Patent Application
CG
Freya
/ 2019-04-12
/
#install.packages('tidyverse')
library(tidyverse)
applications = rawdata %>%
separate(., Journal, into = c("data"), sep = ";", extra = "drop", remove = FALSE) %>%
separate(., data, into = c("front", "number"), sep = ":") %>%
separate(., number, into = c("extra", "code", "id", "junk", "date"), sep = " ") %>%
select(-extra, -junk) %>%
mutate(id = str_trim(id, side = "both")) %>%
unite(application_number, c("code", "id"), sep = "") %>%
mutate(application_number = str_trim(application_number, side = "both")) %>%
mutate(duplicated = duplicated(application_number))
#> # A tibble: 6 x 7
#> Organism Journal front Company application_num… date duplicated
#> <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
#> 1 methanocal… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… FALSE
#> 2 pyrococcus… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… TRUE
#> 3 pyrococcus… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… TRUE
#> 4 pyrococcus… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… TRUE
#> 5 pyrococcus… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… TRUE
#> 6 pyrococcus… "JOURNAL Pat… JOURNA… Abbott … WO2011053699-A1 05-M… TRUE
#> # A tibble: 2 x 2
#> duplicated n
#> <lgl> <int>
#> 1 FALSE 999
#> 2 TRUE 11999
#> [1] 41.625
#> [1] 47.5
1988 |
1 |
1991 |
2 |
1992 |
1 |
1993 |
2 |
1998 |
3 |
1999 |
12 |
2000 |
15 |
2001 |
66 |
2002 |
67 |
2003 |
42 |
2004 |
56 |
2005 |
66 |
2006 |
67 |
2007 |
72 |
2008 |
68 |
2009 |
90 |
2010 |
89 |
2011 |
53 |
2012 |
54 |
2013 |
30 |
2014 |
37 |
2015 |
49 |
2016 |
46 |
2017 |
11 |