協作閣

開源協作部落格

Introducing PatentsView

USPTO

Freya / 2019-05-02 /


#install.packages("patentsview")
library(patentsview)

PatentsView query language http://www.patentsview.org/api/query-language.html

qry_1 <- '{"_gt":{"patent_year":2009}}'
search_pv(query = qry_1, fields = NULL) # This will retrieve a default set of fields
## $data
## #### A list with a single data frame on a patent level:
## 
## List of 1
##  $ patents:'data.frame': 25 obs. of  3 variables:
##   ..$ patent_id    : chr [1:25] "10000000" ...
##   ..$ patent_number: chr [1:25] "10000000" ...
##   ..$ patent_title : chr [1:25] "Coherent LADAR using intra-pixel quadrature "..
## 
## $query_results
## #### Distinct entity counts across all downloadable pages of output:
## 
## total_patent_count = 100,000
qry_2 <- qry_funs$gt(patent_year = 2009) # All DSL functions are in the qry_funs list
qry_2 # qry_2 is the same as qry_1
## {"_gt":{"patent_year":2009}}
search_pv(query = qry_2)
## $data
## #### A list with a single data frame on a patent level:
## 
## List of 1
##  $ patents:'data.frame': 25 obs. of  3 variables:
##   ..$ patent_id    : chr [1:25] "10000000" ...
##   ..$ patent_number: chr [1:25] "10000000" ...
##   ..$ patent_title : chr [1:25] "Coherent LADAR using intra-pixel quadrature "..
## 
## $query_results
## #### Distinct entity counts across all downloadable pages of output:
## 
## total_patent_count = 100,000
search_pv(
  query = qry_funs$gt(patent_year = 2009),
  fields = c("patent_abstract", "patent_average_processing_time",
             "inventor_first_name", "inventor_total_num_patents")
)
## $data
## #### A list with a single data frame (with list column(s) inside) on a patent level:
## 
## List of 1
##  $ patents:'data.frame': 25 obs. of  3 variables:
##   ..$ patent_abstract               : chr [1:25] "A frequency modulated (cohe"..
##   ..$ patent_average_processing_time: logi [1:25] NA ...
##   ..$ inventors                     :List of 25
## 
## $query_results
## #### Distinct entity counts across all downloadable pages of output:
## 
## total_patent_count = 100,000
search_pv(
  query = qry_funs$gt(patent_year = 2009),
  fields = get_fields(endpoint = "patents", groups = c("patents", "inventors"))
)
## $data
## #### A list with a single data frame (with list column(s) inside) on a patent level:
## 
## List of 1
##  $ patents:'data.frame': 25 obs. of  32 variables:
##   ..$ detail_desc_length                    : chr [1:25] "18369" ...
##   ..$ patent_abstract                       : chr [1:25] "A frequency modulat"..
##   ..$ patent_average_processing_time        : logi [1:25] NA ...
##   ..$ patent_date                           : chr [1:25] "2018-06-19" ...
##   ..$ patent_firstnamed_assignee_city       : chr [1:25] "Waltham" ...
##   ..$ patent_firstnamed_assignee_country    : chr [1:25] "US" ...
##   ..$ patent_firstnamed_assignee_id         : chr [1:25] "org_XxRFmu0ugT2CoQ7"..
##   ..$ patent_firstnamed_assignee_latitude   : chr [1:25] "42.3764" ...
##   ..$ patent_firstnamed_assignee_location_id: chr [1:25] "42.3764|-71.2361" ...
##   ..$ patent_firstnamed_assignee_longitude  : chr [1:25] "-71.2361" ...
##   ..$ patent_firstnamed_assignee_state      : chr [1:25] "MA" ...
##   ..$ patent_firstnamed_inventor_city       : chr [1:25] "Manhattan Beach" ...
##   ..$ patent_firstnamed_inventor_country    : chr [1:25] "US" ...
##   ..$ patent_firstnamed_inventor_id         : chr [1:25] "5073021-1" ...
##   ..$ patent_firstnamed_inventor_latitude   : chr [1:25] "33.8847" ...
##   ..$ patent_firstnamed_inventor_location_id: chr [1:25] "33.8847|-118.41" ...
##   ..$ patent_firstnamed_inventor_longitude  : chr [1:25] "-118.41" ...
##   ..$ patent_firstnamed_inventor_state      : chr [1:25] "CA" ...
##   ..$ patent_id                             : chr [1:25] "10000000" ...
##   ..$ patent_kind                           : chr [1:25] "B2" ...
##   ..$ patent_num_cited_by_us_patents        : chr [1:25] "0" ...
##   ..$ patent_num_claims                     : chr [1:25] "20" ...
##   ..$ patent_num_combined_citations         : chr [1:25] "5" ...
##   ..$ patent_num_foreign_citations          : chr [1:25] "1" ...
##   ..$ patent_num_us_application_citations   : chr [1:25] "2" ...
##   ..$ patent_num_us_patent_citations        : chr [1:25] "2" ...
##   ..$ patent_number                         : chr [1:25] "10000000" ...
##   ..$ patent_processing_time                : chr [1:25] "1197" ...
##   ..$ patent_title                          : chr [1:25] "Coherent LADAR usin"..
##   ..$ patent_type                           : chr [1:25] "utility" ...
##   ..$ patent_year                           : chr [1:25] "2018" ...
##   ..$ inventors                             :List of 25
## 
## $query_results
## #### Distinct entity counts across all downloadable pages of output:
## 
## total_patent_count = 100,000

Examples of H04L63/00 CPC code

query <- with_qfuns( 
  and(
    begins(cpc_subgroup_id = 'H04L63/02'),
    gte(patent_year = 2009)
  )
)
fields <- c(
  c("patent_number", "patent_year"),
  get_fields(endpoint = "patents", groups = c("assignees", "cpcs"))
)
pv_res <- search_pv(query = query, fields = fields, all_pages = TRUE)
library(leaflet)
library(htmltools)
library(dplyr)
library(tidyr)

data <-
  pv_res$data$patents %>%
    unnest(assignees) %>%
    select(assignee_id, assignee_organization, patent_number,
           assignee_longitude, assignee_latitude) %>%
    group_by_at(vars(-matches("pat"))) %>%
    mutate(num_pats = n()) %>%
    ungroup() %>%
    select(-patent_number) %>%
    distinct() %>%
    mutate(popup = paste0("<font color='Grey'>",
                          htmlEscape(assignee_organization), "<br><br>Patents:",
                          num_pats, "</font>")) %>%
    mutate_at(vars(matches("_l")), as.numeric) %>%
    filter(!is.na(assignee_id))

leaflet(data) %>%
  addProviderTiles(providers$CartoDB.DarkMatterNoLabels) %>%
  addCircleMarkers(lng = ~assignee_longitude, lat = ~assignee_latitude,
                   popup = ~popup, ~sqrt(num_pats), color = "red")
library(ggplot2)
library(RColorBrewer)

data <-
  pv_res$data$patents %>%
    unnest(cpcs) %>%
    filter(cpc_subgroup_id != "H04L63/02") %>% 
    mutate(
      title = case_when(
        grepl("filtering", .$cpc_subgroup_title, ignore.case = T) ~
          "Filtering policies",
        .$cpc_subgroup_id %in% c("H04L63/0209", "H04L63/0218") ~
          "Architectural arrangements",
        grepl("Firewall traversal", .$cpc_subgroup_title, ignore.case = T) ~
          "Firewall traversal",
        TRUE ~
          .$cpc_subgroup_title
      )
    ) %>%
    mutate(title = gsub(".*(?=-)-", "", title, perl = TRUE)) %>%
    group_by(title, patent_year) %>%
    count() %>%
    ungroup() %>%
    mutate(patent_year = as.numeric(patent_year))

ggplot(data = data) +
  geom_smooth(aes(x = patent_year, y = n, colour = title), se = FALSE) +
  scale_x_continuous("\nPublication year", limits = c(2009, 2018),
                     breaks = 2009:2018) +
  scale_y_continuous("Patents\n", limits = c(0, 700)) +
  scale_colour_manual("", values = brewer.pal(5, "Set2")) +
  theme_bw() + # theme inspired by https://hrbrmstr.github.io/hrbrthemes/
  theme(panel.border = element_blank(), axis.ticks = element_blank())

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.