The maximum number of rows that a OnePetro query can return is 1000. It means that the user could set up the query to return up to amximum of 1000 papers. Abover that number, the query to OnePetro will return error.

OnePetro has options to define the number of rows to display at 10, 50 and 100 rows. Additionally, through scripts like these, that number could be raised up to 1,000.

This article describes the process of reading multiple pages with thousand of papers to a unique dataframe.

Retrieve the most numerous paper by type

library(petroOne)

# folder where we will save the results
outdir <- system.file("out", package = "petroOne")

What type of paper do we have?

result <- read_onepetro(my_url)
summary_by_doctype(result)
## # A tibble: 7 x 2
##               name value
##              <chr> <dbl>
## 1          Chapter     1
## 2 Conference paper  3027
## 3          General    60
## 4    Journal paper   894
## 5            Media     5
## 6            Other     1
## 7     Presentation     7

For the tyme being we will retrieve only conference papers.

Collect first 1000 rows

# we use "conference-paper" only because other document types have
# different dataframe structure

my_url_1 <- make_search_url(query = "pressure transient analysis", 
                          how = "all", 
                          dc_type = "conference-paper",
                          start = 0,
                          rows  = 1000)

get_papers_count(my_url_1)
## [1] 3027
page_1 <- read_onepetro(my_url_1)
htm_1 <- paste(outdir, "pta-01-conference.html", sep = "/")
xml2::write_html(page_1, file = htm_1)
onepetro_page_to_dataframe(htm_1)
## # A tibble: 1,000 x 6
##                                                         title_data
##                                                              <chr>
##  1                             Pressure Transient Analysis in SAGD
##  2                           Well-head Pressure Transient Analysis
##  3  Pressure Transient Analysis of Multifractured Horizontal Wells
##  4 Integrating Pressure Transient Analysis in Hydraulic Fracturing
##  5        How Wellbore Dynamics Affect Pressure Transient Analysis
##  6         Software Showcase: Pressure Transient Analysis Programs
##  7  Pressure Transient Analysis in Multilayered Faulted Reservoirs
##  8                Pressure-Transient Analysis for Perforated Wells
##  9             Numerical Solutions for Pressure Transient Analysis
## 10     Automated Pressure Transient Analysis with Smart Technology
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>

Collect second set of 1000 rows

my_url_2 <- make_search_url(query = "pressure transient analysis", 
                          how = "all", 
                          dc_type = "conference-paper",
                          start = 1000,
                          rows  = 1000)

page_2 <- read_onepetro(my_url_2)
htm_2 <- paste(outdir, "pta-02-conference.html", sep = "/")
xml2::write_html(page_2, file = htm_2)
onepetro_page_to_dataframe(htm_2)
## # A tibble: 1,000 x 6
##                                                                     title_data
##                                                                          <chr>
##  1 Storativity Ratio, Matrix/Fracture Permeability Ratio, Fracture Partitionin
##  2 Integration Wireline Formation Testing and Well Testing Evaluation - An Exa
##  3 An Integrated Approach to Designing an Offshore Heavy Oil Well Test - North
##  4  Setting a New Milestone in Carbonate Matrix Stimulation with Coiled Tubing
##  5                             Transient Analysis in Partially Completed Wells
##  6 Performance of Plugless Toe Stages and Non-Isolated Wellbore in Multi-Stage
##  7 Comprehensive Global Model for Before-Closure Analysis of an Injection Fall
##  8 New Approach for Using Surfactants to Enhance Oil Recovery from Naturally F
##  9 New Variable Compliance Method for Estimating In-Situ Stress and Leak-Off f
## 10 Well-Reservoir Coupling on the Numerical Simulation of Horizontal Wells in 
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>

Collect next set of 1000 rows

my_url_3 <- make_search_url(query = "pressure transient analysis", 
                          how = "all", 
                          dc_type = "conference-paper",
                          start = 2000,
                          rows  = 1000)

page_3 <- read_onepetro(my_url_3)
htm_3 <- paste(outdir, "pta-03-conference.html", sep = "/")
xml2::write_html(page_3, file = htm_3)
onepetro_page_to_dataframe(htm_3)
## # A tibble: 1,000 x 6
##                                                                     title_data
##                                                                          <chr>
##  1 Numerical Simulations Of The Combined Effects Of Wellbore Damage And Partia
##  2 An Integrated Team Approach for Improving Company-Wide Stimulation Design a
##  3 Transformation of Failure to Success in Revival of Production by Overcoming
##  4 Heat Transfer Ahead of a SAGD Steam Chamber: A Study of Thermocouple Data F
##  5 EOR Potential for Lean Gas Reinjection in Zipper Fracs in Liquid-Rich Basin
##  6 Influence of Wellbore Hydraulics on Pressure Behavior and Productivity of H
##  7        Geothermal Reservoir Testing by Piezometric Surface Drawdown/Buildup
##  8 Wildcat Hills Gas Gathering System Case Studies: An Integrated Approach Fro
##  9    Cuttings Injection And Monitoring Operations: Cashiriari Gas Field, Peru
## 10 A Synopsis on Completion Efficiency for Multi Stage Acid Fracturing Technol
## # ... with 990 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>

Collect remaining set

my_url_4 <- make_search_url(query = "pressure transient analysis", 
                          how = "all", 
                          dc_type = "conference-paper",
                          start = 3000,
                          rows  = 100)

page_4 <- read_onepetro(my_url_4)
htm_4 <- paste(outdir, "pta-04-conference.html", sep = "/")
xml2::write_html(page_4, file = htm_4)
onepetro_page_to_dataframe(htm_4)
## # A tibble: 27 x 6
##                                                                     title_data
##                                                                          <chr>
##  1 Pressure Transient and Decline Curve Behaviors in Naturally Fractured Vuggy
##  2                                  Reservoir Modeling in Shale-Gas Reservoirs
##  3 Analytical Matrix-Fracture Transfer Models For Oil Recovery by Hindered-Cap
##  4 The Significance of Non-Darcy and Multiphase Flow Effects in High-Rate, Fra
##  5 Rate Transient and Decline Curve Analyses for Continuously (Dual-Porosity) 
##  6      Effect Of Drainage Area Shapes On The Productivity Of Horizontal Wells
##  7 Vertical Fracture Growth Considerations in the Mission Canyon/Ratcliffe For
##  8                    Analysis of Injection/Falloff Data From Horizontal Wells
##  9 Innovative Use of Open-Hole Wireline Formation Pressure Testing in Waterflo
## 10 Key Parameters Affecting Successful Hydraulic Fracture Design and Optimized
## # ... with 17 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>

Binding tables in one dataframe

p1 <- onepetro_page_to_dataframe(htm_1)
p2 <- onepetro_page_to_dataframe(htm_2)
p3 <- onepetro_page_to_dataframe(htm_3)
p4 <- onepetro_page_to_dataframe(htm_4)

papers <- rbind(p1, p2, p3, p4)
papers
## # A tibble: 3,027 x 6
##                                                         title_data
##                                                              <chr>
##  1                             Pressure Transient Analysis in SAGD
##  2                           Well-head Pressure Transient Analysis
##  3  Pressure Transient Analysis of Multifractured Horizontal Wells
##  4 Integrating Pressure Transient Analysis in Hydraulic Fracturing
##  5        How Wellbore Dynamics Affect Pressure Transient Analysis
##  6         Software Showcase: Pressure Transient Analysis Programs
##  7  Pressure Transient Analysis in Multilayered Faulted Reservoirs
##  8                Pressure-Transient Analysis for Perforated Wells
##  9             Numerical Solutions for Pressure Transient Analysis
## 10     Automated Pressure Transient Analysis with Smart Technology
## # ... with 3,017 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>

Find which papers have the search word in the title

pattern <- "pressure transient analysis"
rows <- grep(pattern = pattern, papers$title_data, ignore.case = TRUE)
papers[rows, ]
## # A tibble: 161 x 6
##                                                                     title_data
##                                                                          <chr>
##  1                                         Pressure Transient Analysis in SAGD
##  2                                       Well-head Pressure Transient Analysis
##  3              Pressure Transient Analysis of Multifractured Horizontal Wells
##  4             Integrating Pressure Transient Analysis in Hydraulic Fracturing
##  5                    How Wellbore Dynamics Affect Pressure Transient Analysis
##  6                     Software Showcase: Pressure Transient Analysis Programs
##  7              Pressure Transient Analysis in Multilayered Faulted Reservoirs
##  8                         Numerical Solutions for Pressure Transient Analysis
##  9                 Automated Pressure Transient Analysis with Smart Technology
## 10 Pressure Transient Analysis as an Element of Permanent Reservoir Monitoring
## # ... with 151 more rows, and 5 more variables: paper_id <chr>,
## #   source <chr>, type <chr>, year <chr>, author1_data <chr>