Code to pull species list from ForestGEO website (https://forestgeo.si.edu/sites/)

#1.Install these packages

install.packages("RCurl")

install.packages("XLM")

install.packages("rlist")

library(XML)

library(RCurl)

library(rlist)

 

#2. Lets start with an example: pull data from page 1, here for Korup..

theurl <- getURL("https://forestgeo.si.edu/korup-species-list?page=1",.opts = list(ssl.verifypeer = FALSE) )

tables <- readHTMLTable(theurl)

tables <- list.clean(tables, fun = is.null, recursive = FALSE)

 

#But many sites show their species list in more than 1 page (only display 25 per page),

#so work with a loop to include as many pages as shown on the website.

#Korup, for example has 465 species displayed in 19 pages

 

#3.Create an empty dataframe

korup <- NULL

 

#4.Run the loop. Change "page in 0:19" to include the number of pages in the website

#(the only manual part) for the site you need

 

for(page in 0:19) {

  print(page)

  theurl <- getURL(paste0("https://forestgeo.si.edu/korup-species-list?page=", page),.opts = list(ssl.verifypeer = FALSE) )

  tables <- readHTMLTable(theurl)

  tables <- list.clean(tables, fun = is.null, recursive = FALSE)

  korup <- rbind(korup, do.call(rbind.data.frame, tables))

}