Skrobanie internetowe za pomocą r selen
zips.df <- read.csv("zip_code_data.csv") # csv of zip codes
rD <- rsDriver(browser="firefox", port=4557L)
remDr <- rD[["client"]]
remDr$navigate("https://www.fcc.gov/media/engineering/dtvmaps")
scrape.zips <- function(zip){ # our scraping function
remDr$findElement("id", "startpoint")$sendKeysToElement(list(zip))
remDr$findElements("id", "btnSub")[[1]]$clickElement()
alert <- try(remDr$getAlertText(), silent=T)
if(class(alert) != "try-error") {
signals <- data.frame(callsign = NA, network = NA, ch_num = NA, band = NA, strength = NA, cont.strength = NA)
remDr$acceptAlert()
remDr$findElement("id", "startpoint")$clearElement()
} else {
Sys.sleep(2)
html <- remDr$getPageSource()[[1]]
cont.strength <- read_html(html) %>%
html_nodes(".callsign") %>%
html_attr("onclick") %>%
str_extract("(?<=RX Strength: )\\s*\\-*[0-9.]+")
signals <- read_html(html) %>%
html_nodes("table.tbl_mapReception") %>%
.[3] %>%
.[[1]] %>%
html_table(fill=T)
names(signals) <- c("rm", "callsign", "network", "ch_num", "band", "rm2")
signals <- signals %>%
slice(2:n()) %>%
filter(callsign != "") %>%
select(callsign:band)
strength <- read_html(html) %>%
html_nodes("table.tbl_mapReception:nth-child(3) .ae-img") %>%
html_attr("src")
if(length(strength)==0) { strength <- "none" }
if(length(cont.strength)==0) { cont.strength <- "none" }
signals <- cbind(signals, strength) %>% cbind(cont.strength)
signals <- mutate(signals, strength = strength %>% str_extract("strength."))
}
remDr$findElement("id", "startpoint")$clearElement()
return(signals)
Sys.sleep(runif(1, 1, 3))
}
Bushra