Looping in RSelenium and Scraping - for-loop

I'm trying to scrape data from website using RSelenium. I am able to navigate through drop downs individually but when I run them in loop I get error.
Also after selecting all the values in the drop down I want to store the name of the facility and contact details in a table. Which I'm not able to do so far.
rm(list=ls())
setwd("D:\\work_codes\\kvk\\data")
getwd()
library(RSelenium)
library(rvest)
library(XML)
library(RCurl)
library(magrittr)
library(stringr)
rd<-rsDriver()
remDr<-rd[["client"]]
remDr$navigate("https://kvk.icar.gov.in/facilities_list.aspx")
remDr$refresh()
stateEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlState")
states<-stateEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
states<-str_trim(states, 'left')
stateEle$clickElement()
for (i in 1:length(states)) {
remDr$refresh()
stateEle$clickElement()
stateEle$sendKeysToElement(list(states[i]))
stateEle$clickElement()
districts<-NULL
distEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlDistrict")
districts<-distEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
districts<-str_trim(districts, 'left')
for (j in 1:length(districts)) {
distEle$clickElement()
distEle$sendKeysToElement(list(districts[j]))
distEle$clickElement()
kvk<-NULL
kvkEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlKvk")
kvk<-kvkEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
kvk<-str_trim(kvk, 'left')
for (k in 1:length(kvk)) {
kvkEle$clickElement()
kvkEle$sendKeysToElement(list(kvk[[1]]))
kvkEle$clickElement()
submitEle<-remDr$findElement("id", "ContentPlaceHolder1_btnSubmit")
submitEle$clickElement()
doc<-remDr$findElement('id', 'ContentPlaceHolder1_rptfacility_f_name_1')
doc$getElementText()
doc$clickElement()
remDr$findElement('class name','Contact details:')
}
}
}

library(rvest)
url<-"https://kvk.icar.gov.in/facilities_list.aspx"
page<-html_session(url)
form<-html_form(page)[[1]]
states<-html_nodes(page,css="#ContentPlaceHolder1_ddlState > option") %>% html_attr("value")
states<-states[-1]
states_name<-html_nodes(page,css="#ContentPlaceHolder1_ddlState > option") %>% html_text()
states_name<-states_name[-1]
final_df<-0
#### STATES LOOP ####
for(i in 1:length(states)){
filled_form<-set_values(form,
"ctl00$ContentPlaceHolder1$ddlState"=states[i])
page1<-submit_form(page,filled_form)
district<-html_nodes(page1,css="#ContentPlaceHolder1_ddlDistrict > option") %>% html_attr("value")
district<-district[-1]
district_name<-html_nodes(page1,css="#ContentPlaceHolder1_ddlDistrict > option") %>% html_text()
district_name<-district_name[-1]
#### DISTRICT LOOP ####
for(j in 1:length(district)){
filled_form1<-set_values(html_form(page1)[[1]],
"ctl00$ContentPlaceHolder1$ddlState"=states[i],
"ctl00$ContentPlaceHolder1$ddlDistrict"=district[j])
page2<-submit_form(page1,filled_form1)
kvk<-html_nodes(page2,css="#ContentPlaceHolder1_ddlKvk > option") %>% html_attr("value")
kvk<-kvk[-1]
kvk_name<-html_nodes(page2,css="#ContentPlaceHolder1_ddlKvk > option") %>% html_text()
kvk_name<-kvk_name[-1]
#### KVK LOOP ####
for(k in 1:length(kvk)){
filled_form2<-set_values(html_form(page2)[[1]],
"ctl00$ContentPlaceHolder1$ddlState"=states[i],
"ctl00$ContentPlaceHolder1$ddlDistrict"=district[j],
"ctl00$ContentPlaceHolder1$ddlKvk"=kvk[k])
page3<-submit_form(page2,filled_form2)
contact_text<-gsub("[\r\n]","",html_nodes(page3,css=".panel-body") %>% html_text())
if(length(contact_text) == 0){contact_text=""}
df<-data.frame(cbind(states_name[i],district_name[j],kvk[k],contact_text))
names(df)<-c("STATE","DISTRICT","KVK","CONTACT_TEXT")
final_df[i*j*k] = list(df)
### WAITTIME TO AVOID HTTP 500 error - So the server is not overloaded
sleep(5)
}
}
}
output_df<-data.table::rbindlist(final_df,fill=TRUE)
# After this perform some string operations to extract the exact information required from the CONTACT_TEXT variable
The above answer does not use any RSelenium package and I think this is more trustworthy than RSelenium.

Related

create new columns with mean of specific other columns

I'm trying to create 5 new columns in my data frame by calculating the mean of specific already existing columns.
i so far tried the following code:
`data_new2 <- data1 %>%
data1$chronisch <-apply(data1[,c(22,28,31,25)],1,mean) %>%
data1$Sozial <-apply(data1[,33],1,mean) %>%
data1$überforderung <-apply(data1[,c(30,24,27)],1,mean) %>%
data1$Anerkennung <-apply(data1[,c(26,29)],1,mean) %>%
data1$Arbeit <-apply(data1[,c(23,32)],1,mean)`
by it give the error message: Fehler in apply(data1[, c(26, 29)], 1, mean) %>% data1$Arbeit <- apply(data1[, : konnte Funktion "%>%<-" nicht finden
But I cannot figure out what the problem is. And yes, deplore is installed and loaded.
Would appreciate ny help!

Error in (function (classes, fdef, mtable) unable to find an inherited method for function ‘krige’ for signature ‘"formula", "tbl_df"’

I have a strange Error and actually don't know how to solve it, even after checking other posts. Everything runs until the Kriging and then I receive the error: Error in (function (classes, fdef, mtable) unable to find an inherited method for function ‘krige’ for signature ‘"formula", "tbl_df"’
The strange thing is that everything worked a few days ago, I did not change anything in the code and now it doesn't run anymore. Some other posts related the problem with the Raster, but I could not find any discrepances. Is there something because of recent updates? I use for example the sp package.
Unfortunately I cannot provide the data I use, hopefully it can be solved without.
How can I solve the issue? Thank you in advance for the help.
homeDir = "D:/Folder/DataXYyear/"
y = 1992
Source = paste("Year", y, ".csv")
File = file.path(homeDir,Source)
GWMeas <- read_csv(File)
GWMeasX <- na.omit(GWMeas)
ggplot(
data = GWMeasX,
mapping = aes(x = X, y = Y, color = level)
) +
geom_point(size = 3) +
scale_color_viridis(option = "B") +
theme_classic()
GWMX_sf <- st_as_sf(GWMeasX, coords = c("X", "Y"), crs = 25832) %>%
cbind(st_coordinates(.))
v_emp_OK <- gstat::variogram(
level~1,
as(GWMX_sf, "Spatial") # switch from {sf} to {sp}
)
v_mod_OK <- automap::autofitVariogram(level~1, as(GWMX_sf, "Spatial"), model = "Sph")$var_model
GWMeasX %>% as.data.frame %>% glimpse
GW.vgm <- variogram(level~1, locations = ~X+Y, data = GWMeasX) # calculates sample variogram values
GW.fit <- fit.variogram(GW.vgm, model=vgm(model = "Gau")) # fit model
sf_GWlevel <- st_as_sf(GWMeasX, coords = c("X", "Y"), crs = 25833)
grd_sf <- sf_GWlevel %>%
st_bbox() %>%
st_as_sfc() %>%
st_make_grid(
cellsize = c(5000, 5000), # 5000m pixel size
what = "centers"
) %>%
st_as_sf() %>%
cbind(., st_coordinates(.))
grid <- as(grd_sf, "Spatial")
gridded(grid) <- TRUE
grid <- as(grid, "SpatialPixels")
createGrid <- function(XY.Spacing)
crs(grid) <- crs(GWMX_sf)
OK3 <- krige(formula = level~1, # variable to interpolate
data = GWMX_sf, # gauge data
newdata = grid, # grid to interpolate on
model = v_mod_OK, # variogram model to use
nmin = 4, # minimum number of points to use for the interpolation
nmax = 20, # maximum number of points to use for the interpolation
maxdist = 120e3 # maximum distance of points to use for the interpolation
)

The P-Value and significance level I added to R ggplot, are incorrectly placed

I tried to add significane level (package:ggpubrto)to my t_test plot (package:rstatix) and got a plot which the lines of significance are in the "pulled" to the right of the plot.
I copy the code from this link [https://www.datanovia.com/en/blog/how-to-perform-multiple-t-test-in-r-for-different-variables/][1] but still got the same plot
here is the code:
library(tidyverse)
library(rstatix)
library(ggpubr)
# Prepare the data and inspect a random sample of the data
mydata <- iris %>%
filter(Species != "setosa") %>%
as_tibble()
mydata %>% sample_n(6)
mydata.long <- mydata %>%
pivot_longer(-Species, names_to = "variables", values_to = "value")
mydata.long %>% sample_n(6)
stat.test <- mydata.long %>%
group_by(variables) %>%
t_test(value ~ Species) %>%
adjust_pvalue(method = "BH") %>%
add_significance()
stat.test
myplot <- ggboxplot(
mydata.long, x = "Species", y = "value",
fill = "Species", palette = "npg", legend = "none",
ggtheme = theme_pubr(border = TRUE)) +
facet_wrap(~variables)
# Add statistical test p-values
stat.test <- stat.test %>% add_xy_position(x = "Species")
myplot + stat_pvalue_manual(stat.test, label = "p.adj.signif")`
[this is the result from the site:][2]
[and this is what i got:][4]
any idea what i did wrong?
My Rstudio version is 1.4.1103
[1]: https://www.datanovia.com/en/blog/how-to-perform-multiple-t-test-in-r-for-different-variables/
[2]: https://i.stack.imgur.com/tzPo6.png
[3]: https://i.stack.imgur.com/1rtAO.jpg
[4]: https://i.stack.imgur.com/MJolk.png
I found it
i changed the "xmin" and "xmax values of "stat.test

SparklyR Lubridate

A code was still working yesterday but no longer works. Below is the code and the error message. Someone can help me?
tbl(connexion, "donnees1") %>%
select(date_heure_debut) %>%
sdf_schema()
$date_heure_debut
$date_heure_debut$name
[1] "date_heure_debut"
$date_heure_debut$type
[1] "StringType
tbl(connexion, "donnees1") %>%
dplyr::mutate(
annee_debut = lubridate::year(date_heure_debut)
) %>%
sdf_register("donnees1")
Error in lubridate::year(date_heure_debut) :
object 'date_heure_debut' not found

Edit a datatable in shiny throwing an error : Can't subset with `[` using an object of class NULL

I am trying to edit a data table (DT: datatable) on server side and i am using a reactive data table which is rendered on the front end. Now i want to edit the data table and retrieve the information from edited data table. Here is a reproducible example of what i am trying to achieve as mentioned in the comments in the reprex :
library(shiny)
library(DT)
library(tidyverse)
d <- iris
ui <- fluidPage(
dataTableOutput("table1"),
dataTableOutput("table2")
)
server <- function(input,output,session){
# This is the main table I would want to display in its full context
output$table1 <- renderDataTable(
datatable(d)
)
get_row <-reactive({
d %>% slice(input$table1_rows_selected)})
# Here the table with row selected from table 1 is displayed
output$table2 <- renderDataTable({
datatable(get_row(),
editable = TRUE)
})
# Now as the cell gets edited in table 2, i want the edited value to show and make the last column values = NA
proxy <- dataTableProxy("table2")
observeEvent(eventExpr = input$table2_cell_edit, handlerExpr = {
x <- isolate(get_row())
info = input$table2_cell_edit
i = info$row
j = info$column
v = info$value
x[i, j] <<- DT::coerceValue(v, x[i, j])
x <- x %>% mutate(Species = NA)
replaceData(proxy, x, resetPaging = FALSE)
})
}
shinyApp(ui = ui, server = server)
I am getting an error Error in <<-: object 'x' not found. Not sure where am i wrong.
The solution came down to your input$table2_cell_edit names. It uses col not column. The error created an empty x that couldn't accept your assignment of the new value. I used a simpler assignment of the new value.
In the future, use the print() function to print out variables in your app to figure out what's being passed or not passed downstream. That's how a figured out this error.
library(shiny)
library(DT)
library(tidyverse)
d <- iris
ui <- fluidPage(
dataTableOutput("table1"),
dataTableOutput("table2")
)
server <- function(input,output,session){
# This is the main table I would want to display in its full context
output$table1 <- renderDataTable(
datatable(d)
)
get_row <-reactive({
req(input$table1_rows_selected)
d %>% slice(input$table1_rows_selected)})
# Here the table with row selected from table 1 is displayed
output$table2 <- renderDataTable({
req(get_row)
datatable(get_row(),
editable = TRUE)
})
# Now as the cell gets edited in table 2, i want the edited value to show and make the last column values = NA
proxy <- dataTableProxy("table2")
observeEvent(eventExpr = input$table2_cell_edit, handlerExpr = {
x <- isolate(get_row())
info = input$table2_cell_edit
i = info$row
### info uses 'col' not 'column'
j = info$col
v = info$value
### used a base R subset assignment
x[i, j] <- v
x <- x %>% mutate(Species = NA)
replaceData(proxy, x, resetPaging = FALSE)
})
}
shinyApp(ui = ui, server = server)

Resources