Downloading multiple PDFs using Rselenium

Downloading multiple PDFs using Rselenium - download

I'm trying to download multiple PDFs by navigating on a page. Even though I'm able to navigate on the page using drop down and download PDF at the end. I am getting this error:
An element command failed because the referenced element is no longer attached to the DOM.
Below is my code:
library(RSelenium)
library(stringr)
rd<-rsDriver()
remDr<-rd[["client"]]
remDr$navigate("http://secc.gov.in/lgdStateList")
#First drop down
stateEle<-remDr$findElement("id", "lgdState")
states<-stateEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% str_trim('left')
states<-states[-1]
for (i in 1:length(states)) {
stateEle$clickElement()
stateEle$sendKeysToElement(list(states[i]))
stateEle$clickElement()
#Second drop down
distEle<-remDr$findElement("id", "lgdDistrict")
districts<-distEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist%>%str_trim('left')
districts<-districts[-1]
for (j in 1:length(districts)) {
distEle$clickElement()
distEle$sendKeysToElement(list(districts[[j]]))
distEle$clickElement()
#Third drop down
blockEle<-remDr$findElement("id", "lgdBlock")
block<-blockEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist%>%str_trim('left')
block<-block[-1]
for (k in 1:length(block)) {
blockEle$clickElement()
blockEle$sendKeysToElement(list(block[[k]]))
blockEle$clickElement()
gpEle<-remDr$findElements('class', 'statesrow')
for (m in 1:length(gpEle)) {
h<-unlist(gpEle[[m]]$getElementAttribute('innerHTML'))
h<-unlist(h%>% strsplit(., '<td>'))
h<-h[-1]
for (n in 1:length(h)) {
xpath1<-paste('//*[#id="example"]/tbody/tr[',m,']/td[',n,']/a')
pdfEle<-remDr$findElement('xpath', xpath1)
pdfEle$clickElement()
Sys.sleep(5)
}
}
}
}
}

As per your request
library(rvest)
url<-"http://secc.gov.in/lgdStateList"
page<-html_session(url)
## STATE LOOP ##
state <- html_nodes(page,css="#lgdState > option") %>% html_text()
state <- state[-1]
state_id <- html_nodes(page,css="#lgdState > option") %>% html_attr('value')
state_id <- state_id[-1]
for(i in 1:length(state)){
page1<-rvest:::request_POST(page, url="http://secc.gov.in/lgdDistrictList",
body=list(
"stateCode"=state_id[i]
),
encode="form")
## DISTRICT LOOP ##
district <- html_nodes(page1,css="#lgdDistrict > option") %>% html_text()
district <- district[-1]
district_id <- html_nodes(page1,css="#lgdDistrict > option") %>% html_attr('value')
district_id <- district_id[-1]
for(j in 1:length(district)){
page2<-rvest:::request_POST(page1,url="http://secc.gov.in/lgdBlockList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j]
),
encode = "form")
## BLOCK LOOP ##
block <- html_nodes(page2, css="#lgdBlock > option") %>% html_text()
block <- block [-1]
block_id <- html_nodes(page2, css="#lgdBlock > option") %>% html_attr('value')
block_id <- block_id[-1]
for(k in 1:length(block)){
page3<-rvest:::request_POST(page2,url="http://secc.gov.in/lgdGpList",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k]
),
encode = "form")
txt <- html_nodes(page3,css="#example a") %>% html_attr("onclick")
library(stringr)
gpcode<-sapply(txt,function(x){
k <- str_extract_all(x, "\\([^()]+\\)")[[1]]
k <- substring(k, 2, nchar(k)-1)
regexp <- "[[:digit:]]+"
k <- str_extract(strsplit(k, ",")[[1]][4], regexp)
})
## GP CODE LOOP to download file ##
for(l in 1:length(gpcode)){
page4<-rvest:::request_POST(page3,url="http://secc.gov.in/downloadLgdwisePdfFile",
body=list(
"stateCode"=state_id[i],
"districtCode"=district_id[j],
"blockCode"=block_id[k],
"gpCode"=gpcode[l]
),
encode = "form")
error = "PDF File for this Gram Panchayat is not available."
error_displayed = try(html_nodes(page4,css=".error") %>% html_text())
if(error != error_displayed){
filename<-gsub("attachment;filename=","",page4$response$headers$`content-disposition`)
filename<-str_replace_all(filename, '"', "")
writeBin(page4$response$content,filename)
}
}
}
}
}
This is again without RSelenium. :)

Related

Using dplyr:filter to create new variables in for loop for rMarkdown

I have some R code that is meant to run a .rmd file so that I can create a unique report for multiple individuals at the same time.
Simplistically, my issue is that code chunk 1 creates a PDF output that is correct, however, will only create one PDF, and not one for each athlete in my dataset.
Code chunk 2 will create multiple PDF outputs (one for each athlete in the dataset), however, the variables created within the for loop (i.e., Athlete_full_name_i, Athlete_Sport_i) do not work. In the PDF output, these values are presented as "c("values","in","the","list")".
I think the code is close to working well but I would Love some advice to make this run smoother.
Thank you!
code chunk 1:
'''
#libraries
library(tidyverse)
library(rmarkdown)
library(dplyr)
#extra information
Folder_path1 <- "path to folder"
Excel_sheet1 <- "Data"
StartDate1 <- "2021-09-01"
EndDate1 <- Sys.Date()
#create athlete dataframe to run one, or multiple athlete reports at once.
Athlete_full_name <- c("John Smith", "Joe Tree") #as the name should appear in the report
Athlete_file_name <- c("SmithJ", "TreeJ") #Last name, first initial
Athlete_Team <- c("Men's Football", "Men's Darts") #Sport as it should appear in the report
Athlete_df <- data.frame(Athlete_full_name, Athlete_file_name, Athlete_Team)
Names <- unique(Athlete_df$Athlete_file_name)
#For loop - each athlete in Athlete_df gets unique report
for (i in 1:length(Names)) {
#creating new variables for each iteration
#determine athlete full name
Athlete_full_name_i <- Athlete_df %>%
filter(Athlete_file_name == Names[i]) %>%
subset(select = 1)
#determine athlete sport
Athlete_Sport_i <- Athlete_df %>%
filter(Athlete_file_name == Names[i]) %>%
subset(select = 3)
#create Athlete report with unique params and unique filename
rmarkdown::render("R2S_Dinos_JumpV3.Rmd",
params = list(Athlete_csvname = Names[i],
Athlete_fullname = Athlete_full_name_i,
Team = Athlete_Sport_i,
Folder_path = Folder_path1,
Excel_sheet = Excel_sheet1,
StartDate = StartDate1,
EndDate = EndDate1),
output_format = "pdf_document",
output_file=paste0("Athlete Reports/", Names[i], "_Report_", Sys.Date(), ".pdf"))
}
'''
code chunk 2:
'''
'''
#libraries
library(tidyverse)
library(rmarkdown)
library(dplyr)
#extra information
Folder_path1 <- "path to folder"
Excel_sheet1 <- "Data"
StartDate1 <- "2021-09-01"
EndDate1 <- Sys.Date()
#create athlete dataframe to run one, or multiple athlete reports at once.
Athlete_full_name <- c("John Smith", "Joe Tree") #as the name should appear in the report
Athlete_file_name <- c("SmithJ", "TreeJ") #Last name, first initial
Athlete_Team <- c("Men's Football", "Men's Darts") #Sport as it should appear in the report
Athlete_df <- data.frame(Athlete_full_name, Athlete_file_name, Athlete_Team)
#For loop - each athlete in Athlete_df gets unique report
for (Athlete_file_name in Athlete_df$Athlete_file_name) {
#creating new variables for each iteration
#determine athlete full name
Athlete_full_name_i <- Athlete_df %>%
filter(Athlete_file_name == Athlete_file_name) %>%
subset(select = 1)
#determine athlete sport
Athlete_Sport_i <- Athlete_df %>%
filter(Athlete_file_name == Athlete_file_name) %>%
subset(select = 3)
#create Athlete report with unique params and unique filename
rmarkdown::render("R2S_Dinos_JumpV3.Rmd",
params = list(Athlete_csvname = Athlete_file_name,
Athlete_fullname = Athlete_full_name_i,
Team = Athlete_Sport_i,
Folder_path = Folder_path1,
Excel_sheet = Excel_sheet1,
StartDate = StartDate1,
EndDate = EndDate1),
output_format = "pdf_document",
output_file=paste0("Athlete Reports/", Athlete_file_name, "_Report_", Sys.Date(), ".pdf"))
}
'''

When I run Code chunk 1 with a test Rmd output file it works as expected (two pdf files):
test_out.Rmd file:
---
title: "test_out"
params:
name: Bob
personid: 1
---
`r params$name` is person number `r params$personid`
Shortened slightly for clarity:
library(tidyverse)
library(rmarkdown)
Athlete_full_name <- c("John Smith", "Joe Tree")
Athlete_file_name <- c("SmithJ", "TreeJ")
Athlete_Team <- c("Men's Football", "Men's Darts")
Athlete_df <- data.frame(Athlete_full_name, Athlete_file_name, Athlete_Team)
Names <- unique(Athlete_df$Athlete_file_name)
for (i in 1:length(Names)) {
Athlete_full_name_i <- Athlete_df %>%
filter(Athlete_file_name == Names[i]) %>%
subset(select = 1)
Athlete_Sport_i <- Athlete_df %>%
filter(Athlete_file_name == Names[i]) %>%
subset(select = 3)
rmarkdown::render(
"test_out.Rmd",
params = list(Athlete_fullname = Athlete_full_name_i,
Team = Athlete_Sport_i),
output_format = "pdf_document",
output_file = paste0("test_out/", Names[i], "_Report_", Sys.Date(), ".pdf")
)
}
You could make this tidier and easier to debug with an anonymous function inside group_walk - this is the way I'd recommend doing it:
Athlete_df |>
rowwise() |>
group_walk(function(data, key) {
rmarkdown::render(
"test_out.Rmd",
params = list(
Athlete_fullname = data$Athlete_full_name,
Team = data$Athlete_Team
),
output_format = "pdf_document",
output_file = paste0("test_out/", data$Athlete_file_name, "_Report", ".pdf")
)
})
(same result)
In your code chunk 2 the problem comes when you filter with Athlete_file_name == Athlete_file_name. This tests whether these variables are equal within the dataframe, i.e. keep every row where the Athlete_file_name column equals the Athlete_file_name column, which means every row every time! Change the name of the iterating variable to e.g. in_name:
for (in_name in Athlete_df$Athlete_file_name) {
...
filter(Athlete_file_name == in_name) %>% # twice
...
output_file = paste0("test_out/", in_name,
"_Report_", Sys.Date(), ".pdf"
...
}

Thanks for the feedback everyone, there was an error in the for loop. The solution is as follows:
'''
#libraries
library(tidyverse)
library(rmarkdown)
library(dplyr)
#extra information
Folder_path1 <- "path to folder"
Excel_sheet1 <- "Data"
StartDate1 <- "2021-09-01"
EndDate1 <- Sys.Date()
#create athlete dataframe to run one, or multiple athlete reports at once.
Athlete_full_name <- c("John Smith", "Joe Tree") #as the name should appear in the report
Athlete_file_name <- c("SmithJ", "TreeJ") #Last name, first initial
Athlete_Team <- c("Men's Football", "Men's Darts") #Sport as it should appear in the report
Athlete_df <- data.frame(Athlete_full_name, Athlete_file_name, Athlete_Team)
Names <- unique(Athlete_df$Athlete_file_name)
#For loop - each athlete in Athlete_df gets unique report
for (i in Names) {
#creating new variables for each iteration
#determine athlete full name
Athlete_full_name_i <- Athlete_df %>%
filter(Athlete_file_name == i) %>%
subset(select = 1)
#determine athlete sport
Athlete_Sport_i <- Athlete_df %>%
filter(Athlete_file_name == i) %>%
subset(select = 3)
#create Athlete report with unique params and unique filename
rmarkdown::render("R2S_Dinos_JumpV3.Rmd",
params = list(Athlete_csvname = i,
Athlete_fullname = Athlete_full_name_i,
Team = Athlete_Sport_i,
Folder_path = Folder_path1,
Excel_sheet = Excel_sheet1,
StartDate = StartDate1,
EndDate = EndDate1),
output_format = "pdf_document",
output_file=paste0("Athlete Reports/", i, "_Report_", Sys.Date(), ".pdf"))
}

How do I get implied volatility from TWS into R using IBrokers?

Currently I have modified some code I found here to read in bid/ask prices for options in R. Then I feed those back to TWS using calculateImpliedVolatility to get implied volatility. It seems I should be able to get them without the second step using .twsTickType$MODEL_OPTION. I have tried to modify the same code I used for bid/ask prices but have been unable to get it to work. This is what I have tried:
eWrapper.data.Opt_Model <- function(n) {
eW <- eWrapper(NULL) # use basic template
eW$assign.Data("data", rep(list(structure(.xts(matrix(rep(NA_real_,8),nc=8),0),
.Dimnames=list(NULL,c("ImpVol","Delta","tv","pvdiv","gamma","vega",'theta','spot')))),n))
eW$tickPrice <- function(curMsg, msg, timestamp, file, ...)
{
tickType = msg[3]
msg <- as.numeric(msg)
id <- msg[2] #as.numeric(msg[2])
data <- eW$get.Data("data") #[[1]] # list position of symbol (by id == msg[2])
attr(data[[id]],"index") <- as.numeric(Sys.time())
nr.data <- NROW(data[[id]])
if(tickType == .twsTickType$MODEL_OPTION) {
data[[id]][nr.data,1:8] <- msg[4:11]
}
#else
# if(tickType == .twsTickType$ASK) {
# data[[id]][nr.data,2] <- msg[4]
# }
eW$assign.Data("data", data)
c(curMsg, msg)
}
return(eW)
}

It took some time, but I got it to work.
> eWrapper.data.Opt_Model <- function(n) { eW <- eWrapper(NULL) # use
> basic template eW$assign.Data("data",
> rep(list(structure(.xts(matrix(rep(NA_real_,8),nc=8),0),
> .Dimnames=list(NULL,c('modelOption: impVol: ',' delta: ',' modelPrice:
> ',' pvDiv ',' gamma: ',' vega: ',' theta: ',' undPrice: ')))),n))
> eW$tickOptionComputation <- function(curMsg, msg, timestamp, file, ...) {
> tickType = msg[3]
> msg <- as.numeric(msg)
> id <- msg[2] #as.numeric(msg[2])
> data <- eW$get.Data("data") #[[1]] # list position of symbol (by id == msg[2])
> attr(data[[id]],"index") <- as.numeric(Sys.time())
> nr.data <- NROW(data[[id]])
> if(tickType == .twsTickType$MODEL_OPTION) {
> data[[id]][nr.data,1:8] <- msg[4:11]
> }
> #else
> # if(tickType == .twsTickType$ASK) {
> # data[[id]][nr.data,2] <- msg[4]
> # }
> eW$assign.Data("data", data)
> c(curMsg, msg) }
> return(eW) }

R Shiny/Shinydashboard: Hiding the last part of a string in a table

I have a data table that contains some very wide columns and I want to add a scrolling-bar to make it more presentable. So far I have found examples using a scrolling-bar for the entire table - but ideally I would like to have a scrolling-bar for EACH column in the table if that is possible. Below there is an illustrating example. In this code I want a scrolling-bar for both "This_is_a_very_long_name_1", "This_is_a_very_long_name_2" etc.
library("shinydashboard")
library("shiny")
body <- dashboardBody(
fluidPage(
column(width = 4,
box(
title = "Box title", width = NULL, status = "primary",
div(style = 'overflow-x: scroll', tableOutput('table'))
)
)
)
)
ui <- dashboardPage(
dashboardHeader(title = "Column layout"),
dashboardSidebar(),
body
)
server <- function(input, output) {
test.table <- data.frame(lapply(1:8, function(x) {1:10}))
names(test.table) <- paste0('This_is_a_very_long_name_', 1:8)
output$table <- renderTable({
test.table
})
}
# Preview the UI in the console
shinyApp(ui = ui, server = server)
I thought about splitting the table into 8 tables, making a scrolling table for each of them and then putting them next to each other, but space was added betweeen them and it did not look that nice. I think it would be preferable to keeping it as one table (but suggestions are very welcome!).
Does anyone whether this is possible - and how to solve it?
Thanks in advance!

I would not recommend scrolling column header, i think it would not be very clear to read it or so. Here is the code which You can use to get the header in 2 lines so the columns are not too wide:
library("shinydashboard")
library("shiny")
library(DT)
test.table <- data.frame(lapply(1:8, function(x) {1:10}))
names(test.table) <- paste0('This_is_a_very_long_name_', 1:8)
body <- dashboardBody(
fluidPage(
column(width = 8,
box(
title = "Box title", width = NULL, status = "primary",
div(style = 'overflow-x: scroll', dataTableOutput('table'))
)
)
)
)
ui <- dashboardPage(
dashboardHeader(title = "Column layout"),
dashboardSidebar(),
body
)
server <- function(input, output) {
output$table <- renderDataTable({
names(test.table) <- gsub("_"," ",names(test.table))
datatable(test.table, options = list(columnDefs = list(list(width = '100px', targets = c(1:8)))))
})
}
# Preview the UI in the console
shinyApp(ui = ui, server = server)
[UPDATE] --> Column text rendering
Here is a one solution which can be usefull for You. There is no scrolling, however Your row text displays only first three characters (the number of characters displayed can be changed) and ..., with mouse over the row You get the pop up with whole variable name in this row:
library("shinydashboard")
library("shiny")
library(DT)
x <- c("aaaaaaaaaaaaaa", "bbbbbbbbbbbb", "ccccccccccc")
y <- c("aaaaaaaaaaaaaa", "bbbbbbbbbbbb", "ccccccccccc")
z <- c(1:3)
data <- data.frame(x,y,z)
body <- dashboardBody(
fluidPage(
column(width = 4,
box(
title = "Box title", width = NULL, status = "primary",
div(style = 'overflow-x: scroll', dataTableOutput('table'))
)
)
)
)
ui <- dashboardPage(
dashboardHeader(title = "Column layout"),
dashboardSidebar(),
body
)
server <- function(input, output) {
output$table <- renderDataTable({
datatable(data, options = list(columnDefs = list(list(
targets = c(1:3),
render = JS(
"function(data, type, row, meta) {",
"return type === 'display' && data.length > 3 ?",
"'<span title=\"' + data + '\">' + data.substr(0, 3) + '...</span>' : data;",
"}")),list(width = '100px', targets = c(1:3)))))
})
}
# Preview the UI in the console
shinyApp(ui = ui, server = server)

How to download edited table from selected tab in Shiny?

I have created a Shiny app which takes .csv file as input and generates tabs based on the Grade column.
The data looks like this
Name Age Score Grade
Jane 13 89 A
Hanna 14 67 B
Jack 13 80 A
Leena 14 78 B
Amy 12 65 B
Nina 14 90 A
Steven 12 45 C
Omy 13 59 C
The code will generate tables in each tab containing only rows of the dataset that match the Grade.
These tables are editable. I am trying to download the edited table from the active tabPanel, but I am stuck on what should be the content in the downloadHandler. I have attached my code for reference. Sorry for the messy code, I am fairly new to shiny.
library(shiny)
library(xlsx)
library(rhandsontable)
ui <- fluidPage(
titlePanel("Scores"),
sidebarLayout(
sidebarPanel(
fileInput("file", "Upload the file"),
br(),
downloadButton('downloadData', 'Save as excel')
),
mainPanel(uiOutput("op"))
)
)
server <- function(input, output, session) {
data <- reactive({
file1 <- input$file
if (is.null(file1)) {
return()
}
read.csv(file = file1$datapath)
})
fun1 <- function(x) {
mydf <- data()
DF <- mydf[(mydf$Grade == x), c(1:3)]
table <- renderRHandsontable({
newtable<- rhandsontable(DF, rowHeaders = NULL)
})
tabPanel(x, table)
}
output$op <- renderUI({
if (is.null(data()))
helpText("File not uploaded!")
else{
mydf <- data()
Tabs <- lapply((unique(mydf$Grade)), fun1)
do.call(tabsetPanel, c(id = "tabs", Tabs))
}
})
output$downloadData <- downloadHandler(
filename = function() {
'Edited table.xls'
},
# what should go in place of table
content = function(file) {
write.xlsx(table, file)
}
)
}
shinyApp(ui, server)

Echo Timestamp in R Batch Mode

I'd like to better understand the execution duration of statements within an R script when run in batch mode. Is there a good way to do this?
I had one thought on how I'd love to see this done. When executing in batch made the source is echoed to the specified log file. Is there a way for it to echo a timestamp next to the source code in this log file?
> R CMD BATCH script.R script.Rout
Here is the output that I see today.
> tail -f script.Rout
...
> # features related to the date
> trandateN <- as.integer(trandate)
> dayOfWeek <- as.integer(wday(trandate))
> holiday <- mapply(isHoliday, trandate)
I'd like to see something like...
> tail -f script.Rout
...
2013-06-27 11:18:01 > # features related to the date
2013-06-27 11:18:01 > trandateN <- as.integer(trandate)
2013-06-27 11:18:05 > dayOfWeek <- as.integer(wday(trandate))
2013-06-27 11:19:02 > holiday <- mapply(isHoliday, trandate)

You can use addTaskCallback as follows to create a log of each top level execution.
.log <- data.frame(time=character(0), expr=character(0))
.logger <- function(expr, value, ok, visible) { # formals described in ?addTaskCallback
time <- as.character(Sys.time())
expr <- deparse(expr)
.log <<- rbind(.log, data.frame(time, expr))
return(TRUE) # required of task callback functions
}
.save.log <- function() {
if (exists('.logger')) write.csv(.log, 'log.csv')
}
addTaskCallback(.logger)
x <- 1:10
y <- mean(x)
.save.log()
.log
# time expr
# 1 2013-06-27 12:01:45.837 addTaskCallback(.logger)
# 2 2013-06-27 12:01:45.866 x <- 1:10
# 3 2013-06-27 12:01:45.876 y <- mean(x)
# 4 2013-06-27 12:01:45.900 .save.log()
Of course instead of committing the cardinal sin of growing a data.frame row-wise, as I have here, you could just leave a connection open and write directly to file, closing the connection with on.exit.
And if you want to be tidy about it, you can pack the logging setup into a function pretty nicely.
.log <- function() {
.logger <<- local({
log <- data.frame(time=character(0), expr=character(0))
function(expr, value, ok, visible) {
time <- as.character(Sys.time())
expr <- deparse(expr)
log <<- rbind(log, data.frame(time, expr))
return(TRUE)
}
})
invisible(addTaskCallback(.logger))
}
.save.log <- function() {
if (exists('.logger'))
write.csv(environment(.logger)$log, 'log.csv')
}
.log()
x <- 1:10
y <- mean(x)
.save.log()

See ?Sys.time. It returns a POSIXct datetime, which you'll need to format when outputting to a log file.
cat(format(Sys.time()), " is the current time\n")

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Downloading multiple PDFs using Rselenium - download

Related

Using dplyr:filter to create new variables in for loop for rMarkdown

How do I get implied volatility from TWS into R using IBrokers?

R Shiny/Shinydashboard: Hiding the last part of a string in a table

How to download edited table from selected tab in Shiny?

Echo Timestamp in R Batch Mode

Categories

Resources