How to add extra variables to TSLM model - forecast

I'm trying to add some extra variables in my model like below:
No problem adding new values to my numeric variables, but when I try to add more variables I got an issue
library(fpp3)
google_2015 <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) == 2015) %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE) %>%
mutate(qt = quarter(Date)) %>%
mutate(dow = as.factor(weekdays(Date)))
google_2015_stretch <- google_2015 %>%
stretch_tsibble(.init = 3, .step = 1) %>%
filter(.id != max(.id))
fit_cv <- google_2015_stretch %>%
model(tslm = TSLM(Close ~ Volume + Low + qt + dow))
f <- new_data(google_2015_stretch, 1) %>%
mutate(Volume = 1447601,
Low = 700,
qt = quarter(Date),
dow = as.factor(weekdays(Date)))
fc <- forecast(fit_cv, new_data = f)
fc %>% accuracy(
google_2015,
list(rmse = RMSE, mae = MAE, mape = MAPE, mase = MASE, crps = CRPS, winkler = winkler_score)
) %>%
arrange(rmse)
Please, if someone could help me, will be appreciated

Related

Error on filtering on data for R Shiny app

I am trying to crate a Shiny app for price forecasting. price curve categorized based on 3 filed: 1- curve name, 2- peak type and 3- delivery.
I have a question in Server function for my app, I have following code and ui section is working fine but I think server function filtering on data has some problem, I really apricate if anyone can help me on this.
Data sets"
head(data,10)
date curve_code curve peak_code delivery period peak price
1 0001-10-20 3 ICE NYISO A Monthly Futures 0 2024-05 17 OFF_PEAK 19.05
2 0001-10-20 4 ICE NYISO C Monthly Futures 1 2024-02 14 PEAK 66.90
3 0001-10-20 3 ICE NYISO A Monthly Futures 0 2023-05 5 OFF_PEAK 19.85
4 0001-10-20 3 ICE NYISO A Monthly Futures 1 2023-03 3 PEAK 35.30
5 0001-10-20 3 ICE NYISO A Monthly Futures 0 2023-08 8 OFF_PEAK 39.20
6 0001-10-20 4 ICE NYISO C Monthly Futures 0 2023-11 11 OFF_PEAK 24.30
7 0001-10-20 5 ICE NYISO F Monthly Futures 0 2023-03 3 OFF_PEAK 72.25
8 0001-10-20 3 ICE NYISO A Monthly Futures 1 2023-07 7 PEAK 56.45
9 0001-10-20 6 ICE NYISO G Monthly Futures 1 2024-07 19 PEAK 69.75
10 0001-10-20 5 ICE NYISO F Monthly Futures 0 2023-07 7 OFF_PEAK 53.25
and here is my code
## app.R ##
library(shinydashboard)
ui <- dashboardPage(
header <- dashboardHeader(title = " Price Forecasting"),
## Sidebar content
sidebar <- dashboardSidebar(
sidebarMenu(
menuItem("price forecasting", tabName = "dashboard1", icon = icon("chart-bar")),
menuItem(" Log-Return forecasting", tabName = "dashboard2", icon = icon("chart-bar")),
menuItem("Choose Curve Name", tabName = "controller", icon = icon("th"))
)
),
## Body content
body <- dashboardBody(
tabItems(
# First Tab Content
tabItem(tabName = "dashboard1",
fluidRow(
box(title = "Forecast Auto-Arima Method", status = "primary", solidHeader = TRUE, collapsible = T, plotOutput("plot1", height = 400),
verbatimTextOutput("detail1")
),
box(title = "Timeseries Decomposition", status = "primary", solidHeader = TRUE, collapsible = T, plotOutput("plot2", height = 400),
verbatimTextOutput("detail2")
),
)
),
# Second Tab Content
tabItem(tabName = "dashboard2",
fluidRow(
box(title = "Log-return Forecast Auto-Arima Method", status = "primary", solidHeader = TRUE, collapsible = TRUE, plotOutput("plot4", height = 400),
verbatimTextOutput("detail3")
),
box(title = "Log-return Timeseries Decomposition", status = "primary", solidHeader = TRUE, collapsible = TRUE, plotOutput("plot5", height = 400),
verbatimTextOutput("detail4")
),
)
),
# Third tab content
tabItem(tabName = "controller",
fluidRow(
box(title = "Curve_name", status = "primary", solidHeader = TRUE, collapsible = TRUE,
selectInput("reg",label = "Choose Curve name",
choices = list('ICE NYISO A Monthly Futures'=1,
'ICE NYISO C Monthly Futures'=2,
'ICE NYISO F Monthly Futures'=3,
'ICE NYISO G Monthly Futures'=4),
selected = "ICE NYISO A Monthly Futures")
),
box(title = "Peak_type", status = "primary", solidHeader = TRUE, collapsible = TRUE,
selectInput("reg",label = "Choose Peak Type",
choices = list("PEAK"=1,
"OFF_PEAK"=2),
selected = "PEAK")
),
box(title = "Delivery", status = "primary", solidHeader = TRUE, collapsible = TRUE,
selectInput("reg",label = "Choose Delivery Period",
choices = list("2023-01"=1,"2023-02"=2,"2023-03"=3, "2023-04"=4,"2023-05"=5, "2023-06"=6,
"2023-07"=7,"2023-08"=8, "2023-09"=9,"2023-10"=10,"2023-11"=11, "2023-12"=12,
"2024-01"=13,
"2024-02"=14,
"2024-03"=15,
"2024-04"=16,
"2024-05"=17,
"2024-06"=18,
"2024-07"=19,
"2024-08"=20,
"2024-09"=21,
"2024-10"=22,
"2024-11"=23,
"2024-12"=24
),
selected = "2024-02")
)
)
)
)
),
#Integrate Dashboard
dashboardPage(
header,
sidebar,
body
)
)
#Our Server to get IO from User
#
data=read.csv("ICE data.csv", h=T)
library(dplyr)
#data=dplyr::filter(data,peak_type==c("PEAK" ,"OFF_PEAK"))
data <- data[order(data$last_updated_date,decreasing=F),]
data=data.frame(date=as.Date(data$last_updated_date, origin = "1900-01-01"),
curve=data$ICE.Curve.Name, delivery=data$delivery, period=data$period, peak=data$peak_type, price=data$ICE.price)
server <- function(input, output) {
# server logic based on user input
observeEvent(c(input$Curve_name,input$Peak_type,input$Delivery), {
data <- data %>%
filter(curve %in% input$Curve_name)
if (input$Delivery == 1) {
data <- data %>%
filter(delivery=="2023-01") }
if (input$Delivery == 2) {
data <- data %>%
filter(delivery=="2023-02") }
if (input$Delivery == 3) {
data <- data %>%
filter(delivery=="2023-03") }
if (input$Delivery == 4) {
data <- data %>%
filter(delivery=="2023-04") }
if (input$Delivery == 5) {
data <- data %>%
filter(delivery=="2023-05") }
if (input$Delivery == 6) {
data <- data %>%
filter(delivery=="2023-06") }
if (input$Delivery == 7) {
data <- data %>%
filter(delivery=="2023-07") }
if (input$Delivery == 8) {
data <- data %>%
filter(delivery=="2023-08") }
if (input$Delivery == 9) {
data <- data %>%
filter(delivery=="2023-09") }
if (input$Delivery == 10) {
data <- data %>%
filter(delivery=="2023-10") }
if (input$Delivery == 11) {
data <- data %>%
filter(delivery=="2023-11") }
if (input$Delivery == 12) {
data <- data %>%
filter(delivery=="2023-12") }
if (input$Delivery == 13) {
data <- data %>%
filter(delivery=="2024-01") }
if (input$Delivery == 14) {
data <- data %>%
filter(delivery=="2024-02") }
if (input$Delivery == 15) {
data <- data %>%
filter(delivery=="2024-03") }
if (input$Delivery == 16) {
data <- data %>%
filter(delivery=="2024-04") }
if (input$Delivery == 17) {
data <- data %>%
filter(delivery=="2024-05") }
if (input$Delivery == 18) {
data <- data %>%
filter(delivery=="2024-06") }
if (input$Delivery == 19) {
data <- data %>%
filter(delivery=="2024-07") }
if (input$Delivery == 20) {
data <- data %>%
filter(delivery=="2024-08") }
if (input$Delivery == 21) {
data <- data %>%
filter(delivery=="2024-09") }
if (input$Delivery == 22) {
data <- data %>%
filter(delivery=="2024-10") }
if (input$Delivery == 23) {
data <- data %>%
filter(delivery=="2024-11") }
if (input$Delivery == 24) {
data <- data %>%
filter(delivery=="2024-12") }
if (input$Peak_type == 1) {
data <- data %>%
filter(peak=="PEAK") }
if (input$Peak_type == 2) {
data <- data %>%
filter(peak=="OFF_PEAK") }
#model
m1 <- auto.arima(data$price)
f1 <- forecast(m1, h = 30)
plot(f1)
detail1 <- accuracy(f1)
m2 <- auto.arima(data$price)
f2 <- forecast(m2, h = 20)
plot(f2)
detail2 <- accuracy(f2)
m3 <- auto.arima(diff(log(data$price), lag=1))
f3 <- forecast(m3, h = 30)
plot(f3)
detail3 <- accuracy(f3)
m4 <- auto.arima(diff(log(data$price), lag=1))
f4 <- forecast(m4, h = 20)
plot(f4)
detail4 <- accuracy(f4)
output$detail1 <- renderText({ detail1[,"MAPE"] })
output$detail2 <- renderText({ detail2[,"MAPE"] })
output$detail3 <- renderText({ detail3[,"MAPE"] })
output$detail4 <- renderText({ detail4[,"MAPE"] })
# Create plot
output$plot1 <- renderPlot({
plot(f1)
})
output$plot2 <- renderPlot({
plot(f2)
})
output$plot3 <- renderPlot({
plot(f3)
})
output$plot4 <- renderPlot({
plot(f4)
})
})
}
shinyApp(ui, server)
I am expecting to have plot for ARIMA forecasting for each curve, delivery and peak type
Don't nest all of your output$.. <- calls within an observeEvent: they are defined once, at which point none of them react to changing data.
input$Delivery is never defined, that should be input$reg (the box title does nothing for input$-based variables).
I suggest a mydata <- reactive(..) model that works on filtering the data based on inputs, and then model/forecast/plot that.
We can do some significant reduction of code, all 24 of the if (input$Delivery == 21) {...} can be simplified into 1 expression. Change your box(title="Delivery" to:
box(title = "Delivery", status = "primary", solidHeader = TRUE, collapsible = TRUE,
selectInput("reg",label = "Choose Delivery Period",
choices =
c("2023-01", "2023-02", "2023-03", "2023-04", "2023-05", "2023-06",
"2023-07", "2023-08", "2023-09", "2023-10", "2023-11", "2023-12",
"2024-01", "2024-02", "2024-03", "2024-04", "2024-05", "2024-06",
"2024-07", "2024-08", "2024-09", "2024-10", "2024-11", "2024-12"),
selected = "2024-02")
)
Similarly, change the Peak type choices to choices = c("PEAK", "OFF_PEAK"). Note that if the choices= vector/list is named, then it shows the names and returns the value; if it is not named, it shows and returns the values.
Try this (untested):
server <- function(input, output) {
# data <- read.csv(...)
mydata <- eventReactive(input$reg, {
req(input$reg)
data %>%
filter(delivery == input$reg, peak = input$Peak_type)
})
mdl1 <- eventReactive(mydata(), {
req(mydata())
mdl <- auto.arima(mydata()$price)
fcst <- forecast(mdl, h = 30)
detail <- accuracy(fcst)
list(mdl = mdl, fcst = fcst, detail = detail)
})
mdl2 <- eventReactive(mydata(), {
req(mydata())
mdl <- auto.arima(mydata()$price)
fcst <- forecast(mdl, h = 20)
detail <- accuracy(fcst)
list(mdl = mdl, fcst = fcst, detail = detail)
})
mdl3 <- eventReactive(mydata(), {
req(mydata())
mdl <- auto.arima(diff(log(mydata()$price), lag = 1))
fcst <- forecast(mdl, h = 30)
detail <- accuracy(fcst)
list(mdl = mdl, fcst = fcst, detail = detail)
})
mdl4 <- eventReactive(mydata(), {
req(mydata())
mdl <- auto.arima(diff(log(mydata()$price), lag = 1))
fcst <- forecast(mdl, h = 20)
detail <- accuracy(fcst)
list(mdl = mdl, fcst = fcst, detail = detail)
})
output$detail1 <- renderText({ req(mdl1()); mdl1()$detail[,"MAPE"] })
output$detail2 <- renderText({ req(mdl2()); mdl2()$detail[,"MAPE"] })
output$detail3 <- renderText({ req(mdl3()); mdl3()$detail[,"MAPE"] })
output$detail4 <- renderText({ req(mdl4()); mdl4()$detail[,"MAPE"] })
output$plot1 <- renderPlot({ req(mdl1()); plot(mdl1()$fcst); })
output$plot2 <- renderPlot({ req(mdl2()); plot(mdl2()$fcst); })
output$plot3 <- renderPlot({ req(mdl3()); plot(mdl3()$fcst); })
output$plot4 <- renderPlot({ req(mdl4()); plot(mdl4()$fcst); })
}

No output from for loop eventReactive: Shiny

Awesome people out there! Need a quick help on Shiny eventReactive().
I have two data frames (df1, df2), which are outputs of previous calculations in my Shiny app. I need to run a for loop on these two data frames in my Shiny app with eventReactive(). When I click/trigger the actionButton("simulate") from my UI. I need to run the below for loop and create a data table in Shiny app. The problem I have is I do not see anything happen on UI, UI is blank (while I expect to see a datatable) and there is no error message to debug.
The for loop runs fine in console but not in the Shiny App. I tried lapply but that did not work as well. It was also an empty screen without any output.
Any help is much Appreciated!
For loop in console, this runs fine
library(tidyverse)
df1 <- data.frame(Year = rep(c(2018:2020), each = 12),
Treatment = rep(c("AA", "BB", "CC"), each = 4, times = 3),
Source = rep(c("Ran", "Ban", "Dam", "Sam"), times = 9),
Value = sample(36))
df2 <- data.frame(Year = rep(c(2018:2020), each = 3),
Treatment = rep(c("AA", "BB", "CC"), times = 3),
Value2 = sample(9))
tnames <- unique(df1$Treatment)
simdata <- data.frame()
for(i in 1:length(tnames)){
vcmp <- df1 %>% filter(Treatment == tnames[i]) %>%
group_by(Source) %>%
summarise(median = median(Value)) %>%
mutate(Trait = tnames[i])
preSD <- df2 %>% filter(Treatment == tnames[i]) %>%
summarize(Value2 = median(Value2))
simuS <- tibble(nLo = rep(5:15, times = 3),
nRe = rep(1:3, each = 11),
nPlo = nLo * nRe,
Vg = vcmp[[3,2]],
Ve = vcmp[[4,2]],
ycept = preSD[[1,1]],
sm = sqrt(Vg / nLo + Ve / nPlo),
sd = sqrt(2 * (Vg / nLo + Ve / nPlo)),
ld = 2 * sd,
Rp = factor(nRe),
Treatment = tnames[i])
simdata <- rbind(simdata, simuS)
}
MRE, Shiny implementation, I kind of solved half, I can see the output now but it only generates one iteration. not the entire for loop
library(shiny)
library(tidyverse)
library(DT)
ui <- fluidPage(
sidebarLayout(
sidebarPanel(width = 2,
selectInput("Parameter", "Select Parameter","", ""),
actionButton("generate", "Generate Data"),
hr(),
hr(),
actionButton("simulate", "Simulate Data")),
mainPanel(width = 7,
DT::dataTableOutput("table1"),
DT::dataTableOutput("table2"),
DT::dataTableOutput("table3"))
)
)
server <- function(input, output, session){
table1 <- eventReactive(input$generate,{
df1 <- data.frame(Year = rep(c(2018:2020), each = 12),
Treatment = rep(c("AA", "BB", "CC"), each = 4, times = 3),
Source = rep(c("Ran", "Ban", "Dam", "Sam"), times = 9),
Value = sample(36))
})
table2 <- eventReactive(input$generate, {
df2 <- data.frame(Year = rep(c(2018:2020), each = 3),
Treatment = rep(c("AA", "BB", "CC"), times = 3),
Value2 = sample(9))
})
simudata <- eventReactive(input$simulate,{
tnames <- unique(table1()$Treatment)
simudata <- tibble()
for(i in 1:length(tnames)){
vcmp <- table1() %>%
filter(Treatment == tnames[i]) %>%
group_by(Source) %>%
summarise(median = median(Value)) %>%
mutate(Trait = tnames[i])
preSD <- table2() %>%
filter(Treatment == tnames[i]) %>%
summarize(Value2 = median(Value2))
new_Row <- tibble(nLo = rep(5:15, times = 3),
nRe = rep(1:3, each = 11),
nPlo = nLo * nRe,
Vg = vcmp[[3,2]],
Ve = vcmp[[4,2]],
ycept = preSD[[1,1]],
sm = sqrt(Vg / nLo + Ve / nPlo),
sd = sqrt(2 * (Vg / nLo + Ve / nPlo)),
ld = 2 * sd,
Rp = factor(nRe),
Treatment = tnames[i])
simudata <- rbind(simudata, new_Row)
return(simudata)
}
})
output$table1 <- DT::renderDataTable(table1(), options = list(paging = t, pageLength = 5))
output$table2 <- DT::renderDataTable(table2(), options = list(paging = t, pageLength = 5))
output$table3 <- DT::renderDataTable(simudata(), options = list(paging = t, pageLength = 5))
}
shinyApp(ui, server)
You can address your issue by using local() as shown below.
simudata <- eventReactive(input$simulate,{
tnames <- unique(table1()$Treatment)
simudata <- tibble()
for(i in 1:length(tnames)){
local({
i <- i
vcmp <- table1() %>%
dplyr::filter(Treatment == tnames[i]) %>%
group_by(Source) %>%
dplyr::summarise(median = median(Value)) %>%
dplyr::mutate(Trait = tnames[i])
preSD <- table2() %>%
dplyr::filter(Treatment == tnames[i]) %>%
dplyr::summarize(Value2 = median(Value2))
new_Row <- tibble(nLo = rep(5:15, times = 3),
nRe = rep(1:3, each = 11),
nPlo = nLo * nRe,
Vg = vcmp[[3,2]],
Ve = vcmp[[4,2]],
ycept = preSD[[1,1]],
sm = sqrt(Vg / nLo + Ve / nPlo),
sd = sqrt(2 * (Vg / nLo + Ve / nPlo)),
ld = 2 * sd,
Rp = factor(nRe),
Treatment = tnames[i])
simudata <<- rbind(simudata, new_Row)
})
}
return(simudata)
})

Queries run in R Markdown much slower than queries run in R Console?

I'm creating a report in R that estimates and plots the level of transmission of COVID19, and am finding that it runs MUCH more slowly in R Markdown than it does in the Console. I am using RStudio for Mac v1.3.1093, and R v4.0.3.
This chart, which uses a Markov Chain Monte Carlo (MCMC) methodology to estimate infections and the level of spread, first obtains data showing new cases by day in the American state of Oklahoma. This is very quick in both the Console and R Markdown. The next part is the MCMC modelling, which is where the delays are coming.
I ran it this morning and it took 2.5 hours in R Markdown, compared to 30 minutes when running the query as an R Script in the Console.
I can't work out why it's SO much slower in the R Markdown version. This becomes more important because, if I run the query at a county level (say, for 10 counties), then it models this 10 times, and would therefore take 10 times longer.
As a shortcut, I can simply run 2 queries, with the output of the modelling query being added into a bigger R Markdown query (e.g., by saving/reading in an RDS file). However, I just can't understand why this is happening and so wanted to better understand before I took that route.
Below is code for the R Console version:
library(tidyverse)
library(EpiNow2)
library(tools)
library(zoo)
# Paths for output files (used as reference for Daily Dashboard and other reports)
# ok_rt_output <- '/Users/tony/Dropbox/Chickasaw R Queries/ok_rt_output.rds'
# cn_rt_output <- '/Users/tony/Dropbox/Chickasaw R Queries/cn_rt_output.rds'
# cn_counties_rt_output <- '/Users/tony/Dropbox/Chickasaw R Queries/cn_counties_output'
# Define defaults for generation time, incubation period and reporting delay for COVID19.
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
#set number of cores to use
options(mc.cores = ifelse(interactive(), 4, 1))
# construct example distributions
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
# Reporting Delay
reporting_delay <- list(mean = convert_to_logmean(3, 1),
mean_sd = 0.1,
sd = convert_to_logsd(3, 1),
sd_sd = 0.1,
max = 10)
# Obtain data from JHU Repository
jhu_county_COVID_confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv", na="")
# Put dates into a single column
jhu_county_COVID_confirmed_data <- jhu_county_COVID_confirmed %>% select(c(-UID, -iso2, -iso3, -code3, -FIPS, -Lat, -Long_, -Combined_Key, -Country_Region, County=Admin2))
jhu_county_COVID_confirmed_data <- jhu_county_COVID_confirmed_data %>% pivot_longer(c(-County, -Province_State), names_to = "Date", values_to = "Cases")
jhu_county_COVID_confirmed_data$Date <- strptime(trimws(jhu_county_COVID_confirmed_data$Date), format='%m/%d/%y')
jhu_county_COVID_confirmed_data$Date <- as.Date(jhu_county_COVID_confirmed_data$Date)
# Limit to Oklahoma
ok_jhu_county_COVID_confirmed <- jhu_county_COVID_confirmed_data %>% filter(Province_State=='Oklahoma')
ok_jhu_county_COVID_confirmed <- ok_jhu_county_COVID_confirmed %>% rename(county=County, state=Province_State, date=Date, cases_total=Cases)
ok_jhu_county_COVID_confirmed <- ok_jhu_county_COVID_confirmed %>% select(state, county, date, cases_total)
# Add in VERY latest OK data from OK DoH.
# Download latest day of *Oklahoma* COVID data. Defines fields as needed.
ok_covid_data_latest_for_daily_dashboard <- read_csv("https://storage.googleapis.com/ok-covid-gcs-public-download/oklahoma_cases_county.csv", na="")
ok_covid_data_latest_for_daily_dashboard$ReportDate <- as.Date(ok_covid_data_latest_for_daily_dashboard$ReportDate)
ok_covid_data_latest_for_daily_dashboard$County <- as.factor((ok_covid_data_latest_for_daily_dashboard$County))
# Adjust format from state data (counties are in all caps at first; so make them lower case and then adjust the "Mc" counties)
ok_covid_data_latest_for_daily_dashboard <- mutate(ok_covid_data_latest_for_daily_dashboard, County = toTitleCase(tolower(`County`)))
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mccurtain"] <- "McCurtain"
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mcclain"] <- "McClain"
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mcintosh"] <- "McIntosh"
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% rename(`Date`=`ReportDate`)
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% rename(`Recoveries`=`Recovered`)
ok_covid_data_latest_for_daily_dashboard$Province_State <- "Oklahoma"
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard[,c("County", "Province_State", "Date", "Cases", "Deaths")]
ok_latest_day_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% group_by(Province_State, County, Date) %>% summarise(OK_Cases=sum(Cases))
ok_latest_day_for_daily_dashboard <- ok_latest_day_for_daily_dashboard %>% rename(cases_total=OK_Cases, date=Date, county=County, state=Province_State)
# Append latest data file for Oklahoma to existing COVID (after first testing it is in fact new)
if(min(ok_latest_day_for_daily_dashboard$date) > max(ok_jhu_county_COVID_confirmed$date)) {ok_cases_by_day <- rbind(ok_jhu_county_COVID_confirmed, ok_latest_day_for_daily_dashboard) }
if(min(ok_latest_day_for_daily_dashboard$date) <= max(ok_jhu_county_COVID_confirmed$date)) {ok_cases_by_day <- ok_jhu_county_COVID_confirmed}
# Oklahoma Cases by Day
ok_cases_by_day <- ok_cases_by_day %>% arrange(state, county, date)
# New Cases in Oklahoma for Rt calculation
ok_cases_by_day_for_Rt <- ok_cases_by_day %>% group_by(date) %>% summarise(OK_Cases = sum(cases_total, na.rm = TRUE))
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% mutate(OK_New_Cases=OK_Cases - lag(OK_Cases,1))
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% select(-OK_Cases) %>% rename(confirm=OK_New_Cases)
# Limit cases to dates from 1 April
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% filter(date >= '2020-04-01')
# estimate Rt and nowcast/forecast cases by date of infection for Oklahoma
out_ok <- epinow(reported_cases = ok_cases_by_day_for_Rt, generation_time = generation_time,
rt = rt_opts(prior = list(mean = 1, sd = 0.2)), # replace earlier mean of 2 and sd of 0.1 used in initial approach by Abbott et al.
delays = delay_opts(incubation_period, reporting_delay), return_output = TRUE,
verbose = TRUE, horizon=14)
# summary of the latest estimates
summary(out_ok)
# plot estimates
plot(out_ok)
Below is code for the R Markdown version of this query
```
---
title: "Rt Sample"
output:
word_document: default
html_document:
df_print: paged
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, collapse = TRUE, warning=FALSE, message=FALSE)
library(tidyverse)
library(zoo)
library(readxl)
library(tools)
library(EpiNow2)
```
## R Markdown
This is a query that graphs the transmission rate of COVID19 in the American state of Oklahoma. It uses the EpiNow2 package to do this.
More info can be found on epiforecasts.io
``` {r ok_rt_chart}
# Note this is a sample report so the commentary (e.g., progress) will appear in the report.
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
#set number of cores to use
options(mc.cores = ifelse(interactive(), 4, 1))
# construct example distributions
generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
# Reporting Delay
reporting_delay <- list(mean = convert_to_logmean(3, 1),
mean_sd = 0.1,
sd = convert_to_logsd(3, 1),
sd_sd = 0.1,
max = 10)
# Obtain data from JHU Repository
jhu_county_COVID_confirmed <- read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv", na="")
# Put dates into a single column
jhu_county_COVID_confirmed_data <- jhu_county_COVID_confirmed %>% select(c(-UID, -iso2, -iso3, -code3, -FIPS, -Lat, -Long_, -Combined_Key, -Country_Region, County=Admin2))
jhu_county_COVID_confirmed_data <- jhu_county_COVID_confirmed_data %>% pivot_longer(c(-County, -Province_State), names_to = "Date", values_to = "Cases")
jhu_county_COVID_confirmed_data$Date <- strptime(trimws(jhu_county_COVID_confirmed_data$Date), format='%m/%d/%y')
jhu_county_COVID_confirmed_data$Date <- as.Date(jhu_county_COVID_confirmed_data$Date)
# Limit to Oklahoma
ok_jhu_county_COVID_confirmed <- jhu_county_COVID_confirmed_data %>% filter(Province_State=='Oklahoma')
ok_jhu_county_COVID_confirmed <- ok_jhu_county_COVID_confirmed %>% rename(county=County, state=Province_State, date=Date, cases_total=Cases)
ok_jhu_county_COVID_confirmed <- ok_jhu_county_COVID_confirmed %>% select(state, county, date, cases_total)
# Add in VERY latest OK data from OK DoH.
# Download latest day of *Oklahoma* COVID data. Defines fields as needed.
ok_covid_data_latest_for_daily_dashboard <- read_csv("https://storage.googleapis.com/ok-covid-gcs-public-download/oklahoma_cases_county.csv", na="")
ok_covid_data_latest_for_daily_dashboard$ReportDate <- as.Date(ok_covid_data_latest_for_daily_dashboard$ReportDate)
ok_covid_data_latest_for_daily_dashboard$County <- as.factor((ok_covid_data_latest_for_daily_dashboard$County))
# Adjust format from state data (counties are in all caps at first; so make them lower case and then adjust the "Mc" counties)
ok_covid_data_latest_for_daily_dashboard <- mutate(ok_covid_data_latest_for_daily_dashboard, County = toTitleCase(tolower(`County`)))
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mccurtain"] <- "McCurtain"
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mcclain"] <- "McClain"
ok_covid_data_latest_for_daily_dashboard$County[ok_covid_data_latest_for_daily_dashboard$County == "Mcintosh"] <- "McIntosh"
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% rename(`Date`=`ReportDate`)
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% rename(`Recoveries`=`Recovered`)
ok_covid_data_latest_for_daily_dashboard$Province_State <- "Oklahoma"
ok_covid_data_latest_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard[,c("County", "Province_State", "Date", "Cases", "Deaths")]
ok_latest_day_for_daily_dashboard <- ok_covid_data_latest_for_daily_dashboard %>% group_by(Province_State, County, Date) %>% summarise(OK_Cases=sum(Cases))
ok_latest_day_for_daily_dashboard <- ok_latest_day_for_daily_dashboard %>% rename(cases_total=OK_Cases, date=Date, county=County, state=Province_State)
# Append latest data file for Oklahoma to existing COVID (after first testing it is in fact new)
if(min(ok_latest_day_for_daily_dashboard$date) > max(ok_jhu_county_COVID_confirmed$date)) {ok_cases_by_day <- rbind(ok_jhu_county_COVID_confirmed, ok_latest_day_for_daily_dashboard) }
if(min(ok_latest_day_for_daily_dashboard$date) <= max(ok_jhu_county_COVID_confirmed$date)) {ok_cases_by_day <- ok_jhu_county_COVID_confirmed}
# Oklahoma Cases by Day
ok_cases_by_day <- ok_cases_by_day %>% arrange(state, county, date)
# New Cases in Oklahoma for Rt calculation
ok_cases_by_day_for_Rt <- ok_cases_by_day %>% group_by(date) %>% summarise(OK_Cases = sum(cases_total, na.rm = TRUE))
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% mutate(OK_New_Cases=OK_Cases - lag(OK_Cases,1))
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% select(-OK_Cases) %>% rename(confirm=OK_New_Cases)
# Limit cases to dates from 1 April
ok_cases_by_day_for_Rt <- ok_cases_by_day_for_Rt %>% filter(date >= '2020-04-01')
# estimate Rt and nowcast/forecast cases by date of infection for Oklahoma
out_ok <- epinow(reported_cases = ok_cases_by_day_for_Rt, generation_time = generation_time,
rt = rt_opts(prior = list(mean = 1, sd = 0.2)), # replace earlier mean of 2 and sd of 0.1 used in initial approach by Abbott et al.
delays = delay_opts(incubation_period, reporting_delay), return_output = TRUE,
verbose = TRUE, horizon=14)
summary(out_ok)
plot(out_ok)

Is there a way to create a for loop with char variables to create several plots?

I am novel with R and I cannot find a solution to my problem. I guess the problem is pretty simple. I have a df with 4 variables: date, SKU_code, SKU_category and sales_amount. I want to create a for loop to plot n figures, with n equal to the number of SKU_category. In other words this is the code I want to transform in a for loop. It works, but I have more than 50 categories so it is not efficient:
dfsales_red_cat <- dfsales %>% group_by(date, SKU_code, SKU_category) %>% summarize(y=sum(sales_amount))
dfsales_red_C01 <- dfsales_red_cat %>% filter(SKU_category =="C01")
dfsales_red_C01 <- dfsales_red_C01[,c(1,2,4)]
ggplot(dfsales_red_C01,aes(x=date,y=y,colour=SKU_code,group=SKU_code)) + theme(legend.position="none") + geom_line()+labs(title="C01", y='Sales',x='Year')
dfsales_red_C02 <- dfsales_red_cat %>% filter(SKU_category =="C02")
dfsales_red_C02 <- dfsales_red_C02[,c(1,2,4)]
ggplot(dfsales_red_C02,aes(x=date,y=y,colour=SKU_code,group=SKU_code)) + theme(legend.position="none") + geom_line()+labs(title="C02", y='Sales',x='Year')
...and so on...
I tried with this, but it didn't work
dfsales_red_cat <- dfsales %>% group_by(date, SKU_code, SKU_category) %>% summarize(y=sum(sales_amount))
cat <- unique(dfsales_red_cat$SKU_category)
for (i in cat) {
dfsales_red_i <- dfsales_red_cat %>% filter(SKU_category==i)
dfsales_red_i <- dfsales_red_i[,c(1,2,4)]
ggplot(dfsales_red_i,aes(x=date,y=y,colour=SKU_code,group=SKU_code)) + theme(legend.position="none") + geom_line()+
labs(title=i, y='Sales',x='Year')
}
Thank you for your help.
This is part of the original table dfsales_red_cat (>10000 rows) and formats are date, char, char, number:
DATE SKU_code SKU_category sales_amount
1 2016-01-03 Z0003 C13 298380.0
2 2016-01-03 Z0005 C10 225433.6
3 2016-01-03 Z0006 C10 2246883.8
4 2016-01-03 Z0007 C10 653144.4
5 2016-01-03 Z0009 C15 170233.4
You need to either print , store it in a list and print or simply facet_wrap. First to get something like your data:
sample_dates=seq(as.Date("2016-01-03"),as.Date("2016-12-03"),length.out=50)
df = expand.grid(
date = sample_dates,
SKU_code = c("Z0003","Z0005","Z0006"),
SKU_category = c("C13","C10")
)
df$date = as.Date(df$date)
df$sales_amount = runif(nrow(df))
cat <- unique(df$SKU_category)
Just print:
for (i in cat) {
df_i <- subset(df,SKU_category==i)
g = ggplot(df_i,aes(x=date,y=sales_amount,colour=SKU_code,group=SKU_code)) +
theme(legend.position="none") + geom_line()+
labs(title=i, y='Sales',x='Year')
print(g)
}
Store in list:
plts = lapply(cat,function(i){
g = ggplot(df_i,aes(x=date,y=sales_amount,colour=SKU_code,group=SKU_code)) +
theme(legend.position="none") + geom_line()+
labs(title=i, y='Sales',x='Year')
return(g)
})
plts[[1]]
Or:
ggplot(df,aes(x=date,y=sales_amount,colour=SKU_code,group=SKU_code)) +
theme(legend.position="none") + geom_line()+
labs(y='Sales',x='Year')+
facet_wrap(~SKU_category)

Add multiple parallel planes to 3D plot with plotly package (datacamp exercise)

I am trying to visualize two parallel planes in a 3D plot with the plotly package (for those that have access to Datacamp, it is this exercise: https://campus.datacamp.com/courses/multiple-and-logistic-regression/multiple-regression?ex=9)
At some point in the construction of this graph, you need to create a matrix to fit the planes that match the categorical variable (plane_new = condition is new, plane_used = condition is used). The problem is, that the two datasets that I used as input (corresponding to the two levels of condition), have different number of observations. I can't seem to figure out how I can get these matrices to be comparable, so that the planes are correctly fit as geometrical objects in the figure.
I hope an R-wizard can help me out ;). Here's my code:
# libraries
library(openintro) #exemplary datasets
library(modelr) #multivariate methods
library(broom) #tidy
library(ggplot2) #visualizing data
library(plotly) #visualizing models in 3D
## Fit the model
lm_ext <- lm(totalPr ~ duration + startPr + cond, # Interpretation: With every 1 unit increase of auction duration (unit = day), the price of the game decreases with .51 units in the response variable (total price), when keeping startPr constant. The eventual value of the predicted value also depends on condition (categorical), for which the y-intercept is different
data = marioKart)
## Visualize the model (including predictions)
marioKart_ss_new <- subset(marioKart, cond == "new") # To visualize planes in a 3D graph in plotly, the dataframe needs to be split in the number of levels of the categorical variable
marioKart_ss_used <- subset(marioKart, cond == "used")
duration_new <- as.vector(marioKart_ss_new$duration) # These vectors represent the linear model for condition = new
startPr_new <- as.vector(marioKart_ss_new$startPr)
duration_used <- as.vector(marioKart_ss_used$duration) # These vectors represent the linear model for condition = used
startPr_used <- as.vector(marioKart_ss_used$startPr)
lm_new <- lm(totalPr ~ duration + startPr, # Create two linear models
data = marioKart_ss_new)
lm_used <- lm(totalPr ~ duration + startPr,
data = marioKart_ss_used)
grid_new <- marioKart_ss_new %>% # Make two grids with all combinations of the levels of the two numerical explanatory variables
data_grid(duration =
seq_range(duration, by = 1),
startPr =
seq_range(startPr, by = 1))
grid_used <- marioKart_ss_used %>%
data_grid(duration =
seq_range(duration, by = 1),
startPr =
seq_range(startPr, by = 1))
lm_new <- lm(totalPr ~ duration + startPr, # Make two seperate models based on the two levels of the categorical explanatory variable
data = marioKart_ss_new)
lm_used <- lm(totalPr ~ duration + startPr,
data = marioKart_ss_used)
pred_new <- augment(lm_new, newdata = grid_new) # Predictions
pred_used <- augment(lm_used, newdata = grid_used)
plane_new <- matrix(pred_new$.fitted, # Matrix of preditions as input for planes
nrow = 70,
ncol = 70)
plane_used <- matrix(pred_used$.fitted,
nrow = 55,
ncol = 55)
plot <- plot_ly(data = marioKart, # 3D plot of datapoints
z = ~totalPr,
x = ~duration,
y = ~startPr,
opacity = 0.6) %>%
add_markers(color = ~cond)
plot %>% # Add planes
add_surface(x = ~duration_new, ### NOT WORKING, WAIT FOR DATACAMP
y = ~startPr_new,
z = ~plane_new,
showscale = FALSE) %>%
add_surface(x = ~duration_used,
y = ~duration_used,
z = ~plane_used,
showscale = FALSE)
No code wizard here but asked for the same thing:
library(tidyverse)
library(modelr)
grid <- mario_kart %>%
modelr::data_grid(
duration = seq_range(duration, n = 70),
startPr = seq_range(startPr, n = 70),
cond
)
library(broom)
tidy_planes <- mod %>%
augment(newdata = grid)
x <- unique(grid$duration)
y <- unique(grid$startPr)
plane0 <- tidy_planes %>%
filter(cond == "new") %>%
pull(.fitted) %>%
matrix(nrow = length(x), byrow = TRUE)
plane1 <- tidy_planes %>%
filter(cond == "used") %>%
pull(.fitted) %>%
matrix(nrow = length(x), byrow = TRUE)

Resources