Error in custom measure parsing external function while using parallelization - parallel-processing

I defined a custom measure that allows to transform the prediction$data with an external function before evaluating standard measures such as rmse. If I try to tune params without parallelization everything goes smooth but if I start a parallelized session it seems not to find the external function anymore, although it's declared in the global environment.
library(compiler)
library(mlr)
library(parallelMap)
library(parallel)
# define function
inverse_fun = function(x){x^2}
inverse_fun = Vectorize(inverse_fun)
inverse_fun = cmpfun(inverse_fun, options=list(suppressUndefined=T))
assign('inverse_fun', inverse_fun, envir = .GlobalEnv)
tuning_criterion = 'rmse'
# define a new measure that applies inverse_fun to prediction and evaluates rmse
original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
# transform back to original value
pred$data$truth = inverse_fun(pred$data$truth)
pred$data$response = inverse_fun(pred$data$response)
return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
id = 'ii', name = 'ccc',
properties = original_measure$properties,
minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
fun = transf_measure_fun)
transf_measure = setAggregation(transf_measure, original_measure$aggr)
aggregated_measure = list(transf_measure, setAggregation(transf_measure, test.sd), setAggregation(transf_measure, train.mean), setAggregation(transf_measure, train.sd))
# train and predict
lrn.lm = makeLearner("regr.ksvm")
mod.lm = train(lrn.lm, bh.task)
task.pred.lm = predict(mod.lm, task = bh.task)
# inverse function on prediction
inv_pred = task.pred.lm
inv_pred$data$truth = inverse_fun(inv_pred$data$truth)
inv_pred$data$response = inverse_fun(inv_pred$data$response)
# check for performance match
performance(task.pred.lm, transf_measure)
performance(inv_pred, rmse)
# tuning
discrete_ps = makeParamSet(
makeDiscreteParam("C", values = c(0.5, 1.0, 1.5, 2.0)),
makeDiscreteParam("sigma", values = c(0.5, 1.0, 1.5, 2.0))
)
ctrl = makeTuneControlGrid()
rdesc = makeResampleDesc("CV", iters = 3L)
# this works
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
par.set = discrete_ps, control = ctrl, measures = transf_measure)
# try with parallelization - doesn't work
current_os = Sys.info()[['sysname']] # detect OS
if (current_os == "Windows"){
set.seed(1, "L'Ecuyer-CMRG")
parallelStart(mode = "socket", cpus = detectCores(), show.info = F)
parallel::clusterSetRNGStream(iseed = 1)
} else if (current_os == "Linux"){
set.seed(1, "L'Ecuyer-CMRG")
parallelStart(mode = "multicore", cpus = detectCores(), show.info = F)
} else {
cat('\n\n#### OS not recognized, check parallelization init\n\n')
}
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
par.set = discrete_ps, control = ctrl, measures = transf_measure)
parallelStop()
getting the following error:
Error in stopWithJobErrorMessages(inds, vcapply(result.list[inds], as.character)) :
Errors occurred in 16 slave jobs, displaying at most 10 of them:
00001: Error in inverse_fun(pred$data$truth) :
cannot find "inverse_fun"
I tried to pass the function with extra.args but I get an error
original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
# transform back to original value
pred$data$truth = extra.args$inv_fun(pred$data$truth)
pred$data$response = extra.args$inv_fun(pred$data$response)
return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
id = 'ii', name = 'ccc',
properties = original_measure$properties,
minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
fun = transf_measure_fun(extra.args = list(inv_fun = inverse_fun))
)
and I get
Error in FUN(X[[i]], ...) : argument "pred" is missing, with no default
Thanks in advance

You need to export your custom objects using parallelMap::parallelExport().
library(mlr)
#> Loading required package: ParamHelpers
library(parallelMap)
library(compiler)
# define function
inverse_fun = function(x){x^2}
inverse_fun = Vectorize(inverse_fun)
inverse_fun = cmpfun(inverse_fun, options=list(suppressUndefined=T))
assign('inverse_fun', inverse_fun, envir = .GlobalEnv)
tuning_criterion = 'rmse'
# define a new measure that applies inverse_fun to prediction and evaluates rmse
original_measure = eval(parse(text=tuning_criterion))
transf_measure_fun = function(task, model, pred, feats, extra.args){
# transform back to original value
pred$data$truth = inverse_fun(pred$data$truth)
pred$data$response = inverse_fun(pred$data$response)
return(original_measure$fun(task, model, pred, feats, extra.args))
}
transf_measure = makeMeasure(
id = 'ii', name = 'ccc',
properties = original_measure$properties,
minimize = original_measure$minimize, best = original_measure$best, worst = original_measure$worst,
fun = transf_measure_fun)
transf_measure = setAggregation(transf_measure, original_measure$aggr)
# tuning
discrete_ps = makeParamSet(
makeDiscreteParam("C", values = c(0.5, 1.0, 1.5, 2.0)),
makeDiscreteParam("sigma", values = c(0.5, 1.0, 1.5, 2.0))
)
ctrl = makeTuneControlGrid()
rdesc = makeResampleDesc("CV", iters = 3L)
lrn.lm = makeLearner("regr.ksvm")
set.seed(1, "L'Ecuyer-CMRG")
parallelStart(mode = "socket", cpus = 2, show.info = F)
parallelExport("inverse_fun", "original_measure")
res = tuneParams(lrn.lm, task = bh.task, resampling = rdesc,
par.set = discrete_ps, control = ctrl, measures = transf_measure)
#> [Tune] Started tuning learner regr.ksvm for parameter set:
#> Type len Def Constr Req Tunable Trafo
#> C discrete - - 0.5,1,1.5,2 - TRUE -
#> sigma discrete - - 0.5,1,1.5,2 - TRUE -
#> With control class: TuneControlGrid
#> Imputation value: Inf
#> [Tune] Result: C=2; sigma=0.5 : ii.test.rmse=270.8008465
parallelStop()
Created on 2019-10-08 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ──────────────────────────────────────────────────────────
#> setting value
#> version R version 3.6.1 (2019-07-05)
#> os Arch Linux
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Berlin
#> date 2019-10-08
#>
#> ─ Packages ──────────────────────────────────────────────────────────────
#> ! package * version date lib
#> assertthat 0.2.1 2019-03-21 [1]
#> backports 1.1.5 2019-10-02 [1]
#> BBmisc 1.11 2017-03-10 [1]
#> callr 3.3.2 2019-09-22 [1]
#> checkmate 1.9.4 2019-07-04 [1]
#> cli 1.1.0 2019-03-19 [1]
#> colorspace 1.4-1 2019-03-18 [1]
#> crayon 1.3.4 2017-09-16 [1]
#> data.table 1.12.4 2019-10-03 [1]
#> desc 1.2.0 2018-05-01 [1]
#> devtools 2.2.1 2019-09-24 [1]
#> digest 0.6.21 2019-09-20 [1]
#> dplyr 0.8.3 2019-07-04 [1]
#> ellipsis 0.3.0 2019-09-20 [1]
#> evaluate 0.14 2019-05-28 [1]
#> fastmatch 1.1-0 2017-01-28 [1]
#> fs 1.3.1 2019-05-06 [1]
#> ggplot2 3.2.1 2019-08-10 [1]
#> glue 1.3.1 2019-03-12 [1]
#> gtable 0.3.0 2019-03-25 [1]
#> highr 0.8 2019-03-20 [1]
#> htmltools 0.4.0 2019-10-04 [1]
#> kernlab 0.9-27 2018-08-10 [1]
#> knitr 1.25 2019-09-18 [1]
#> lattice 0.20-38 2018-11-04 [1]
#> lazyeval 0.2.2 2019-03-15 [1]
#> magrittr 1.5 2014-11-22 [1]
#> Matrix 1.2-17 2019-03-22 [1]
#> memoise 1.1.0 2017-04-21 [1]
#> mlr * 2.15.0.9000 2019-10-08 [1]
#> munsell 0.5.0 2018-06-12 [1]
#> parallelMap * 1.4 2019-05-17 [1]
#> ParamHelpers * 1.12 2019-01-18 [1]
#> pillar 1.4.2 2019-06-29 [1]
#> pkgbuild 1.0.5 2019-08-26 [1]
#> pkgconfig 2.0.3 2019-09-22 [1]
#> pkgload 1.0.2 2018-10-29 [1]
#> prettyunits 1.0.2 2015-07-13 [1]
#> processx 3.4.1 2019-07-18 [1]
#> ps 1.3.0 2018-12-21 [1]
#> purrr 0.3.2 2019-03-15 [1]
#> R6 2.4.0 2019-02-14 [1]
#> Rcpp 1.0.2 2019-07-25 [1]
#> remotes 2.1.0 2019-06-24 [1]
#> rlang 0.4.0 2019-06-25 [1]
#> rmarkdown 1.16 2019-10-01 [1]
#> rprojroot 1.3-2 2018-01-03 [1]
#> scales 1.0.0 2018-08-09 [1]
#> sessioninfo 1.1.1 2018-11-05 [1]
#> stringi 1.4.3 2019-03-12 [1]
#> stringr 1.4.0 2019-02-10 [1]
#> R survival 2.44-1.1 <NA> [2]
#> testthat 2.2.1 2019-07-25 [1]
#> tibble 2.1.3 2019-06-06 [1]
#> tidyselect 0.2.5 2018-10-11 [1]
#> usethis 1.5.1.9000 2019-10-07 [1]
#> withr 2.1.2 2018-03-15 [1]
#> xfun 0.10 2019-10-01 [1]
#> XML 3.98-1.20 2019-06-06 [1]
#> yaml 2.2.0 2018-07-25 [1]
#> source
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> local
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> <NA>
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#> Github (r-lib/usethis#3015465)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.1)
#> CRAN (R 3.6.0)
#> CRAN (R 3.6.0)
#>
#> [1] /home/pjs/R/x86_64-pc-linux-gnu-library/3.6
#> [2] /usr/lib/R/library
#>
#> R ── Package was removed from disk.

Related

data.table row value depend on previous value in R

I have a data.table x, it has 2 columns a, b.
I want calculate a c column.
library(data.table)
x = data.table(a = c(1:5), b = c(1,0,2,3,6), c = NA)
x$a[1] = NA
x$b[1] = NA
x
#> a b c
#> <int> <num> <lgcl>
#> 1: NA NA NA
#> 2: 2 0 NA
#> 3: 3 2 NA
#> 4: 4 3 NA
#> 5: 5 6 NA
The algorithm is:
c[i] = ifelse(a[i] < b[i] & c[i-1] < b[i], a[i], b[i])
I don't want to use for loop, because it's too slow.
I want to use data.table functions, or a fast method like this:
x$c = fifelse(x$a < x$b & lag(x$c) < x$b, x$a, x$b)
But it's not working, because x$c calculation is in progress right now.
Any solution for this?
Thanks for help
Janos
You could use Reduce with accumulate=T option :
library(data.table)
x = data.table(a = c(1:5), b = c(1,0,2,3,6), c = NA)
x$a[1] = NA
x$b[1] = NA
x[,c:=Reduce(f = function(prev,val) ifelse((val$a < val$b & prev<val$b),val$a,val$b),
x = split(.SD[-1],seq_len(.N-1)), init = NA
,accumulate = T)][]
#> a b c
#> <int> <num> <num>
#> 1: NA NA NA
#> 2: 2 0 0
#> 3: 3 2 2
#> 4: 4 3 3
#> 5: 5 6 5
Reduce passes the result of the previous row calculation to calculate the next row.
accumulate=T returns the intermediate results instead of only the last row.

Parallelization on resampling within a stacked learner (ensemble/stack of classification learners) doesn't work

The below code works fine, however, I am interested to run it in parallel. I have tried different plans within future and future.apply but couldn't managed. Any help appreciated.
I am running on windows OS, 8 cores.
library(mlr3verse)
library(future.apply)
#> Warning: package 'future.apply' was built under R version 3.6.3
#> Loading required package: future
#> Warning: package 'future' was built under R version 3.6.3
library(future)
future::plan(multicore)
tsk_clf = tsk("sonar")
tsk_clf$col_roles$stratum = tsk_clf$target_names #stratification
lda = lrn("classif.lda", predict_type = "response")
svm = lrn("classif.svm", type = "C-classification", kernel= "radial",predict_type = "response")
xgb = lrn("classif.xgboost", predict_type = "response")
ranger_lrn = lrn("classif.ranger", predict_type = "response",importance ="permutation")
level_1 =
gunion(list(
PipeOpLearnerCV$new(lda, id = "lda_cv_l1"),
PipeOpLearnerCV$new(svm, id = "svm_cv_l1"),
PipeOpLearnerCV$new(xgb, id = "xgb_cv_l1")
))
level_2 = level_1 %>>%
PipeOpFeatureUnion$new(3, id = "u2") %>>%
PipeOpLearner$new(ranger_lrn,
id = "ranger_l2")
lrn = GraphLearner$new(level_2)
lrn$
train(tsk_clf)$
predict(tsk_clf)$
score()
#> INFO [17:04:06.984] Applying learner 'classif.lda' on task 'sonar' (iter 3/3)
#> INFO [17:04:07.052] Applying learner 'classif.lda' on task 'sonar' (iter 1/3)
#> INFO [17:04:07.097] Applying learner 'classif.lda' on task 'sonar' (iter 2/3)
#> INFO [17:04:07.340] Applying learner 'classif.svm' on task 'sonar' (iter 1/3)
#> INFO [17:04:07.382] Applying learner 'classif.svm' on task 'sonar' (iter 2/3)
#> INFO [17:04:07.430] Applying learner 'classif.svm' on task 'sonar' (iter 3/3)
#> INFO [17:04:08.627] Applying learner 'classif.xgboost' on task 'sonar' (iter 3/3)
#> INFO [17:04:08.672] Applying learner 'classif.xgboost' on task 'sonar' (iter 2/3)
#> INFO [17:04:08.715] Applying learner 'classif.xgboost' on task 'sonar' (iter 1/3)
#> classif.ce
#> 0.01923077
Created on 2020-12-15 by the reprex package (v0.3.0)
devtools::session_info()
#> - Session info ----------------------------------------------------------
#> setting value
#> version R version 3.6.1 (2019-07-05)
#> os Windows 10 x64
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.1252
#> ctype English_United States.1252
#> tz Europe/Berlin
#> date 2020-12-15
#>
#> - Packages --------------------------------------------------------------
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0)
#> backports 1.1.4 2019-04-10 [1] CRAN (R 3.6.0)
#> bbotk 0.2.0 2020-07-24 [1] CRAN (R 3.6.3)
#> callr 3.5.1 2020-10-13 [1] CRAN (R 3.6.3)
#> checkmate 2.0.0 2020-02-06 [1] CRAN (R 3.6.3)
#> class 7.3-17 2020-04-26 [1] CRAN (R 3.6.3)
#> cli 2.1.0 2020-10-12 [1] CRAN (R 3.6.3)
#> codetools 0.2-16 2018-12-24 [1] CRAN (R 3.6.0)
#> colorspace 1.4-1 2019-03-18 [1] CRAN (R 3.6.3)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0)
#> data.table 1.13.0 2020-07-24 [1] CRAN (R 3.6.3)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 3.6.3)
#> devtools 2.3.2 2020-09-18 [1] CRAN (R 3.6.3)
#> digest 0.6.18 2018-10-10 [1] CRAN (R 3.6.0)
#> dplyr 1.0.2 2020-08-18 [1] CRAN (R 3.6.3)
#> e1071 1.7-3 2019-11-26 [1] CRAN (R 3.6.3)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 3.6.3)
#> evaluate 0.13 2019-02-12 [1] CRAN (R 3.6.0)
#> fansi 0.4.0 2018-10-05 [1] CRAN (R 3.6.0)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 3.6.3)
#> future * 1.18.0 2020-07-09 [1] CRAN (R 3.6.3)
#> future.apply * 1.6.0 2020-07-01 [1] CRAN (R 3.6.3)
#> generics 0.1.0 2020-10-31 [1] CRAN (R 3.6.3)
#> ggplot2 3.3.2 2020-06-19 [1] CRAN (R 3.6.3)
#> globals 0.12.5 2019-12-07 [1] CRAN (R 3.6.1)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 3.6.3)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 3.6.3)
#> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0)
#> htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.6.0)
#> knitr 1.22 2019-03-08 [1] CRAN (R 3.6.0)
#> lattice 0.20-41 2020-04-02 [1] CRAN (R 3.6.3)
#> lgr 0.3.4 2020-03-20 [1] CRAN (R 3.6.3)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.3)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 3.6.3)
#> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0)
#> MASS 7.3-52 2020-08-18 [1] CRAN (R 3.6.3)
#> Matrix 1.2-18 2019-11-27 [1] CRAN (R 3.6.3)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 3.6.3)
#> mlr3 * 0.5.0 2020-08-07 [1] CRAN (R 3.6.3)
#> mlr3filters * 0.3.0 2020-07-18 [1] CRAN (R 3.6.3)
#> mlr3learners * 0.3.0 2020-08-29 [1] CRAN (R 3.6.3)
#> mlr3measures 0.2.0 2020-06-27 [1] CRAN (R 3.6.3)
#> mlr3misc 0.5.0 2020-08-13 [1] CRAN (R 3.6.3)
#> mlr3pipelines * 0.2.1 2020-08-18 [1] CRAN (R 3.6.3)
#> mlr3tuning * 0.2.0 2020-07-28 [1] CRAN (R 3.6.3)
#> mlr3verse * 0.1.3 2020-07-06 [1] CRAN (R 3.6.3)
#> mlr3viz * 0.4.0 2020-10-05 [1] CRAN (R 3.6.3)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 3.6.3)
#> paradox * 0.4.0 2020-07-21 [1] CRAN (R 3.6.3)
#> pillar 1.4.6 2020-07-10 [1] CRAN (R 3.6.3)
#> pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 3.6.3)
#> pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.6.0)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 3.6.3)
#> prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.6.0)
#> processx 3.4.4 2020-09-03 [1] CRAN (R 3.6.3)
#> ps 1.3.4 2020-08-11 [1] CRAN (R 3.6.3)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 3.6.3)
#> R6 2.4.1 2019-11-12 [1] CRAN (R 3.6.3)
#> ranger 0.12.1 2020-01-10 [1] CRAN (R 3.6.3)
#> Rcpp 1.0.1 2019-03-17 [1] CRAN (R 3.6.0)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 3.6.3)
#> rlang 0.4.7 2020-07-09 [1] CRAN (R 3.6.3)
#> rmarkdown 1.12 2019-03-14 [1] CRAN (R 3.6.0)
#> rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.6.0)
#> scales 1.1.1 2020-05-11 [1] CRAN (R 3.6.3)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.3)
#> stringi 1.4.3 2019-03-12 [1] CRAN (R 3.6.0)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0)
#> testthat 2.3.2 2020-03-02 [1] CRAN (R 3.6.3)
#> tibble 3.0.4 2020-10-12 [1] CRAN (R 3.6.3)
#> tidyselect 1.1.0 2020-05-11 [1] CRAN (R 3.6.3)
#> usethis 1.6.3 2020-09-17 [1] CRAN (R 3.6.3)
#> uuid 0.1-4 2020-02-26 [1] CRAN (R 3.6.3)
#> vctrs 0.3.4 2020-08-29 [1] CRAN (R 3.6.3)
#> withr 2.3.0 2020-09-22 [1] CRAN (R 3.6.3)
#> xfun 0.6 2019-04-02 [1] CRAN (R 3.6.0)
#> xgboost 1.2.0.1 2020-09-02 [1] CRAN (R 3.6.3)
#> yaml 2.2.0 2018-07-25 [1] CRAN (R 3.6.0)
#>
#> [1] C:/Users/mshey/Anaconda3/envs/rstudio/lib/R/library
Looks fine to me.
Note that multicore mode is not available on Windows and falls back to sequential. Might this be the culprit here?
PS: Next time you face a parallelization/runtime issue, benchmarking the runtime might help ;)
library(mlr3verse)
#> Loading required package: mlr3
#> Loading required package: mlr3filters
#> Loading required package: mlr3learners
#> Loading required package: mlr3pipelines
#> Loading required package: mlr3tuning
#> Loading required package: mlr3viz
#> Loading required package: paradox
library(future.apply)
#> Loading required package: future
library(future)
library(lgr)
lgr::get_logger("mlr3")$set_threshold("fatal")
tsk_clf <- tsk("sonar")
tsk_clf$col_roles$stratum <- tsk_clf$target_names # stratification
lda <- lrn("classif.lda", predict_type = "response")
svm <- lrn("classif.svm", type = "C-classification", kernel = "radial", predict_type = "response")
xgb <- lrn("classif.xgboost", predict_type = "response")
ranger_lrn <- lrn("classif.ranger", predict_type = "response", importance = "permutation")
level_1 <-
gunion(list(
PipeOpLearnerCV$new(lda, id = "lda_cv_l1"),
PipeOpLearnerCV$new(svm, id = "svm_cv_l1"),
PipeOpLearnerCV$new(xgb, id = "xgb_cv_l1")
))
level_2 <- level_1 %>>%
PipeOpFeatureUnion$new(3, id = "u2") %>>%
PipeOpLearner$new(ranger_lrn,
id = "ranger_l2"
)
lrn <- GraphLearner$new(level_2)
# parallel
plan(multicore)
time <- Sys.time()
lrn$
train(tsk_clf)$
predict(tsk_clf)$
score()
#> classif.ce
#> 0.01923077
Sys.time() - time
#> Time difference of 2.994049 secs
# sequential
plan(sequential)
lrn$
train(tsk_clf)$
predict(tsk_clf)$
score()
#> classif.ce
#> 0.01923077
Sys.time() - time
#> Time difference of 4.276779 secs
Created on 2020-12-20 by the reprex package (v0.3.0)
Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.0.3 Patched (2020-12-10 r79607)
#> os macOS Big Sur 10.16
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Berlin
#> date 2020-12-20
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.3)
#> backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.3)
#> bbotk 0.2.2 2020-12-20 [1] Github (mlr-org/bbotk#5acf598)
#> callr 3.5.1 2020-10-13 [1] CRAN (R 4.0.3)
#> checkmate 2.0.0 2020-02-06 [1] CRAN (R 4.0.3)
#> class 7.3-17 2020-04-26 [2] CRAN (R 4.0.3)
#> cli 2.2.0 2020-11-20 [1] CRAN (R 4.0.3)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.0.3)
#> colorspace 2.0-0 2020-11-11 [1] CRAN (R 4.0.3)
#> crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.3)
#> data.table 1.13.4 2020-12-08 [1] CRAN (R 4.0.3)
#> desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.3)
#> devtools 2.3.2 2020-09-18 [1] CRAN (R 4.0.3)
#> digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.3)
#> dplyr 1.0.2 2020-08-18 [1] CRAN (R 4.0.3)
#> e1071 1.7-4 2020-10-14 [1] CRAN (R 4.0.3)
#> ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.3)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.3)
#> fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.3)
#> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.3)
#> future * 1.21.0 2020-12-10 [1] CRAN (R 4.0.3)
#> future.apply * 1.6.0 2020-07-01 [1] CRAN (R 4.0.3)
#> generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.3)
#> ggplot2 3.3.2 2020-06-19 [1] CRAN (R 4.0.3)
#> globals 0.14.0 2020-11-22 [1] CRAN (R 4.0.3)
#> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.3)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.3)
#> highr 0.8 2019-03-20 [1] CRAN (R 4.0.3)
#> htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.3)
#> knitr 1.30 2020-09-22 [1] CRAN (R 4.0.3)
#> lattice 0.20-41 2020-04-02 [2] CRAN (R 4.0.3)
#> lgr * 0.4.1 2020-10-20 [1] CRAN (R 4.0.3)
#> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.3)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.0.3)
#> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.3)
#> MASS 7.3-53 2020-09-09 [2] CRAN (R 4.0.3)
#> Matrix 1.2-18 2019-11-27 [2] CRAN (R 4.0.3)
#> memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.3)
#> mlr3 * 0.9.0-9000 2020-12-20 [1] Github (mlr-org/mlr3#f9ac360)
#> mlr3filters * 0.4.0 2020-11-10 [1] CRAN (R 4.0.3)
#> mlr3learners * 0.4.3 2020-12-08 [1] CRAN (R 4.0.3)
#> mlr3measures 0.3.0 2020-10-05 [1] CRAN (R 4.0.3)
#> mlr3misc 0.6.0 2020-11-17 [1] CRAN (R 4.0.3)
#> mlr3pipelines * 0.3.2 2020-12-17 [1] CRAN (R 4.0.3)
#> mlr3tuning * 0.5.0 2020-12-07 [1] CRAN (R 4.0.3)
#> mlr3verse * 0.1.3 2020-07-06 [1] CRAN (R 4.0.3)
#> mlr3viz * 0.5.0 2020-11-02 [1] CRAN (R 4.0.3)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.3)
#> paradox * 0.6.0-9000 2020-12-20 [1] Github (mlr-org/paradox#99cfc0f)
#> parallelly 1.22.0 2020-12-13 [1] CRAN (R 4.0.3)
#> pillar 1.4.7 2020-11-20 [1] CRAN (R 4.0.3)
#> pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.0.3)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.3)
#> pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.3)
#> prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.3)
#> processx 3.4.5 2020-11-30 [1] CRAN (R 4.0.3)
#> ps 1.5.0 2020-12-05 [1] CRAN (R 4.0.3)
#> purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.3)
#> R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.2)
#> ranger 0.12.1 2020-01-10 [1] CRAN (R 4.0.3)
#> Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.3)
#> remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.3)
#> rlang 0.4.9 2020-11-26 [1] CRAN (R 4.0.3)
#> rmarkdown 2.6 2020-12-14 [1] CRAN (R 4.0.3)
#> rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.3)
#> scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.3)
#> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.3)
#> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.3)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.3)
#> testthat 3.0.1 2020-12-20 [1] Github (r-lib/testthat#e99155a)
#> tibble 3.0.4 2020-10-12 [1] CRAN (R 4.0.3)
#> tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.3)
#> usethis 2.0.0.9000 2020-12-20 [1] Github (r-lib/usethis#c1e8ed6)
#> uuid 0.1-4 2020-02-26 [1] CRAN (R 4.0.3)
#> vctrs 0.3.6 2020-12-17 [1] CRAN (R 4.0.3)
#> withr 2.3.0 2020-09-22 [1] CRAN (R 4.0.3)
#> xfun 0.19 2020-10-30 [1] CRAN (R 4.0.3)
#> xgboost 1.2.0.1 2020-09-02 [1] CRAN (R 4.0.3)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.3)
#>
#> [1] /Users/pjs/Library/R/4.0/library
#> [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library

Trying to write the division algorithm in Sympy which apparently has its own truth (Boolean variables)

So I was trying to write out the code to perform the one variable polynomial division algorithm using the open source SymPy. It already has a leading term function, so I thought it would be easy. The book, Ideals, Varieties, and Algorithms by Cox, Little and O'Shea gives the pseudo-code to be:
Input: g, f
Output: q, r
q := 0; r := f
WHILE r <> 0 AND LT (g) divides LT (r ) DO
q := q + LT (r )/ LT (g)
r := r − ( LT (r )/ LT (g))g
So my python code is:
from sympy import *
x = symbols('x')
f= x**4-5*x**2-2*x+7
g = 3*x**2 + 4*x - 2
q = 0
r = f
while (r != 0 & degree(g) < LT(r ) ):
q = q + LT (r )/ LT (g)
r = r - ( LT (r )/ LT (g))*g
print(q,r)
But this gives an error message: TypeError: unsupported operand type(s) for &: 'int' and 'Integer'. So in the console, I query type(r != 0) and it gives bool, but for type(degree(g) < degree(r)), it gives sympy.logic.boolalg.BooleanTrue. I tried to look up the documentation for sympy.logic.boolalg.BooleanTrue, and I couldn't find how to make it an object so that I can combine with a bool using a logical &.
Why does sympy have its own boolean variables and how do I get it to play well with the normal boolean variables?
SymPy has its own Boolean class because needs to be used in the Basic expression construction and needs to support methods like subs etc to be used consistently in the SymPy architecture.
The Boolean type can be used with bool e.g.:
In [7]: S.true & True
Out[7]: True
In [8]: S.true and True
Out[8]: True
The problem in your example is actually to do with operator preference:
In [9]: r != 0 & degree(g) < LT(r )
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-9-f2737cccd83e> in <module>
----> 1 r != 0 & degree(g) < LT(r )
TypeError: unsupported operand type(s) for &: 'int' and 'Integer'
In [10]: (r != 0) & (degree(g) < LT(r))
Out[10]:
4
2 < x
In [11]: r != (0 & degree(g)) < LT(r)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-c7a6030e1401> in <module>
----> 1 r != (0 & degree(g)) < LT(r)
TypeError: unsupported operand type(s) for &: 'int' and 'Integer'
It is 0 & degree(g) which is evaluated first and SymPy's Integer type can not be used with & which is used for symbolic "and" in SymPy:
In [14]: (x<1) & (x>-1)
Out[14]: x > -1 ∧ x < 1
You probably meant to use Python's and operator which has a lower precedence than &:
In [15]: r != 0 and degree(g) < LT(r)
Out[15]:
4
2 < x
This will not actually work though in the while loop because the Boolean expression is indeterminate:
---> 10 while (r != 0 and degree(g) < LT(r ) ):
11 q = q + LT (r )/ LT (g)
12 r = r - ( LT (r )/ LT (g))*g
~/current/sympy/sympy/sympy/core/relational.py in __nonzero__(self)
382
383 def __nonzero__(self):
--> 384 raise TypeError("cannot determine truth value of Relational")
385
386 __bool__ = __nonzero__
TypeError: cannot determine truth value of Relational
This is because the truth of the condition is unknowable:
In [18]: degree(g) < LT(r)
Out[18]:
4
2 < x
I think what you meant to check there is rem(LT(g), LT(r)) == 0.
from sympy import *
x = symbols('x')
f= x**4-5*x**2-2*x+7
g = 3*x**2 + 4*x - 2
q = 0
r = f
while (r != 0 and rem(LT(g), LT(r)) == 0):
q = q + LT (r )/ LT (g)
r = r - ( LT (r )/ LT (g))*g
print(q,r)
Output:
0 x**4 - 5*x**2 - 2*x + 7
Thanks so much, Oscar (though I still don't understand the need for the sympy boolean to be different). Your response allowed me to find other issues and the following code works (which is in function form):
from sympy import *
x = symbols('x')
# the division algorithm, returns the quotient and remainder
def DivAlg(f,g):
q = 0 # quotient
r = f # remainder
while (r != 0) & (degree(g) <= degree(r)):
q = q + LT(r)/LT(g)
r = r - expand(( LT(r)/LT(g)) * g) # the expand command is necessary!
return [q,r]
Can try DivAlg(x**4-x**3-4*x**2-5*x-3, x**2+3*x-5).
I also used this to write a function that uses the Euclidean algorithm to find the GCD of two one-variable polynomials:
def GCD(f,g):
h = f
s = g
while (s!= 0):
r = DivAlg(h,s)[1]
h = s
s = r
return h/LC(h) # LC(h) is the coefficient of highest order term.
You can try it out with print(GCD(x**3-x**2-x-2,x**4-x**3-4*x**2-5*x-3)).
This is the code needed to do problem 1.5.8 (p. 46) of the fun book, Ideals, Varieties, and Algorithms by Cox, Little, and O'Shea. I should note that sympy - of course - already has an implementation of this with the following command: gcd(x**3-x**2-x-2,x**4-x**3-4*x**2-5*x-3,domain=QQ)

Cannot download daily data with weathercan

I am using weathercan package (https://github.com/ropensci/weathercan/issues)
With simple codes to download daily data from station 28011 (Regina RCS)
Then I got an error message:
Error in 1:grep("Date/Time", preamble$V1) : argument of length 0
Need help.
library(weathercan)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
####################################################
#download data
#46367 = Hudson Bay
#####################################################################
stations_search("Regina RCS", interval="hour")
#> # A tibble: 1 x 14
#> prov station_name station_id climate_id WMO_id TC_id lat lon elev
#> <chr> <chr> <int> <fct> <int> <fct> <dbl> <dbl> <dbl>
#> 1 SK REGINA RCS 28011 4016699 71514 WDJ 50.4 -105. 577.
#> # ... with 5 more variables: tz <chr>, interval <chr>, start <int>,
#> # end <int>, normals <lgl>
#stations_search("Kamloops", interval = "hour")
#kam <- weather_dl(station_ids = 10805, start = "2018-02-01", end = "2018-04-15")
Regina<- weather_dl(station_ids = 28011, start = "2019-02-01", interval = "day", quiet = TRUE)
#> Error in 1:grep("Date/Time", preamble$V1): argument of length 0
Created on 2019-09-27 by the reprex package (v0.3.0)

Why a lot of junk information by gem,rails and bundle under rvm?

$rvm -v
rvm 1.21.15 (stable) by Wayne E. Seguin <wayneeseguin#gmail.com>, Michal Papis <mpapis#gmail.com> [https://rvm.io/]
$which gem
/Users/ad9075/.rvm/rubies/ruby-2.0.0-p247/bin/gem
$rvm -v
rvm 1.21.15 (stable) by Wayne E. Seguin <wayneeseguin#gmail.com>, Michal Papis <mpapis#gmail.com> [https://rvm.io/]
The /Users/ad9075/.rvm/rubies/ruby-2.0.0-p247/bin/gem file is like this:
#!/Users/ad9075/.rvm/rubies/ruby-2.0.0-p247/bin/ruby
#--
# Copyright 2006 by Chad Fowler, Rich Kilmer, Jim Weirich and others.
# All rights reserved.
# See LICENSE.txt for permissions.
#++
require 'rubygems'
require 'rubygems/gem_runner'
require 'rubygems/exceptions'
required_version = Gem::Requirement.new ">= 1.8.7"
unless required_version.satisfied_by? Gem.ruby_version then
abort "Expected Ruby Version #{required_version}, is #{Gem.ruby_version}"
end
args = ARGV.clone
begin
Gem::GemRunner.new.run args
rescue Gem::SystemExitException => e
exit e.exit_code
end
When I type in gem, the output is like this:
$gem
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.9.2
>= 0
>= 0
>= 0
>= 0
>= 1.9.2
>= 0
>= 0
>= 1.9.3
= 4.0.0
~> 2.5.3
>= 0
>= 1.9.3
= 4.0.0
~> 3.1.0
~> 1.5.2
~> 0.6.2
~> 2.7.0
= 4.0.0
~> 0.3.37
>= 0
>= 1.9.3
= 4.0.0
~> 3.1.0
>= 0
>= 1.9.3
= 4.0.0
= 4.0.0
~> 4.0.0
~> 1.0.2
>= 0
>= 3
< 5
>= 4.0.0.beta
~> 1.3
>= 0
>= 1.9.3
>= 0.6.4
~> 0.6
~> 1.3
~> 0.3.37
~> 4.2
~> 0.1
>= 0
~> 4.4
~> 4.0
~> 3.5
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.3.5
>= 3.0.0
>= 0
>= 0
>= 0
>= 0
~> 1.16
~> 1.4.8
>= 1.0.3
> 0.8.7
~> 2.12.0
>= 0
>= 0
>= 2.0.4
~> 4.7
~> 4.0
~> 1.2
~> 1.0
~> 1.1
~> 1.5
~> 1.0
~> 1.2
~> 1.5
~> 10.0
~> 3.6
>= 1.3.5
~> 1.0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.0
>= 1.8.11
>= 1.9.3
= 4.0.0
= 4.0.0
= 4.0.0
= 4.0.0
= 4.0.0
< 2.0
>= 1.3.0
~> 2.0.0
>= 0
>= 1.9.3
= 4.0.0
= 4.0.0
>= 0.8.7
< 2.0
>= 0.18.1
>= 0
~> 1.2
~> 1.0
~> 1.0
!= 1.3.0
~> 1.1
>= 0
~> 2.0
~> 1.2
~> 1.0
~> 1.0
~> 1.0
>= 0
>= 0
>= 0
~> 3.1
>= 0
>= 0
>= 0
~> 2.8
>= 3.0
>= 3.0
>= 0
>= 1.3.6
~> 1.0
>= 0
>= 0
>= 0
>= 0.1.0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 2.2.11
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
~> 0.5.0
~> 1.0
>= 2.0.0
>= 0
>= 0.3.1
>= 0
>= 1.3.6
>= 1.8.7
~> 0.7.3
~> 2.11
>= 0
>= 0
>= 1.3.2
>= 1.8.6
~> 2.1
>= 0
>= 1.0.1
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.8.7
~> 1.2
>= 1.2.1
RubyGems is a sophisticated package manager for Ruby. This is a
basic help message containing pointers to more information.
Usage:
gem -h/--help
gem -v/--version
gem command [arguments...] [options...]
Examples:
gem install rake
gem list --local
gem build package.gemspec
gem help install
Further help:
gem help commands list all 'gem' commands
gem help examples show some examples of usage
gem help platforms show information about platforms
gem help <COMMAND> show help on COMMAND
(e.g. 'gem help install')
gem server present a web page at
http://localhost:8808/
with info about installed gems
Further information:
http://guides.rubygems.org
Not only gem, rails and bundle also prints out lots of junk information:
$rails
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.9.2
>= 0
>= 0
>= 0
>= 0
>= 1.9.2
>= 0
>= 0
>= 1.9.3
= 4.0.0
~> 2.5.3
>= 0
>= 1.9.3
= 4.0.0
~> 3.1.0
~> 1.5.2
~> 0.6.2
~> 2.7.0
= 4.0.0
~> 0.3.37
>= 0
>= 1.9.3
= 4.0.0
~> 3.1.0
>= 0
>= 1.9.3
= 4.0.0
= 4.0.0
~> 4.0.0
~> 1.0.2
>= 0
>= 3
< 5
>= 4.0.0.beta
~> 1.3
>= 0
>= 1.9.3
>= 0.6.4
~> 0.6
~> 1.3
~> 0.3.37
~> 4.2
~> 0.1
>= 0
~> 4.4
~> 4.0
~> 3.5
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.3.5
>= 3.0.0
>= 0
>= 0
>= 0
>= 0
~> 1.16
~> 1.4.8
>= 1.0.3
> 0.8.7
~> 2.12.0
>= 0
>= 0
>= 2.0.4
~> 4.7
~> 4.0
~> 1.2
~> 1.0
~> 1.1
~> 1.5
~> 1.0
~> 1.2
~> 1.5
~> 10.0
~> 3.6
>= 1.3.5
~> 1.0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 1.0
>= 1.8.11
>= 1.9.3
= 4.0.0
= 4.0.0
= 4.0.0
= 4.0.0
= 4.0.0
< 2.0
>= 1.3.0
~> 2.0.0
>= 0
>= 1.9.3
= 4.0.0
= 4.0.0
>= 0.8.7
< 2.0
>= 0.18.1
>= 0
~> 1.2
~> 1.0
~> 1.0
!= 1.3.0
~> 1.1
>= 0
~> 2.0
~> 1.2
~> 1.0
~> 1.0
~> 1.0
>= 0
>= 0
>= 0
~> 3.1
>= 0
>= 0
>= 0
~> 2.8
>= 3.0
>= 3.0
>= 0
>= 1.3.6
~> 1.0
>= 0
>= 0
>= 0
>= 0.1.0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 2.2.11
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
~> 0.5.0
~> 1.0
>= 2.0.0
>= 0
>= 0.3.1
>= 0
>= 1.3.6
>= 1.8.7
~> 0.7.3
~> 2.11
>= 0
>= 0
>= 1.3.2
>= 1.8.6
~> 2.1
>= 0
>= 1.0.1
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
>= 0
Usage:
rails new APP_PATH [options]
It seems that there are some junk information about version. Is it normal? Does anyone have ideas about this...

Resources