R语言:代码需要很长时间才能运行 [英] R Language: Code Taking a Long Time to Run
问题描述
我发布了(v0.3.0)创建于2021-01-05 sup>
其他附带的软件包:[1] plotly_4.9.2.2 dplyr_1.0.2 ggplot2_3.3.3 xts_0.12.1 zoo_1.8-8
I posted another question on how to plot interactive time series in R using the "plotly" library. I received an answer and tried running the code - however, this code has been running for the past 3 hours. The data is not that big, and I tried a similar example from the plotly website ( https://plotly.com/r/cumulative-animations/ and https://plotly.com/r/custom-buttons/) and they seem to have run fine.
Here is the code (from my previous answer) that I am trying to run:
#load libraries and generate artificial time series data (this part works)
library(xts)
library(ggplot2)
library(dplyr)
library(plotly)
#create data
#time series 1
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
final_data <- data.frame(date_decision_made, property_damages_in_dollars)
final_data %>%
mutate(date_decision_made = as.Date(date_decision_made)) %>%
add_count(week = format(date_decision_made, "%W-%y"))
final_data$class = "time_series_1"
#time series 2
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,10,10)
final_data_2 <- data.frame(date_decision_made, property_damages_in_dollars)
final_data_2 %>%
mutate(date_decision_made = as.Date(date_decision_made)) %>%
add_count(week = format(date_decision_made, "%W-%y"))
final_data_2$class = "time_series_2"
#combine
data = rbind(final_data, final_data_2)
Part 1:
#part 1:
data <- data %>%
mutate(tmp_date = as.numeric(as.Date(date_decision_made, format = "%Y/%m/%d")))
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
data <- data %>% accumulate_by(~tmp_date)
fig <- data %>%
plot_ly(
x = ~tmp_date,
y = ~property_damages_in_dollars,
split = ~class,
frame = ~frame,
type = 'scatter',
mode = 'lines',
line = list(simplyfy = F)
)
fig
Part 2:
#part 2
updatemenus <- list(
list(
active = -1,
type= 'buttons',
buttons = list(
list(
label = "time_series_1",
method = "update",
args = list(list(visible = c(FALSE, TRUE)),
list(title = "series 1",
annotations = list(c(), high_annotations)))),
list(
label = "time_series_2",
method = "update",
args = list(list(visible = c(TRUE, FALSE)),
list(title = "series 2",
annotations = list(low_annotations, c() )))),
)
)
)
fig <- data %>% plot_ly(type = 'scatter', mode = 'lines')
fig <- fig %>% add_lines(x=~date_decision_made,
y=~property_damages_in_dollars, name="High",
line=list(color="#33CFA5"))
fig <- fig %>% add_lines(x=~date_decision_made,
y=~property_damage_in_dollars, name="Low",
line=list(color="#F06A6A"))
fig <- fig %>% layout(title = "Apple", showlegend=FALSE,
xaxis=list(title="Date"),
yaxis=list(title="Price ($)"),
updatemenus=updatemenus)
fig
Can someone please tell me what I am doing wrong? Or is there an issue with my computer and R console? The data is not that big and I have run similar procedures on data of similar size in the past without too much trouble.
Thanks
NOTE: session info
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)
Matrix products: default
locale:
[1] LC_COLLATE=English_Canada.1252 LC_CTYPE=English_Canada.1252 LC_MONETARY=English_Canada.1252 LC_NUMERIC=C
[5] LC_TIME=English_Canada.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] plotly_4.9.2.1 dplyr_1.0.2 ggplot2_3.3.2 xts_0.12.1 zoo_1.8-8
loaded via a namespace (and not attached):
[1] tinytex_0.26 tidyselect_1.1.0 xfun_0.15 purrr_0.3.4 reshape2_1.4.4 splines_4.0.2
[7] lattice_0.20-41 colorspace_1.4-1 vctrs_0.3.2 generics_0.0.2 viridisLite_0.3.0 htmltools_0.5.0
[13] stats4_4.0.2 yaml_2.2.1 survival_3.2-7 prodlim_2019.11.13 rlang_0.4.7 ModelMetrics_1.2.2.2
[19] pillar_1.4.6 glue_1.4.1 withr_2.3.0 xgboost_1.1.1.1 foreach_1.5.1 lifecycle_0.2.0
[25] plyr_1.8.6 lava_1.6.8 stringr_1.4.0 timeDate_3043.102 munsell_0.5.0 gtable_0.3.0
[31] recipes_0.1.13 htmlwidgets_1.5.2 codetools_0.2-16 crosstalk_1.1.0.1 caret_6.0-86 class_7.3-17
[37] Rcpp_1.0.5 scales_1.1.1 ipred_0.9-9 jsonlite_1.7.1 digest_0.6.25 stringi_1.4.6
[43] grid_4.0.2 tools_4.0.2 magrittr_1.5 lazyeval_0.2.2 tibble_3.0.3 tidyr_1.1.0
[49] crayon_1.3.4 pkgconfig_2.0.3 MASS_7.3-53 ellipsis_0.3.1 Matrix_1.2-18 data.table_1.12.8
[55] pROC_1.16.2 lubridate_1.7.9 gower_0.2.2 httr_1.4.2 rstudioapi_0.11 iterators_1.0.13
[61] R6_2.4.1 rpart_4.1-15 nnet_7.3-14 nlme_3.1-149 compiler_4.0.2
While the data is not small (535,092 rows following the accumulate_by
transformation), it should not take hours to generate those plotly graphs. On my machine the whole thing takes under 2 minutes. It appears to leave behind some data that only a restart of R seems to get rid of, so you might want to check the memory footprint.
The code below should be reproducible:
tic <- Sys.time()
suppressPackageStartupMessages(invisible(
lapply(c("xts", "ggplot2", "dplyr", "plotly"),
require, character.only = TRUE)))
#create data
#time series 1
date_decision_made <- seq(as.Date("2014/1/1"), as.Date("2016/1/1"), by="day") %>%
{format(as.Date(.), "%Y/%m/%d")}
property_damages_in_dollars <- rnorm(731,100,10)
final_data <- data.frame(date_decision_made, property_damages_in_dollars) %>%
mutate(date_decision_made = as.Date(date_decision_made),
class = "time_series_1") %>%
add_count(week = format(date_decision_made, "%W-%y"))
#time series 2
date_decision_made <- seq(as.Date("2014/1/1"), as.Date("2016/1/1"), by="day") %>%
{format(as.Date(.), "%Y/%m/%d")}
property_damages_in_dollars <- rnorm(731,10,10)
final_data_2 <- data.frame(date_decision_made, property_damages_in_dollars) %>%
mutate(date_decision_made = as.Date(date_decision_made),
class = "time_series_2") %>%
add_count(week = format(date_decision_made, "%W-%y"))
#combine
data <- rbind(final_data, final_data_2)
#part 1:
data <- data %>%
mutate(tmp_date = as.numeric(as.Date(date_decision_made, format = "%Y/%m/%d")))
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
data <- data %>% accumulate_by(~tmp_date)
lvls <- plotly:::getLevels(data$tmp_date)
## edit: defined custom tick marks with date labels
myticks <- floor(do.call(seq, as.list(c(range(data$tmp_date),length.out=8))))
fig <- data %>%
plot_ly(
x = ~tmp_date,
y = ~property_damages_in_dollars,
split = ~class,
frame = ~frame,
type = 'scatter',
mode = 'lines' #,
# line = list(simplify = FALSE)
) %>% # edit: added custom tick labels and text
layout(xaxis = list(
tickmode = "array",
nticks = 8,
tickvals = myticks,
ticktext = data$date_decision_made[myticks]
))
myticks <- floor(do.call(seq, as.list(c(range(data$tmp_date),length.out=8))))
fig <- data %>%
plot_ly(
x = ~tmp_date,
y = ~property_damages_in_dollars,
text=~date_decision_made, # edit: added hovertext label
split = ~class,
frame = ~frame,
type = 'scatter',
mode = 'lines'
) %>% # edit: added custom tick labels and text
layout(xaxis = list(
tickmode = "array",
nticks = 8,
tickvals = myticks,
ticktext = data$date_decision_made[myticks])
)
fig
#part 2
high_annotations <- list(
x=unique(data$date_decision_made[data$property_damages_in_dollars == max(data$property_damages_in_dollars)]),
y=max(data$property_damages_in_dollars),
xref='x', yref='y',
text=paste0('High: $',max(data$property_damages_in_dollars)),
ax=0, ay=-40
)
low_annotations <- list(
x=unique(data$date_decision_made[data$property_damages_in_dollars == min(data$property_damages_in_dollars)]),
y=min(data$property_damages_in_dollars),
xref='x', yref='y',
text=paste0('Low: $',min(data$property_damages_in_dollars)),
ax=0, ay=40
)
updatemenus <- list(
list(
active = -1,
type= 'buttons',
buttons = list(
list(
label = "time_series_1",
method = "update",
args = list(list(visible = c(FALSE, TRUE)),
list(title = "series 1",
annotations = list(c(), high_annotations)))),
list(
label = "time_series_2",
method = "update",
args = list(list(visible = c(TRUE, FALSE)),
list(title = "series 2",
annotations = list(low_annotations, c() ))))
)
)
)
fig <- data %>% plot_ly(type = 'scatter', mode = 'lines')
fig <- fig %>% add_lines(x=~date_decision_made,
y=~property_damages_in_dollars, name="High",
line=list(color="#33CFA5"))
fig <- fig %>% add_lines(x=~date_decision_made,
y=~property_damages_in_dollars, name="Low",
line=list(color="#F06A6A"))
fig <- fig %>% layout(title = "Apple", showlegend=FALSE,
xaxis=list(title="Date"),
yaxis=list(title="Price ($)"),
updatemenus=updatemenus)
fig
toc <- Sys.time()
toc-tic
#> Time difference of 1.61937 mins
Created on 2021-01-05 by the reprex package (v0.3.0)
other attached packages:
[1] plotly_4.9.2.2 dplyr_1.0.2 ggplot2_3.3.3 xts_0.12.1 zoo_1.8-8
这篇关于R语言:代码需要很长时间才能运行的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!