R:在“循环"中遇到的错误:x输入的“名称"不能回收到100号 [英] R: Errors encountered during "loops": x Input `name` can't be recycled to size 100
问题描述
我正在使用R编程语言.我发表了较早的文章(
注释
由于其中一些过程运行时间较长(相对而言),我们将 microbenchmark()
中的迭代次数从默认值100减少到10,这足以满足证明代码按预期工作.由于已在代码顶部将其设置为参数,因此可以增加此数字以增加执行每个基准测试的次数.
I am using the R programming language. I made an earlier post (R: Using "microbenchmark" and ggplot2 to plot runtimes) where I am learning how to use loops and functions to iterate procedures (7 procedures) in R for sample sizes. Once this is done, I want to produce a plot.
Based on the previous answer, I tried to write a few of these loops in R:
library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)
#simulate data
var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)
#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)
# configure run sizes
sizes <- c(100,200,300,400,500,600,700,800,900,1000)
# Procedure 1: :
proc1 <- function(size){
assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
metric = "gower"),envir = .GlobalEnv)
assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
envir = .GlobalEnv)
}
proc1List <- lapply(sizes,function(x){
b <- microbenchmark(proc1(x))
b$obs <- x
b
})
proc1summary <- do.call(rbind,(proc1List))
#procedure2
proc2 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=3),envir = .GlobalEnv)}
proc2List <- lapply(sizes,function(x){
b <- microbenchmark(proc2(x))
b$obs <- x
b
})
proc2summary <- do.call(rbind,(proc2List))
#procedure3
proc3 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=5),envir = .GlobalEnv)}
proc3List <- lapply(sizes,function(x){
b <- microbenchmark(proc3(x))
b$obs <- x
b
})
proc3summary <- do.call(rbind,(proc3List))
#procedure4
proc4 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
proc4summary <- do.call(rbind,(proc4List))
#procedure5
proc5 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, perplexity = 10, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc5List <- lapply(sizes,function(x){
b <- microbenchmark(proc5(x))
b$obs <- x
b
})
proc5summary <- do.call(rbind,(proc5List))
#procedure6
proc6 <- function(size){
assign(paste0("plot_",size),ggplot(aes(x = X, y = Y), data = tsne_data) + geom_point(aes()),envir = .GlobalEnv)}
proc6List <- lapply(sizes,function(x){
b <- microbenchmark(proc6(x))
b$obs <- x
b
})
proc6summary <- do.call(rbind,(proc6List))
#procedure 7
proc7 <- function(size) {
assign(paste0 ("tsne_obj_", size), Rtsne(gower_dist, is_distance = TRUE), envir = .GlobalEnv)
assign(paste0 ("tsne_data_", size), tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID,
lof=lof,
var1=f$var_1,
var2=f$var_2,
var3=f$var_3
), envir = .GlobalEnv)
assign(paste0 ("p1_", size), ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1,
var2=var2, var3=var3), data = tsne_data) +
geom_point(shape=1, col="red") + theme_minimal(), envir = .GlobalEnv)
assign(paste0 ("plotly_", size),
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3")
), envir = .GlobalEnv)
}
proc7List <- lapply(sizes,function(x){
b <- microbenchmark(proc7(x))
b$obs <- x
b
})
proc7summary <- do.call(rbind,(proc7List))
do.call(rbind,list(proc1summary,proc2summary,proc3summary, proc4summary, proc5summary, proc6summary, proc7summary)) %>%
group_by(expr,obs) %>%
summarise(.,time_ms = mean(time) * .000001) -> proc_time
ggplot(proc_time,aes(obs,time_ms,group = expr)) +
geom_line(aes(group = expr),color = "grey80") +
geom_point(aes(color = expr))
However, for some of these procedures, when I call them though a list, I keep getting an error:
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
Error: Problem with `mutate()` input `name`.
x Input `name` can't be recycled to size 100.
i Input `name` is `f$ID`.
i Input `name` must be size 100 or 1, not 1000.
I tried reading other stackoverflow posts (Input `typ` can't be recycled to size in R), but I could not understand why this "recycling error" keeps showing up. Is it because "size = 100" is too small? Is it because some of the variables have been named improperly?
Could someone please tell me what I am doing wrong?
Thanks
In order to make procedures 4 - 7 work we needed to make the adjustments listed in the conclusions section of Using microbenchmark and ggplot2 to plot runtimes:
- Wrap the original procedure in a function that we can use as the unit of analysis for
microbenchmark()
, and include asize
argument - Modify the procedure to use
size
as a variable where necessary - Modify the procedure to access objects from previous steps, based on the
size
argument - Modify the procedure to write its outputs with
assign()
andsize
if these are needed for subsequent procedure steps
The modified code looks like this:
library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)
#simulate data
var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)
#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)
# configure run sizes
sizes <- c(10,50,100,200,500,1000)
# configure # of benchmark runs
time_ct <- 10
# Procedure 1: :
proc1 <- function(size){
assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
metric = "gower"),envir = .GlobalEnv)
assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
envir = .GlobalEnv)
}
proc1List <- lapply(sizes,function(x){
b <- microbenchmark(proc1(x),times=time_ct)
b$obs <- x
b
})
proc1summary <- do.call(rbind,(proc1List))
#Procedure 2
proc2 <- function(size){
lof <- lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=3)
}
proc2List <- lapply(sizes,function(x){
b <- microbenchmark(proc2(x),times=time_ct)
b$obs <- x
b
})
proc2summary <- do.call(rbind,(proc2List))
#Procedure 3
proc3 <- function(size){
assign(paste0("lof_",size),lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=5),
envir = .GlobalEnv)
}
proc3List <- lapply(sizes,function(x){
b <- microbenchmark(proc3(x),times=time_ct)
b$obs <- x
b
})
proc3summary <- do.call(rbind,(proc3List))
proc4 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(30,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size)
}
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x),times=time_ct)
b$obs <- x
b
})
proc4summary <- do.call(rbind,(proc4List))
proc5 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(10,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size)
assign(paste0("tsne_data_",size),tsne_data,envir = .GlobalEnv)
}
proc5List <- lapply(sizes,function(x){
b <- microbenchmark(proc5(x),times=time_ct)
b$obs <- x
b
})
proc5summary <- do.call(rbind,(proc5List))
proc6 <- function(size){
plot = ggplot(aes(x = X, y = Y), data = get(paste0("tsne_data_",size),envir = .GlobalEnv)) + geom_point(aes())
}
proc6List <- lapply(sizes,function(x){
b <- microbenchmark(proc6(x),times=time_ct)
b$obs <- x
b
})
proc6summary <- do.call(rbind,(proc6List))
proc7 <- function(size){
tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),
perplexity = min(30,(size-1)/3),
is_distance = TRUE)
tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = 1:size,
lof=get(paste0("lof_",size),envir = .GlobalEnv),
var1=f$var_1[1:size],
var2=f$var_2[1:size],
var3=f$var_3[1:size]
)
p1 <- ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1,
var2=var2, var3=var3), data = tsne_data) +
geom_point(shape=1, col="red")+
theme_minimal()
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3"))
}
proc7List <- lapply(sizes,function(x){
b <- microbenchmark(proc7(x),times=time_ct)
b$obs <- x
b
})
proc7summary <- do.call(rbind,(proc7List))
do.call(rbind,list(proc1summary,proc2summary,proc3summary,proc4summary,proc5summary,
proc6summary,proc7summary)) %>%
group_by(expr,obs) %>%
summarise(.,time_ms = mean(time) * .000001) -> proc_time
head(proc_time)
ggplot(proc_time,aes(obs,time_ms,group = expr)) +
geom_line(aes(group = expr),color = "grey80") +
geom_point(aes(color = expr))
...and the output:
Notes
Since some of these procedures take a long time to run (relatively speaking), we reduced the number of iterations in microbenchmark()
from the default of 100 to 10, which is more than sufficient to demonstrate that the code works as intended. Since this was set as a parameter at the top of the code, one can increase this number to increase the number of times each benchmark is executed.
这篇关于R:在“循环"中遇到的错误:x输入的“名称"不能回收到100号的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!