R:在“循环"中遇到的错误:x输入的“名称"不能回收到100号 [英] R: Errors encountered during "loops": x Input `name` can't be recycled to size 100

查看:52
本文介绍了R:在“循环"中遇到的错误:x输入的“名称"不能回收到100号的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用R编程语言.我发表了较早的文章(

注释

由于其中一些过程运行时间较长(相对而言),我们将 microbenchmark()中的迭代次数从默认值100减少到10,这足以满足证明代码按预期工作.由于已在代码顶部将其设置为参数,因此可以增加此数字以增加执行每个基准测试的次数.

I am using the R programming language. I made an earlier post (R: Using "microbenchmark" and ggplot2 to plot runtimes) where I am learning how to use loops and functions to iterate procedures (7 procedures) in R for sample sizes. Once this is done, I want to produce a plot.

Based on the previous answer, I tried to write a few of these loops in R:

library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)

#simulate data

var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )

#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)

#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)

# configure run sizes
sizes <- c(100,200,300,400,500,600,700,800,900,1000)

# Procedure 1: :
proc1 <- function(size){
    assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
                        metric = "gower"),envir = .GlobalEnv)
        
    assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
           envir = .GlobalEnv)
        
}     

proc1List <- lapply(sizes,function(x){
        b <- microbenchmark(proc1(x))
        b$obs <- x
        b
})

proc1summary <- do.call(rbind,(proc1List))

#procedure2


proc2 <- function(size){
    assign(paste0("lof_",size), lof(gower_dist, k=3),envir = .GlobalEnv)}

proc2List <- lapply(sizes,function(x){
        b <- microbenchmark(proc2(x))
        b$obs <- x
        b
})


proc2summary <- do.call(rbind,(proc2List))


#procedure3


proc3 <- function(size){
    assign(paste0("lof_",size), lof(gower_dist, k=5),envir = .GlobalEnv)}

proc3List <- lapply(sizes,function(x){
        b <- microbenchmark(proc3(x))
        b$obs <- x
        b
})


proc3summary <- do.call(rbind,(proc3List))

#procedure4

proc4 <- function(size){
    assign(paste0("tsne_obj_",size),Rtsne(gower_dist,  is_distance = TRUE),envir = .GlobalEnv)
        
    assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
    data.frame() %>%
    setNames(c("X", "Y")) %>%
    mutate(
           name = f$ID) ,envir = .GlobalEnv)}


proc4List <- lapply(sizes,function(x){
        b <- microbenchmark(proc4(x))
        b$obs <- x
        b
})


proc4summary <- do.call(rbind,(proc4List))


#procedure5

proc5 <- function(size){
    assign(paste0("tsne_obj_",size),Rtsne(gower_dist, perplexity = 10,  is_distance = TRUE),envir = .GlobalEnv)
        
    assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
    data.frame() %>%
    setNames(c("X", "Y")) %>%
    mutate(
           name = f$ID) ,envir = .GlobalEnv)}


proc5List <- lapply(sizes,function(x){
        b <- microbenchmark(proc5(x))
        b$obs <- x
        b
})


proc5summary <- do.call(rbind,(proc5List))


#procedure6

proc6 <- function(size){
    assign(paste0("plot_",size),ggplot(aes(x = X, y = Y), data = tsne_data) + geom_point(aes()),envir = .GlobalEnv)}
        

proc6List <- lapply(sizes,function(x){
        b <- microbenchmark(proc6(x))
        b$obs <- x
        b
})


proc6summary <- do.call(rbind,(proc6List))

#procedure 7

proc7 <- function(size) {

assign(paste0 ("tsne_obj_", size),  Rtsne(gower_dist,  is_distance = TRUE), envir = .GlobalEnv)

assign(paste0 ("tsne_data_", size),  tsne_data <- tsne_obj$Y %>%
  data.frame() %>%
  setNames(c("X", "Y")) %>%
  mutate(
    name = f$ID, 
    lof=lof,
    var1=f$var_1,
    var2=f$var_2,
    var3=f$var_3
    ), envir = .GlobalEnv)

assign(paste0 ("p1_", size),  ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1, 
  var2=var2, var3=var3), data = tsne_data) + 
  geom_point(shape=1, col="red") + theme_minimal(), envir = .GlobalEnv)


assign(paste0 ("plotly_", size),  
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3")
 ), envir = .GlobalEnv)


}


proc7List <- lapply(sizes,function(x){
    b <- microbenchmark(proc7(x))
    b$obs <- x
    b
})



proc7summary <- do.call(rbind,(proc7List))


do.call(rbind,list(proc1summary,proc2summary,proc3summary, proc4summary, proc5summary, proc6summary, proc7summary)) %>% 
    group_by(expr,obs) %>%
    summarise(.,time_ms = mean(time) * .000001) -> proc_time 



ggplot(proc_time,aes(obs,time_ms,group = expr)) +
    geom_line(aes(group = expr),color = "grey80") + 
    geom_point(aes(color = expr))

However, for some of these procedures, when I call them though a list, I keep getting an error:

proc4List <- lapply(sizes,function(x){
     b <- microbenchmark(proc4(x))
     b$obs <- x
     b
 })

 Error: Problem with `mutate()` input `name`.
x Input `name` can't be recycled to size 100.
i Input `name` is `f$ID`.
i Input `name` must be size 100 or 1, not 1000.

I tried reading other stackoverflow posts (Input `typ` can't be recycled to size in R), but I could not understand why this "recycling error" keeps showing up. Is it because "size = 100" is too small? Is it because some of the variables have been named improperly?

Could someone please tell me what I am doing wrong?

Thanks

解决方案

In order to make procedures 4 - 7 work we needed to make the adjustments listed in the conclusions section of Using microbenchmark and ggplot2 to plot runtimes:

  1. Wrap the original procedure in a function that we can use as the unit of analysis for microbenchmark(), and include a size argument
  2. Modify the procedure to use size as a variable where necessary
  3. Modify the procedure to access objects from previous steps, based on the size argument
  4. Modify the procedure to write its outputs with assign() and size if these are needed for subsequent procedure steps

The modified code looks like this:

library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)

#simulate data

var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )


#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)

#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)

# configure run sizes
sizes <- c(10,50,100,200,500,1000)

# configure # of benchmark runs
time_ct <- 10

# Procedure 1: :
proc1 <- function(size){
    assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
                        metric = "gower"),envir = .GlobalEnv)
        
    assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
           envir = .GlobalEnv)
        
}     

proc1List <- lapply(sizes,function(x){
        b <- microbenchmark(proc1(x),times=time_ct)
        b$obs <- x
        b
})
proc1summary <- do.call(rbind,(proc1List))

#Procedure 2

proc2 <- function(size){
        lof <- lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=3)
}
proc2List <- lapply(sizes,function(x){
    b <- microbenchmark(proc2(x),times=time_ct)
    b$obs <- x
    b
})
proc2summary <- do.call(rbind,(proc2List))

#Procedure 3

proc3 <- function(size){
    assign(paste0("lof_",size),lof(get(paste0("gower_dist_",size),envir = .GlobalEnv), k=5),
           envir = .GlobalEnv)
}
proc3List <- lapply(sizes,function(x){
    b <- microbenchmark(proc3(x),times=time_ct)
    b$obs <- x
    b
})
proc3summary <- do.call(rbind,(proc3List))

proc4 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(30,(size-1)/3),
                      is_distance = TRUE)
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size)
}

proc4List <- lapply(sizes,function(x){
    b <- microbenchmark(proc4(x),times=time_ct)
    b$obs <- x
    b
})

proc4summary <- do.call(rbind,(proc4List))

proc5 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(10,(size-1)/3),
                      is_distance = TRUE)
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size)
    assign(paste0("tsne_data_",size),tsne_data,envir = .GlobalEnv)
}

proc5List <- lapply(sizes,function(x){
    b <- microbenchmark(proc5(x),times=time_ct)
    b$obs <- x
    b
})

proc5summary <- do.call(rbind,(proc5List))

proc6 <- function(size){
    plot = ggplot(aes(x = X, y = Y), data = get(paste0("tsne_data_",size),envir = .GlobalEnv)) + geom_point(aes())
    
}

proc6List <- lapply(sizes,function(x){
    b <- microbenchmark(proc6(x),times=time_ct)
    b$obs <- x
    b
})

proc6summary <- do.call(rbind,(proc6List))

proc7 <- function(size){
    tsne_obj <- Rtsne(get(paste0("gower_dist_",size),envir = .GlobalEnv),  
                      perplexity = min(30,(size-1)/3),
                      is_distance = TRUE)
    
    tsne_data <- tsne_obj$Y %>%
        data.frame() %>%
        setNames(c("X", "Y")) %>%
        mutate(
            name = 1:size, 
            lof=get(paste0("lof_",size),envir = .GlobalEnv),
            var1=f$var_1[1:size],
            var2=f$var_2[1:size],
            var3=f$var_3[1:size]
        )
    
    p1 <- ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1, 
                     var2=var2, var3=var3), data = tsne_data) + 
        geom_point(shape=1, col="red")+
        theme_minimal()
    
    ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3"))
    
    
}

proc7List <- lapply(sizes,function(x){
    b <- microbenchmark(proc7(x),times=time_ct)
    b$obs <- x
    b
})

proc7summary <- do.call(rbind,(proc7List))

do.call(rbind,list(proc1summary,proc2summary,proc3summary,proc4summary,proc5summary,
                   proc6summary,proc7summary)) %>% 
    group_by(expr,obs) %>%
    summarise(.,time_ms = mean(time) * .000001) -> proc_time 

head(proc_time)

ggplot(proc_time,aes(obs,time_ms,group = expr)) +
    geom_line(aes(group = expr),color = "grey80") + 
    geom_point(aes(color = expr))

...and the output:

Notes

Since some of these procedures take a long time to run (relatively speaking), we reduced the number of iterations in microbenchmark() from the default of 100 to 10, which is more than sufficient to demonstrate that the code works as intended. Since this was set as a parameter at the top of the code, one can increase this number to increase the number of times each benchmark is executed.

这篇关于R:在“循环"中遇到的错误:x输入的“名称"不能回收到100号的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆