无法使用r data.table包执行计算 [英] unable to perform calculations using r data.table package

查看:134
本文介绍了无法使用r data.table包执行计算的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个巨大的数据框,最后30行如下:

  libary(data.table)

dput(p)

  structure(list(DATE = structure(c(1367516015,1367516045,1367516075,
1367516105,1367516135,1367516165,1367516195,1367516225,1367516255,
1367516285,1367516315,1367516345,1367516375,1367516405,1367516435 ,
1367516465,1367516495,1367516525,1367516555,1367516585,1367516615,
1367516645,1367516675,1367516705,1367516735,1367516765,1367516795,
1367516825,1367516855,1367516885),class = c ,POSIXt
),tzone =),LPAR = structure(c(6L,6L,6L,6L,6L,6L,6L,
6L,6L,6L,6L,6L ,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6b,6L,6L,6L,6L,6L,6L,6L),.Label = c(cigp01a4a004 ,cigp01b4a002,
cigp01b4a004,cigp04a4a002,cigp04a4a004,cigp04b4a002,
cigp04b4a004),class =factor),ENT = c(0.5,0.5,0.5 ,0.5,
0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0b $ b 0.5,0.5,0.5,0.5,0.5,0.5,0.5 ,0.5,0.5,0.5,0.5,0.5,0.5
),USR_SYS_CPU_PCT = c(79L,80L,77L,77L,77L,76L,79L,82L,
81L,80L,79L,77L, 77L,77L,79L,79L,80L,82L,82L,83L,80L,
81L,80L,78L,78L,83L,86L,87L,88L,87L),ENT_PCT = c(706.8,
693.8,570.1,641.5,558.5,601.5,674.3,742.3,668.9,722.6,
679.1,677.2,548.5,644.6,689.3,716.1,709.5,767.3,753.7,
786.4,684.2,735.1 ,688.2,676.6,645.6,788,859.5,832.6,
883.1,872.2),PHYSICAL_CPU_USED = c(3.53,3.47,2.85,3.21,
2.79,3.01,3.37,3.71,3.34,3.61, 3.4,3.39,2.74,3.22,3.45,
3.58,3.55,3.84,3.77,3.93,3.42,3.68,3.44,3.38,3.23,3.94,
4.3,4.16,4.42,4.36),PROC_QUE = c(12L,13L,19L,16L,11L,
13L,17L,14L,9L,10L,12L,13L,16L,14L,22L,17L,17L,17L,
26L,26L ,15L,43L,9L,11L,12L,7L,31L,26L,27L,23L),RELATIVE_CORES = c(3.53,
3.47,2.85,3.21,2.79,3.01,3.37,3.71,3.34,3.61, 3.4,3.39,
2.74,3.22,3.45,3.58,3.55,3.84,3.77,3.93,3.42,3.68,3.44,
3.38,3.23,3.94,4.3,4.16,4.42,4.36),USED_CORES = c(2.7887,
2.776,2.1945,2.4717,2.1483,2.2876,2.6623,3.0422,2.7054,
2.888,2.686,2.6103,2.1098,2.49494,2.7255,2.8282,2.84,3.1488,
3.0914,3.2619,2.736,2.9808,2.752,2.6364,2.5194,3.2702,
3.698,3.6192,3.886,3.7932)).Names = c(DATE,LPAR,ENT,
USR_SYS_CPU_PCT,ENT_PCT,PHYSICAL_CPU_USED,PROC_QUE,
RELATIVE_CORES,USED_CORES),class =data.frame,row.names = c(NA,
)当我喜欢使用data.table计算一些值时,如下所示:

>

  p <-data.table(p)
p <-p [,RELATIVE_PERCENT:= ifelse(ENT_PCT> 100, USED​​_CORES / ENT)* 100,USR_SYS_CPU_PCT)by = c(DATE,LPAR)]

我得到这个错误:

 错误在`.data.table`(x,``=`(RELATIVE_PERCENT ,ifelse(ENT_PCT> 100,:
RHS的类型('整数')必须与LHS('double')匹配。检查和强制
对最快的情况影响性能太多。更改目标列的
的类型,或强制:=自己的RHS(例如使用1L而不是
的1)

此错误是什么意思?

解决方案

问题是你的 ifelse 语句为一些值返回 integer 类型,对于其他一些条目返回 numeric (double)。和 data.table 抱怨列类型不匹配,因为它期望强制由用户执行(出于性能原因,如错误中给出的)。因此,只需用 as.numeric 包装,以便所有值都将转换为双精度。

 p < -  p [,RELATIVE_PERCENT:= as.numeric(ifelse(ENT_PCT> 100,(USED_CORES / ENT)* 100,
USR_SYS_CPU_PCT) ,LPAR)]


I have a huge data frame, last 30 rows are below:

libary(data.table)

dput(p)

structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075, 
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255, 
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435, 
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615, 
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795, 
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt"
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002", 
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002", 
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L, 
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L, 
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8, 
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6, 
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7, 
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6, 
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21, 
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45, 
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94, 
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L, 
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L, 
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53, 
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887, 
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054, 
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488, 
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702, 
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT", 
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE", 
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA, 
-30L))

when I like calcualate some values using data.table as below:

p<-data.table(p)
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")]

I get this error:

Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100,  : 
  Type of RHS ('integer') must match LHS ('double'). To check and coerce would
  impact performance too much for the fastest cases. Either change the type of
  the target column, or coerce the RHS of := yourself (e.g. by using 1L instead
  of 1)

what does this error mean? How can I get around this error?

解决方案

The problem is that your ifelse statement returns integer type for some values and numeric (double) for some other entries. And data.table complains about the mismatch in the column type as it expects the coercion to be performed by the user (for performance reasons as given in the error). So, just wrap it around with as.numeric so that all values will be converted to double.

p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, 
                      USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]

这篇关于无法使用r data.table包执行计算的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆