无法使用r data.table包执行计算 [英] unable to perform calculations using r data.table package
问题描述
我有一个巨大的数据框,最后30行如下:
libary(data.table)
dput(p)
structure(list(DATE = structure(c(1367516015,1367516045,1367516075,
1367516105,1367516135,1367516165,1367516195,1367516225,1367516255,
1367516285,1367516315,1367516345,1367516375,1367516405,1367516435 ,
1367516465,1367516495,1367516525,1367516555,1367516585,1367516615,
1367516645,1367516675,1367516705,1367516735,1367516765,1367516795,
1367516825,1367516855,1367516885),class = c ,POSIXt
),tzone =),LPAR = structure(c(6L,6L,6L,6L,6L,6L,6L,
6L,6L,6L,6L,6L ,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6L,6b,6L,6L,6L,6L,6L,6L,6L),.Label = c(cigp01a4a004 ,cigp01b4a002,
cigp01b4a004,cigp04a4a002,cigp04a4a004,cigp04b4a002,
cigp04b4a004),class =factor),ENT = c(0.5,0.5,0.5 ,0.5,
0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0b $ b 0.5,0.5,0.5,0.5,0.5,0.5,0.5 ,0.5,0.5,0.5,0.5,0.5,0.5
),USR_SYS_CPU_PCT = c(79L,80L,77L,77L,77L,76L,79L,82L,
81L,80L,79L,77L, 77L,77L,79L,79L,80L,82L,82L,83L,80L,
81L,80L,78L,78L,83L,86L,87L,88L,87L),ENT_PCT = c(706.8,
693.8,570.1,641.5,558.5,601.5,674.3,742.3,668.9,722.6,
679.1,677.2,548.5,644.6,689.3,716.1,709.5,767.3,753.7,
786.4,684.2,735.1 ,688.2,676.6,645.6,788,859.5,832.6,
883.1,872.2),PHYSICAL_CPU_USED = c(3.53,3.47,2.85,3.21,
2.79,3.01,3.37,3.71,3.34,3.61, 3.4,3.39,2.74,3.22,3.45,
3.58,3.55,3.84,3.77,3.93,3.42,3.68,3.44,3.38,3.23,3.94,
4.3,4.16,4.42,4.36),PROC_QUE = c(12L,13L,19L,16L,11L,
13L,17L,14L,9L,10L,12L,13L,16L,14L,22L,17L,17L,17L,
26L,26L ,15L,43L,9L,11L,12L,7L,31L,26L,27L,23L),RELATIVE_CORES = c(3.53,
3.47,2.85,3.21,2.79,3.01,3.37,3.71,3.34,3.61, 3.4,3.39,
2.74,3.22,3.45,3.58,3.55,3.84,3.77,3.93,3.42,3.68,3.44,
3.38,3.23,3.94,4.3,4.16,4.42,4.36),USED_CORES = c(2.7887,
2.776,2.1945,2.4717,2.1483,2.2876,2.6623,3.0422,2.7054,
2.888,2.686,2.6103,2.1098,2.49494,2.7255,2.8282,2.84,3.1488,
3.0914,3.2619,2.736,2.9808,2.752,2.6364,2.5194,3.2702,
3.698,3.6192,3.886,3.7932)).Names = c(DATE,LPAR,ENT,
USR_SYS_CPU_PCT,ENT_PCT,PHYSICAL_CPU_USED,PROC_QUE,
RELATIVE_CORES,USED_CORES),class =data.frame,row.names = c(NA,
)当我喜欢使用data.table计算一些值时,如下所示: >
p <-data.table(p)
p <-p [,RELATIVE_PERCENT:= ifelse(ENT_PCT> 100, USED_CORES / ENT)* 100,USR_SYS_CPU_PCT)by = c(DATE,LPAR)]
我得到这个错误:
错误在`.data.table`(x,``=`(RELATIVE_PERCENT ,ifelse(ENT_PCT> 100,:
RHS的类型('整数')必须与LHS('double')匹配。检查和强制
对最快的情况影响性能太多。更改目标列的
的类型,或强制:=自己的RHS(例如使用1L而不是
的1)
此错误是什么意思?
解决方案问题是你的 ifelse
语句为一些值返回 integer
类型,对于其他一些条目返回 numeric
(double)。和 data.table
抱怨列类型不匹配,因为它期望强制由用户执行(出于性能原因,如错误中给出的)。因此,只需用 as.numeric
包装,以便所有值都将转换为双精度。
p < - p [,RELATIVE_PERCENT:= as.numeric(ifelse(ENT_PCT> 100,(USED_CORES / ENT)* 100,
USR_SYS_CPU_PCT) ,LPAR)]
I have a huge data frame, last 30 rows are below:
libary(data.table)
dput(p)
structure(list(DATE = structure(c(1367516015, 1367516045, 1367516075,
1367516105, 1367516135, 1367516165, 1367516195, 1367516225, 1367516255,
1367516285, 1367516315, 1367516345, 1367516375, 1367516405, 1367516435,
1367516465, 1367516495, 1367516525, 1367516555, 1367516585, 1367516615,
1367516645, 1367516675, 1367516705, 1367516735, 1367516765, 1367516795,
1367516825, 1367516855, 1367516885), class = c("POSIXct", "POSIXt"
), tzone = ""), LPAR = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("cigp01a4a004", "cigp01b4a002",
"cigp01b4a004", "cigp04a4a002", "cigp04a4a004", "cigp04b4a002",
"cigp04b4a004"), class = "factor"), ENT = c(0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5
), USR_SYS_CPU_PCT = c(79L, 80L, 77L, 77L, 77L, 76L, 79L, 82L,
81L, 80L, 79L, 77L, 77L, 77L, 79L, 79L, 80L, 82L, 82L, 83L, 80L,
81L, 80L, 78L, 78L, 83L, 86L, 87L, 88L, 87L), ENT_PCT = c(706.8,
693.8, 570.1, 641.5, 558.5, 601.5, 674.3, 742.3, 668.9, 722.6,
679.1, 677.2, 548.5, 644.6, 689.3, 716.1, 709.5, 767.3, 753.7,
786.4, 684.2, 735.1, 688.2, 676.6, 645.6, 788, 859.5, 832.6,
883.1, 872.2), PHYSICAL_CPU_USED = c(3.53, 3.47, 2.85, 3.21,
2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39, 2.74, 3.22, 3.45,
3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44, 3.38, 3.23, 3.94,
4.3, 4.16, 4.42, 4.36), PROC_QUE = c(12L, 13L, 19L, 16L, 11L,
13L, 17L, 14L, 9L, 10L, 12L, 13L, 16L, 14L, 22L, 17L, 17L, 17L,
26L, 26L, 15L, 43L, 9L, 11L, 12L, 7L, 31L, 26L, 27L, 23L), RELATIVE_CORES = c(3.53,
3.47, 2.85, 3.21, 2.79, 3.01, 3.37, 3.71, 3.34, 3.61, 3.4, 3.39,
2.74, 3.22, 3.45, 3.58, 3.55, 3.84, 3.77, 3.93, 3.42, 3.68, 3.44,
3.38, 3.23, 3.94, 4.3, 4.16, 4.42, 4.36), USED_CORES = c(2.7887,
2.776, 2.1945, 2.4717, 2.1483, 2.2876, 2.6623, 3.0422, 2.7054,
2.888, 2.686, 2.6103, 2.1098, 2.4794, 2.7255, 2.8282, 2.84, 3.1488,
3.0914, 3.2619, 2.736, 2.9808, 2.752, 2.6364, 2.5194, 3.2702,
3.698, 3.6192, 3.8896, 3.7932)), .Names = c("DATE", "LPAR", "ENT",
"USR_SYS_CPU_PCT", "ENT_PCT", "PHYSICAL_CPU_USED", "PROC_QUE",
"RELATIVE_CORES", "USED_CORES"), class = "data.frame", row.names = c(NA,
-30L))
when I like calcualate some values using data.table as below:
p<-data.table(p)
p<-p[,RELATIVE_PERCENT:=ifelse(ENT_PCT>100, (USED_CORES/ENT)*100, USR_SYS_CPU_PCT), by= c("DATE", "LPAR")]
I get this error:
Error in `[.data.table`(x, , `:=`(RELATIVE_PERCENT, ifelse(ENT_PCT > 100, :
Type of RHS ('integer') must match LHS ('double'). To check and coerce would
impact performance too much for the fastest cases. Either change the type of
the target column, or coerce the RHS of := yourself (e.g. by using 1L instead
of 1)
what does this error mean? How can I get around this error?
解决方案 The problem is that your ifelse
statement returns integer
type for some values and numeric
(double) for some other entries. And data.table
complains about the mismatch in the column type as it expects the coercion to be performed by the user (for performance reasons as given in the error). So, just wrap it around with as.numeric
so that all values will be converted to double.
p <- p[,RELATIVE_PERCENT := as.numeric(ifelse(ENT_PCT>100, (USED_CORES/ENT)*100,
USR_SYS_CPU_PCT)), by= c("DATE", "LPAR")]
这篇关于无法使用r data.table包执行计算的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!