从循环中计算函数,将其应用于数据框列表 [英] Compute a function from a loop to apply it in a list of dataframe

查看:155
本文介绍了从循环中计算函数,将其应用于数据框列表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个dataframe df

  structure(list =结构(c(8L,8L,8L,8L,8L,8L,8L,9L,
9L,9L,10L,10L,10L,10L,11L,11L,11L,11L,11L,12L,12L)
13L 13L 13L 14L 14L 14L 14L 15L 15L 15L 16L $ 16L 16L 16L 16L 16L 16L 16L 17L,17L,17L,17L,17L,
17L,17L,18L,18L,18L,18L,18L,18L,20L,20L,20L,40L,40L,$ 40L,40L,40L ,40L,40L,57L,57L,62L,62L,62L,70L,70L,70L,
70L,70L,2L,2L,2L,43L,43L,43L,43L,43L,43L,43L,46L
46L,46L,46L,60L,60L,60L,60L,60L,60L,60L,60L,60L)。标签= c(AU-Tum,
BE-胸罩 ,BR-Sa3,CA-Ca1,CA-Ca2,CA-Ca3,CA-Gro,Ca-Man,
CA- NS2,CA-NS3,CA-NS4,CA-NS5,CA-NS6,CA-NS7,
CA-Oas, CA-SFJ,CA-SF2,CA-SF1,CA-SF2,CA-QJ,CA- ,CA-SJ3,CA-TP1,CA-TP2,CA-TP4,
,CZ-Bk1,DE-Har,DE-Wet,DK- Sor,FI-Hyy FR-Hes,FR-Pue,
,ID-Pag,IT-Ro1,IT-Ro2,IT-Sro,JP-Tak,JP ,SE-Sk1,SE-Sk2,SE-St1,N-Lo,
SE-Abi,SE-Fla UK-Gri,
US-Blo,US-Bn1,US-Bn2,Us-Bn3,US-Dk3,US-Fmf,US-Fwf ,
US-Ha1,US-Ha2,US-Ho1,US-Ho2,US-Lph,US-Me1,US-Me3,
US-Nc2,US-NR1,US-Sp1,US-Sp2,US-Sp3,US-Umb,US-Wcr,
US (156,157,160,162),美国Wi1,美国Wi2,美国-Wi4,美国-Wi8),class =factor),
x = c ,163,164,165,153,154,155,71,
72,73,74,37,38,39,40,41,39,40,22,23,24,12,13 b $ b 14,15,16,4,5,6,7,74,75,76,77,78,79,80,81,82 $ b $ 126,127,128,129,130​​,131 ,132,71,72,73,74,75,76,
99,100,101,49,50,51,52,53,54,56,9,10,46,47,
48,84,85,86,87,88,77,78,79,101,105,106,107,108,$ b $ 109,110,81,82,84,88,131,132,133 ,134,135,136,137,
138,139),y = c(50.047 2381226718,706.825824817992,729.621982051409,
593.225827791495,685.154353165934,574.088067465695,650.30821636616,
494.185166497016,436.312162090908,631.891738044098,280.949480787385,
641.231373456365,412.116433330579,416.824746264203,415.905685925856,
494.374217984441,201.745910386788, 486.030122926459,647.782697262242,
389.839577941515,256.552344558528,605.790549736819,483.045965372879,
668.017897433514,35.2706101682852,265.693628564011,285.116345260642,
291.023782284086,357.428790589795,205.920375034591,229.606221692753,
230.952761338012,241.641164634028,1089.06303295676, 1255.88808925333,
1087.75402177912,1068.248897182,1212.17254891642,884.222588171535,
938.887718005513,863.582247020735,1065.91969416523,902.338968377328,
790.570635510725,834.500908313203,710.755041345197,814.002362551197,
726.814950022846,828.559687148314,611.564 698476112,603.238720579422,
524.322001078981,565.296378873638,532.431853589369,597.174114277044,
606.075737104722,686.408686154056,705.914347674276,1858.98340779543,
1893.38468471169,1819.83262739703,1827.31409981102,1640.5816780664,
1689.0365549922,2112.67917439342,479.374777290737, 326.663507855032,
1184.81825619942,1281.2920902365,1269.12480160726,1265.48484068702,
1193.29000986667,1156.81486114406,1199.7373066445,1116.24029749935,
1100.47051284742,1072.57190890331,1228.25697739795,1576.32775748242,
1631.14609672129,1796.87265141308,1712.90461264737,1844.87409764528,
1938.56225809082,1663.52108450048,1626.12740687071,1333.52924151719,
1349.01338642137,1376.41668179166,1362.32371946308,1317.75608457439,
1519.12511487596,1558.26111694807,1588.8933303128,1624.50100837374,
1433.10019567201,1371.01498340943,1439.94849821774)),.Names = c(ID,
x,y),row.names = c(290L,291L,292L,293L,294L,295L,
296L,297L,298L,299L,300L ,301L,302L,303L,304L,305L,306L,
307L,308L,309L,310L,311L,312L,313L,314L,315L,316L,317L,$ b $ 318L,319L,320L, ,322L,323L,324L,325L,326L,327L,328L,
329L,330L,331L,332L,333L,334L,335L,336L,337L,338L,339L,$ 340L,341L, 343L,344L,351L,352L,353L,424L,425L,426L,
427L,428L,429L,430L,471L,472L,493L,494L,495L,512L,513L,$ b $ 514L,515L ,516L,266L,267L,268L,438L,439L,440L,441L,442L,
443L,444L,451L,452L,453L,454L,484L,485L,486L,487L,488L,
489L ,490L,491L,492L),class =data.frame)

交叉验证中的模型效率通过使用一个非线性函数在这个数据框中留下了一个主题出模式,我已经实现了这个功能:

  library(stats)
library(hydroGOF)

出& lt; - c()
id< - unique(df $ ID)
for(i in id){
fit1 < - try(nls(y_A * x ^ 3 + B * x ^ 2 + C * x + D,数据= df [df $ ID!= i,],start = list(A = 0.02,B = -0.6,C = 50,D = 200) = TRUE)
Out [i] < - if(inherits(fit1,nls))sim = predict(fit1,newdata = df [df $ ID == i,])
}

MEF < - NSE(Out,df $ y)

然而,想从中创建一个函数,以便将其应用于具有相同结构的n个数据帧中,但是也要在循环中包含两个非线性函数。

  stat < -  function(dat){
id< - unique(dat $ ID)
Out< -c()
Out2< -c()
for(i in id){
fit1< - try (A = 1000,B = 0.170,k = - 1,n = 1,...,n) 0.00295)),silent = TRUE);
Out [i] < - if(inherits(fit1,nls))sim = predict(fit1,newdata = dat [dat $ ID == i,])else NA; (A = -0.4,B = 0,B = 1,...,B = 50,C = 300)),silent = TRUE);
Out2 [i] < - if(inherits(fit1,nls))sim = predict(fit1,newdata = dat [dat $ ID == i,])else NA;
c(Out,Out2)
}}

df.list< -list(df)#这里我只放了一个数据帧,但是它不止一个。

res< -lapply(df.list,stat)

解决我的问题?

解决方案

 输出<  -  sapply(
seq_len
函数(i){
模型< - list(
try(
nls(
y〜A *(x ^ B)* (exp(k * x)),
data = df [-i,],
start = list(A = 1000,B = 0.170,k = -0.00295)
),
silent = TRUE
),
try(
nls(
y〜A * x ^ 2 + B * x + C,
data = df [ - i,],
start = list(A = -0.4,B = 50,C = 300)
),
silent = TRUE


sapply(model,function(x){
ifelse(
class(x)==try-error),
NA,
predict(x,newdata = df [i,])

})
}


I have a dataframe df

    structure(list(ID = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 
9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 
13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 20L, 20L, 20L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 57L, 57L, 62L, 62L, 62L, 70L, 70L, 70L, 
70L, 70L, 2L, 2L, 2L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 46L, 
46L, 46L, 46L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L), .Label = c("AU-Tum", 
"BE-Bra", "BR-Sa3", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", 
"CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", 
"CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", 
"CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", 
"CZ-Bk1", "DE-Har", "DE-Wet", "DK-Sor", "FI-Hyy", "FR-Hes", "FR-Pue", 
"ID-Pag", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", "NL-Loo", 
"SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1", "UK-Gri", 
"US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", 
"US-Ha1", "US-Ha2", "US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", 
"US-Nc2", "US-NR1", "US-Sp1", "US-Sp2", "US-Sp3", "US-Umb", "US-Wcr", 
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8"), class = "factor"), 
    x = c(156, 157, 160, 162, 163, 164, 165, 153, 154, 155, 71, 
    72, 73, 74, 37, 38, 39, 40, 41, 39, 40, 22, 23, 24, 12, 13, 
    14, 15, 16, 4, 5, 6, 7, 74, 75, 76, 77, 78, 79, 80, 81, 82, 
    126, 127, 128, 129, 130, 131, 132, 71, 72, 73, 74, 75, 76, 
    99, 100, 101, 49, 50, 51, 52, 53, 54, 56, 9, 10, 46, 47, 
    48, 84, 85, 86, 87, 88, 77, 78, 79, 101, 105, 106, 107, 108, 
    109, 110, 81, 82, 84, 88, 131, 132, 133, 134, 135, 136, 137, 
    138, 139), y = c(50.0472381226718, 706.825824817992, 729.621982051409, 
    593.225827791495, 685.154353165934, 574.088067465695, 650.30821636616, 
    494.185166497016, 436.312162090908, 631.891738044098, 280.949480787385, 
    641.231373456365, 412.116433330579, 416.824746264203, 415.905685925856, 
    494.374217984441, 201.745910386788, 486.030122926459, 647.782697262242, 
    389.839577941515, 256.552344558528, 605.790549736819, 483.045965372879, 
    668.017897433514, 35.2706101682852, 265.693628564011, 285.116345260642, 
    291.023782284086, 357.428790589795, 205.920375034591, 229.606221692753, 
    230.952761338012, 241.641164634028, 1089.06303295676, 1255.88808925333, 
    1087.75402177912, 1068.248897182, 1212.17254891642, 884.222588171535, 
    938.887718005513, 863.582247020735, 1065.91969416523, 902.338968377328, 
    790.570635510725, 834.500908313203, 710.755041345197, 814.002362551197, 
    726.814950022846, 828.559687148314, 611.564698476112, 603.238720579422, 
    524.322001078981, 565.296378873638, 532.431853589369, 597.174114277044, 
    606.075737104722, 686.408686154056, 705.914347674276, 1858.98340779543, 
    1893.38468471169, 1819.83262739703, 1827.31409981102, 1640.5816780664, 
    1689.0365549922, 2112.67917439342, 479.374777290737, 326.663507855032, 
    1184.81825619942, 1281.2920902365, 1269.12480160726, 1265.48484068702, 
    1193.29000986667, 1156.81486114406, 1199.7373066445, 1116.24029749935, 
    1100.47051284742, 1072.57190890331, 1228.25697739795, 1576.32775748242, 
    1631.14609672129, 1796.87265141308, 1712.90461264737, 1844.87409764528, 
    1938.56225809082, 1663.52108450048, 1626.12740687071, 1333.52924151719, 
    1349.01338642137, 1376.41668179166, 1362.32371946308, 1317.75608457439, 
    1519.12511487596, 1558.26111694807, 1588.8933303128, 1624.50100837374, 
    1433.10019567201, 1371.01498340943, 1439.94849821774)), .Names = c("ID", 
"x", "y"), row.names = c(290L, 291L, 292L, 293L, 294L, 295L, 
296L, 297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 305L, 306L, 
307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L, 
318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L, 
329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 
340L, 341L, 342L, 343L, 344L, 351L, 352L, 353L, 424L, 425L, 426L, 
427L, 428L, 429L, 430L, 471L, 472L, 493L, 494L, 495L, 512L, 513L, 
514L, 515L, 516L, 266L, 267L, 268L, 438L, 439L, 440L, 441L, 442L, 
443L, 444L, 451L, 452L, 453L, 454L, 484L, 485L, 486L, 487L, 488L, 
489L, 490L, 491L, 492L), class = "data.frame")

I wanted to compute a the model efficiency in a cross validation leave one subject out mode on this dataframe by using one non-linear function, which I have implemented as such:

library(stats)
library (hydroGOF)

Out <- c()
id <- unique(df$ID)
for (i in id){
  fit1 <- try(nls(y~A*x^3+B*x^2+C*x+D, data = df[df$ID != i,], start = list(A=0.02, B=-0.6, C= 50, D=200)), silent=TRUE)
  Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=df[df$ID==i,])
}

MEF<- NSE(Out, df$y)

However, I would like to create a function out of it in order to apply it in n dataframe with the same structure but by also including two non-linear functions in the loop. I have started to implement this line of codes but without success.

stat <- function(dat) {
  id <- unique(dat$ID)
  Out<-c()
  Out2<-c()
  for (i in id){
    fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
    Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
    Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
c(Out, Out2)
}}

df.list<-list(df) # Here I put only one dataframe but it will be more than one. 

res<-lapply(df.list, stat) 

Anyone could solve my issue?

解决方案

output <- sapply(
  seq_len(nrow(df)), 
  function(i){
    model <- list(
      try(
        nls(
          y ~ A * (x ^ B) * (exp(k * x)), 
          data = df[-i,], 
          start = list(A = 1000, B = 0.170, k = -0.00295)
        ), 
        silent=TRUE
      ),
      try(
        nls(
          y ~ A * x ^ 2 + B * x + C, 
          data = df[-i,], 
          start = list(A = -0.4, B = 50, C= 300)
        ), 
        silent=TRUE
      )
    )
    sapply(model, function(x){
      ifelse(
        class(x) == "try-error",
        NA,
        predict(x, newdata = df[i, ])
      )
    })
  }
)

这篇关于从循环中计算函数,将其应用于数据框列表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆