使用while循环存储purrr:map_dfr和dplyr :: group_split的输出 [英] Store output from purrr:map_dfr and dplyr::group_split with while loop
问题描述
我想使用 map_dfr
和 group_split
通过while循环运行data.frame组,存储结果。
我可以为一组这样的人这样做。
#低于
的df dput#此代码查找Sample.y中Sample.x的DIFF的壁橱匹配项,然后找到下一个最接近的匹配项,直到
df_f<-df%>% filter(grp == AB&VAR == Var1)
HowMany<-length(unique(df_f $ Sample.y))
i<-1
MyList< ;-list()
而(i< HowMany){
res1<-df_f%&%;%
group_by(grp,VAR,Sample.x)% >%
过滤器(DIFF == min(DIFF))%>%
ungroup()%>%
mutate(Rank1 = density_rank(DIFF))
res2<-res1%>%group_by(grp,VAR)%>%filter(rank(Rank1,ties.method = first)== 1)
SY< ;-as.numeric(res2 $ Sample.y)
SX<-as.numeric(res2 $ Sample.x)
res3<-df_f%&%; filter(Sample.y!= SY)
res4 <-res3%>%filter(Sample.x!= SX)
df_f<-res4
MyList [[i]]<-res2
i<-i + 1
}
df.result<-do.call( rbind,MyList)
但是,当尝试使用while循环使函数与 map_dfr
和 group_split一起使用
我无法和/或不确定如何存储输出。
MyResult<-df%> ;%
dplyr :: group_split(grp,VAR)%>%
map_dfr(fun)#有趣的
df.store<-data.frame()#尝试存储结果
fun<-function(df){
HowMany<-length(unique(df $ Sample.y))
i<-1
MyList_FF<-list()
ThisDF<-df
while(i< = HowMany){
res1<-ThisDF%&%;%
group_by(grp,VAR,Sample.x)%>%
过滤器(DIFF == min(DIFF))%>%
ungroup()%>%
突变(等级1 = density_rank(DIFF))
res2<-res1 %>%group_by(grp,VAR)%&%; filter(rank(Rank1,ties.method = first)== 1)
#print(res2)#打印到屏幕上以显示所需的输出外观正确的
SY <-as.numeric(res2 $ Sample.y)
SX <--as.numeric(res2 $ Sample.x)
res3 <-ThisDF %>%filter(Sample.y!= SY)
res4<-res3%>%filter(Sample.x!= SX)
#df.store<- rbind(df.store,res4)
#MyList_FF [[i]]<-res2
ThisDF<-res4
ibi<-i + 1
}
}
我试过 rbind
或使用列表
存储输出,但是我的尝试不正确。如果在屏幕上打印 res2,则一次可以看到所需的输出。如何存储每个 group_split
的 fun
的输出?
#df dput
df<-structure(list(Location.x = structure(c(1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L ,1L,2L,2L,2L,2L,2L,2L,2L,2L,
2L,2L,2L,2L),. Label = c( A, C, B), class = factor),
Sample.x = c(6L,6L,10L,10L,9L,9L,6L,6L,10L,10L,
9L,9L,6L,6L,6L ,10L,10L,10L,9L,9L,9L,6L,6L,6L,
10L,10L,10L,9L,9L,9L,1L,1L,1L,9L,9L,9L,1L,1L ,
1L,9L,9L,9L),VAR = c( Var1, Var1, Var1, Var1,
Var1, Var1, Var2, Var2, Var2, Var2, Var2, Var2,
Var1, Var1, Var1, Var1, Var1, Var1, Var1 , Var1,
Var1, Var2, Var2, Var2, Var2, Var2, Var2, Var2,
Var2, Var2, Var1, Var1, Var1, Var1, Var1, Var1,
Var2, Var2 , Var2, Var2, Var2, Var2),value.x = c(56.48,
56.48,57.03,57.03,55.04,55.04,6,6,10,10,9, 9,56.48,
56.48,56.48,57.03,57.03,57.03,55.04,55.04,55.04,6,
6,6,10,10,10,9,9,9,9,55.62,55.62, 55.62,55.65,55.65,
55.65,1,1,1,9,9,9),Location.y = structure(c(2L,2L,
2L,2L,2L,2L,2L ,2L,2L,2L,2L,2L,3L,3L,3L,3L,3L,
3L,3L,3L,3L,3L,3L,3L,3L,3L,3L,3L,3L,3L ,3L,3L,
3L,3L,3L,3L,3L,3L,3L,3L,3L,3L),.Label = c( A,
C, B ),class = factor),Sample.y = c(1L,9L,1L,9L,
1L,9L,1L,9L,1L,9L,1L,9L,3L,7L,9L,3L ,7L,9L,3L,
7L,9L,3L,7L,9L,3L,7L,9L,3L,7L,9L,3L,7L,9L,3L,
7L,9L,3L ,7L,9L,3L,7L,9L),值y = c(55.62,55.65,
55.62,55.65,55.62,55.65,1,9,1,1,9,1,9,1.4,111.6,
111.8、1.4、111.6、111.8、1.4、111.6、111.8、10.2、14.4,b $ b 20.9、10.2、14.4、20.9、10.2、14.4 ,20.9、1.4、111.6、111.8,
1.4、111.6、111.8、10.2、14.4、20.9、10.2、14.4、20.9),DIFF = c(0.859999999999999,
0.829999999999998、1.41、1.38、0.579999999999998, 0.609999999999999,
5、3、9、1、8、0、55.08、55.12、55.32、55.63、54.57、54.77,
53.64、56.56、56.76、4.2、8.4、14.9、0.199999999999999、4.4,
10.9、1.2、5.4、11.9、54.22、55.98、56.18、54.25、55.95,
56.15、9.2、13.4、19.9、1.2、5.4、11.9),grp = c( AC, AC,
AC, AC, AC, AC, AC, AC, AC, AC, AC, AC,
AB, AB, AB, AB, AB, AB, AB, AB, AB, AB,
AB, AB, AB, AB, AB, AB, AB, AB, CB, CB,
CB, CB, CB , CB, CB, CB, CB, CB, CB, CB
)),row.names = c(NA,-42L),class = data.frame)
唯一缺少的是映射函数 fun
没有返回值。它是 计算并构建临时列表,正确地 MyList_FF
,您可以使用 print()
调用,但没有返回,它消失了。
乐趣<-function(df){
HowMany<-length(unique(df $ Sample.y))
i<-1
MyList_FF<-list()
df_f<-df
而( i< = HowMany){
res1<-df_f%>%
group_by(grp,VAR,Sample.x)%&%;%
filter(DIFF == min(DIFF ))%>%
ungroup()%&%;%
mutate(等级1 = density_rank(DIFF))
res2<-res1%>%group_by(grp ,VAR)%>%filter(rank(Rank1,ties.method = first)== 1)
SY<-as.numeric(res2 $ Sample.y)
SX<-as.numeric(res2 $ Sample.x)
res3<-df_f%>%filter(Sample.y!= SY)
res4<-res3%&% filter(Sample.x!= SX)
df_f<-res4
MyList_FF [[i]]<-res2
i<-i + 1
}
#这是魔术行
do.call( rbind ,MyList_FF)
#这将返回在函数
内构建的列表
神奇之处在于最后一行,类似于您在单个示例之后所做的,将中间结果列表绑定在一起。在R中,仅当您尝试提早返回时,才需要 return()
函数,因为默认情况下,R函数将返回最后一个值。因此,在这里我们无需明确地说 return(do.call( rbind,MyList_FF))
,尽管这样做对您没有任何伤害。在非工作示例中,自分配 i
以来没有最后一个值,因此您没有找回任何对象,但也没有收到任何错误。 / p>
完整的示例:
MyResult<-df% >%
dplyr :: group_split(grp,VAR)%>%
map_df(fun)
MyResult
#小动作:16 x 10
#组:grp,VAR [1]
Location.x Sample.x VAR value.x Location.y Sample.y value.y DIFF grp Rank1
< fct> < int> < chr> < dbl> < fct> < int> < dbl> < dbl> < chr> < int>
1 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
2 A 10 Var1 57.0 B 7 112. 54.6 AB 1
3 A 6 Var1 56.5 B 9 112. 55.3 AB 1
4 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
5 A 10 Var1 57.0 B 7112.54.6 AB 1
6 A 6 Var1 56.5 B 9 112.55.3 AB 1
7 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
8 A 10 Var1 57.0 B 7112.54.6 AB 1
9 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
10 A 10 Var1 57.0 B 7112. 54.6 AB 1
11 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
12 A 10 Var1 57.0 B 7 112. 54.6 AB 1
13 A 6 Var1 56.5 B 9112.55.3 AB 1
14 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
15 A 10 Var1 57.0 B 7 112. 54.6 AB 1
16 A 6 Var1 56.5 B 9 112.55.3 AB 1
旁注,如果您经常使用 do.call( xbind,list)
,您可能会喜欢 dplyr :: bind_rows(list)
和 dplyr :: bind_cols(list)
。
I would like to use map_dfr
and group_split
to run groups of a data.frame through a while loop and store the results.
I can do this for one group like this.
# df dput below
# this code finds the closet match for DIFF for Sample.x in Sample.y, then finds the next closest match, until
df_f <- df %>% filter(grp == "AB" & VAR == "Var1")
HowMany <- length(unique(df_f$Sample.y))
i <- 1
MyList <- list()
while (i <= HowMany){
res1 <- df_f %>%
group_by(grp, VAR, Sample.x) %>%
filter(DIFF == min(DIFF)) %>%
ungroup() %>%
mutate(Rank1 = dense_rank(DIFF))
res2 <- res1 %>% group_by(grp, VAR) %>% filter(rank(Rank1, ties.method="first")==1)
SY <- as.numeric(res2$Sample.y)
SX <- as.numeric(res2$Sample.x)
res3 <- df_f %>% filter(Sample.y != SY)
res4 <- res3 %>% filter(Sample.x != SX)
df_f <- res4
MyList[[i]] <- res2
i <- i + 1
}
df.result <- do.call("rbind", MyList)
But when trying to make a function with the while loop to use with map_dfr
and group_split
I am unable and/or unsure on how to store the output.
MyResult <- df %>%
dplyr::group_split(grp, VAR) %>%
map_dfr(fun) # fun below
df.store <- data.frame() # attempt to store results
fun <- function(df){
HowMany <- length(unique(df$Sample.y))
i <- 1
MyList_FF <- list()
ThisDF <- df
while (i <= HowMany){
res1 <- ThisDF %>%
group_by(grp, VAR, Sample.x) %>%
filter(DIFF == min(DIFF)) %>%
ungroup() %>%
mutate(Rank1 = dense_rank(DIFF))
res2 <- res1 %>% group_by(grp, VAR) %>% filter(rank(Rank1, ties.method="first")==1)
# print(res2) # when printed to screen the desired output looks correct
SY <- as.numeric(res2$Sample.y)
SX <- as.numeric(res2$Sample.x)
res3 <- ThisDF %>% filter(Sample.y != SY)
res4 <- res3 %>% filter(Sample.x != SX)
# df.store <- rbind(df.store, res4)
# MyList_FF[[i]] <- res2
ThisDF <- res4
i <- i + 1
}
}
I've tried to rbind
or use a list
to store the output, but my attempts have not been correct. If I print "res2" to screen, I can see the desired output one row at a time. How do I store the output from fun
from each group_split
?
# df dput
df <- structure(list(Location.x = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("A", "C", "B"), class = "factor"),
Sample.x = c(6L, 6L, 10L, 10L, 9L, 9L, 6L, 6L, 10L, 10L,
9L, 9L, 6L, 6L, 6L, 10L, 10L, 10L, 9L, 9L, 9L, 6L, 6L, 6L,
10L, 10L, 10L, 9L, 9L, 9L, 1L, 1L, 1L, 9L, 9L, 9L, 1L, 1L,
1L, 9L, 9L, 9L), VAR = c("Var1", "Var1", "Var1", "Var1",
"Var1", "Var1", "Var2", "Var2", "Var2", "Var2", "Var2", "Var2",
"Var1", "Var1", "Var1", "Var1", "Var1", "Var1", "Var1", "Var1",
"Var1", "Var2", "Var2", "Var2", "Var2", "Var2", "Var2", "Var2",
"Var2", "Var2", "Var1", "Var1", "Var1", "Var1", "Var1", "Var1",
"Var2", "Var2", "Var2", "Var2", "Var2", "Var2"), value.x = c(56.48,
56.48, 57.03, 57.03, 55.04, 55.04, 6, 6, 10, 10, 9, 9, 56.48,
56.48, 56.48, 57.03, 57.03, 57.03, 55.04, 55.04, 55.04, 6,
6, 6, 10, 10, 10, 9, 9, 9, 55.62, 55.62, 55.62, 55.65, 55.65,
55.65, 1, 1, 1, 9, 9, 9), Location.y = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"C", "B"), class = "factor"), Sample.y = c(1L, 9L, 1L, 9L,
1L, 9L, 1L, 9L, 1L, 9L, 1L, 9L, 3L, 7L, 9L, 3L, 7L, 9L, 3L,
7L, 9L, 3L, 7L, 9L, 3L, 7L, 9L, 3L, 7L, 9L, 3L, 7L, 9L, 3L,
7L, 9L, 3L, 7L, 9L, 3L, 7L, 9L), value.y = c(55.62, 55.65,
55.62, 55.65, 55.62, 55.65, 1, 9, 1, 9, 1, 9, 1.4, 111.6,
111.8, 1.4, 111.6, 111.8, 1.4, 111.6, 111.8, 10.2, 14.4,
20.9, 10.2, 14.4, 20.9, 10.2, 14.4, 20.9, 1.4, 111.6, 111.8,
1.4, 111.6, 111.8, 10.2, 14.4, 20.9, 10.2, 14.4, 20.9), DIFF = c(0.859999999999999,
0.829999999999998, 1.41, 1.38, 0.579999999999998, 0.609999999999999,
5, 3, 9, 1, 8, 0, 55.08, 55.12, 55.32, 55.63, 54.57, 54.77,
53.64, 56.56, 56.76, 4.2, 8.4, 14.9, 0.199999999999999, 4.4,
10.9, 1.2, 5.4, 11.9, 54.22, 55.98, 56.18, 54.25, 55.95,
56.15, 9.2, 13.4, 19.9, 1.2, 5.4, 11.9), grp = c("AC", "AC",
"AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC", "AC",
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB",
"AB", "AB", "AB", "AB", "AB", "AB", "AB", "AB", "CB", "CB",
"CB", "CB", "CB", "CB", "CB", "CB", "CB", "CB", "CB", "CB"
)), row.names = c(NA, -42L), class = "data.frame")
The only piece missing was your mapped function fun
wasn't returning a value. It was computing and building the temporary list, MyList_FF
properly, you could see with the print()
calls, but without a return, it was disappearing.
fun <- function(df) {
HowMany <- length(unique(df$Sample.y))
i <- 1
MyList_FF <- list()
df_f <- df
while (i <= HowMany){
res1 <- df_f %>%
group_by(grp, VAR, Sample.x) %>%
filter(DIFF == min(DIFF)) %>%
ungroup() %>%
mutate(Rank1 = dense_rank(DIFF))
res2 <- res1 %>% group_by(grp, VAR) %>% filter(rank(Rank1, ties.method="first")==1)
SY <- as.numeric(res2$Sample.y)
SX <- as.numeric(res2$Sample.x)
res3 <- df_f %>% filter(Sample.y != SY)
res4 <- res3 %>% filter(Sample.x != SX)
df_f <- res4
MyList_FF[[i]] <- res2
i <- i + 1
}
# this is the magic line
do.call("rbind", MyList_FF)
# this returns the list built inside of the function
}
The magic is in that last line, similar to what you did after your single example, binding up the intermediate results list. In R the return()
function is only needed if you are trying to return early, because by default R functions will return the last value. So here we don't need to explicitly say return(do.call("rbind", MyList_FF))
, although it wouldn't hurt anything if you did. In the non-working example there wasn't a last value since i
was being assigned, so you were not getting any objects back, but were not getting any errors either.
For a full working example:
MyResult <- df %>%
dplyr::group_split(grp, VAR) %>%
map_df(fun)
MyResult
# A tibble: 16 x 10
# Groups: grp, VAR [1]
Location.x Sample.x VAR value.x Location.y Sample.y value.y DIFF grp Rank1
<fct> <int> <chr> <dbl> <fct> <int> <dbl> <dbl> <chr> <int>
1 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
2 A 10 Var1 57.0 B 7 112. 54.6 AB 1
3 A 6 Var1 56.5 B 9 112. 55.3 AB 1
4 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
5 A 10 Var1 57.0 B 7 112. 54.6 AB 1
6 A 6 Var1 56.5 B 9 112. 55.3 AB 1
7 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
8 A 10 Var1 57.0 B 7 112. 54.6 AB 1
9 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
10 A 10 Var1 57.0 B 7 112. 54.6 AB 1
11 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
12 A 10 Var1 57.0 B 7 112. 54.6 AB 1
13 A 6 Var1 56.5 B 9 112. 55.3 AB 1
14 A 9 Var1 55.0 B 3 1.4 53.6 AB 1
15 A 10 Var1 57.0 B 7 112. 54.6 AB 1
16 A 6 Var1 56.5 B 9 112. 55.3 AB 1
Side note if you use do.call("xbind", list)
a lot you might enjoy dplyr::bind_rows(list)
and dplyr::bind_cols(list)
.
这篇关于使用while循环存储purrr:map_dfr和dplyr :: group_split的输出的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!