如何在列表中取得列表子集 [英] How to take subsets of lists in a tibble
问题描述
日期
列变成日期列表: 库(tidyverse)
库(lubridate)
因素。对于子集(子集)(list)(list(
sec_id = c(1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L, 1572L,1572L,1572L,1572L,1572L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1676L,1572L,1572L,1572L, 1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1572L,1676L,1676L,1676L,1676L,1676L,1676L, 1676L,1676L,1676L,1676L,1676L,1676L,167 6,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH EPS_GROWTHEPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH ,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,EPS_GROWTH,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY ,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITYND_EQUITY,ND_EQUITY,ND_EQUITY ,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY,ND_EQUITY),
date = structure(c(9464,99829,10193 ,10560,10925,11291,1165,12020,12384,12752,13117,13482,13847,14211,14578,14943,15308,15674,16038,16402,16769,17135,9342,9708,10073,103838,10802,11200 ,11565,13756,14120,14487,14852,15217,15583,15947,1663,11678,17044,9464,99829,10193,10560,10925,11291,1165,12020,12384,12752,13117,13482,13847,14211 ,14578,144943,15308,15674,16038,16402,16769,17135,9342,9708,10073,1043,3802,11200,11565,13756,14120,14487,14852,15217,15583,15947,1663,11678,17044 ),
class =Date),value = c(0.250468,0.091548,0.100863,0.058375,0.24784,0.178765,0.099276,0.25472,0.033291,0.124165,0.050947,0.243008,0.1205,0.239625,0.231221 ,0.365649,0.163779,0.024976,0.08388,0.154777,0.016473,-0.272928,-0.018711,-0.162076,-0.599241,-4.071504,-0.37761,1.6694085,0.0 45113,0.329818,0.199564,-0.616418,1.164773,0.8877078,-0.325099,-0.294199,0.272016,-0.706077,-2.57027,4.500261,4.734375,4.090376,3.322846,3.640895,4.645253,4.783054,3.946184,3847828,4.077601,4.778736,5.4583883 ,5.14355,5.084551,3.370378,3.076065,2.812879,2.87688,2.430692,3.029766,3.062665,3349906,0.396299,0.60174,0.527478,1.048755,1.136417,0.668333,0.523115,0.259175,0.164024,0.1318469,0.041141,0.096251,0346829,041832,0.300988 ,0.344943,0.432505)),
row.names = c(NA,-78L),class = c(tbl_df,tbl,data.frame),.Names = c(sec_id ,metric,date,value))
factors.subset.monthly = factors.subset.raw%>%
group_by(sec_id,metric)%> %
mutate(date = ceiling_date(date,'month'))%>%
mutate(date = map2(date,lead(date - 1,default = today()),seq,by ='month'))
现在只需添加%>% unnest()%>%mutate(date = date - 1)
以上,将我的年度数据转换为每月,所有日期都是月底。
当数据存在很大差距时,我的问题出现。发生这种情况时,我只想填写最多18个月。
我尝试添加管道,将截断日期
列,但到目前为止我似乎无法弄清楚。这个小宝石给我不兼容的大小错误,例如:
factors.subset.monthly%>%
mutate (count.date = as.numeric(lapply(date,length)))%>%
mutate(count.cutoff = ifelse(count.date< = 18,count.date,18))%> ;%
mutate(date = date [1:count.cutoff])
您需要使用 map
/ lapply
来遍历列表列,您可以使用头
将其限制为18个观察值:
library(tidyverse)
库(lubridate)
df< - factors.subset.monthly%>%mutate(date = map(date,head,18))
any(length(factors.subset.monthly $ date)> 18)
#> [1] TRUE
any(length(df $ date)> 18)
#> [1] FALSE
您还可以包括 head
当你做 factors.subset.monthly
:
factors.subset.raw%>%
group_by(sec_id,metric)%>%
mutate(date = ceiling_date(date,'month'),
date = map2(date,lead(date - 1,default = today()),
〜head(seq(.x,.y,by ='month'),18)))
您还可以将的起始日期的最低目标日期或18个月用于
参数 seq
,但由于长度不规则而增加18个月有些困难。
I have annual financial data for several stocks. I needed to blow it out to become monthly data and, thanks to an answer to this question I'd asked earlier, I have a solution which involves mutating the date
column into lists of dates:
library(tidyverse)
library(lubridate)
factors.subset.raw = structure(list(
sec_id = c(1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L),
metric = c("EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY"),
date = structure(c(9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044, 9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044),
class = "Date"), value = c(0.250468, 0.091548, -0.100863, 0.058375, 0.24784, 0.178765, 0.099276, 0.25472, -0.033291, 0.124165, 0.050947, 0.243008, 0.1205, -0.239625, -0.231221, 0.365649, 0.163779, 0.024976, 0.08388, 0.154777, 0.016473, -0.272928, -0.018711, -0.162076, -0.599241, -4.071504, -0.37761, 1.694085, 0.045113, 0.329818, 0.199564, -0.616418, 1.164773, 0.877078, -0.325099, -0.294199, 0.272016, -0.706077, -2.57027, 4.500261, 4.734375, 4.090376, 3.322846, 3.640895, 4.645253, 4.783054, 3.946184, 3.847828, 4.077601, 4.778736, 5.453883, 5.14355, 5.084551, 3.370378, 3.076065, 2.812879, 2.87688, 2.430692, 3.029766, 3.062665, 3.349906, 0.396299, 0.60174, 0.527478, 1.048755, 1.136417, 0.668333, 0.523115, 0.259175, 0.164024, 0.118469, 0.061141, 0.096251, 0.346829, 0.401832, 0.300988, 0.344943, 0.432505)),
row.names = c(NA, -78L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("sec_id", "metric", "date", "value"))
factors.subset.monthly = factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month')) %>%
mutate(date = map2(date, lead(date - 1, default = today()), seq, by = 'month'))
Now it suffices to add %>% unnest() %>% mutate(date = date - 1)
to the above to transform my annual data to monthly, with all dates being end of month.
My problem occurs when there is a big gap in the data. When this happens, I only want to fill forward at most 18 months.
I've tried adding pipes which cutoff the date
column, but so far I can't seem to figure it out. This little gem gives me incompatible size errors, for example:
factors.subset.monthly %>%
mutate(count.date = as.numeric(lapply(date, length))) %>%
mutate(count.cutoff = ifelse(count.date <= 18, count.date, 18)) %>%
mutate(date = date[1:count.cutoff])
You need to use map
/lapply
to iterate over the list column, but then you can simply use head
to limit it to 18 observations:
library(tidyverse)
library(lubridate)
df <- factors.subset.monthly %>% mutate(date = map(date, head, 18))
any(lengths(factors.subset.monthly$date) > 18)
#> [1] TRUE
any(lengths(df$date) > 18)
#> [1] FALSE
You could also just include head
when you make factors.subset.monthly
:
factors.subset.raw %>%
group_by(sec_id, metric) %>%
mutate(date = ceiling_date(date, 'month'),
date = map2(date, lead(date - 1, default = today()),
~head(seq(.x, .y, by = 'month'), 18)))
You could also use the minimum of the target date or 18 months past the start date for the to
parameter of seq
, but adding 18 months is somewhat difficult due to their irregular length.
这篇关于如何在列表中取得列表子集的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!