如何创建一个列,根据条件累计添加前两行的总和? [英] How can I create a column that cumulatively adds the sum of two previous rows based on conditions?

查看:145
本文介绍了如何创建一个列,根据条件累计添加前两行的总和?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我曾试着问过这个问题,但是这个问题不好说。这是一个新的尝试,因为我还没有解决它。



我有一个数据集与赢家,输家,日期,winner_points和loser_points。
$ b

对于每一行,我需要两个新的列,一个是赢家,一个是失败者,表示他们迄今获得了多少分(赢家和输家)。



示例数据:

 赢家<-c(1,2,3, 1,2,3,1,2,3)
loser <-c(3,1,1,2,1,1,3,1,2)
date <-c( 2017年10月1日, 2017年10月2日, 2017年10月3日, 2017年10月4日, 2017年10月5日, 2017年10月6日,2017年-10-07,2017-10-08,2017-10-09)
winner_points <-c(2,1,2,1,2,1,2,1,2)
loser_points< -c(1,0,1,0,1,0,1,0,1)
test_data< - data.frame(winner,loser,date = as.Date日期),winner_points,loser_points)

我想输出为:

pre $ winner_points_sum <-c(0,0,1,3,1,3,5,3,5)
loser_points_sum <-c (0,2,1,4,5,4,7,4)
test_data< - data.frame(赢家,输家,日期= as.Date(日期),赢家点,输家点,赢家点数,输家点数)
$ b

到目前为止我已经解决了这个问题,就是做一个for循环,例如:

  library(dplyr) 
test_data $ winner_points_sum_loop < - 0
test_data $ loser_points_sum_loop < - 0

for(i in row.names(test_data)){
test_data [i, ] $ winner_points_sum_loop < -

test_data%>%
dplyr :: filter(winner == test_data [i,] $ winner&日期< test_data [i,] $ date)%>%
dplyr :: summarize(points = sum(winner_points,na.rm = TRUE))
+
test_data%>%
dplyr :: filter(loser == test_data [i,] $ winner& date< test_data [i,] $ date)%>%
dplyr :: summarize(points = sum(loser_points,na .rm = TRUE))

}

test_data $ winner_points_sum_loop< - unlist(test_data $ winner_points_sum_loop)

有什么建议如何解决这个问题?当行数加起来时,查询需要相当长的一段时间。我已经试着用AVE函数来详细说明,我可以做一列来作为赢家,但是不能弄清楚如何将他们的积分作为失败者加入。



解决方案

code>赢者<-c(1,2,3,1,2,3,1,2,3)
失败者<-c(3,1,1,2,1,1 ,3,1,2)
日期< - c(2017-10-01,2017-10-02,2017-10-03,2017-10-04, 2017-10-05,2017-10-06,2017-10-07,2017-10-08,2017-10-09)
winner_points< - c(2 ,1,2,1,2,1,2,1,2)
loser_points< - c(1,0,1,0,1,0,1,0,1)
test_data < - data.frame(赢家,输家,日期= as.Date(日期),winner_points,loser_points)


图书馆(dplyr)
图书馆(tidyr)

test_data%>%
联合(赢家,赢家,赢家点数)%>%#联赛赢家列
单位(失败者,失败者,失败者点)%>%#统一失败者列
collect(类型,pl_pts,赢家,失败者,-date)%>%#重塑
separate(pl_pts,c( (玩家,分数),convert = T)%>%#分隔列
排列(日期)%>%#订单日期(如果不是)
group_by(玩家)%> ;每个玩家的%#
mutate(sum_points = cumsum(points) - points)%>%#获得积分到该日期
ungroup()%>%#忘记分组$ b $ (pl_pts_sumpts,player,points,sum_points)%>%#unite columns
spread(type,pl_pts_sumpts)%>%#整形
separate(loser,c(loser,loser_points ,loser_points_sum),convert = T)%>%#分隔列并给出适当的名字
separate(winner,c(winner,winner_poin转换= T)%>%
select(赢家,输家,日期,赢家点数,输家点数,赢家点数,输家点数)#选择你喜欢的订单


##A tibble:9 x 7
#赢家失败日期winner_points loser_points winner_points_sum loser_points_sum
#*< int> < INT> <日期> < INT> < INT> < INT> < INT>
#1 1 3 2017-10-01 2 1 0 0
#2 2 1 2017-10-02 1 0 0 2
#3 3 1 2017-10-03 2 1 1 2
#4 1 2 2017-10-04 1 0 3 1
#5 2 1 2017-10-05 2 1 1 4
#6 3 1 2017-10-06 1 0 3 5
#7 1 3 2017-10-07 2 1 5 4
#8 2 1 2017-10-08 1 0 3 7
#9 3 2 2017-10-09 2 1 5 4


I tried asking this question before but was it was poorly stated. This is a new attempt cause I haven't solved it yet.

I have a dataset with winners, losers, date, winner_points and loser_points.

For each row, I want two new columns, one for the winner and one for the loser that shows how many points they have scored so far (as both winners and losers).

Example data:

winner <- c(1,2,3,1,2,3,1,2,3)
loser <-  c(3,1,1,2,1,1,3,1,2)
date <- c("2017-10-01","2017-10-02","2017-10-03","2017-10-04","2017-10-05","2017-10-06","2017-10-07","2017-10-08","2017-10-09")
winner_points <- c(2,1,2,1,2,1,2,1,2)
loser_points <- c(1,0,1,0,1,0,1,0,1)
test_data <- data.frame(winner, loser, date = as.Date(date), winner_points, loser_points)

I want the output to be:

winner_points_sum <- c(0, 0, 1, 3, 1, 3, 5, 3, 5)
loser_points_sum <- c(0, 2, 2, 1, 4, 5, 4, 7, 4)
test_data <- data.frame(winner, loser, date = as.Date(date), winner_points, loser_points, winner_points_sum, loser_points_sum)

How I've solved it thus far is to do a for loop such as:

library(dplyr)
test_data$winner_points_sum_loop <- 0
test_data$loser_points_sum_loop <- 0

for(i in row.names(test_data)) {
  test_data[i,]$winner_points_sum_loop <-
    (
    test_data %>%
      dplyr::filter(winner == test_data[i,]$winner & date < test_data[i,]$date) %>%
      dplyr::summarise(points = sum(winner_points, na.rm = TRUE))
  +
    test_data %>%
      dplyr::filter(loser == test_data[i,]$winner & date < test_data[i,]$date) %>%
      dplyr::summarise(points = sum(loser_points, na.rm = TRUE))
    )
}

test_data$winner_points_sum_loop <- unlist(test_data$winner_points_sum_loop)

Any suggestions how to tackle this problem? The queries take quite some time when the row numbers add up. I've tried elaborating with the AVE function, I can do it for one column to sum a players point as winner but can't figure out how to add their points as loser.

解决方案

winner <- c(1,2,3,1,2,3,1,2,3)
loser <-  c(3,1,1,2,1,1,3,1,2)
date <- c("2017-10-01","2017-10-02","2017-10-03","2017-10-04","2017-10-05","2017-10-06","2017-10-07","2017-10-08","2017-10-09")
winner_points <- c(2,1,2,1,2,1,2,1,2)
loser_points <- c(1,0,1,0,1,0,1,0,1)
test_data <- data.frame(winner, loser, date = as.Date(date), winner_points, loser_points)


library(dplyr)
library(tidyr)

test_data %>%
  unite(winner, winner, winner_points) %>%                    # unite winner columns
  unite(loser, loser, loser_points) %>%                       # unite loser columns
  gather(type, pl_pts, winner, loser, -date) %>%              # reshape
  separate(pl_pts, c("player","points"), convert = T) %>%     # separate columns
  arrange(date) %>%                                           # order dates (in case it's not)
  group_by(player) %>%                                        # for each player
  mutate(sum_points = cumsum(points) - points) %>%            # get points up to that date
  ungroup() %>%                                               # forget the grouping
  unite(pl_pts_sumpts, player, points, sum_points) %>%        # unite columns
  spread(type, pl_pts_sumpts) %>%                             # reshape
  separate(loser, c("loser", "loser_points", "loser_points_sum"), convert = T) %>%                # separate columns and give appropriate names
  separate(winner, c("winner", "winner_points", "winner_points_sum"), convert = T) %>%
  select(winner, loser, date, winner_points, loser_points, winner_points_sum, loser_points_sum)   # select the order you prefer


# # A tibble: 9 x 7
#   winner loser       date winner_points loser_points winner_points_sum loser_points_sum
# *  <int> <int>     <date>         <int>        <int>             <int>            <int>
# 1      1     3 2017-10-01             2            1                 0                0
# 2      2     1 2017-10-02             1            0                 0                2
# 3      3     1 2017-10-03             2            1                 1                2
# 4      1     2 2017-10-04             1            0                 3                1
# 5      2     1 2017-10-05             2            1                 1                4
# 6      3     1 2017-10-06             1            0                 3                5
# 7      1     3 2017-10-07             2            1                 5                4
# 8      2     1 2017-10-08             1            0                 3                7
# 9      3     2 2017-10-09             2            1                 5                4

这篇关于如何创建一个列,根据条件累计添加前两行的总和?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆