根据定义的 i 创建具有动态列名和重复值的循环 [英] Create loop with dynamic column names and repeating values based on defined i

查看:35
本文介绍了根据定义的 i 创建具有动态列名和重复值的循环的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有以下数据框:

id <- c("A", "B", "C")
col1 <- c(1, 3, 5)
col2 <- c(6, 12, 9)
col3 <- c(2, 4, 30)
df <- data.frame(id, col1, col2, col3)

从本质上讲,我希望每个i都被20、25、30、35、40取代.此循环有效,但是非常非常慢.

Essentially, I want every i to be replaced by 20, 25, 30, 35, 40. This loop works but it works very, very slowly.

library(dplyr)
library(tibble)
library(foreach)
library(tidyverse)
library(purrr)

id <- c("A", "B", "C")
col1 <- c(1, 3, 5)
col2 <- c(6, 12, 9)
col3 <- c(2, 4, 30)
df <- data.frame(id, col1, col2, col3)
  
vals <- c(seq(from=20, to=40, by=5))

final <- foreach(i = vals, .combine='cbind') %do% {
  
  # if cell is greater than i, then code 0
  df_2 <- df %>% mutate(across(starts_with("col"), ~ +(. < i)))

  
  # transpose the dataset
  rownames(df_2) <- df_2$id
  df_2$id <- NULL
  df_2_t <- as.data.frame(t(df_2))
  
  # sum the rows
  df_2_t <- cbind(id = rownames(df_2_t), df_2_t)
  rownames(df_2_t) <- 1:nrow(df_2_t)
  df_2_t <- df_2_t %>%
    mutate(sum = rowSums(.[2:ncol(.)]))
  
  # merge a new column
  id2 <- c("col1", "col2", "col3")
  D <- c(3, 4, 5)
  id_d <- data.frame(id2, D)
  df_2_t_d <- left_join(df_2_t, id_d, by = c("id" = "id2"))
  
  # divide D by the number of letters (there are 3 letter columns -- A, B, C)
  df_2_t_d$letters <- rep(3)
  df_2_t_d <- df_2_t_d %>%
    mutate(frac = D/letters)
  
  # recode all 1s to the frac
  letters <- grep("^A|^B|^C", names(df_2_t_d))
  df_2_t_d[letters] <- apply(df_2_t_d[letters], 2, function(x) ifelse(x == 1, df_2_t_d$frac, 0))
  
  # drop two columns
  df_2_t_d <- select(df_2_t_d, -c(D, letters))
  
  # transpose again
  rownames(df_2_t_d) <- df_2_t_d$id
  df_2_t_d$id <- NULL
  df_2_t_d2 <- as.data.frame(t(df_2_t_d))

  
  df_2_t_d2_sum <- df_2_t_d2 %>%
     mutate(rowSums(.[1:3])) %>%
    transmute(!!paste0('sum_', i) := rowSums(select(., starts_with('col'))))
  
}

df_2_t_d2 <- cbind(list_name = rownames(df_2_t_d2), df_2_t_d2)
rownames(df_2_t_d2) <- 1:nrow(df_2_t_d2)
df_2_t_d2 <- select(df_2_t_d2, list_name)

abc <- cbind(df_2_t_d2, df_2_t_d2_sum)

View(abc)
  

如果有任何方法可以加快速度,欢迎提出建议!

If there's any way to speed it up, suggestions are welcome!

推荐答案

这是执行此操作的一种方法 map_dfc :

Here's a way to do this map_dfc :

library(dplyr)
library(purrr)

vals <- seq(from=20, to=40, by=5)

bind_cols(
  df, map_dfc(vals, function(x) df %>% 
                mutate(across(starts_with("col"), ~ +(. < x))) %>%
                transmute(!!paste0('sum_', x) := rowSums(select(., starts_with('col'))))))

或在基数R中:

cols <- grep('col', names(df))

df[paste0('sum_', vals)] <- lapply(vals, function(x) rowSums(+(df[cols] < x)))
df

#  id col1 col2 col3 sum_20 sum_25 sum_30 sum_35 sum_40
#1  A    1    6    2      3      3      3      3      3
#2  B    3   12    4      3      3      3      3      3
#3  C    5    9   30      2      2      2      3      3

这篇关于根据定义的 i 创建具有动态列名和重复值的循环的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆