在数据框底部生成汇总表 [英] Generating summary table at bottom of dataframe

查看:48
本文介绍了在数据框底部生成汇总表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

请帮助!

我有以下数据框(名为Final_APOL1)。我需要生成一个汇总表,如所示的第二个数据框。生成后,是否可以将其另存为单独的输出csv,并将其保存到同一目录?

I have the following dataframe (named Final_APOL1). I need to generate a summary table like the second dataframe shown. Once generated is it possible to save this as a separate output csv that will be saved to the same directory?

摘要表会遍历风险等位基因计数变量并将其放置

The summary table runs through the risk allele count variables and places them into categories so population frequencies can be calculated for each mutation.

 "no APOL1 Risk Alleles" = ifelse(`Final genotype of APOL1` == "G0/G0", 1, NA),

    "1 APOL1 Risk Alleles" = 
      ifelse(`Final genotype of APOL1` %in% c("G0/G2", "G1^{GM}/G0", "G1^{G+}/G0"), 1, NA),

    "2 APOL1 Risk Alleles" =
      ifelse(`Final genotype of APOL1` %in% c("G1^{GM}/G1^{GM}", "G1^{GM}/G2", "G2/G2"), 1, NA))



Final_APOL1中的基因型适用于Summary_table

的以下类别:

The genotypes within Final_APOL1 fit into the following categories for Summary_table

G1^{GM}/G2 and G1^{G+}/G2 = G1/G2 for Summary_table
G1^{GM}/G1^{GM} = G1/G1 for Summary_table
G1^{GM}/G0 and G1^{G+}/G0 = G1/G0 for Summary_table
G2/G2 = G2/G2 for Summary_table
G0/G2 = G0/G2 for Summary_table



数据帧(Final_APOL1)



dataframe (Final_APOL1)

structure(list(Well = structure(1:96, .Label = c("A01", "A02", 
"A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10", "A11", 
"A12", "B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", 
"B09", "B10", "B11", "B12", "C01", "C02", "C03", "C04", "C05", 
"C06", "C07", "C08", "C09", "C10", "C11", "C12", "D01", "D02", 
"D03", "D04", "D05", "D06", "D07", "D08", "D09", "D10", "D11", 
"D12", "E01", "E02", "E03", "E04", "E05", "E06", "E07", "E08", 
"E09", "E10", "E11", "E12", "F01", "F02", "F03", "F04", "F05", 
"F06", "F07", "F08", "F09", "F10", "F11", "F12", "G01", "G02", 
"G03", "G04", "G05", "G06", "G07", "G08", "G09", "G10", "G11", 
"G12", "H01", "H02", "H03", "H04", "H05", "H06", "H07", "H08", 
"H09", "H10", "H11", "H12"), class = "factor"), G1_1_1 = c("Blank", 
"Blank", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", 
"+", "G1^{S342G}", "G1^{S342G}", "Blank", "Blank", "+", "+", 
"+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", "Blank", 
"Blank", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"Blank", "Blank", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "G1^{S342G}", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", 
"+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", "+", "+", "G1^{S342G}", 
"G1^{S342G}", "G1^{S342G}", "G1^{S342G}", "+", "+", "+", "+", 
"+", "+", "G1^{S342G}", "G1^{S342G}"), G1_1_2 = c("Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "G1^{S342G}", "G1^{S342G}", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "G1^{S342G}", 
"G1^{S342G}", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"
), G1_2_1 = c("Blank", "Blank", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "Blank", "Blank", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "Blank", "Blank", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "+", "+", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "G1^{I384M}", 
"G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "+", "+", "+", "+", 
"+", "+", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}"), G1_2_2 = c("Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "Blank", 
"Blank", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", 
"+", "+", "+", "+", "+", "+", "Blank", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", "+", 
"+", "+", "+", "+", "+", "G1^{I384M}", "G1^{I384M}", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+"), G2_1 = c("Blank", "Blank", 
"+", "+", "+", "+", "G2", "G2", "+", "+", "G2", "G2", "Blank", 
"Blank", "+", "+", "G2", "G2", "G2", "G2", "+", "+", "+", "+", 
"Blank", "Blank", "G2", "G2", "+", "+", "+", "+", "+", "+", "+", 
"+", "Blank", "Blank", "G2", "G2", "G2", "G2", "+", "+", "+", 
"+", "+", "+", "G2", "G2", "G2", "G2", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "G2", "G2", "G2", 
"G2", "G2", "G2", "G2", "G2", "+", "+", "+", "+", "+", "+", "+", 
"+", "G2", "G2", "+", "+", "+", "+", "G2", "G2", "+", "+", "+", 
"+", "+", "+"), G2_2 = c("Blank", "Blank", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "Blank", "Blank", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", 
"+", "+", "+", "+", "+"), `Final genotype of APOL1` = c("NA", 
"NA", "G0/G0", "G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G2", "G1^{GM}/G2", 
"G0/G0", "G0/G0", "G1^{GM}/G2", "G1^{GM}/G2", "NA", "NA", "G0/G0", 
"G0/G0", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G1^{GM}/G0", "G1^{GM}/G0", 
"G0/G0", "G0/G0", "NA", "NA", "G0/G2", "G0/G2", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G0", "NA", "NA", "G0/G2", "G0/G2", "G1^{GM}/G2", 
"G1^{GM}/G2", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G0", "G0/G0", "G1^{GM}/G0", 
"G1^{GM}/G0", "G1^{GM}/G2", "G1^{GM}/G2", "G0/G2", "G0/G2", "G0/G0", 
"G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G1^{GM}", "G1^{GM}/G1^{GM}", 
"G0/G0", "G0/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G0", "G0/G0", 
NA, "G0/G0", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G0/G2", "G0/G2", 
"G0/G2", "G0/G2", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", "G1^{GM}/G0", 
"G0/G0", "G0/G0", "G0/G0", "G0/G0", "G0/G2", "G0/G2", "G1^{GM}/G1^{GM}", 
"G1^{GM}/G1^{GM}", "G1^{GM}/G0", "G1^{GM}/G0", "G0/G2", "G0/G2", 
"G0/G0", "G0/G0", "G0/G0", "G0/G0", "G1^{GM}/G0", "G1^{GM}/G0"
), `no APOL1 Risk Alleles` = c(NA, NA, 1, 1, 1, 1, NA, NA, 1, 
1, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1, 1, NA, NA, 1, 
1, NA, NA, 1, 1, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1, 
1, NA, NA), `1 APOL1 Risk Alleles` = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, 1, 1, 1, 1, NA, 
NA, NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, NA, 1, 1, NA, NA, 
1, 1, NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, 1, NA, NA, NA, NA, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
NA, NA, NA, NA, 1, 1, NA, NA, 1, 1, 1, 1, NA, NA, NA, NA, 1, 
1), `2 APOL1 Risk Alleles` = c(NA, NA, NA, NA, NA, NA, 1, 1, 
NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA), Sample_Flag = c(FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE)), row.names = c(NA, -96L), class = "data.frame")



数据帧(摘要表)



dataframe (Summary_table)

structure(list(X__1 = c("Total number", "Percentage", "G1/G1 %", 
"G2/G2 %", "G1/G2 %", "G0/G1 %", "G0/G2 %"), `no APOL1 Risk Alleles` = c(29, 
33.33, NA, NA, NA, NA, NA), `1 APOL1 Risk Alleles` = c(46, 52.88, 
NA, NA, NA, 27.59, 25.29), `2 APOL1 Risk Alleles` = c(12, 13.79, 
4.6, NA, 9.19, NA, NA), `Total Number` = c(87, NA, NA, NA, NA, 
NA, NA)), row.names = c(NA, -7L), class = c("tbl_df", "tbl", 
"data.frame"))


推荐答案

我通常不喜欢存储行摘要(例如,总计)框架本身,因为任何后续行摘要都将包括该数字;我假设这纯粹是为了美学,所以包括它并不可怕

I'm generally not a fan of storing row summaries (e.g., total) within the frame itself, as any follow-on row summarizing will include that number; I'm assuming that this is purely for aesthetics, and so including it is not horrible

library(dplyr)

tmp1 <- as_tibble(Final_APOL_1) %>%
  # discard G* fields, they seem unnecessary?
  select(-starts_with("G")) %>%
  # remove an reference to NA or the literal string "NA", they seem distracting?
  filter(
    !is.na(`Final genotype of APOL1`),
    `Final genotype of APOL1` != "NA"
  ) %>%
  mutate(
    `X__1` = case_when(
      `Final genotype of APOL1` %in% c("G1^{GM}/G2", "G1^{G+}/G2") ~ "G1/G2",
      `Final genotype of APOL1` == "G1^{GM}/G1^{GM}"               ~ "G1/G1",
      `Final genotype of APOL1` %in% c("G1^{GM}/G0", "G1^{G+}/G0") ~ "G1/G0",
      TRUE ~ `Final genotype of APOL1`
    )
  ) %>%
  select(-Well, -`Final genotype of APOL1`, -Sample_Flag) %>%
  group_by(X__1) %>%
  summarize_all(~ sum(.))

totals <- tmp1 %>%
  summarize_at(vars(-X__1), ~ sum(., na.rm = TRUE)) %>%
  mutate(
    `Total Number` = sum(.),
    X__1 = "Total number"
  )
percents <- totals %>%
  mutate_at(vars(-X__1, -`Total Number`), ~ 100 * . / `Total Number`) %>%
  mutate(
    `Total Number` = NA_real_,
    X__1 = "Percentage"
  )

最后的步骤和输出:

tmp1 %>%
  mutate_at(vars(-X__1), ~ 100 * . / totals$`Total Number`) %>%
  bind_rows(., totals, percents)
# # A tibble: 7 x 5
#   X__1         `no APOL1 Risk Allele~ `1 APOL1 Risk Allele~ `2 APOL1 Risk Allele~ `Total Number`
#   <chr>                         <dbl>                 <dbl>                 <dbl>          <dbl>
# 1 G0/G0                          33.3                  NA                   NA                NA
# 2 G0/G2                          NA                    25.3                 NA                NA
# 3 G1/G0                          NA                    27.6                 NA                NA
# 4 G1/G1                          NA                    NA                    4.60             NA
# 5 G1/G2                          NA                    NA                    9.20             NA
# 6 Total number                   29                    46                   12                87
# 7 Percentage                     33.3                  52.9                 13.8              NA

这篇关于在数据框底部生成汇总表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆