结合使用dplyr :: quos()和列表参数而不是省略号参数 [英] Using dplyr::quos() with a list argument rather than the ellipsis argument

查看:79
本文介绍了结合使用dplyr :: quos()和列表参数而不是省略号参数的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用dplyr并尝试创建一个基于分组参数来计算p.values的函数.我希望能够有一个参数,该参数可以是要分组的任何长度的变量的列表.这是示例数据集:

I am using dplyr and trying to create a function to calculate p.values based on grouping arguments. I would like to be able to have an argument that would be list of any length of variables to group by. Here is the example dataset:

dataset <- structure(list(Experiment = c(170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170222, 170222, 170222, 170222, 
170222, 170222, 170222, 170222, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824, 170824, 170824, 170824, 170824, 
170824, 170824, 170824, 170824), Sample = c("1: FL_496", "1: FL_496", 
"1: FL_496", "1: FL_496", "1: FL_496", "1: FL_496", "1: FL_496", 
"1: FL_496", "2: FL_505", "2: FL_505", "2: FL_505", "2: FL_505", 
"2: FL_505", "2: FL_505", "2: FL_505", "2: FL_505", "3: FL_509", 
"3: FL_509", "3: FL_509", "3: FL_509", "3: FL_509", "3: FL_509", 
"3: FL_509", "3: FL_509", "4: FL_514", "4: FL_514", "4: FL_514", 
"4: FL_514", "4: FL_514", "4: FL_514", "4: FL_514", "4: FL_514", 
"5: cKO_497", "5: cKO_497", "5: cKO_497", "5: cKO_497", "5: cKO_497", 
"5: cKO_497", "5: cKO_497", "5: cKO_497", "6: cKO_504", "6: cKO_504", 
"6: cKO_504", "6: cKO_504", "6: cKO_504", "6: cKO_504", "6: cKO_504", 
"6: cKO_504", "7: cKO_510", "7: cKO_510", "7: cKO_510", "7: cKO_510", 
"7: cKO_510", "7: cKO_510", "7: cKO_510", "7: cKO_510", "8: cKO_515", 
"8: cKO_515", "8: cKO_515", "8: cKO_515", "8: cKO_515", "8: cKO_515", 
"8: cKO_515", "8: cKO_515", "9: cKO_517", "9: cKO_517", "9: cKO_517", 
"9: cKO_517", "9: cKO_517", "9: cKO_517", "9: cKO_517", "9: cKO_517", 
NA, NA, NA, NA, NA, NA, NA, NA, "1: FL_627", "1: FL_627", "1: FL_627", 
"1: FL_627", "1: FL_627", "1: FL_627", "2: FL_628", "2: FL_628", 
"2: FL_628", "2: FL_628", "2: FL_628", "2: FL_628", "3: FL_633", 
"3: FL_633", "3: FL_633", "3: FL_633", "3: FL_633", "3: FL_633", 
"4: FL_636", "4: FL_636", "4: FL_636", "4: FL_636", "4: FL_636", 
"4: FL_636", "5: cKO_620", "5: cKO_620", "5: cKO_620", "5: cKO_620", 
"5: cKO_620", "5: cKO_620", "6: cKO_625", "6: cKO_625", "6: cKO_625", 
"6: cKO_625", "6: cKO_625", "6: cKO_625", "7: cKO_626", "7: cKO_626", 
"7: cKO_626", "7: cKO_626", "7: cKO_626", "7: cKO_626", "8: cKO_634", 
"8: cKO_634", "8: cKO_634", "8: cKO_634", "8: cKO_634", "8: cKO_634", 
"cKO_620", "cKO_620", "cKO_625", "cKO_625", "cKO_626", "cKO_626", 
"cKO_634", "cKO_634", "FL_627", "FL_627", "FL_628", "FL_628", 
"FL_633", "FL_633", "FL_636", "FL_636"), Genotype = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("miR-15/16 FL", 
"miR-15/16 cKO"), class = "factor"), variable = c("% CD127+", 
"% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", 
"% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", "% CD127+", "% CD127+", 
"% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", 
"% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", 
"% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", "% CD127+", "% CD127+", 
"% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", 
"% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", 
"% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", "% CD127+", "% CD127+", 
"% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% KLRG1+", "% CD127+", 
"% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% CD127+", "% CD127+", "% KLRG1+", "% KLRG1+", 
"% KLRG1+", "% CD127+", "% KLRG1+", "% CD127+", "% KLRG1+", "% CD127+", 
"% KLRG1+", "% CD127+", "% KLRG1+", "% CD127+", "% KLRG1+", "% CD127+", 
"% KLRG1+", "% CD127+", "% KLRG1+", "% CD127+", "% KLRG1+"), 
    value = c(1, 28.7, 40.1, 47.4, 64.1, 69.9, 73.1, 79.42, 0.99, 
    21.72, 33, 56.6, 55.5, 82.9, 84.96, 86.7, 3.94, 43.4, 49.5, 
    60.8, 57.1, 69.8, 71.4, 77.72, 1, 20.56, 28.77, 35.1, 71.07, 
    71.2, 78.16, 84.04, 3.77, 56.9, 60.5, 66.5, 43.7, 50.36, 
    50.8, 51.8, 3.24, 58.2, 59.8, 70.8, 47.9, 58.5, 59.5, 61.3, 
    4.21, 62, 65.7, 73.8, 40, 51.5, 53.1, 55.69, 9.48, 41.7, 
    44, 63, 53.7, 57.31, 60.4, 60.8, 3.84, 34.1, 41.1, 53.2, 
    55.07, 55.3, 62.2, 76.6, NA, NA, NA, NA, NA, NA, NA, NA, 
    12.01, 18.5, 20.99, 66.39, 77.2, 85.6, 12.8, 31.3, 35.11, 
    59.8, 85.5, 89.7, 32.1, 33.3, 34.7, 63.2, 71.6, 80.5, 15.3, 
    17.02, 33.5, 65.54, 82.7, 85.8, 41.61, 51.3, 69.3, 39.81, 
    59, 62, 46.6, 52.1, 67.8, 39.5, 58.8, 66, 52.2, 52.9, 68.7, 
    46, 55.9, 61.6, 45.17, 59.9, 74.3, 31.87, 48.4, 51.2, 6.2, 
    56.34, 4.17, 70.85, 3.54, 59.89, 5.61, 49.71, 1.87, 77.09, 
    0.51, 86.05, 1.8, 80.69, 2.15, 79.43), Day = structure(c(1L, 
    2L, 3L, 4L, 4L, 3L, 2L, 1L, 1L, 3L, 4L, 2L, 2L, 4L, 1L, 3L, 
    1L, 3L, 2L, 4L, 4L, 2L, 3L, 1L, 1L, 3L, 4L, 2L, 4L, 2L, 3L, 
    1L, 1L, 3L, 2L, 4L, 4L, 1L, 2L, 3L, 1L, 3L, 2L, 4L, 4L, 2L, 
    3L, 1L, 1L, 3L, 2L, 4L, 4L, 3L, 2L, 1L, 1L, 3L, 4L, 2L, 2L, 
    1L, 4L, 3L, 1L, 2L, 3L, 4L, 1L, 4L, 3L, 2L, 2L, 3L, 4L, 1L, 
    2L, 3L, 4L, 1L, 3L, 2L, 4L, 3L, 2L, 4L, 2L, 3L, 4L, 3L, 2L, 
    4L, 2L, 3L, 4L, 3L, 2L, 4L, 2L, 3L, 4L, 3L, 4L, 2L, 3L, 2L, 
    4L, 3L, 2L, 4L, 3L, 2L, 4L, 3L, 2L, 4L, 3L, 2L, 4L, 3L, 2L, 
    4L, 3L, 2L, 4L, 3L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("8", "15", "22", 
    "30+"), class = "factor")), class = "data.frame", row.names = c(NA, 
-144L), .Names = c("Experiment", "Sample", "Genotype", "variable", 
"value", "Day"))

这是我使用...创建的功能

and here is the function I have made that works using ...

grouped.t.test <- function(dataset, subset.plot, comparison, ...)
  {
  group.by <- quos(...)
  if (is.null(subset.plot)){
    subset.plot <- dataset[['variable']]
    }
  filter(dataset, variable %in% subset.plot) %>%
    group_by(!!!group.by) %>%
    do(tidy(t.test(x = .$value[.[comparison] == levels(.[[comparison]])[1]],
                   y = .$value[.[comparison] == levels(.[[comparison]])[2]]))) %>%
    mutate(p.value.format = symnum(p.value, corr = FALSE, na = FALSE, cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", NA))) %>%
    arrange(!!!group.by)
  }
View(grouped.t.test(dataset = dataset, subset.plot = NULL, comparison = 'Genotype', variable, Day))

我希望能够将...替换为一个参数(例如group_vars)并像这样调用它:

I would like to be able to replace ... with an argument (e.g., group_vars) and call it like this:

View(grouped.t.test(dataset = dataset, subset.plot = NULL, comparison = 'Genotype', group_vars = c(variable, Day)))

这似乎不适用于quos(),但我不明白为什么. 能够使用多个可以单独使用和使用的列表参数会很好,例如,创建一个参数"arrange.by",该参数将是要传递给该函数末尾的变量列表.

This does not seem to work with quos() but I don't understand why. It would be nice to be able to use multiple list arguments that get quosed and used independently (e.g., creating an argument "arrange.by" that would be a list of variables to pass to arrange at the end of the function.

非常感谢您能帮助我理解为什么它不起作用以及我能做些什么!

I'd greatly appreciate any help understanding why this doesn't work and what I could do instead!

推荐答案

如@lionel所提到的-在

As mentioned by @lionel - one of the lead developers of dplyr in this comment

您希望引用是外部的,并且由用户显式完成,而不是由函数隐式执行.为此,您可以要求用户使用base :: alist(),rlang :: exprs()或dplyr :: vars()

You want the quoting to be external and explicitly done by the user rather than implicitly by your function. To this end you can ask your users to quote with base::alist(), rlang::exprs(), or dplyr::vars()

您可以针对自己的问题做类似的事情

You can do something like this for your question

grouped.t.test2 <- function(dataset, subset.plot, comparison, group_vars) {

  if (is.null(subset.plot)) {
    subset.plot <- dataset[['variable']]
  }

  filter(dataset, variable %in% subset.plot) %>%
    group_by(!!! group_vars) %>%
    do(tidy(t.test(x = .$value[.[comparison] == levels(.[[comparison]])[1]],
                   y = .$value[.[comparison] == levels(.[[comparison]])[2]]))) %>%
    mutate(p.value.format = symnum(p.value, corr = FALSE, na = FALSE, 
                                   cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
                                   symbols = c("****", "***", "**", "*", NA))) %>%
    arrange(!!! group_vars)
}

grouped.t.test2(dataset = dataset, subset.plot = NULL, comparison = 'Genotype', 
               alist(variable, Day))

# or

grouped.t.test2(dataset = dataset, subset.plot = NULL, comparison = 'Genotype', 
               dplyr::vars(variable, Day))

# A tibble: 8 x 13
# Groups:   variable, Day [8]
  variable Day   estimate estimate1 estimate2 statistic p.value parameter
  <fct>    <fct>    <dbl>     <dbl>     <dbl>     <dbl>   <dbl>     <dbl>
1 % CD127+ 8        -3.24      1.66      4.90     -4.26 9.93e-4      12.6
2 % CD127+ 15      -24.4      31.1      55.5      -3.80 2.88e-3      11.2
3 % CD127+ 22      -22.1      27.4      49.5      -4.60 5.54e-4      12.5
4 % CD127+ 30+     -28.6      36.8      65.4      -5.23 1.36e-4      13.7
5 % KLRG1+ 8        23.8      81.2      57.4       9.79 3.11e-7      12.5
6 % KLRG1+ 15       16.5      73.7      57.2       3.78 2.08e-3      13.8
7 % KLRG1+ 22       20.9      70.1      49.2       4.44 4.82e-4      14.9
8 % KLRG1+ 30+      22.5      76.7      54.2       4.46 6.01e-4      13.4
# ... with 5 more variables: conf.low <dbl>, conf.high <dbl>,
#   method <fct>, alternative <fct>, p.value.format <chr>              

这篇关于结合使用dplyr :: quos()和列表参数而不是省略号参数的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆