如何使用dplyr对x中的元素进行分组,x的间隔为y的计数频率? [英] How to use dplyr to group elements in x ,count frequency of x for an interval of y?

查看:131
本文介绍了如何使用dplyr对x中的元素进行分组,x的间隔为y的计数频率?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

x

y <样本(1:40,79,替换= T)



y
1 38 18 19 19 37 38 26 4 32 23 11 24 36 15 22 19 6 24 13 36 2 26 35 39 8 33 20 19 23 28 5 17 40 26 18 21
[37] 35 23 27 12 3 33 16 32 11 19 4 5 8 19 5 19 33 33 33 13 12 32 21 4 14 8 28 34 33 22 34 19 39 23 6 8
[73] 37 17 21 16 38 15 36




<我有两个变量'x'和'y'。在x中有不止一个观察实例。 y中有值对应于'x'中的每个观察值



我想实现分组,并将y值分割为间隔。



以不同的方式,将信发送多少次,分为基于每次发生时分配给该字母的值指定的间隔。



示例: -





无法正确表示表,因为我找不到更好的方式在这里键入。 >

我希望很清楚。如果需要,我会尝试重申。
我希望在这方面有任何帮助。

解决方案

使用 dplyr

  library(dplyr)
库(tidyr)

res< - tally(group_by(df,x,y = cut y,breaks = seq(0,40,= 10))))%>%
ungroup()%>%
spread(y,n,fill = 0)

或使用 data.table

  library(data.table)
res1< - dcast.data.table(setDT(df)[,list(.N),
by = list(x,y1 = cut(y,breaks = seq(0,40,by = 10)))],
x〜y1,value.var =N,fill = 0L)

all.equal(as.data.frame(res),as.data.frame(res1))
#[1] TRUE

注意: cut 标签参数$ c>所以如果你想让列标题为 freq0-10 等等

$ t
$ b

  tally(group_by(df,x,y = cut(y,breaks = seq(0,40,by = 10),
labels = paste0(freq,c(0-10,10-20,20-30 30-40))))%)%
ungroup()%>%
spread(y,n,fill = 0)%>%
head )

#x freq0-10 freq10-20 freq20-30 freq30-40
#1 a 0 1 1 0
#2 b 1 1 0 0



数据



  df <  -  structure(list(x = structure(c(1L,22L,3L,1L,4L,5L,7L,6L,
8L,24L,21L,18L,19L,23L,19L,4L,7L ,10L,21L,18L,19L,
19L,19L,22L,2L,7L,5L,23L,19L,4L,7L,8L,10L,9L,20L,
5L,23L,23L ,17L,17L,4L,22L,21L,13L,13L,11L,12L,21L,
16L,15L,18L,20L,14L,5L,23L,23L,10L,6L,3L,7L,8L ,20L,
18L,4L,5L,23L,23L,23L,25L,6L,7L,6L,8L, 8L,24L,18L,
6L,6L,12L),.Label = c(a,b,c,d,e,f,g h,
i,j,k,l,m,n,o,p,q,r ,t,u,
v,w,y,z),class =factor),y = c(12L,9L,29L,21L,
27L,37L,12L,31L,33L,11L,25L,15L,27L,27L,13L,37L,8L,
2L,21L,6L,4L,23L,30L,6L,9L, 28L,4L,24L,26L,2L,13L,
10L,15L,6L,38L,9L,30L,26L,28L,39L,19L,16L,11L,9L,
2L,4L, 16L,15L,11L,14L,19L,35L,19L,29L,22L,40L,19L,
12L,7L,6L,20L,10L,12L,6L,30L,13L,38L,39L,30L, 20L,
6L,9L,1L,40L,26L,14L,23L,33L,2L)),Names = c(x,y
),row.names = c NA,-79L),class =data.frame)


x<- c('a','v','c','a','d','e','g','f','h','y','u','r','s','w','s','d','g','j','u','r','s','s','s','v','b','g','e','w','s','d','g','h','j','i','t','e','w','w','q','q','d','v','b','m','m','k','l','u','p','o','r','t','n','e','w','w','j','f','c','g','h','t','r','d','e','w','w','w','z','f','g','f','h','h','y','r','f','f','l')

y <- sample(1:40, 79, replace=T)

y 1 38 18 19 19 37 38 26 4 32 23 11 24 36 15 22 19 6 24 13 36 2 26 35 39 8 33 20 19 23 28 5 17 40 26 18 21 [37] 35 23 27 12 3 33 16 32 11 19 4 5 8 19 5 19 33 33 33 13 12 32 21 4 14 8 28 34 33 22 34 19 39 23 6 8 [73] 37 17 21 16 38 15 36

I have two variables 'x' and 'y' . There is more than one instance of an observation in 'x' . There are values in y corresponding to every observation in 'x'

I would like to achieve grouping and also partitioning of y values into intervals .

To put it in a different way , how many times a letter occured would be divided into intervals specified based on value assigned to that letter in each of its occurance.

example :-

could not represent the table properly as i could not find a better way to type it here.

I hope it is clear. I shall try to restate it if needed. I would appreciate any help in this regard.

解决方案

Using dplyr

library(dplyr)
library(tidyr)

res <- tally(group_by(df, x, y=cut(y, breaks=seq(0,40, by=10)))) %>% 
                                                        ungroup() %>%
                                                         spread(y,n, fill=0)

Or using data.table

library(data.table)
res1 <- dcast.data.table(setDT(df)[,list(.N), 
           by=list(x, y1=cut(y, breaks=seq(0,40, by=10)))],
                            x~y1, value.var="N", fill=0L)

all.equal(as.data.frame(res), as.data.frame(res1))
#[1] TRUE

Note: There is a label argument in cut so if you want to have the column headings to be freq0-10, etc

 tally(group_by(df, x, y=cut(y,breaks=seq(0,40, by=10),
      labels=paste0("freq", c("0-10", "10-20", "20-30", "30-40")))))  %>%
                                                            ungroup() %>%
                                                            spread(y,n, fill=0) %>%
                                                            head(2)

  #   x freq0-10 freq10-20 freq20-30 freq30-40
  #1 a        0         1         1         0
  #2 b        1         1         0         0

data

 df <-  structure(list(x = structure(c(1L, 22L, 3L, 1L, 4L, 5L, 7L, 6L, 
 8L, 24L, 21L, 18L, 19L, 23L, 19L, 4L, 7L, 10L, 21L, 18L, 19L, 
 19L, 19L, 22L, 2L, 7L, 5L, 23L, 19L, 4L, 7L, 8L, 10L, 9L, 20L, 
 5L, 23L, 23L, 17L, 17L, 4L, 22L, 2L, 13L, 13L, 11L, 12L, 21L, 
 16L, 15L, 18L, 20L, 14L, 5L, 23L, 23L, 10L, 6L, 3L, 7L, 8L, 20L, 
 18L, 4L, 5L, 23L, 23L, 23L, 25L, 6L, 7L, 6L, 8L, 8L, 24L, 18L, 
 6L, 6L, 12L), .Label = c("a", "b", "c", "d", "e", "f", "g", "h", 
 "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", 
 "v", "w", "y", "z"), class = "factor"), y = c(12L, 9L, 29L, 21L, 
 27L, 37L, 12L, 31L, 33L, 11L, 25L, 15L, 27L, 27L, 13L, 37L, 8L, 
 2L, 21L, 6L, 4L, 23L, 30L, 6L, 9L, 28L, 4L, 24L, 26L, 2L, 13L, 
 10L, 15L, 6L, 38L, 9L, 30L, 26L, 28L, 39L, 19L, 16L, 11L, 9L, 
 2L, 4L, 16L, 15L, 11L, 14L, 19L, 35L, 19L, 29L, 22L, 40L, 19L, 
 12L, 7L, 6L, 20L, 10L, 12L, 6L, 30L, 13L, 38L, 39L, 30L, 20L, 
 6L, 9L, 1L, 40L, 26L, 14L, 23L, 33L, 2L)), .Names = c("x", "y"
 ), row.names = c(NA, -79L), class = "data.frame")

这篇关于如何使用dplyr对x中的元素进行分组,x的间隔为y的计数频率?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆