用分位数将ggplot2拆分为小提琴曲线 [英] Split violin plot with ggplot2 with quantiles

查看:886
本文介绍了用分位数将ggplot2拆分为小提琴曲线的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

为了绘制一半的密度,我使用了这篇文章中描述的函数:拆分小提琴用ggplot2绘图



然而,当我想在密度上绘制分位数时,就像正常 geom_violin() code>或 geom_boxplot(),我得到一个错误信息。



我也会对添加每个半密度以上的观察数目。

下面是我想要获得的例子:

<











$ GeomSplitViolin < - ggproto( GeomSplitViolin,GeomViolin,draw_group = function(self,data,...,draw_quantiles = NULL){
data < - transform(data,xminv = x - violinwidth *(x - xmin),xmaxv = x + violinwidth *(xmax-x))
grp< - data [1,'group']
newdata< -plyr :: arrange(transform (data,x = if(grp %% 2 == 1)xminv else xmaxv)if if(grp %% 2 == 1)y else -y)
newdata < - rbind(newdata [1,] ,newdata,newdata [nrow(newdata)],newdata [1,])
newdata [c(1,nrow(newdata)-1,nrow(newdata)),'x'] < - round newdata [1,'x'])
if(length(draw_quantiles)> 0& !scale :: zero_range(range(data $ y))){
stopifnot(all(draw_quantiles> = 0),all(draw_quantiles< =
1))
quantiles< - 数据[rep(1,nrow(分位数)),setdiff(名称(数据),c(x,y)),drop = FALSE] - create_quantile_segment_frame(data,draw_quantiles)
aesthetics $ alpha< - rep(1,nrow(分位数))
quantile_grob< - GeomPath $ draw_panel )
ggplot2 ::: ggname(geom_split_violin,grobTree(GeomPolygon $ draw_panel(newdata,...),quantile_grob))
}
else {
ggplot2 ::: ggname(geom_split_violin,GeomPolygon $ draw_panel(newdata,...))
}
})

geom_split_violin< - 函数(mapping = NULL,data = NULL, stat =ydensity,position =identity,...,draw_quantiles = NULL,trim = TRUE,scale =area,na.rm = FALSE,show.legend = NA,inherit.aes = TRUE){
图层(data = dat a,mapping = mapping,stat = stat,geom = GeomSplitViolin,position = position,show.legend = show.legend,inherit.aes = inherit.aes,params = list(trim = trim,scale = scale,draw_quantiles = draw_quantiles, na.rm = na.rm,...))
}

tmp< - 钻石[钻石$ cut%in%c(Fair,Good) ),]

#获得的情节
ggplot(tmp,aes(as.factor(color),carat,fill = cut))+
geom_split_violin()

#由于内部函数(交错,...)
ggplot(tmp,aes(as.factor(color),carat,fill = cut))+
geom_split_violin(draw_quantiles = 0.5)

#函数返回观察数
give_n = function(x,y_up = y_upper){
data.frame(y = y_up * 1.06,
label = paste(n =,length(x))

}

#在每个半密度以上添加观察值的代码
new_plot = given_plot +
#仅返回数据长度
stat_summary(fun.data = give_n,aes(x = as.factor(varia ble)),geom =text)


解决方案

我们可以通过@YAK进一步调整函数,并对 create_quantile_segment_frame 添加一些调整:

  GeomSplitViolin < -  ggproto(GeomSplitViolin,GeomViolin,
draw_group = function(self,data,...,draw_quantiles = NULL){
#Jan Gleixner的原始函数(@ jan-glx)
#Wouter van der Bijl(@Axeman)
data < - transform(data,xminv = x - violinwidth *(x - xmin),xmaxv = x + violinwidth * (数据,x = if(grp %% 2 == 1))(xmax-x))
grp< - data [1,'group']
newdata< xminv else xmaxv)if if(grp %% 2 == 1)y else -y)
newdata < - rbind(newdata [1,],newdata,newdata [nrow(newdata),],newdata [1 ,])
newdata [c(1,nrow(newdata)-1,nrow(newdata)),'x']< - round(newdata [1,'x'])
if长度(draw_quantiles)> 0& (数据$ y))){
stopifnot(all(draw_quantiles> = 0),all(draw_quantiles< = 1))
quantiles < - create_quantile_segment_frame(data ,draw_quantiles,split = TRUE,grp = grp)
aesthetics < - data [rep(1,nrow(quantiles)),setdiff(names(data),c(x,y)) (分位数,美学)
quantile_grob< - GeomPath $ draw_panel(两者都是) -
美学$ alpha< - rep(1,nrow(分位数))
都< ,...)
ggplot2 ::: ggname(geom_split_violin,grid :: grobTree(GeomPolygon $ draw_panel(newdata,...),quantile_grob))
}
else {
ggplot2 ::: ggname(geom_split_violin,GeomPolygon $ draw_panel(newdata,...))
}
}


create_quantile_segment_frame< - 函数(data,draw_quantiles,split = FALSE,grp = NULL){
dens< - cumsum(data $ density)/ sum(data $ density)
ecdf< - stats :: approxfun(数据$ y)
ys < - ecdf(draw_qu (stats :: approxfun(data $ y,data $ xminv))(ys)
violin.xminvs< - (stats :: approxfun(data $ y,数据$ xmaxv))(ys)
violin.xs< - (stats :: approxfun(data $ y,data $ x))(ys)
if(grp %% 2 == 0) {
data.frame(x = ggplot2 ::: interleave(violin.xs,violin.xmaxvs),
y = rep(ys,each = 2),group = rep(ys,each = 2) )
} else {
data.frame(x = ggplot2 ::: interleave(violin.xminvs,violin.xs),
y = rep(ys,each = 2),group = rep (ys,each = 2))
}
}

geom_split_violin< - 函数(mapping = NULL,data = NULL,stat =ydensity,position =identity ,...,draw_quantiles = NULL,trim = TRUE,scale =area,na.rm = FALSE,show.legend = NA,inherit.aes = TRUE){
layer(data = data,mapping = mapping,stat = stat,geom = GeomSplitViolin,position = position,show.legend = show.legend,inherit.aes = inherit.aes,params = list(trim = trim,scale = scale,draw_quantiles = draw_quantiles,na.rm = na.rm,...))
}

然后简单地画出:

  ggplot(diamonds [diamonds $ cut%in%c(Fair,Good) ),
aes(as.factor(color),carat,fill = cut))+
geom_split_violin(draw_quantiles = c(0.25,0.5,0.75))

< img src =https://i.stack.imgur.com/h0Q9m.pngalt =在这里输入图片描述>


In order to plot half densities, I am using the function described in this post: Split violin plot with ggplot2

However, when I want to draw the quantiles on the densities, like on a normal geom_violin() or geom_boxplot(), I obtain an error message.

I would also be interested in adding the number of observations above each half density.

Here is an example of what I would like to obtain:

data("diamonds")
library(ggplot2)

# Function described in a previous post
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
  data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
  grp <- data[1,'group']
  newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
  newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
  newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
  if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
    stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 
                                              1))
    quantiles <- create_quantile_segment_frame(data, draw_quantiles)
    aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
    aesthetics$alpha <- rep(1, nrow(quantiles))
    both <- cbind(quantiles, aesthetics)
    quantile_grob <- GeomPath$draw_panel(both, ...)
    ggplot2:::ggname("geom_split_violin", grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
  }
  else {
    ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
  }
})

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

tmp <- diamonds[which(diamonds$cut %in% c("Fair", "Good")), ]

# Obtained plot
ggplot(tmp, aes(as.factor(color), carat, fill = cut)) +
  geom_split_violin()

# Error due to internal functions (interleave, ...)
ggplot(tmp, aes(as.factor(color), carat, fill = cut)) +
  geom_split_violin(draw_quantiles = 0.5)

# Function to return number of observation
give_n = function(x, y_up = y_upper) {
  data.frame(y = y_up * 1.06,
             label = paste("n =", length(x))
  )
}

# Code to add number of observations above each half density
new_plot = given_plot +
  # Give back only length of data
  stat_summary(fun.data = give_n, aes(x = as.factor(variable)), geom = "text")

解决方案

We can make further adjustments to the function by @YAK, and add some adjustments to create_quantile_segment_frame:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
  draw_group = function(self, data, ..., draw_quantiles = NULL){
    # Original function by Jan Gleixner (@jan-glx)
    # Adjustments by Wouter van der Bijl (@Axeman)
    data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
    grp <- data[1,'group']
    newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
    newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
    newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
    if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
      stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
      quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
      aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
      aesthetics$alpha <- rep(1, nrow(quantiles))
      both <- cbind(quantiles, aesthetics)
      quantile_grob <- GeomPath$draw_panel(both, ...)
      ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
    }
    else {
      ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
    }
  }
)

create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
  dens <- cumsum(data$density)/sum(data$density)
  ecdf <- stats::approxfun(dens, data$y)
  ys <- ecdf(draw_quantiles)
  violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
  violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
  violin.xs <- (stats::approxfun(data$y, data$x))(ys)
  if (grp %% 2 == 0) {
    data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs), 
               y = rep(ys, each = 2), group = rep(ys, each = 2)) 
  } else {
    data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs), 
               y = rep(ys, each = 2), group = rep(ys, each = 2)) 
  }
}

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

Then simply plot:

ggplot(diamonds[which(diamonds$cut %in% c("Fair", "Good")), ],
       aes(as.factor(color), carat, fill = cut)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75))

这篇关于用分位数将ggplot2拆分为小提琴曲线的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆