用分位数将ggplot2拆分为小提琴曲线 [英] Split violin plot with ggplot2 with quantiles
问题描述
为了绘制一半的密度,我使用了这篇文章中描述的函数:拆分小提琴用ggplot2绘图
然而,当我想在密度上绘制分位数时,就像正常 geom_violin() code>或
geom_boxplot()
,我得到一个错误信息。
我也会对添加每个半密度以上的观察数目。
下面是我想要获得的例子:
<
$ GeomSplitViolin < - ggproto( GeomSplitViolin,GeomViolin,draw_group = function(self,data,...,draw_quantiles = NULL){
data < - transform(data,xminv = x - violinwidth *(x - xmin),xmaxv = x + violinwidth *(xmax-x))
grp< - data [1,'group']
newdata< -plyr :: arrange(transform (data,x = if(grp %% 2 == 1)xminv else xmaxv)if if(grp %% 2 == 1)y else -y)
newdata < - rbind(newdata [1,] ,newdata,newdata [nrow(newdata)],newdata [1,])
newdata [c(1,nrow(newdata)-1,nrow(newdata)),'x'] < - round newdata [1,'x'])
if(length(draw_quantiles)> 0& !scale :: zero_range(range(data $ y))){
stopifnot(all(draw_quantiles> = 0),all(draw_quantiles< =
1))
quantiles< - 数据[rep(1,nrow(分位数)),setdiff(名称(数据),c(x,y)),drop = FALSE] - create_quantile_segment_frame(data,draw_quantiles)
aesthetics $ alpha< - rep(1,nrow(分位数))
都
ggplot2 ::: ggname(geom_split_violin,grobTree(GeomPolygon $ draw_panel(newdata,...),quantile_grob))
}
else {
ggplot2 ::: ggname(geom_split_violin,GeomPolygon $ draw_panel(newdata,...))
}
})
geom_split_violin< - 函数(mapping = NULL,data = NULL, stat =ydensity,position =identity,...,draw_quantiles = NULL,trim = TRUE,scale =area,na.rm = FALSE,show.legend = NA,inherit.aes = TRUE){
图层(data = dat a,mapping = mapping,stat = stat,geom = GeomSplitViolin,position = position,show.legend = show.legend,inherit.aes = inherit.aes,params = list(trim = trim,scale = scale,draw_quantiles = draw_quantiles, na.rm = na.rm,...))
}
tmp< - 钻石[钻石$ cut%in%c(Fair,Good) ),]
#获得的情节
ggplot(tmp,aes(as.factor(color),carat,fill = cut))+
geom_split_violin()
#由于内部函数(交错,...)
ggplot(tmp,aes(as.factor(color),carat,fill = cut))+
geom_split_violin(draw_quantiles = 0.5)
#函数返回观察数
give_n = function(x,y_up = y_upper){
data.frame(y = y_up * 1.06,
label = paste(n =,length(x))
)
}
#在每个半密度以上添加观察值的代码
new_plot = given_plot +
#仅返回数据长度
stat_summary(fun.data = give_n,aes(x = as.factor(varia ble)),geom =text)
我们可以通过@YAK进一步调整函数,并对 create_quantile_segment_frame
添加一些调整:
GeomSplitViolin < - ggproto(GeomSplitViolin,GeomViolin,
draw_group = function(self,data,...,draw_quantiles = NULL){
#Jan Gleixner的原始函数(@ jan-glx)
#Wouter van der Bijl(@Axeman)
data < - transform(data,xminv = x - violinwidth *(x - xmin),xmaxv = x + violinwidth * (数据,x = if(grp %% 2 == 1))(xmax-x))
grp< - data [1,'group']
newdata< xminv else xmaxv)if if(grp %% 2 == 1)y else -y)
newdata < - rbind(newdata [1,],newdata,newdata [nrow(newdata),],newdata [1 ,])
newdata [c(1,nrow(newdata)-1,nrow(newdata)),'x']< - round(newdata [1,'x'])
if长度(draw_quantiles)> 0& (数据$ y))){
stopifnot(all(draw_quantiles> = 0),all(draw_quantiles< = 1))
quantiles < - create_quantile_segment_frame(data ,draw_quantiles,split = TRUE,grp = grp)
aesthetics < - data [rep(1,nrow(quantiles)),setdiff(names(data),c(x,y)) (分位数,美学)
quantile_grob< - GeomPath $ draw_panel(两者都是) -
美学$ alpha< - rep(1,nrow(分位数))
都< ,...)
ggplot2 ::: ggname(geom_split_violin,grid :: grobTree(GeomPolygon $ draw_panel(newdata,...),quantile_grob))
}
else {
ggplot2 ::: ggname(geom_split_violin,GeomPolygon $ draw_panel(newdata,...))
}
}
)
create_quantile_segment_frame< - 函数(data,draw_quantiles,split = FALSE,grp = NULL){
dens< - cumsum(data $ density)/ sum(data $ density)
ecdf< - stats :: approxfun(数据$ y)
ys < - ecdf(draw_qu (stats :: approxfun(data $ y,data $ xminv))(ys)
violin.xminvs< - (stats :: approxfun(data $ y,数据$ xmaxv))(ys)
violin.xs< - (stats :: approxfun(data $ y,data $ x))(ys)
if(grp %% 2 == 0) {
data.frame(x = ggplot2 ::: interleave(violin.xs,violin.xmaxvs),
y = rep(ys,each = 2),group = rep(ys,each = 2) )
} else {
data.frame(x = ggplot2 ::: interleave(violin.xminvs,violin.xs),
y = rep(ys,each = 2),group = rep (ys,each = 2))
}
}
geom_split_violin< - 函数(mapping = NULL,data = NULL,stat =ydensity,position =identity ,...,draw_quantiles = NULL,trim = TRUE,scale =area,na.rm = FALSE,show.legend = NA,inherit.aes = TRUE){
layer(data = data,mapping = mapping,stat = stat,geom = GeomSplitViolin,position = position,show.legend = show.legend,inherit.aes = inherit.aes,params = list(trim = trim,scale = scale,draw_quantiles = draw_quantiles,na.rm = na.rm,...))
}
然后简单地画出:
ggplot(diamonds [diamonds $ cut%in%c(Fair,Good) ),
aes(as.factor(color),carat,fill = cut))+
geom_split_violin(draw_quantiles = c(0.25,0.5,0.75))
< img src =https://i.stack.imgur.com/h0Q9m.pngalt =在这里输入图片描述>
In order to plot half densities, I am using the function described in this post: Split violin plot with ggplot2
However, when I want to draw the quantiles on the densities, like on a normal geom_violin()
or geom_boxplot()
, I obtain an error message.
I would also be interested in adding the number of observations above each half density.
Here is an example of what I would like to obtain:
data("diamonds")
library(ggplot2)
# Function described in a previous post
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, draw_group = function(self, data, ..., draw_quantiles = NULL){
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
tmp <- diamonds[which(diamonds$cut %in% c("Fair", "Good")), ]
# Obtained plot
ggplot(tmp, aes(as.factor(color), carat, fill = cut)) +
geom_split_violin()
# Error due to internal functions (interleave, ...)
ggplot(tmp, aes(as.factor(color), carat, fill = cut)) +
geom_split_violin(draw_quantiles = 0.5)
# Function to return number of observation
give_n = function(x, y_up = y_upper) {
data.frame(y = y_up * 1.06,
label = paste("n =", length(x))
)
}
# Code to add number of observations above each half density
new_plot = given_plot +
# Give back only length of data
stat_summary(fun.data = give_n, aes(x = as.factor(variable)), geom = "text")
We can make further adjustments to the function by @YAK, and add some adjustments to create_quantile_segment_frame
:
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL){
# Original function by Jan Gleixner (@jan-glx)
# Adjustments by Wouter van der Bijl (@Axeman)
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1,'group']
newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x'])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)
create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density)/sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
} else {
data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2))
}
}
geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
Then simply plot:
ggplot(diamonds[which(diamonds$cut %in% c("Fair", "Good")), ],
aes(as.factor(color), carat, fill = cut)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75))
这篇关于用分位数将ggplot2拆分为小提琴曲线的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!