在ggplot boxplot中没有与facet_wrap异常值 [英] No outliers in ggplot boxplot with facet_wrap

查看:867
本文介绍了在ggplot boxplot中没有与facet_wrap异常值的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想用ggplot来绘制没有异常值的箱型图,只关注箱子和胡须。

例如:

  p1 < -  ggplot(diamonds,aes(x = cut,y = price,fill = cut))
p1 + geom_boxplot()+ facet_wrap(〜清晰度,scale =免费)

给出了具有异常值的多面箱图



<我可以用outlier.size = NA来压制离群值。

  p1 < -  ggplot(diamonds,aes(x = cut ,y = price,fill = cut))
p1 + geom_boxplot(outlier.size = NA)+ facet_wrap(〜clarity,scales =free)

给出



我可以像这样重置ylim

  ylim1 = boxplot.stats (diamonds $ price)$ stats [c(1,5)] 

然后重新绘制

  p1 + geom_boxplot(outlier.size = NA)
+ facet_wrap(〜clarity,scales =free)
+ coord_cartesian(ylim = ylim1 * 1.05)

但这不起作用:





有没有办法facet_wrapboxplots.stats函数?

编辑:

我试图动态计算boxplot统计信息,但这似乎不起作用。

  give .stats<  -  function(x){return(boxplot.stats(x)$ stats [c(1,5)])} 

p1 + geom_boxplot(outlier.size = NA)+
facet_wrap(〜clarity,scales =free)+
coord_cartesian(ylim = give.stats)

> min(x,na.rm = na.rm)中的错误:参数



无效'type'

解决方案

好吧,我想出了一个更简单的方法来做到这一点,通过评论在原始的ggplot boxplot函数中删除一些行并调用修改过的函数。



我不是程序员,不知道这是一件好事还是强健的事情,但现在看起来工作得很好。



这是我正在使用的修改后的功能:

 #geom_boxplot的修改版本

require(ggplot2)
geom_boxplot_noOutliers< - 函数(mapping = NULL,data = NULL,stat =boxplot,
position =dodge,outlier.colour = NULL,
outlier.shape = NULL,outlier.size = NULL,
notch = FALSE,notchwidth = .5,varwidth = FALSE,
...){

#outlier_defaults < - ggplot2 ::: Geom $ find('point')$ default_aes()

#outlier.colour< - outlier.colour%||%outlier_defaults $ color
#outlier .shape< - outlier.shape%||%outlier_defaults $ shape
#outlier.size< - outlier.size%||%outlier_defaults $ size

GeomBoxplot_noOutliers $ new(mapping = MAPP data = data,stat = stat,
position = position,outlier.colour = outlier.colour,
outlier.shape = outlier.shape,outlier.size = outlier.size,notch = notch,
notchwidth = notchwidth,varwidth = varwidth,...)
}

GeomBoxplot_noOutliers< - proto(ggplot2 ::: Geom,{
objname< - boxplot_noOutliers

reparameterise< - function(。,df,params){
df $ width< - df $ width%||%
params $ width%| |%(分辨率(df $ x,FALSE)* 0.9)

#if(!is.null(df $ outliers)){
#suppressWarnings({
#out_min < - vapply(df $ outliers,min,numeric(1))
#out_max < - vapply(df $ outliers,max,numeric(1))
#})

#df $ ymin_final <-pmin(out_min,df $ ymin)
#df $ ymax_final <-pmax(out_max,df $ ymax)
#}

#如果`varwidth`没有被请求或不可用,请不要使用
if(i s.null(params)|| is.null(params $ varwidth)|| !params $ varwidth || is.null(df $ relvarwidth)){
df $ xmin< - df $ x - df $ width / 2
df $ xmax< - df $ x + df $ width / 2
} else {
#make`relvarwidth`相对于最大组的大小
df $ relvarwidth< - df $ relvarwidth / max(df $ relvarwidth)
df $ xmin< ; - df $ x - df $ relvarwidth * df $ width / 2
df $ xmax < - df $ x + df $ relvarwidth * df $ width / 2
}
df $ width < - NULL
if(!is.null(df $ relvarwidth))df $ relvarwidth< - NULL

df
}

draw < - 函数(。,data,...,fatten = 2,outlier.colour = NULL,outlier.shape = NULL,outlier.size = 2,
notch = FALSE,notchwidth = .5,varwidth = FALSE){
common < - data.frame(
color = data $ color,
size = data $ size,
linetype = data $ linetype,
fill = alpha(data $ fill,data $ alpha),
group = data $ group,
stringsAsFactors = FALSE

$ b $数据框架(
x =数据$ x,
xend =数据$ x,
y = c(数据$上面,数据$下面),
yend = c(data $ ymax,data $ ymin),
alpha = NA,
common)

box < - data.frame(
xmin = data $ xmin ,
xmax = data $ xmax,
ymin = data $ lower,
y = data $ middle,
ymax = data $ upper,
ynotchlower = ifelse(notch,数据$ notchlower,NA),
ynotchupper = ifelse(缺口,数据$ notchupper,NA),
notchwidth =缺口宽度,
alpha =数据$ alpha,
common)

#if(!is.null(data $ outliers)&& (数据$离群值[[1]]> 1)){
#离群值< - 数据帧(
#y =数据$离群值[[1]],
#x = data $ x [1],
#color = outlier.colour%||%data $ color [1],
#shape = outlier.shape%||%data $ shape [1 ],
#size = outlier.size%||%data $ size [1],
#fill = NA,
#alpha = NA,
#stringsAsFactors = FALSE)
#outliers_grob< - GeomPoint $ draw(outliers,...)
#} else {
outliers_grob< - NULL
#}

ggname(。$ my_name(),grobTree(
outliers_grob,
)GeomSegment $ draw(胡须,...),
GeomCrossbar $ draw(box,fatten = fatten,...)
$)b

$ b guide_geom< - 函数(。)boxplot_noOutliers
draw_legend< - 函数(。,data,...){$ b (数据,gpar(col = color,fill = alpha(fill,alpha),..., lwd = size * .pt,lty = lin
gTree(gp = gp,children = gList(
linesGrob(0.5,c(0.1,0.25)),
linesGrob(0.5,c(0.75,0.9)),
rectGrob(height = 0.5,width = 0.75),
linesGrob(c(0.125,0.875),0.5)
))
}

default_stat< ;函数(。)StatBoxplot
default_pos< - function(。)PositionDodge
default_aes< - function(。)aes(weight = 1,color =grey20,fill =white,大小= 0.5,alpha = NA,shape = 16,linetype =solid)
required_aes <-c(x,lower,upper,middle,ymin,ymax )

})

我将它保存为r文件并使用 source 加载它:

  library(ggplot2)
库(比例)

#载入函数
源(D:/ Eigene Dateien / Scripte / R-Scripte / myfunctions / geomBoxplot_noOutliers.r)
geom_boxplot_noOutliers
来绘制没有异常值的图片,并且一切正常,甚至无线 - )

  p1 < -  ggplot(diamonds,aes(x = cut,y = price,fill = cut ))
p1 + geom_boxplot_noOutliers()+ facet_wrap(〜clarity,scales =free)


I would like to plot boxplots without outliers with ggplot, giving focus on the boxes and whiskers only

For example:

p1 <- ggplot(diamonds, aes(x=cut, y=price, fill=cut))
p1 + geom_boxplot() + facet_wrap(~clarity, scales="free")

gives facetted boxplots with outliers

I can suppress outliers with outlier.size=NA:

p1 <- ggplot(diamonds, aes(x=cut, y=price, fill=cut))
p1 + geom_boxplot(outlier.size=NA) + facet_wrap(~clarity, scales="free")

which gives

Here, the y-axis scale is the same as in the original plot, just the outliers don't show up. How can I now modify the scale to "zoom in" on each panel according to the whisker ends?

I can reset ylim like this

ylim1 = boxplot.stats(diamonds$price)$stats[c(1, 5)]

and then replot

p1 + geom_boxplot(outlier.size=NA) 
   + facet_wrap(~clarity, scales="free") 
   +  coord_cartesian(ylim = ylim1*1.05)

but this doesn't work on the facets:

Is there a way to "facet_wrap" the boxplots.stats function?

Edit:

I've tried to calculate the boxplot statistics dynamically, but this doesn't seem to work.

give.stats <- function(x){return(boxplot.stats(x)$stats[c(1,5)])}

p1 + geom_boxplot(outlier.size=NA) + 
  facet_wrap(~clarity, scales="free") + 
  coord_cartesian(ylim = give.stats)

> Error in min(x, na.rm = na.rm) : invalid 'type' (list) of argument

Any more ideas would be much appreciated.

解决方案

Ok, I figured out a more easy way to do this by commenting out some lines in the original ggplot boxplot function and calling the modified function.

I am not a programmer, no idea if this is a good or robust thing to do, but it seems to work fine for now.

This is the modified function I am using:

#modified version of geom_boxplot

require(ggplot2)
geom_boxplot_noOutliers <- function (mapping = NULL, data = NULL, stat = "boxplot",
                          position = "dodge", outlier.colour = NULL,
                          outlier.shape = NULL, outlier.size = NULL,
                          notch = FALSE, notchwidth = .5, varwidth = FALSE,
                          ...) {

  #outlier_defaults <- ggplot2:::Geom$find('point')$default_aes()

  #outlier.colour   <- outlier.colour %||% outlier_defaults$colour
  #outlier.shape    <- outlier.shape  %||% outlier_defaults$shape
  #outlier.size     <- outlier.size   %||% outlier_defaults$size

  GeomBoxplot_noOutliers$new(mapping = mapping, data = data, stat = stat,
                  position = position, outlier.colour = outlier.colour,
                  outlier.shape = outlier.shape, outlier.size = outlier.size, notch = notch,
                  notchwidth = notchwidth, varwidth = varwidth, ...)
}

GeomBoxplot_noOutliers <- proto(ggplot2:::Geom, {
  objname <- "boxplot_noOutliers"

  reparameterise <- function(., df, params) {
    df$width <- df$width %||%
      params$width %||% (resolution(df$x, FALSE) * 0.9)

  # if (!is.null(df$outliers)) {
  #    suppressWarnings({
  #      out_min <- vapply(df$outliers, min, numeric(1))
  #      out_max <- vapply(df$outliers, max, numeric(1))
  #    })
  #    
  #    df$ymin_final <- pmin(out_min, df$ymin)
  #    df$ymax_final <- pmax(out_max, df$ymax)
  #   }

    # if `varwidth` not requested or not available, don't use it
    if (is.null(params) || is.null(params$varwidth) || !params$varwidth || is.null(df$relvarwidth)) {
      df$xmin <- df$x - df$width / 2
      df$xmax <- df$x + df$width / 2
    } else {
      # make `relvarwidth` relative to the size of the largest group
      df$relvarwidth <- df$relvarwidth / max(df$relvarwidth)
      df$xmin <- df$x - df$relvarwidth * df$width / 2
      df$xmax <- df$x + df$relvarwidth * df$width / 2
    }
    df$width <- NULL
    if (!is.null(df$relvarwidth)) df$relvarwidth <- NULL

    df
  }

  draw <- function(., data, ..., fatten = 2, outlier.colour = NULL, outlier.shape = NULL, outlier.size = 2,
                   notch = FALSE, notchwidth = .5, varwidth = FALSE) {
    common <- data.frame(
      colour = data$colour,
      size = data$size,
      linetype = data$linetype,
      fill = alpha(data$fill, data$alpha),
      group = data$group,
      stringsAsFactors = FALSE
    )

    whiskers <- data.frame(
      x = data$x,
      xend = data$x,
      y = c(data$upper, data$lower),
      yend = c(data$ymax, data$ymin),
      alpha = NA,
      common)

    box <- data.frame(
      xmin = data$xmin,
      xmax = data$xmax,
      ymin = data$lower,
      y = data$middle,
      ymax = data$upper,
      ynotchlower = ifelse(notch, data$notchlower, NA),
      ynotchupper = ifelse(notch, data$notchupper, NA),
      notchwidth = notchwidth,
      alpha = data$alpha,
      common)

  #  if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
  #    outliers <- data.frame(
  #      y = data$outliers[[1]],
  #      x = data$x[1],
  #      colour = outlier.colour %||% data$colour[1],
  #      shape = outlier.shape %||% data$shape[1],
  #      size = outlier.size %||% data$size[1],
  #      fill = NA,
  #      alpha = NA,
  #      stringsAsFactors = FALSE)
  #    outliers_grob <- GeomPoint$draw(outliers, ...)
  #  } else {
      outliers_grob <- NULL
  #  }

    ggname(.$my_name(), grobTree(
      outliers_grob,
      GeomSegment$draw(whiskers, ...),
      GeomCrossbar$draw(box, fatten = fatten, ...)
    ))
  }

  guide_geom <- function(.) "boxplot_noOutliers"
  draw_legend <- function(., data, ...)  {
    data <- aesdefaults(data, .$default_aes(), list(...))
    gp <- with(data, gpar(col=colour, fill=alpha(fill, alpha), lwd=size * .pt, lty = linetype))
    gTree(gp = gp, children = gList(
      linesGrob(0.5, c(0.1, 0.25)),
      linesGrob(0.5, c(0.75, 0.9)),
      rectGrob(height=0.5, width=0.75),
      linesGrob(c(0.125, 0.875), 0.5)
    ))
  }

  default_stat <- function(.) StatBoxplot
  default_pos <- function(.) PositionDodge
  default_aes <- function(.) aes(weight=1, colour="grey20", fill="white", size=0.5, alpha = NA, shape = 16, linetype = "solid")
  required_aes <- c("x", "lower", "upper", "middle", "ymin", "ymax")

})

I saved it as an r file and use source to load it:

library(ggplot2)
library(scales)

#load functions
source("D:/Eigene Dateien/Scripte/R-Scripte/myfunctions/geomBoxplot_noOutliers.r")

Now I can just plot without outliers using geom_boxplot_noOutliers and everything works fine even with facets :-)

p1 <- ggplot(diamonds, aes(x=cut, y=price, fill=cut))
p1 + geom_boxplot_noOutliers() + facet_wrap(~clarity, scales="free")

这篇关于在ggplot boxplot中没有与facet_wrap异常值的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆