GGPLOT可以制作2D摘要数据吗? [英] Can GGPLOT make 2D summaries of data?

查看:116
本文介绍了GGPLOT可以制作2D摘要数据吗?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我希望绘制反应时间的平均值(或其他函数)作为x y平面中目标位置的函数。
作为测试数据:

  library(ggplot2)
xs < - runif(100,-1 ,1)
ys < - runif(100,-1,1)
rts < - rnorm(100)
testDF < - data.frame(x= xs, y= ys,rt= rts)

我知道我可以这样做: (数据= testDF,aes(x = x,y = y))+ geom_bin2d(区间= 10),其中,

我希望能够做的是同样的事情,但绘制数据的函数在每个箱子里而不是数量。我可以这样做吗?或者是否需要先在R中生成条件装置(例如 drt < - tapply(testDF $ rt,list(cut(testDF $ x,10),cut(testDF $ y,10)),mean)),然后绘制这个图表吗?



谢谢。 / p>

解决方案

更新随着ggplot2 0.9.0的发布,新功能添加了 stat_summary2d stat_summary_bin



这里是一个这个答案的要点: https://gist.github.com/1341218



  StatAggr2d<  -  proto(Stat,{ 
objname< - aggr2d
default_aes< - function(。)aes(fill = ..value ..)
required_aes< -c(x,y ,z)
default_geom< - 函数(。)GeomRect

计算< - 函数(。,data,scales,binwidth = NULL,bins = 30 ,break = NULL,origin = NULL,drop = TRUE,fun = mean,...){

range< - list(
x = scales $ x $ output_set(),
y = scales $ y $ output_set()


#确定binwidth,如果省略
if(is.null(binwidth)){
binwidth< ; - c(NA,NA)
if(is.integer(data $ x)){
binwidth [1]< - 1
} else {
binwidth [1如果(整数(数据$ y)){
binwidth [2] <-1
}
stopifnot(is.numeric(binwidth))
stopifnot(
binwidth [2] < - diff(range $ y)/ bins
}
}
长度(binwidth)== 2)

#确定中断,如果省略
if(is.null(breaks)){
if(is.null(origin)){
break < - list(
fullseq(范围$ x,binwidth [1]),
fullseq(范围$ y,binwidth [2])

}其他{
休息< - list(
seq(origin [1],max(range $ x)+ binwidth [1],binwidth [1]),
seq(origin [2],max(range $ y )+ binwidth [2],binwidth [2])

}
}
stopifnot(is.list(breaks))
stopifnot(length(breaks) == 2)
stopifnot(all(sapply(breaks,is.numeric)))
names(breaks)< - c(x,y)

xbin< - cut(data $ x,sort(breaks $ x),include.lowest = TRUE)
ybin< - cut(data $ y,sort(break $ y),include.lowest = TRUE如果(is.null(data $ weight))data $ weight < - 1
ans < - ddply(data.frame(data,xbin,ybin),。( (xbin,xbin,ybin),函数(d)data.frame(value = fun(d $ z)))

(ans,{
xint< - as.numeric(xbin)
xmin < - breaks $ x [xint]
xmax < - breaks $ x [xint + 1]

yint< - as.numeric(ybin)
ymin< - 打破$ y [yint]
ymax< - 打破$ y [yint + 1]
})
}
})

stat_aggr2d < - StatAggr2d $ build_accessor()

和用法:

  ggplot(data = testDF,aes(x = x,y = y,z = rts))+ stat_aggr2d(bins = 3)
ggplot testDF,aes(x = x,y = y,z = rts))+
stat_aggr2d(bins = 3,fun = function(x)sum(x ^ 2))



以下是对stat_binhex的一些修改:

  StatAggrhex<  -  proto(Stat,{
objname< - aggrhex

default_aes< - function(。)aes(fill = ..value ..)
required_aes< -c(x,y,z)
default_geom< - 函数(。)GeomHex

计算< - 函数(。,data,scale ,binwidth = NULL,bins = 30,na.rm = FALSE,fun = mean,...){
try_require(hexbin)
data< - remove_missing(data,na.rm, c(x,y),name =stat_hexbin)

if(is.null(binwidth)){
binwidth< - c(
diff(比例尺$ x $ input_set())/ bins,
diff(比例尺$ y $ input_set())/ bins

}

try_require(hexbin)

x< - data $ x
y< - data $ y

#将binwidth转换为bounds + nbins
xbnds <-c(
round_any(min(x),binwidth [1],floor)-1e-6,
round_any(max(x),binwidth [1],ceiling)+ 1e-6

xbins< - diff(xbnds)/ binwidth [1]

ybnds< -c(
round_any(min(y), binwidth [1],floor)-1e-6,
round_any(max(y),binwidth [2],ceiling)+ 1e-6

ybins < - diff(ybnds )/ binwidth [2]

#调用hexbin
hb < - hexbin(
x,xbnds = xbnds,xbins = xbins,
y,ybnds = ybnds,shape = ybins / xbins,
IDs = TRUE

值< - tapply(数据$ z,hb @ cID,fun)

#转换为数据帧
data.frame(hcell2xy(hb),value)
}


))

stat_aggrhex< - StatAggrhex $ build_accessor()

和用法:

  ggplot(data = testDF,aes(x = x,y = y, z = rts))+ stat_aggrhex(bins = 3)
ggplot(data = testDF,aes(x = x,y = y,z = rts))+
stat_aggrhex(bins = 3,fun =函数(x)sum(x ^ 2))


I wish to plot mean (or other function) of reaction time as a function of the location of the target in the x y plane. As test data:

library(ggplot2)
xs <- runif(100,-1,1)
ys <- runif(100,-1,1)
rts <- rnorm(100)
testDF <- data.frame("x"=xs,"y"=ys,"rt"=rts)

I know I can do this:

p <- ggplot(data = testDF,aes(x=x,y=y))+geom_bin2d(bins=10)

What I would like to be able to do, is the same thing but plot a function of the data in each bin rather than counts. Can I do this?

Or do I need to generate the conditional means first in R (e.g. drt <- tapply(testDF$rt,list(cut(testDF$x,10),cut(testDF$y,10)),mean)) and then plot that?

Thank you.

解决方案

Update With the release of ggplot2 0.9.0, much of this functionality is covered by the new additions of stat_summary2d and stat_summary_bin.

here is a gist for this answer: https://gist.github.com/1341218

here is a slight modification of stat_bin2d so as to accept arbitrary function:

StatAggr2d <- proto(Stat, {
  objname <- "aggr2d" 
  default_aes <- function(.) aes(fill = ..value..)
  required_aes <- c("x", "y", "z")
  default_geom <- function(.) GeomRect

  calculate <- function(., data, scales, binwidth = NULL, bins = 30, breaks = NULL, origin = NULL, drop = TRUE, fun = mean, ...) {

    range <- list(
      x = scales$x$output_set(),
      y = scales$y$output_set()
    )

    # Determine binwidth, if omitted
    if (is.null(binwidth)) {
      binwidth <- c(NA, NA)
      if (is.integer(data$x)) {
        binwidth[1] <- 1
      } else {
        binwidth[1] <- diff(range$x) / bins
      }
      if (is.integer(data$y)) {
        binwidth[2] <- 1
      } else {
        binwidth[2] <- diff(range$y) / bins
      }      
    }
    stopifnot(is.numeric(binwidth))
    stopifnot(length(binwidth) == 2)

    # Determine breaks, if omitted
    if (is.null(breaks)) {
      if (is.null(origin)) {
        breaks <- list(
          fullseq(range$x, binwidth[1]),
          fullseq(range$y, binwidth[2])
        )
      } else {
        breaks <- list(
          seq(origin[1], max(range$x) + binwidth[1], binwidth[1]),
          seq(origin[2], max(range$y) + binwidth[2], binwidth[2])
        )
      }
    }
    stopifnot(is.list(breaks))
    stopifnot(length(breaks) == 2)
    stopifnot(all(sapply(breaks, is.numeric)))
    names(breaks) <- c("x", "y")

    xbin <- cut(data$x, sort(breaks$x), include.lowest=TRUE)
    ybin <- cut(data$y, sort(breaks$y), include.lowest=TRUE)

    if (is.null(data$weight)) data$weight <- 1
    ans <- ddply(data.frame(data, xbin, ybin), .(xbin, ybin), function(d) data.frame(value = fun(d$z)))

    within(ans,{
      xint <- as.numeric(xbin)
      xmin <- breaks$x[xint]
      xmax <- breaks$x[xint + 1]

      yint <- as.numeric(ybin)
      ymin <- breaks$y[yint]
      ymax <- breaks$y[yint + 1]
    })
  }
})

stat_aggr2d <- StatAggr2d$build_accessor()

and usage:

ggplot(data = testDF,aes(x=x,y=y, z=rts))+stat_aggr2d(bins=3)
ggplot(data = testDF,aes(x=x,y=y, z=rts))+
  stat_aggr2d(bins=3, fun = function(x) sum(x^2))

As well, here is a slight modification of stat_binhex:

StatAggrhex <- proto(Stat, {
  objname <- "aggrhex"

  default_aes <- function(.) aes(fill = ..value..)
  required_aes <- c("x", "y", "z")
  default_geom <- function(.) GeomHex

  calculate <- function(., data, scales, binwidth = NULL, bins = 30, na.rm = FALSE, fun = mean, ...) {
    try_require("hexbin")
    data <- remove_missing(data, na.rm, c("x", "y"), name="stat_hexbin")

    if (is.null(binwidth)) {
      binwidth <- c( 
        diff(scales$x$input_set()) / bins,
        diff(scales$y$input_set() ) / bins
      )
    }

    try_require("hexbin")

    x <- data$x
    y <- data$y

    # Convert binwidths into bounds + nbins
    xbnds <- c(
      round_any(min(x), binwidth[1], floor) - 1e-6, 
      round_any(max(x), binwidth[1], ceiling) + 1e-6
    )
    xbins <- diff(xbnds) / binwidth[1]

    ybnds <- c(
      round_any(min(y), binwidth[1], floor) - 1e-6, 
      round_any(max(y), binwidth[2], ceiling) + 1e-6
    )
    ybins <- diff(ybnds) / binwidth[2]

    # Call hexbin
    hb <- hexbin(
      x, xbnds = xbnds, xbins = xbins,  
      y, ybnds = ybnds, shape = ybins / xbins,
      IDs = TRUE
    )
    value <- tapply(data$z, hb@cID, fun)

    # Convert to data frame
    data.frame(hcell2xy(hb), value)
  }


})

stat_aggrhex <- StatAggrhex$build_accessor()

and usage:

ggplot(data = testDF,aes(x=x,y=y, z=rts))+stat_aggrhex(bins=3)
ggplot(data = testDF,aes(x=x,y=y, z=rts))+
  stat_aggrhex(bins=3, fun = function(x) sum(x^2))

这篇关于GGPLOT可以制作2D摘要数据吗?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆