用ggplot2重现drc :: plot.drc [英] Reproducing drc::plot.drc with ggplot2

查看:131
本文介绍了用ggplot2重现drc :: plot.drc的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想用 ggplot2

重现以下 drc :: plot.drc 图。 b
$ b

  df1 < - 
结构(list(TempV = structure(c(1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,5L,5L,5L,5L,5L,5L, 5L,5L,5L,5L,3L,3L,3L,
3L,3L,3L,3L,3L,3L,3L,7L,7L,7L,7L,7L,7L,7L,7L,
7L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,13L,13L,13L,13L,
13L,13L,13L,13L,13L,13L, 11L,11L,11L,11L,11L,11L,11L,
11L,11L,11L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,6L,6L, $ b 6L,6L,6L,6L,6L,6L,6L,6L,4L,4L,4L,4L,4L,4L,4L,4L,
4L,4L,8L,8L,8L,8L, 8L,8L,8L,8L,8L,8L,10L,10L,10L,
10L,10L,10L,10L,10L,10L,10L,14L,14L,14L,14L,14L,14L, $ b 14L,14L,14 L,14L,12L,12L,12L,12L,12L,12L,12L,12L,12L,
12L),标签= c(22.46FH-142,27.59FH-142,26.41 FH-142,
29.71FH-142,31.66FH-142,34.11FH-142,33.22FH-142,22.46FH-942,
27.59 FH-942,26.41FH-942,29.71FH-942,31.66FH-942,34.11FH-942,
33.22FH-942),class =factor )Start = c(0L,24L,48L,72L,
96L,120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L, 192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,
192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,192L,216L, $ b 0L,24L,48L,72L,96L,120L,144L,168L,192L,216L,0L,24L,
48L,72L,96L,120L,144L,168L,192L,216L, 48L,72L,
96L,120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L,192L,216L,0L,24L,48L, 72L,96L,120L,144L,168L,
192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,192L,216L,
0L,24L,48L,72L, 9 6L,120L,144L,168L,192L,216L,0L,24L,
48L,72L,96L,120L,144L,168L,192L,216L,0L,24L,48L,72L, 120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L,192L,216L),End = c(24,48,72,96,120,144 ,168,
192,216,Inf,24,48,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168 ,192,216,Inf,24,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192 ,216,Inf,24,48,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168,192,216,Inf,24 ,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192,216,Inf,24,48 ,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168,192,216,Inf,24,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192,216,Inf,24,48,72,96,120,144 ,168,192,216,Inf),
Germin ated = c(0L,0L,0L,0L,3L,67L,46L,12L,101L,221L,
0L,0L,0L,0L,57L,50L,44L,31L,32L,236L, 0L,0L,
31L,68L,50L,31L,34L,29L,207L,0L,0L,8L,30L,31L,
55L,27L,22L,4L,273L,0L, 46L,64L,16L,8L,15L,
15L,20L,266L,0L,0L,0L,0L,4L,13L,63L,51L,147L,
172L,0L,0L, 26L,92L,31L,91L,14L,7L,185L,0L,
0L,0L,0L,0L,32L,59L,36L,50L,273L,0L,0L,0L,4L, 13L,32L,42L,52L,42L,265L,0L,0L,0L,6L,22L,40L,
57L,44L,73L,208L,0L,1L,2L,24L,55L,41L, 24L,
33L,202L,0L,0L,18L,31L,26L,30L,61L,25L,58L,201L,
0L,0L,36L,54L,33L,55L,12L,27L, 55L,178L,0L,0L,
6L,28L,26L,31L,53L,48L,33L,225L)),.Names = c(TempV,
Start ,发芽),row.names = c(NA,-140L),class =data.frame)

library(data.table)

dt1< ; - data.tabl e(df1)

库(drc)

dt1fm1 < -
drm(
公式=发芽〜开始+结束
, curveid = TempV
#,pmodels =
#,权重=
,data = dt1
#,子集=
,fct = LL.2()
,type =event
,bcVal = NULL
,bcAdd = 0
#,start =
,na.action = na.fail
,robust = mean
,logDose = NULL
,control = drmc(
constr = FALSE
,errorm = TRUE
,maxIt = 1500
,method = BFGS
,noMessage = FALSE
,relTol = 1e-07
,rmNA = FALSE
,useD = FALSE
, trace = FALSE
,otrace = FALSE
,warnVal = -1
,dscaleThres = 1e-15
,rscaleThres = 1e-15

, lowerl = NULL
,upperl = NULL
,separate = FALSE
,pshifts = NULL




## ---- dt1fm1Plot1 ----
plot(
x = dt1fm1
,xlab =时间(小时)
,ylab =发芽比例(\\% )
#,ylab =比例发芽(%)
,add = FALSE
,level = NULL
,type =average#c(average, all,bars,none,obs,confidence)
,broken = FALSE
#,bp
,bcontrol = NULL
,conName = NULL
,axes = TRUE
,gridsize = 100
,log =
#,xtsty
,x ttrim = TRUE
,xt = NULL
,xtField = NULL
,xField =时间(小时)
,xlim = c(0,200)
, yt = NULL
,ytField = NULL
,yField =发芽比例
,ylim = c(0,1.05)
,lwd = 1
,cex = 1.2
,cex.axis = 1
,col = TRUE
#,lty
#,pch
,legend = TRUE
#,legendText
,legendPos = c(40,1.1)
,cex.legend = 0.6
,normal = FALSE
,normRef = 1
,confidence.level = 0.95



## ---- dt1fm1Plot2 ----
dt1fm1Means1 <-dt1 [,。(发芽=平均值(发芽)/ 450),by = (TempV,Start,End)]
dt1fm1Means2 <-dt1fm1Means1 [,。(Start = Start,End = End,Cum_Germinated = cumsum(Germinated)),by =。(TempV)]
dt1fm1Means < - data.table(dt1fm1Means2 [End!= Inf],Pred = predict(object = dt1fm1))

dt1fm1Plot2 < -
ggplot(data = dt1fm1Means,mapping = aes(x = End,y = Cum_Germinated,group = TempV,color = TempV,shape = TempV))+
geom_point()+
geom_line(aes(y = Pred))+
scale_shape_manual(values = seq(0,13))+
labs(x =时间(小时),y =比例发芽,shape =Temp Temp)+
theme_bw()+
scale_x_continuous(expand = c(0,0),breaks = c(0,unique(dt1fm1Means $ End))+
scale_y_continuous(expand = c(0,0),labels = function(x)paste0(100 * x,\\%))+
#scale_y_continuous(expand = c(0,0),labels = percent) +
expand_limits(x = c(0,max(dt1fm1Means $ End)+20),y = c(0,max(dt1fm1Means $ Pred)+0.1))+
theme(axis.title。 x = element_text(size = 12,hjust = 0.54,vjust = 0),
axis.title.y = element_text(size = 12,angle = 90,vjust = 0.25))
print(dt1fm1Plot2)



问题

ggplot2 输出中几乎没有差异。这些差异是由于 predict 函数以不同的模式给出输出,而不是数据中给定的水平。 >编辑



实际上 drm 函数改变了 TempV级别的顺序,从 summary(dt1fm1)输出和 drc :: plot.drc output。

解决方案

如问题所述,有一个与 drm 混洗因子水平的顺序。不要洗牌这个烂摊子证明比我预料的更棘手。

最后,我通过调用每个因子级别的 drm 函数来构建一个结果表一次一个因素级别。

以这种冗长的方式做这件事,发现你的第一张图从 plot.drc 和ggplot版本都不正确

让我们首先将函数调用包装到另一个包装器函数中,以便于重复调用它,使其在 drm()每个轨迹:

  drcmod < -  function(dt1){
drm(formula = Germinated_Start + End
,curveid = TempV
,data = dt1
,fct = LL.2()
,type =event
,bcVal = NULL
, bcAdd = 0
,na.action = na.fail
,robust =mean
,logDose = NULL
,control = drmc(
constr = FALSE
,errorm = TRUE
,maxIt = 1500
,method =BFGS
,noMessage = FALSE
,relTol = 1e-07
,rmNA = FALSE
,useD = FALSE
,trace = FALSE
,otrace = FALSE
,warnVal = -1
,dscaleThres = 1e-15
,rscaleThres = 1e-15

,lowerl = NULL
,upperl = NULL
,separate = FALSE
,pshifts = NULL

}

现在我们可以使用这个包装函数将drc模型依次适用于每个因子级别:

<$ p对于(i in 1:nlevels(dt1 $ TempV)){
dt < - dt1 [TempV == levels),$ p> dt2 < - data.table (TempV)[i]]
dt [,TempV:= as.character(TempV)]
dt [,Germ_frac:= mean(发芽)/ 450,by =。(Start)]
$ dt [,cum_Germinated:= cumsum(Germ_frac)]
dt [,Pred:= c(predic(object = drcmod(dt)),NA)]
dt2 < - rbind(dt2, dt)
}

和plot:

  ggplot(dt2 [End!= Inf],aes(x = End,y = cum_Germinated,group = TempV,color = TempV,shape = TempV)+ 
geom_point()+
geom_line(aes(y = Pred))+
scale_shape_manual(values = seq(0,13))+
labs(x =Time(Hours) y =发芽比例,shape =Temp,color =Temp)+
theme_bw()



< h2> Edit

如果我们在问题中使用少数因子级别的数据子集运行原始代码,例如使用

  dt1 <-dt1 [TempV%in%levels(TempV)[1:5],] 
dt1 < - droplevels(dt1)

所有图(OP中的2个版本以及此答案中的版本)给出了相同的结果。只有在使用大量因子水平时才会出现差异。事实上,OP中的ggplot和plot.drc提供了跟踪与因子级别的不正确匹配,这表明问题很可能出现在 drm()函数中,而不是在plot.drc中。


I want to reproduce the following drc::plot.drc graphs with ggplot2.

df1 <-
      structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L, 
    14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142", 
    "29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942", 
    "27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942", 
    "33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf), 
        Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L, 
        0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L, 
        31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L, 
        55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L, 
        15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L, 
        172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L, 
        0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L, 
        13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L, 
        57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L, 
        33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L, 
        0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L, 
        6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV", 
    "Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")

library(data.table)

dt1 <- data.table(df1)

library(drc)

dt1fm1 <- 
  drm(
        formula   = Germinated ~ Start + End
      , curveid   = TempV
  #   , pmodels   = 
  #   , weights   = 
      , data      = dt1
  #   , subset    = 
      , fct       = LL.2()
      , type      = "event"
      , bcVal     = NULL
      , bcAdd     = 0
  #   , start     =
      , na.action = na.fail
      , robust    = "mean"
      , logDose   = NULL
      , control   = drmc(
                            constr      = FALSE
                            , errorm      = TRUE
                            , maxIt       = 1500
                            , method      = "BFGS"
                            , noMessage   = FALSE
                            , relTol      = 1e-07
                            , rmNA        = FALSE
                            , useD        = FALSE
                            , trace       = FALSE
                            , otrace      = FALSE
                            , warnVal     = -1
                            , dscaleThres = 1e-15
                            , rscaleThres = 1e-15
                            )
      , lowerl    = NULL
      , upperl    = NULL
      , separate  = FALSE
      , pshifts   = NULL 
      )



## ----dt1fm1Plot1----
plot(
        x      = dt1fm1
    , xlab     = "Time (Hours)"
    , ylab     = "Proportion Germinated (\\%)"    
  # , ylab     = "Proportion Germinated (%)"    
    , add      = FALSE
    , level    = NULL
    , type     = "average" # c("average", "all", "bars", "none", "obs", "confidence")
    , broken   = FALSE
  # , bp
    , bcontrol = NULL
    , conName  = NULL
    , axes     = TRUE
    , gridsize = 100
    , log      = ""
  # , xtsty
    , xttrim   = TRUE
    , xt       = NULL
    , xtField    = NULL
    , xField     = "Time (Hours)"
    , xlim     = c(0, 200)
    , yt       = NULL
    , ytField    = NULL
    , yField     = "Proportion Germinated"
    , ylim     = c(0, 1.05)
    , lwd      = 1
    , cex      = 1.2
    , cex.axis = 1
    , col      = TRUE
  # , lty
  # , pch
    , legend     = TRUE
  # , legendText  
    , legendPos  = c(40, 1.1)
    , cex.legend = 0.6
    , normal     = FALSE
    , normRef    = 1
    , confidence.level = 0.95
    )


## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means  <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))

dt1fm1Plot2 <- 
       ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
        geom_point() +
        geom_line(aes(y = Pred)) +
        scale_shape_manual(values=seq(0, 13)) +
        labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
        theme_bw() +
        scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
        scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
      # scale_y_continuous(expand = c(0, 0), labels = percent) +
        expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
        theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
              axis.title.y = element_text(size = 12, angle = 90,  vjust = 0.25))
print(dt1fm1Plot2)

Question

There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.

Edited

Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.

解决方案

As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.

In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.

Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.

Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:

drcmod <- function(dt1){
  drm(formula   = Germinated ~ Start + End
    , curveid   = TempV
    , data      = dt1
    , fct       = LL.2()
    , type      = "event"
    , bcVal     = NULL
    , bcAdd     = 0
    , na.action = na.fail
    , robust    = "mean"
    , logDose   = NULL
    , control   = drmc(
      constr      = FALSE
      , errorm      = TRUE
      , maxIt       = 1500
      , method      = "BFGS"
      , noMessage   = FALSE
      , relTol      = 1e-07
      , rmNA        = FALSE
      , useD        = FALSE
      , trace       = FALSE
      , otrace      = FALSE
      , warnVal     = -1
      , dscaleThres = 1e-15
      , rscaleThres = 1e-15
    )
    , lowerl    = NULL
    , upperl    = NULL
    , separate  = FALSE
    , pshifts   = NULL 
  )
}

Now we can use this wrapper to fit the drc model to each factor level in turn:

dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
  dt <- dt1[TempV==levels(TempV)[i]]
  dt[, TempV:=as.character(TempV)]
  dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
  dt[, cum_Germinated := cumsum(Germ_frac)]
  dt[, Pred := c(predict(object=drcmod(dt)), NA)] 
  dt2 <- rbind(dt2, dt)
}

and plot:

ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
  geom_point() +
  geom_line(aes(y = Pred)) +
  scale_shape_manual(values=seq(0, 13)) +
  labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
  theme_bw()

Edit

If we run the original code in the question using a subset of the data with fewer factor levels, for example using

dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)

all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.

这篇关于用ggplot2重现drc :: plot.drc的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆