用ggplot2重现drc :: plot.drc [英] Reproducing drc::plot.drc with ggplot2
问题描述
我想用 ggplot2
。
drc :: plot.drc
图。 b $ b
df1 < -
结构(list(TempV = structure(c(1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,5L,5L,5L,5L,5L,5L, 5L,5L,5L,5L,3L,3L,3L,
3L,3L,3L,3L,3L,3L,3L,7L,7L,7L,7L,7L,7L,7L,7L,
7L,9L,9L,9L,9L,9L,9L,9L,9L,9L,9L,13L,13L,13L,13L,
13L,13L,13L,13L,13L,13L, 11L,11L,11L,11L,11L,11L,11L,
11L,11L,11L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L,6L,6L, $ b 6L,6L,6L,6L,6L,6L,6L,6L,4L,4L,4L,4L,4L,4L,4L,4L,
4L,4L,8L,8L,8L,8L, 8L,8L,8L,8L,8L,8L,10L,10L,10L,
10L,10L,10L,10L,10L,10L,10L,14L,14L,14L,14L,14L,14L, $ b 14L,14L,14 L,14L,12L,12L,12L,12L,12L,12L,12L,12L,12L,
12L),标签= c(22.46FH-142,27.59FH-142,26.41 FH-142,
29.71FH-142,31.66FH-142,34.11FH-142,33.22FH-142,22.46FH-942,
27.59 FH-942,26.41FH-942,29.71FH-942,31.66FH-942,34.11FH-942,
33.22FH-942),class =factor )Start = c(0L,24L,48L,72L,
96L,120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L, 192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,
192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,192L,216L, $ b 0L,24L,48L,72L,96L,120L,144L,168L,192L,216L,0L,24L,
48L,72L,96L,120L,144L,168L,192L,216L, 48L,72L,
96L,120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L,192L,216L,0L,24L,48L, 72L,96L,120L,144L,168L,
192L,216L,0L,24L,48L,72L,96L,120L,144L,168L,192L,216L,
0L,24L,48L,72L, 9 6L,120L,144L,168L,192L,216L,0L,24L,
48L,72L,96L,120L,144L,168L,192L,216L,0L,24L,48L,72L, 120L,144L,168L,192L,216L,0L,24L,48L,72L,96L,120L,
144L,168L,192L,216L),End = c(24,48,72,96,120,144 ,168,
192,216,Inf,24,48,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168 ,192,216,Inf,24,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192 ,216,Inf,24,48,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168,192,216,Inf,24 ,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192,216,Inf,24,48 ,72,96,120,144,168,192,216,Inf,
24,48,72,96,120,144,168,192,216,Inf,24,48,72,96,
120,144,168,192,216,Inf,24,48,72,96,120,144,168,
192,216,Inf,24,48,72,96,120,144 ,168,192,216,Inf),
Germin ated = c(0L,0L,0L,0L,3L,67L,46L,12L,101L,221L,
0L,0L,0L,0L,57L,50L,44L,31L,32L,236L, 0L,0L,
31L,68L,50L,31L,34L,29L,207L,0L,0L,8L,30L,31L,
55L,27L,22L,4L,273L,0L, 46L,64L,16L,8L,15L,
15L,20L,266L,0L,0L,0L,0L,4L,13L,63L,51L,147L,
172L,0L,0L, 26L,92L,31L,91L,14L,7L,185L,0L,
0L,0L,0L,0L,32L,59L,36L,50L,273L,0L,0L,0L,4L, 13L,32L,42L,52L,42L,265L,0L,0L,0L,6L,22L,40L,
57L,44L,73L,208L,0L,1L,2L,24L,55L,41L, 24L,
33L,202L,0L,0L,18L,31L,26L,30L,61L,25L,58L,201L,
0L,0L,36L,54L,33L,55L,12L,27L, 55L,178L,0L,0L,
6L,28L,26L,31L,53L,48L,33L,225L)),.Names = c(TempV,
Start ,发芽),row.names = c(NA,-140L),class =data.frame)
library(data.table)
dt1< ; - data.tabl e(df1)
库(drc)
dt1fm1 < -
drm(
公式=发芽〜开始+结束
, curveid = TempV
#,pmodels =
#,权重=
,data = dt1
#,子集=
,fct = LL.2()
,type =event
,bcVal = NULL
,bcAdd = 0
#,start =
,na.action = na.fail
,robust = mean
,logDose = NULL
,control = drmc(
constr = FALSE
,errorm = TRUE
,maxIt = 1500
,method = BFGS
,noMessage = FALSE
,relTol = 1e-07
,rmNA = FALSE
,useD = FALSE
, trace = FALSE
,otrace = FALSE
,warnVal = -1
,dscaleThres = 1e-15
,rscaleThres = 1e-15
)
, lowerl = NULL
,upperl = NULL
,separate = FALSE
,pshifts = NULL
)
## ---- dt1fm1Plot1 ----
plot(
x = dt1fm1
,xlab =时间(小时)
,ylab =发芽比例(\\% )
#,ylab =比例发芽(%)
,add = FALSE
,level = NULL
,type =average#c(average, all,bars,none,obs,confidence)
,broken = FALSE
#,bp
,bcontrol = NULL
,conName = NULL
,axes = TRUE
,gridsize = 100
,log =
#,xtsty
,x ttrim = TRUE
,xt = NULL
,xtField = NULL
,xField =时间(小时)
,xlim = c(0,200)
, yt = NULL
,ytField = NULL
,yField =发芽比例
,ylim = c(0,1.05)
,lwd = 1
,cex = 1.2
,cex.axis = 1
,col = TRUE
#,lty
#,pch
,legend = TRUE
#,legendText
,legendPos = c(40,1.1)
,cex.legend = 0.6
,normal = FALSE
,normRef = 1
,confidence.level = 0.95
)
## ---- dt1fm1Plot2 ----
dt1fm1Means1 <-dt1 [,。(发芽=平均值(发芽)/ 450),by = (TempV,Start,End)]
dt1fm1Means2 <-dt1fm1Means1 [,。(Start = Start,End = End,Cum_Germinated = cumsum(Germinated)),by =。(TempV)]
dt1fm1Means < - data.table(dt1fm1Means2 [End!= Inf],Pred = predict(object = dt1fm1))
dt1fm1Plot2 < -
ggplot(data = dt1fm1Means,mapping = aes(x = End,y = Cum_Germinated,group = TempV,color = TempV,shape = TempV))+
geom_point()+
geom_line(aes(y = Pred))+
scale_shape_manual(values = seq(0,13))+
labs(x =时间(小时),y =比例发芽,shape =Temp Temp)+
theme_bw()+
scale_x_continuous(expand = c(0,0),breaks = c(0,unique(dt1fm1Means $ End))+
scale_y_continuous(expand = c(0,0),labels = function(x)paste0(100 * x,\\%))+
#scale_y_continuous(expand = c(0,0),labels = percent) +
expand_limits(x = c(0,max(dt1fm1Means $ End)+20),y = c(0,max(dt1fm1Means $ Pred)+0.1))+
theme(axis.title。 x = element_text(size = 12,hjust = 0.54,vjust = 0),
axis.title.y = element_text(size = 12,angle = 90,vjust = 0.25))
print(dt1fm1Plot2)
问题
在 ggplot2
输出中几乎没有差异。这些差异是由于 predict 函数以不同的模式给出输出,而不是数据中给定的水平。 >编辑
实际上 drm
函数改变了 TempV级别的顺序
,从 summary(dt1fm1)
输出和 drc :: plot.drc
output。
如问题所述,有一个与 drm
混洗因子水平的顺序。不要洗牌这个烂摊子证明比我预料的更棘手。
最后,我通过调用每个因子级别的 drm
函数来构建一个结果表一次一个因素级别。
以这种冗长的方式做这件事,发现你的第一张图从 plot.drc
和ggplot版本都不正确。
让我们首先将函数调用包装到另一个包装器函数中,以便于重复调用它,使其在 drm()
每个轨迹:
drcmod < - function(dt1){
drm(formula = Germinated_Start + End
,curveid = TempV
,data = dt1
,fct = LL.2()
,type =event
,bcVal = NULL
, bcAdd = 0
,na.action = na.fail
,robust =mean
,logDose = NULL
,control = drmc(
constr = FALSE
,errorm = TRUE
,maxIt = 1500
,method =BFGS
,noMessage = FALSE
,relTol = 1e-07
,rmNA = FALSE
,useD = FALSE
,trace = FALSE
,otrace = FALSE
,warnVal = -1
,dscaleThres = 1e-15
,rscaleThres = 1e-15
)
,lowerl = NULL
,upperl = NULL
,separate = FALSE
,pshifts = NULL
)
}
现在我们可以使用这个包装函数将drc模型依次适用于每个因子级别:
<$ p对于(i in 1:nlevels(dt1 $ TempV)){
dt < - dt1 [TempV == levels),$ p> dt2 < - data.table (TempV)[i]]
dt [,TempV:= as.character(TempV)]
dt [,Germ_frac:= mean(发芽)/ 450,by =。(Start)]
$ dt [,cum_Germinated:= cumsum(Germ_frac)]
dt [,Pred:= c(predic(object = drcmod(dt)),NA)]
dt2 < - rbind(dt2, dt)
}
和plot:
ggplot(dt2 [End!= Inf],aes(x = End,y = cum_Germinated,group = TempV,color = TempV,shape = TempV)+
geom_point()+
geom_line(aes(y = Pred))+
scale_shape_manual(values = seq(0,13))+
labs(x =Time(Hours) y =发芽比例,shape =Temp,color =Temp)+
theme_bw()
< h2> Edit
如果我们在问题中使用少数因子级别的数据子集运行原始代码,例如使用
dt1 <-dt1 [TempV%in%levels(TempV)[1:5],]
dt1 < - droplevels(dt1)
所有图(OP中的2个版本以及此答案中的版本)给出了相同的结果。只有在使用大量因子水平时才会出现差异。事实上,OP中的ggplot和plot.drc提供了跟踪与因子级别的不正确匹配,这表明问题很可能出现在 drm()
函数中,而不是在plot.drc中。
I want to reproduce the following drc::plot.drc
graphs with ggplot2
.
df1 <-
structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142",
"29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942",
"27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942",
"33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L,
192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L,
0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L,
48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L,
96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L,
144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf,
24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96,
120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168,
192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf),
Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L,
0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L,
31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L,
55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L,
15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L,
172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L,
0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L,
13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L,
57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L,
33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L,
0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L,
6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV",
"Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")
library(data.table)
dt1 <- data.table(df1)
library(drc)
dt1fm1 <-
drm(
formula = Germinated ~ Start + End
, curveid = TempV
# , pmodels =
# , weights =
, data = dt1
# , subset =
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
# , start =
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
## ----dt1fm1Plot1----
plot(
x = dt1fm1
, xlab = "Time (Hours)"
, ylab = "Proportion Germinated (\\%)"
# , ylab = "Proportion Germinated (%)"
, add = FALSE
, level = NULL
, type = "average" # c("average", "all", "bars", "none", "obs", "confidence")
, broken = FALSE
# , bp
, bcontrol = NULL
, conName = NULL
, axes = TRUE
, gridsize = 100
, log = ""
# , xtsty
, xttrim = TRUE
, xt = NULL
, xtField = NULL
, xField = "Time (Hours)"
, xlim = c(0, 200)
, yt = NULL
, ytField = NULL
, yField = "Proportion Germinated"
, ylim = c(0, 1.05)
, lwd = 1
, cex = 1.2
, cex.axis = 1
, col = TRUE
# , lty
# , pch
, legend = TRUE
# , legendText
, legendPos = c(40, 1.1)
, cex.legend = 0.6
, normal = FALSE
, normRef = 1
, confidence.level = 0.95
)
## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))
dt1fm1Plot2 <-
ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw() +
scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
# scale_y_continuous(expand = c(0, 0), labels = percent) +
expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
axis.title.y = element_text(size = 12, angle = 90, vjust = 0.25))
print(dt1fm1Plot2)
Question
There are few discrepancies in ggplot2
output. These discrepancies occur because the predict
function gives output in different pattern than the given levels in the data.
Edited
Actually drm
function changed the order of levels of TempV
and this is clear from summary(dt1fm1)
output and the graph of drc::plot.drc
output.
As noted in the question, there is an issue related to drm
shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.
In the end I approached this by calling the drm
function once per factor level to build up a table of results one factor level at a time.
Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc
and the ggplot version are both incorrect.
Let's start by wrapping your function call to drm()
inside another wrapper function, to facilitate calling it repeatedly for each trace:
drcmod <- function(dt1){
drm(formula = Germinated ~ Start + End
, curveid = TempV
, data = dt1
, fct = LL.2()
, type = "event"
, bcVal = NULL
, bcAdd = 0
, na.action = na.fail
, robust = "mean"
, logDose = NULL
, control = drmc(
constr = FALSE
, errorm = TRUE
, maxIt = 1500
, method = "BFGS"
, noMessage = FALSE
, relTol = 1e-07
, rmNA = FALSE
, useD = FALSE
, trace = FALSE
, otrace = FALSE
, warnVal = -1
, dscaleThres = 1e-15
, rscaleThres = 1e-15
)
, lowerl = NULL
, upperl = NULL
, separate = FALSE
, pshifts = NULL
)
}
Now we can use this wrapper to fit the drc model to each factor level in turn:
dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
dt <- dt1[TempV==levels(TempV)[i]]
dt[, TempV:=as.character(TempV)]
dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
dt[, cum_Germinated := cumsum(Germ_frac)]
dt[, Pred := c(predict(object=drcmod(dt)), NA)]
dt2 <- rbind(dt2, dt)
}
and plot:
ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) +
geom_point() +
geom_line(aes(y = Pred)) +
scale_shape_manual(values=seq(0, 13)) +
labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
theme_bw()
Edit
If we run the original code in the question using a subset of the data with fewer factor levels, for example using
dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)
all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm()
function, rather than in plot.drc.
这篇关于用ggplot2重现drc :: plot.drc的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!