如何设置“风险号码”表使用ggplot2在Kaplan-Meier图下 [英] How to place a "number at risk" table beneath a Kaplan-Meier plot using ggplot2

查看:231
本文介绍了如何设置“风险号码”表使用ggplot2在Kaplan-Meier图下的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想用ggplot2创建一个Kaplan-Meier图,下面的风险表中的数字表示每个时间点上每个组的风险数(即x轴刻度)。处于危险中的人数应该与相应的勾号一致。数字风险表应该是行名称,表示危险数字所属的组。

我写了下面的例子。我学习如何确定这个



更新#1



正如我所建议的,我用gtable和所得到的数字。我对变体a(来自baptiste的示例代码)的布局不满意,所以我尝试了其他方法。然而,版本B还有另一个缺点:标签在主图的图层的x维内。

a)如何创建合理的布局图b)b)b)另外,如何在主要情节和表格之间放置一个题为处于危险中的数字的标题?标题有风险的数字应该与 tbl 的标签A组和B组的左端一致。



c)tbl中风险数字的字体大小以及相应标签组A和组B应与主图中的刻度标签相同。

 库(生存)
库(重新生成2)
数据(冒号)
library(Hmisc)

d< - colon [,Cs(时间,状态,rx)]
rm(冒号)
名称(d)< - c(days,event,group)
d $ group < - ifelse(d $ group ==Obs,1,2)

fit < (Surv(天,事件)〜组,数据= d)
diff < - 存活率(Surv(天,事件)〜组,数据= d)

风险集< (na.omit(d [,Cs(days,event,group)]),table(group,cut(days,seq(0,max(days),by = 365))))
number。 at.risk< - sapply(1:nrow(risksets),function(i)Reduce( - ,risksets [i,],init = rowSums(risksets)[i],accumulate = TRUE))
number.at.risk< - data.frame(number.at.risk)
names(number.at.risk)< - c(Group.A,Group.B)$ b $ (1 - pchisq(diff $ chisq,1),digits = 4)
p。值<< - ifelse(p.value< 0.001,< 0.001,paste(=,p.value))

d.mortality< - data.frame(time =配合$ T ime,surv = fit $ surv,strata = summary(fit,censored = T)$ strata)
zeros< - data.frame(time = 0,surv = 1,strata = unique(d.mortality $ strata ))
d.mortality <-rbind(d.mortality,zeros)
levels(d.mortality $ strata)< -c(Group A,Group B)
d.mortality $ surv < - (1-d.mortality $ surv)* 100#事件对事件免费且在%
###
g < - ggplot(d.mortality,aes(时间,幸存,组=阶层))+
geom_step(aes(color = strata),size = 1)+
#theme_bw()+#白色背景
主题(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.position =none,
axis.line = element_line(color ='black'),
axis.text.x = element_text(size = 15),
axis。 text.y = element_text(size = 15),
axis.title.x = element_text(size = 17,hjust = .5,vjust = .25,face =bold),
axis.title.y = element_text(size = 17,hjust = .5,vjust = 4,face =bold),
plot.title = element_text(size = 20,hjust = - 。1,vjust = 1,face =bold)
)+
scale_y_continuous(Cumulative event rate [%],limits = c(0,60 ))+
scale_x_continuous(Time [years],limits = c(0,1825),breaks = seq(0,1825,365),labels = c(0,1,2,3,4, 5))+
注释(text,x = 1000,y = 45,label =Group A)+
注释(text,x = 1000,y = 30,label = group B)+
annotate(text,x = 1000,y = 55,label = paste(P,p.value,by log-rank test,collapse =))

number.at.risk = number.at.risk [1:6,]
df_nums = melt(number.at.risk)
df_nums $ year = 1:6
str(df_nums)

tbl < - ggplot(df_nums,aes(x = year,y = factor(variable),color = variable,label = value))+
geom_text()+
#theme_bw()+
主题(
panel.grid.major = element_blank(),
legend.position =none,
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.position =none,
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_blank( )
)+
scale_y_discrete(breaks = c(Group.B,Group.A),labels = c(Group B,Group A))

库(gtable)

#版本A
both = rbind(ggplotGrob(g),ggplotGrob(tbl),size =last)
grid.newpage )
grid.draw(两者)

#版本B
a <-gtable(单位(15,c(cm)),单位(c(10,3 ),cm))
a <-gtable_add_grob(a,ggplotGrob(g),1,1)
a <-gtable_add_grob(a,ggplotGrob(tbl),2,1)
grid.newpage()
grid.draw(a)



版本#1 (风险数字与主图的x轴蜱密切对应,但布局错误





版本#2(旋转对齐,但布局更好)





更新#2



现在它几乎完美了。两件小事:a)如何在剧情中添加一个标题(知道GIMP完成)Number of risk如下图所示? / p>

b)为什么B组位于A组以上? A组的df_nums中的标签为1,B组为2.我如何在风险表中设置组B以上的组B?

 > str(df_nums $ variable)
因子w / 2级别Group.A,Group.B:1 1 1 1 1 2 2 2 2 ...




$ b

这里更新的代码:

  library(生存)
库(reshape2)
数据(冒号)
库(Hmisc)

d < - 冒号[,Cs(时间,状态,rx)] $ (d)<-c(天,事件,组)
d $组<-bel ,1,2)

fit < - survfit(Surv(days,event)〜group,data = d)
diff < - survdiff(Surv(days,event)〜组,数据= d)

risksets < - with(na.omit(d [,Cs(days,event,group)])),table(group,cut(days,seq(0, max(days),by = 365))))
number.at.risk < - sapply(1:nrow(risksets),function(i)Reduce( - ,risksets [i,],init = rowSums(risksets)[i],accumulate = TRUE))
number.at.risk< - data.frame(number.at.risk)
names(number.at.risk)< - c(Group.A,Group.B)
number.at.risk

###
p.value< - round(1 - pchisq (diff $ chisq,1),digits = 4)
p.value< - ifelse(p.value< 0.001,<0.001,paste(=,p.value))

d.mortality < - data.frame(time = fit $ time,surv = fit $ surv,strata =摘要(fit,censored = T)$ strata)
zeros< - data.frame(time = 0,surv = 1,strata = unique(d.mortality $ strata))
d.mortality < - rbind(d.mortality,0)
levels(d.mortality $ strata)< -c(Group A,Group B)
d.mortality $ surv< - (1-d.mortality $ surv)* 100#事件免费给事件和%
###
g < - ggplot(d.mortality,aes(time,surv,group = strata)) +
geom_step(aes(color = strata),size = 1)+
#theme_bw()+#白色背景
主题(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.position =none ,
axis.line = element_line(color ='black'),
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 17,hjust = .5,vjust = .25,face =bold),
axis.title.y = element_text(size = 17,hjust = .5,vjust = 4,face =bold),
plot.title = element_text(size = 20,hjust = - 。1,vjust = 1,face =bold )
)+
scale_y_continuous(Cumulative event rate [%],limits = c(0,60))+
scale_x_continuous(Time [years],limits = c 0,1825),break = seq(0,1825,365),labels = c(0,1,2,3,4,5))+
annotate(text,x = 1000,y = 45,label =Group A)+
annotate(text,x = 1000,y = 30,label =Group B)+
annotate(text,x = 1000, y = 55,label = paste(P,p.value,by log-rank test,collapse =))

number.at.risk = number.at.risk [ 1:6,]
df_nums = melt(number.at.risk)
str(df_nums $ variable)
df_nums
df_nums $ year = 1:6
str (df_nums)

tbl< - ggplot(df_nums,aes(x = year,y =因子(变量),color = variable,label = valu e))+
geom_text()+
#theme_bw()+
主题(
panel.grid.major = element_blank(),
legend.position = none,
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.position =none,
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.text (),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
plot.title = element_blank()
)+
scale_y_discrete(breaks = c(Group.A,Group.B ),labels = c(Group A,Group B))

library(gtable)

#Version C
both = rbind(ggplotGrob g),ggplotGrob(tbl),size =last)
panels&l t; - $ layout $ t [grep(panel,$ layout $ name)]
both $ heights [panels]< - list(unit(1,null),unit(2, lines))
grid.newpage()
grid.draw(两者)


解决方案

b

  both = rbind(ggplotGrob(g),ggplotGrob(tbl),size =last)
面板< - $ layout $ t [grep(panel,$ layout $ name)]
both $ heights [panels]< - list(unit(1,null),unit(2,lines))
都<-gtable_add_rows(高度=单位(1,行),8)
均<< -gtable_add_grob(均为
textGrob(处于危险中的数字,hjust = 0,
grid.drawpage())
x = 0),
t = 9,l = 2,r = 4)


I would like to create a Kaplan-Meier plot using ggplot2 with a number at risk table beneath indicating the number at risk for each group at each time point (i.e. x-axis tick). The number at risk should be aligned to the corresponding tick. Left to the number at risk table should be row names indicating the group to which the numbers at risk belong.

I wrote the following example. I learn how to determine the numbers at risk from this question. However, I do not know how to create a nice, well aligned number at risk table beneath the Kaplan-Meier plot. A friend helped me to create the number of risk table in the following example. However, the resulting figure of my example is insufficient.

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
     theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=1.5, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
df_nums$year = 1:6
tbl = ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable,label=value)) +
     geom_text(size = 3.5) + theme(panel.grid.major = element_blank(), legend.position = "none") +      theme_bw() + 
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + scale_y_discrete(breaks=c("Group.B","Group.A"), labels=c("Number at Risk\nGroup B", "Group A"))

Layout <- grid.layout(nrow = 2, ncol = 1, heights = unit(c(2, 0.55), c("null", "null")))
 grid.show.layout(Layout)
 vplayout <- function(...) {
    grid.newpage()
    pushViewport(viewport(layout = Layout))
}

subplot <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
mmplot <- function(a, b) {
     vplayout()
     print(a, vp = subplot(1, 1))
     print(b, vp = subplot(2, 1))
 }

 dev.new()
mmplot(g, tbl)

UPDATE #1

As suggested I used gtable with the resulting figure. I was not satisfied with the layout of variant a (example code from baptiste), so I tried something else. However, version B does have another drawback: the labels are within the x-dimensions of the plot layer of the main plot.

a) How can I create reasonable layouted figure with well aligned risk numbers.

b) Moreover, how can I place a title "Numbers at risk" between the main plot and the table? The title "Numbers at risk" should be aligned with the left end of the labels "Group A" and "Group B" of tbl.

c) The font size of the risk numbers in tbl and the corresponding labels "Group A" and "Group B" should be the same as the tick labels in the main plot. How can I do this?

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
#           theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=4, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
df_nums$year = 1:6
str(df_nums)

tbl <- ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable, label=value)) +
     geom_text() +
#           theme_bw() + 
     theme(
          panel.grid.major = element_blank(), 
          legend.position = "none",
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + 
     scale_y_discrete(breaks=c("Group.B","Group.A"), labels=c("Group B", "Group A"))

library(gtable)

# Version A
both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
grid.newpage()
grid.draw(both)

# Version B
a <- gtable(unit(15, c("cm")), unit(c(10,3), "cm"))
a <- gtable_add_grob(a, ggplotGrob(g), 1, 1)
a <- gtable_add_grob(a, ggplotGrob(tbl), 2, 1)
grid.newpage()
grid.draw(a)

Version #1 (risk numbers well-aligned to x-axis ticks of main plot but bad layout

Version #2 (screwed alignement but better layout)

UPDATE #2

Now it's almost perfect. Two small things:

a) How can I add a the title (know done with GIMP) "Number at risk" to the plot as shown in the figure below?

b) Why is Group B in the table above Group A? The label in df_nums for Group A is 1 and for Group B 2. How can I set Group A above Group B in the number at risk table?

> str(df_nums$variable)
 Factor w/ 2 levels "Group.A","Group.B": 1 1 1 1 1 1 2 2 2 2 ...

Here the updated code:

library(survival)
library(reshape2)
data(colon)
library(Hmisc)

d <- colon[, Cs(time, status, rx)]
rm(colon)
names(d) <- c("days", "event", "group")
d$group <- ifelse(d$group == "Obs", 1, 2)

fit <- survfit(Surv(days,event)~group, data=d)
diff <- survdiff(Surv(days,event)~group, data=d)

risksets <- with(na.omit(d[, Cs(days, event, group)]), table(group, cut(days, seq(0, max(days), by=365) ) ))
number.at.risk <- sapply(1:nrow(risksets), function(i) Reduce("-",  risksets[i,], init=rowSums(risksets)[i], accumulate=TRUE))
number.at.risk <- data.frame(number.at.risk)
names(number.at.risk) <- c("Group.A", "Group.B")
number.at.risk

###
p.value <- round(1 - pchisq(diff$chisq, 1), digits=4)
p.value <- ifelse(p.value < 0.001, "<0.001", paste("= ", p.value))

d.mortality <- data.frame(time=fit$time, surv=fit$surv, strata=summary(fit, censored=T)$strata)
zeros <- data.frame(time=0, surv=1, strata=unique(d.mortality$strata))
d.mortality <- rbind(d.mortality, zeros)
levels(d.mortality$strata) <- c("Group A", "Group B")
d.mortality$surv <- (1-d.mortality$surv)*100 # event free to events and in %
###
g <- ggplot(d.mortality, aes(time, surv, group=strata)) + 
     geom_step(aes(colour=strata), size=1) +
#           theme_bw() + # white background
     theme(
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_line(color = 'black'),
          axis.text.x = element_text(size=15),
          axis.text.y = element_text(size=15),
          axis.title.x = element_text(size=17, hjust=.5, vjust=.25, face="bold"),
          axis.title.y = element_text(size=17, hjust=.5, vjust=4, face="bold"),
          plot.title = element_text(size=20, hjust=-.1, vjust=1, face="bold")
     ) +
     scale_y_continuous("Cumulative event rate [%]", limits=c(0, 60)) + 
     scale_x_continuous("Time [years]", limits=c(0, 1825), breaks=seq(0, 1825, 365), labels=c(0, 1, 2, 3, 4, 5)) +
     annotate("text", x = 1000, y = 45, label = "Group A") +
     annotate("text", x = 1000, y = 30, label = "Group B") +
     annotate("text", x = 1000, y = 55, label = paste("P ", p.value, "by log-rank test", collapse=""))

number.at.risk = number.at.risk[1:6,]
df_nums = melt(number.at.risk)
str(df_nums$variable)
df_nums
df_nums$year = 1:6
str(df_nums)

tbl <- ggplot(df_nums, aes(x = year, y = factor(variable), colour = variable, label=value)) +
     geom_text() +
#           theme_bw() + 
     theme(
          panel.grid.major = element_blank(), 
          legend.position = "none",
          plot.background = element_blank(), 
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          legend.position="none",
          axis.line = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size=15, face="bold", color = 'black'),
          axis.ticks=element_blank(),
          axis.title.x = element_blank(),
          axis.title.y = element_blank(),
          plot.title = element_blank()
     ) + 
     scale_y_discrete(breaks=c("Group.A", "Group.B"), labels=c("Group A", "Group B"))

library(gtable)

# Version C
both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
panels <- both$layout$t[grep("panel", both$layout$name)]
both$heights[panels] <- list(unit(1,"null"), unit(2, "lines"))
grid.newpage()
grid.draw(both)

解决方案

you could do

both = rbind(ggplotGrob(g), ggplotGrob(tbl), size="last")
panels <- both$layout$t[grep("panel", both$layout$name)]
both$heights[panels] <- list(unit(1,"null"), unit(2, "lines"))
both <- gtable_add_rows(both, heights = unit(1,"line"), 8)
both <- gtable_add_grob(both, 
                        textGrob("Number at risk", hjust=0, x=0), 
                        t=9, l=2, r=4)
grid.newpage()
grid.draw(both)

这篇关于如何设置“风险号码”表使用ggplot2在Kaplan-Meier图下的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆