如何绘制混合箱线图:另一半有抖动点的半箱线图? [英] How to plot a hybrid boxplot: half boxplot with jitter points on the other half?

查看:26
本文介绍了如何绘制混合箱线图:另一半有抖动点的半箱线图?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我试图在

这些是我的数据和代码,它们产生了里面有点的完整盒子

require(magrittr)需要(tidyverse)数据 <- 结构(列表(p1 = c(0.0854261831077604,0.408418657218253,0.577793646477315, 0.578028229977424, 0.48933166218204, 0.53117814324334,0.526653494462464, 0.00687616283435221, 0.444300425796509, 0.00287319455358522,0.949821402532831, 0.96832469523368, 0.953281969982759, 0.360125244759434,0.407921095422844, 0.885776732104954, 0.159882184516691, 0.911094990767761,0.0444367172734037, 0.144888951725151, 0.508858686640707, 0.694913731085945,0.117270366119258, 0.78227546070467, 0.980457304886186, 0.711464034564424,0.753944466390685, 0.0474210438747038, 0.00344183466223558, 0.0290017465534545,0.75092385236303, 0.868873921257987, 0.744396990487425, 0.0140007244233847,0.0332266395043963, 0.482897084793009, 0.0535516646483004, 0.452926358923891,0.0144057727301603, 0.171918034525543), p2 = c(0.101262675229211,0.196913109208586, 0.37814311161382, 0.0677625689405156, 0.12517090579686,0.409083554335168, 0.158886941347288, 0.847394861862651, 0.180560031076741,0.967122694294885, 0.000901627067665116, 0.00039495110143705,9.70707318411806e-05, 0.546200038486894, 0.435475454787648, 5.95555269800323e-06,0.0178837768834925, 8.42690065415846e-06, 0.00777059697751842,0.0020397073541544, 0.486699073016371, 0.283679673247571, 0.857183359146641,0.200712003853458, 0.0164911141652784, 0.0542250670734297, 0.232340206984506,0.948523714169708, 0.169881661474024, 0.968983592882272, 0.00250367590158291,0.000792323746977033, 0.000185068166140097, 0.0193600071757997,0.114775271592724, 4.65931778380389e-05, 0.000754760900847164,2.07521623816406e-05, 0.00782764273312856, 0.00276993826117348), p3 = c(0.0118642223785376, 0.0267362912322735, 6.60753171741111e-08,0.053576051466652, 0.00375873110094442, 9.85095078844696e-08,0.0525436528683484, 0.0193735809639814, 8.44717454802822e-07,0.00608007737576027, 0.0205563904131287, 0.0104638062130591,0.0249997053664864, 0.0587924727726031, 0.0443600964770995, 0.067125687916273,0.758612877724648, 0.0618158334848203, 0.0251025592849138, 0.790905778949543,0.00126904829915329, 0.00760772364901772, 0.00119821088328392,0.0115117347754715, 0.000863676435448072, 0.000996891439583434,0.0115279148630096, 0.00249122388568909, 5.21508620418823e-05,0.00144050407848742, 0.120373444447631, 0.0534773096149069, 0.110284261289338,0.571243879053544, 0.438152084363961, 0.364887514202121, 0.696293189762153,0.414870716968937, 0.0557358576822093, 0.783929426716999), p4 = c(0.000107231042599948,0.000379648762557529, 8.25102162601208e-06, 0.000343829024899591,0.000140680688077216, 1.90076798696051e-06, 0.000214507212681323,1.38587688080716e-05, 3.48104084092359e-06, 6.50782599216903e-07,0.0114584884733498, 0.00652170746426181, 0.0143309604192116,0.0275718029789144, 0.0352327288308957, 0.022950800779703, 0.0569939247302654,0.0190248244391564, 0.0305921420687752, 0.00589871320676732,0.000805515847378872, 1.97674357551495e-05, 8.30853708305541e-06,1.32462751169762e-06, 4.8731965929686e-05, 0.0057411315642433,4.82406700397824e-05, 0.000204633566379066, 0.0552263911781015,0.000181994007177494, 0.0585729576787707, 0.0273685460128338,0.0568746134466117, 0.299309335625926, 0.278980446497419, 0.105600715225359,0.176549247514501, 0.101420411455169, 0.01003894550707, 0.0010803018725911), p5 = c(0.786823338804824, 0.151956168584644, 0.0433468890359269,0.19556481029922, 0.380808150243027, 0.0389798680141623, 0.260481184897901,0.101147673996922, 0.0184624278061585, 0.0222416874775066, 0.000113517761014704,0.00329593083795693, 0.000476682365422989, 0.00571997662739322,0.0697473913851358, 0.0216803412883361, 0.00631472476841249,0.00628215584877364, 0.540944692186543, 0.0135127011440213, 0.00235752761214414,3.10282042735927e-06, 0.0239147204208516, 4.97334784773176e-05,0.00213837866453402, 0.000212207014031345, 0.00180443364400107,8.15954685083038e-05, 0.00445169398173509, 0.000391265642772285,0.0676128522356959, 0.0494864355994384, 0.0882575475549674, 0.0960799089263987,0.134853114895623, 0.0465661014986807, 0.0728456746626632, 0.0307607877988244,0.476388236185883, 0.00831263646470973), p6 = c(0.0145163494370677,0.215596124993685, 0.00070803577599434, 0.104724510291289, 0.000789869989050939,0.0207564351298348, 0.00122021921131791, 0.0251938615732845,0.356672789562296, 0.00168169551566413, 0.0171485737520108, 0.0109989091496048,0.00681361113427885, 0.00159046437476052, 0.00726323309637717,0.00246048235803604, 0.000312511376490686, 0.00177376855883463,0.351153292208846, 0.0427541476203625, 1.01485842454486e-05,0.0137760017612841, 0.000425034892882118, 0.0054497425604112,7.93882623673471e-07, 0.227360668344289, 0.000334737447758259,0.0012777890350116, 0.766946267841861, 8.96835836820999e-07,1.32173732897771e-05、1.46376785664669e-06、1.51905551715105e-06、6.14479494697213e-06, 1.24431458762028e-05, 1.99110299298599e-06,5.46251153509928e-06, 9.72690797485877e-07, 0.435603545161549,0.0319896621589845), type = c("small", "small", "small", "small",小",小",小",小",小",小",小",小",小",小",小",小",小",小",小",小",小"、小"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、大"、"big", "big", "big", "big"), loc = c("abro", "abro", "abro","阿布罗", "阿布罗", "阿布罗", "阿布罗", "阿布罗", "阿布罗", "阿布罗", "圆顶","圆顶", "圆顶", "圆顶", "圆顶", "圆顶", "圆顶", "圆顶", "圆顶","圆顶", "abro", "abro", "abro", "abro", "abro", "abro", "abro","abro", "abro", "abro", "圆顶", "圆顶", "圆顶", "圆顶", "圆顶","圆顶", "圆顶", "圆顶", "圆顶", "圆顶")), .Names = c("p1", "p2","p3", "p4", "p5", "p6", "type", "loc"), class = c("tbl_df", "tbl","data.frame"), row.names = c(NA, -40L))一瞥(数据)#>观察:40#>变量:8#>$ p1 <dbl>0.085426183, 0.408418657, 0.577793646, 0.578028230, 0.489...#>$ p2 <dbl>1.012627e-01、1.969131e-01、3.781431e-01、6.776257e-02、1...#>$ p3 <dbl>1.186422e-02、2.673629e-02、6.607532e-08、5.357605e-02、3...#>$ p4 <dbl>1.072310e-04, 3.796488e-04, 8.251022e-06, 3.438290e-04, 1...#>$ p5 <dbl>7.868233e-01、1.519562e-01、4.334689e-02、1.955648e-01、3...#>$ p6 <dbl>1.451635e-02、2.155961e-01、7.080358e-04、1.047245e-01、7...#>$ 类型 小",小",小",小",小",小",小",小"……#>$ loc 阿布罗"、阿布罗"、阿布罗"、阿布罗"、阿布罗"、阿布罗"、阿布罗"、……

将数据转换为长格式

dat_long <- dat %>%收集(键,值,1:6)%>%变异(loc = factor(loc,levels = c(abro",dome")),类型 = 因子(类型),键 = 因子(键))

绘制带点的箱线图

ggplot(dat_long, aes(x = type, y = value, color = key)) +facet_grid(loc ~ key) +geom_point(position = position_jitter(width = 0.3), alpha = 0.3, size = 2) +geom_boxplot(outlier.color = NA) +主题灯() +主题(legend.position = "bottom") +指南(col = guide_legend(nrow = 1))

解决方案

一个非常快速的解决方案是使用 position_nudge 添加一些微调.

dat_long %>%ggplot(aes(x = 类型,y = 值,填充 = 键)) +geom_boxplot(outlier.color = NA) +geom_point(position = position_nudge(x=0.5), shape = 21, size = 2) +facet_grid(loc ~ 键)

或将 x 轴因子转换为数值并添加一些值

dat_long %>%ggplot(aes(x = 类型,y = 值,填充 = 键)) +geom_boxplot(outlier.color = NA) +geom_point(aes(as.numeric(type) + 0.5), shape = 21, size = 2) +facet_grid(loc ~ 键)

关于 x 轴位置的更通用的方法如下.简而言之,这个想法是添加相同框的第二个数据层.第二个框使用合适的线型和 alpha 隐藏(参见 scale_),但很容易被点覆盖.

dat_long <- dat %>%收集(键,值,1:6)%>%变异(loc = factor(loc,levels = c(abro",dome")),类型 = 因子(类型),键 = 因子(键))%>%mutate(gr=1) # 为第一层添加因子级别dat_long %>%mutate(gr=2) %>% # 为第二个不可见层添加因子级别bind_rows(dat_long) %>% # 添加相同的数据ggplot(aes(x = 类型,y = 值,填充 = 键,alpha = 因子(gr),线型 = 因子(gr)))+geom_boxplot(outlier.color = NA) +facet_grid(loc ~ key) +geom_point(data=.%>% filter(gr==1),position = position_nudge(y=0,x=0.2), shape = 21, size = 2)+scale_alpha_discrete(range = c(1, 0)) +scale_linetype_manual(values = c("solid","blank")) +指南(阿尔法=无",线型=无")

使用 zankuralt 发布的代码

I'm trying to make a similar plot to Fig. 2d-f in an article published on Nature this year. It's basically a half boxplot with points on the other half.

Can anyone give me some hints? Thank you very much!

These are my data and code which produced full boxes with points inside

require(magrittr)
require(tidyverse)

dat <- structure(list(p1 = c(0.0854261831077604, 0.408418657218253, 
  0.577793646477315, 0.578028229977424, 0.48933166218204, 0.53117814324334, 
  0.526653494462464, 0.00687616283435221, 0.444300425796509, 0.00287319455358522, 
  0.949821402532831, 0.96832469523368, 0.953281969982759, 0.360125244759434, 
  0.407921095422844, 0.885776732104954, 0.159882184516691, 0.911094990767761, 
  0.0444367172734037, 0.144888951725151, 0.508858686640707, 0.694913731085945, 
  0.117270366119258, 0.78227546070467, 0.980457304886186, 0.711464034564424, 
  0.753944466390685, 0.0474210438747038, 0.00344183466223558, 0.0290017465534545, 
  0.75092385236303, 0.868873921257987, 0.744396990487425, 0.0140007244233847, 
  0.0332266395043963, 0.482897084793009, 0.0535516646483004, 0.452926358923891, 
  0.0144057727301603, 0.171918034525543), p2 = c(0.101262675229211, 
  0.196913109208586, 0.37814311161382, 0.0677625689405156, 0.12517090579686, 
  0.409083554335168, 0.158886941347288, 0.847394861862651, 0.180560031076741, 
  0.967122694294885, 0.000901627067665116, 0.00039495110143705, 
  9.70707318411806e-05, 0.546200038486894, 0.435475454787648, 5.95555269800323e-06, 
  0.0178837768834925, 8.42690065415846e-06, 0.00777059697751842, 
  0.0020397073541544, 0.486699073016371, 0.283679673247571, 0.857183359146641, 
  0.200712003853458, 0.0164911141652784, 0.0542250670734297, 0.232340206984506, 
  0.948523714169708, 0.169881661474024, 0.968983592882272, 0.00250367590158291, 
  0.000792323746977033, 0.000185068166140097, 0.0193600071757997, 
  0.114775271592724, 4.65931778380389e-05, 0.000754760900847164, 
  2.07521623816406e-05, 0.00782764273312856, 0.00276993826117348
  ), p3 = c(0.0118642223785376, 0.0267362912322735, 6.60753171741111e-08, 
  0.053576051466652, 0.00375873110094442, 9.85095078844696e-08, 
  0.0525436528683484, 0.0193735809639814, 8.44717454802822e-07, 
  0.00608007737576027, 0.0205563904131287, 0.0104638062130591, 
  0.0249997053664864, 0.0587924727726031, 0.0443600964770995, 0.067125687916273, 
  0.758612877724648, 0.0618158334848203, 0.0251025592849138, 0.790905778949543, 
  0.00126904829915329, 0.00760772364901772, 0.00119821088328392, 
  0.0115117347754715, 0.000863676435448072, 0.000996891439583434, 
  0.0115279148630096, 0.00249122388568909, 5.21508620418823e-05, 
  0.00144050407848742, 0.120373444447631, 0.0534773096149069, 0.110284261289338, 
  0.571243879053544, 0.438152084363961, 0.364887514202121, 0.696293189762153, 
  0.414870716968937, 0.0557358576822093, 0.783929426716999), p4 = c(0.000107231042599948, 
  0.000379648762557529, 8.25102162601208e-06, 0.000343829024899591, 
  0.000140680688077216, 1.90076798696051e-06, 0.000214507212681323, 
  1.38587688080716e-05, 3.48104084092359e-06, 6.50782599216903e-07, 
  0.0114584884733498, 0.00652170746426181, 0.0143309604192116, 
  0.0275718029789144, 0.0352327288308957, 0.022950800779703, 0.0569939247302654, 
  0.0190248244391564, 0.0305921420687752, 0.00589871320676732, 
  0.000805515847378872, 1.97674357551495e-05, 8.30853708305541e-06, 
  1.32462751169762e-06, 4.8731965929686e-05, 0.0057411315642433, 
  4.82406700397824e-05, 0.000204633566379066, 0.0552263911781015, 
  0.000181994007177494, 0.0585729576787707, 0.0273685460128338, 
  0.0568746134466117, 0.299309335625926, 0.278980446497419, 0.105600715225359, 
  0.176549247514501, 0.101420411455169, 0.01003894550707, 0.0010803018725911
  ), p5 = c(0.786823338804824, 0.151956168584644, 0.0433468890359269, 
  0.19556481029922, 0.380808150243027, 0.0389798680141623, 0.260481184897901, 
  0.101147673996922, 0.0184624278061585, 0.0222416874775066, 0.000113517761014704, 
  0.00329593083795693, 0.000476682365422989, 0.00571997662739322, 
  0.0697473913851358, 0.0216803412883361, 0.00631472476841249, 
  0.00628215584877364, 0.540944692186543, 0.0135127011440213, 0.00235752761214414, 
  3.10282042735927e-06, 0.0239147204208516, 4.97334784773176e-05, 
  0.00213837866453402, 0.000212207014031345, 0.00180443364400107, 
  8.15954685083038e-05, 0.00445169398173509, 0.000391265642772285, 
  0.0676128522356959, 0.0494864355994384, 0.0882575475549674, 0.0960799089263987, 
  0.134853114895623, 0.0465661014986807, 0.0728456746626632, 0.0307607877988244, 
  0.476388236185883, 0.00831263646470973), p6 = c(0.0145163494370677, 
  0.215596124993685, 0.00070803577599434, 0.104724510291289, 0.000789869989050939, 
  0.0207564351298348, 0.00122021921131791, 0.0251938615732845, 
  0.356672789562296, 0.00168169551566413, 0.0171485737520108, 0.0109989091496048, 
  0.00681361113427885, 0.00159046437476052, 0.00726323309637717, 
  0.00246048235803604, 0.000312511376490686, 0.00177376855883463, 
  0.351153292208846, 0.0427541476203625, 1.01485842454486e-05, 
  0.0137760017612841, 0.000425034892882118, 0.0054497425604112, 
  7.93882623673471e-07, 0.227360668344289, 0.000334737447758259, 
  0.0012777890350116, 0.766946267841861, 8.96835836820999e-07, 
  1.32173732897771e-05, 1.46376785664669e-06, 1.51905551715105e-06, 
  6.14479494697213e-06, 1.24431458762028e-05, 1.99110299298599e-06, 
  5.46251153509928e-06, 9.72690797485877e-07, 0.435603545161549, 
  0.0319896621589845), type = c("small", "small", "small", "small", 
  "small", "small", "small", "small", "small", "small", "small", 
  "small", "small", "small", "small", "small", "small", "small", 
  "small", "small", "big", "big", "big", "big", "big", "big", "big", 
  "big", "big", "big", "big", "big", "big", "big", "big", "big", 
  "big", "big", "big", "big"), loc = c("abro", "abro", "abro", 
  "abro", "abro", "abro", "abro", "abro", "abro", "abro", "dome", 
  "dome", "dome", "dome", "dome", "dome", "dome", "dome", "dome", 
  "dome", "abro", "abro", "abro", "abro", "abro", "abro", "abro", 
  "abro", "abro", "abro", "dome", "dome", "dome", "dome", "dome", 
  "dome", "dome", "dome", "dome", "dome")), .Names = c("p1", "p2", 
  "p3", "p4", "p5", "p6", "type", "loc"), class = c("tbl_df", "tbl", 
  "data.frame"), row.names = c(NA, -40L))
glimpse(dat)
#> Observations: 40
#> Variables: 8
#> $ p1   <dbl> 0.085426183, 0.408418657, 0.577793646, 0.578028230, 0.489...
#> $ p2   <dbl> 1.012627e-01, 1.969131e-01, 3.781431e-01, 6.776257e-02, 1...
#> $ p3   <dbl> 1.186422e-02, 2.673629e-02, 6.607532e-08, 5.357605e-02, 3...
#> $ p4   <dbl> 1.072310e-04, 3.796488e-04, 8.251022e-06, 3.438290e-04, 1...
#> $ p5   <dbl> 7.868233e-01, 1.519562e-01, 4.334689e-02, 1.955648e-01, 3...
#> $ p6   <dbl> 1.451635e-02, 2.155961e-01, 7.080358e-04, 1.047245e-01, 7...
#> $ type <chr> "small", "small", "small", "small", "small", "small", "sm...
#> $ loc  <chr> "abro", "abro", "abro", "abro", "abro", "abro", "abro", "...

Convert data to long format

dat_long <- dat %>%  
  gather(key, value, 1:6) %>% 
  mutate(loc = factor(loc, levels = c("abro", "dome")),
         type = factor(type),
         key = factor(key))

Plot boxplot with points

ggplot(dat_long, aes(x = type, y = value, color = key)) +
  facet_grid(loc ~ key) +
  geom_point(position = position_jitter(width = 0.3), alpha = 0.3, size = 2) +
  geom_boxplot(outlier.color = NA) +
  theme_light() +
  theme(legend.position = "bottom") +
  guides(col = guide_legend(nrow = 1))

解决方案

A very fast solution would be to add some nudge using position_nudge.

dat_long %>% 
 ggplot(aes(x = type, y = value, fill=key)) +
  geom_boxplot(outlier.color = NA) +
  geom_point(position = position_nudge(x=0.5), shape = 21, size = 2) + 
  facet_grid(loc ~ key)

Or transform the x axis factor to numeric and add some value

dat_long %>% 
 ggplot(aes(x = type, y = value, fill=key)) +
  geom_boxplot(outlier.color = NA) +
  geom_point(aes(as.numeric(type) + 0.5), shape = 21, size = 2) + 
  facet_grid(loc ~ key)

A more generalised method regarding the x axis position would be following. In brief, the idea is to add a second data layer of the same boxes. The second boxes are hided using suitable linetype and alpha (see scale_) but could be easily overplotted by the points.

dat_long <- dat %>%  
  gather(key, value, 1:6) %>% 
  mutate(loc = factor(loc, levels = c("abro", "dome")),
         type = factor(type),
         key = factor(key)) %>% 
  mutate(gr=1) # adding factor level for first layer

dat_long %>% 
  mutate(gr=2) %>% # adding factor level for second invisible layer
  bind_rows(dat_long) %>% # add the same data
 ggplot(aes(x = type, y = value, fill=key, alpha=factor(gr), linetype = factor(gr))) +
  geom_boxplot(outlier.color = NA) +
  facet_grid(loc ~ key) + 
  geom_point(data=. %>% filter(gr==1),position = position_nudge(y=0,x=0.2), shape = 21, size = 2)+
  scale_alpha_discrete(range = c(1, 0)) +
  scale_linetype_manual(values = c("solid","blank")) +
  guides(alpha ="none", linetype="none")

Using the code zankuralt posted below and optimise it for faceting you can try:

dat %>% 
  gather(key, value, 1:6) %>% 
  mutate(loc = factor(loc, levels = c("abro", "dome")),
         type = factor(type),
         key = factor(key)) %>% 
  mutate(type2=as.numeric(type)) %>% 
  group_by(type, loc, key) %>%
  mutate(d_ymin = min(value),
         d_ymax = max(value),
         d_lower = quantile(value, 0.25),
         d_middle = median(value),
         d_upper = quantile(value, 0.75)) %>% 
  ggplot() +
  geom_boxplot(aes(x = type2 - 0.2,
                    ymin = d_lower,
                    ymax = d_upper,
                    lower = d_lower,
                    middle = d_middle,
                    upper = d_upper,
                    width = 2 * 0.2,
                    fill = key),
               stat = "identity") +
  geom_jitter(aes(x = type2 + 0.2,
                   y = value,
                   color = key),
              width = 0.2 - 0.25 * 0.2,
              height = 0)+

  # vertical segment
  geom_segment(aes(x = type2,
                   y = d_ymin,
                   xend = type2,
                   yend = d_ymax)) +

  # top horizontal segment
  geom_segment(aes(x = type2 - 0.1,
                   y = d_ymax,
                   xend = type2,
                   yend = d_ymax)) +

  # top vertical segment
  geom_segment(aes(x = type2 - 0.1,
                   y = d_ymin,
                   xend = type2,
                   yend = d_ymin)) +

  # have to manually add in the x scale because we made everything numeric
  # to do the shifting
  scale_x_continuous(breaks = c(1,2),
                     labels = c("big","small"))+
   facet_grid(loc ~ key)

这篇关于如何绘制混合箱线图:另一半有抖动点的半箱线图?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆