使用TukeyHSD的输出将重要字母自动添加到gglot条形图 [英] Automatically adding letters of significance to a ggplot barplot using output from TukeyHSD

查看:38
本文介绍了使用TukeyHSD的输出将重要字母自动添加到gglot条形图的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

使用此数据.



hogs.sample<-structure(list(Zone = c("B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "D", 
"D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", 
"D", "D", "D", "D", "D", "D"), Levelname = c("Medium", "High", 
"Low", "Med.High", "Med.High", "Med.High", "Med.High", "Med.High", 
"Med.High", "Medium", "Med.High", "Medium", "Med.High", "High", 
"Medium", "High", "Low", "Med.High", "Low", "High", "Medium", 
"Medium", "Med.High", "Low", "Low", "Med.High", "Low", "Low", 
"High", "High", "Med.High", "High", "Med.High", "Med.High", "Medium", 
"High", "Low", "Low", "Med.High", "Low"), hogs.fit = c(-0.122, 
-0.871, -0.279, -0.446, 0.413, 0.011, 0.157, 0.131, 0.367, -0.23, 
0.007, 0.05, 0.04, -0.184, -0.265, -1.071, -0.223, 0.255, -0.635, 
-1.103, 0.008, -0.04, 0.831, 0.042, -0.005, -0.022, 0.692, 0.402, 
0.615, 0.785, 0.758, 0.738, 0.512, 0.222, -0.424, 0.556, -0.128, 
-0.495, 0.591, 0.923)), row.names = c(NA, -40L), groups = structure(list(
    Zone = c("B", "D"), .rows = structure(list(1:20, 21:40), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

我正在尝试将基于Tukey HSD的重要字母添加到下面的绘图中.

library(agricolae)
library(tidyverse)
hogs.plot <- ggplot(hogs.sample, aes(x = Zone, y = exp(hogs.fit), 
                                     fill = factor(Levelname, levels = c("High", "Med.High", "Medium", "Low")))) +  
  stat_summary(fun = mean, geom = "bar", position = position_dodge(0.9), color = "black") +  
  stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(0.9), width = 0.2) + 
  labs(x = "", y = "CPUE (+/-1SE)", legend = NULL) + 
  scale_y_continuous(expand = c(0,0), labels = scales::number_format(accuracy = 0.1)) + 
  scale_fill_manual(values = c("midnightblue", "dodgerblue4", "steelblue3", 'lightskyblue')) + 
  scale_x_discrete(breaks=c("B", "D"), labels=c("Econfina", "Steinhatchee"))+
  scale_color_hue(l=40, c = 100)+
 # coord_cartesian(ylim = c(0, 3.5)) +
  labs(title = "Hogs", x = "", legend = NULL) + 
  theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.background = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(),
        axis.text.x = element_text(), axis.title.x = element_text(vjust = 0),
        axis.title.y = element_text(size = 8))+
  theme(legend.title = element_blank(), 
        legend.position = "none")
hogs.plot

我的理想输出应该是这样的.

我不确定在我的样本图上这些字母是否100%准确,但它们表示哪些组彼此之间有很大的不同。区域是独立的,因此我不希望在这两个区域之间进行任何比较,因此我使用以下代码单独运行它们。

hogs.aov.b <- aov(hogs.fit ~Levelname, data = filter(hogs.sample, Zone == "B"))
hogs.aov.summary.b <- summary(hogs.aov.b)
hogs.tukey.b <- TukeyHSD(hogs.aov.b)
hogs.tukey.b

hogs.aov.d <- aov(hogs.fit ~ Levelname, data = filter(hogs.sample, Zone == "D"))
hogs.aov.summary.d <- summary(hogs.aov.d)
hogs.tukey.d <- TukeyHSD(hogs.aov.d)
hogs.tukey.d

我尝试过这条路线,但除了猪之外,我还有许多其他物种可以应用这一路线。 Show statistically significant difference in a graph

我可以一次获取一个区域的字母,但我不确定如何将两个区域都添加到绘图中。它们也是不合乎规程的。我从一个网页修改了此代码,字母不能很好地放置在栏的顶部。

library(agricolae)
library(tidyverse)
# get highest point overall
abs_max <- max(bass.dat.d$bass.fit)
# get the highest point for each class
maxs <- bass.dat.d %>%
  group_by(Levelname) %>%
  # I like to add a little bit to each value so it rests above
  # the highest point. Using a percentage of the highest point
  # overall makes this code a bit more general
  summarise(bass.fit=max(mean(exp(bass.fit))))
# get Tukey HSD results
Tukey_test <- aov(bass.fit ~ Levelname, data=bass.dat.d) %>%
  HSD.test("Levelname", group=TRUE) %>%
  .$groups %>%
  as_tibble(rownames="Levelname") %>%
  rename("Letters_Tukey"="groups") %>%
  select(-bass.fit) %>%
  # and join it to the max values we calculated -- these are
  # your new y-coordinates
  left_join(maxs, by="Levelname")

类似的示例也有很多https://www.staringatr.com/3-the-grammar-of-graphics/bar-plots/3_tukeys/,但它们都只是手动添加文本。如果有代码可以自动获取Tukey输出并将重要字母添加到绘图中,那就太好了。

谢谢

推荐答案

我不理解您的数据/分析(例如,您为什么在hogs.fit上使用exp(),这些字母应该是什么?)所以我不确定这是否正确,但是没有其他人回答,所以我最好的猜测是:

正确示例:

## Source: Rosane Rech
## https://statdoe.com/cld-customisation/#adding-the-letters-indicating-significant-differences
## https://www.youtube.com/watch?v=Uyof3S1gx3M

library(tidyverse)
library(ggthemes)
library(multcompView)

# analysis of variance
anova <- aov(weight ~ feed, data = chickwts)

# Tukey's test
tukey <- TukeyHSD(anova)

# compact letter display
cld <- multcompLetters4(anova, tukey)

# table with factors and 3rd quantile
dt <- group_by(chickwts, feed) %>%
  summarise(w=mean(weight), sd = sd(weight)) %>%
  arrange(desc(w))

# extracting the compact letter display and adding to the Tk table
cld <- as.data.frame.list(cld$feed)
dt$cld <- cld$Letters

print(dt)
#> # A tibble: 6 × 4
#>   feed          w    sd cld  
#>   <fct>     <dbl> <dbl> <chr>
#> 1 sunflower  329.  48.8 a    
#> 2 casein     324.  64.4 a    
#> 3 meatmeal   277.  64.9 ab   
#> 4 soybean    246.  54.1 b    
#> 5 linseed    219.  52.2 bc   
#> 6 horsebean  160.  38.6 c

ggplot(dt, aes(feed, w)) + 
  geom_bar(stat = "identity", aes(fill = w), show.legend = FALSE) +
  geom_errorbar(aes(ymin = w-sd, ymax=w+sd), width = 0.2) +
  labs(x = "Feed Type", y = "Average Weight Gain (g)") +
  geom_text(aes(label = cld, y = w + sd), vjust = -0.5) +
  ylim(0,410) +
  theme_few()

我对您的数据的"最佳猜测":

hogs.sample <- structure(list(Zone = c("B", "B", "B", "B", "B", "B", "B", "B", 
                                       "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "D", 
                                       "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", 
                                       "D", "D", "D", "D", "D", "D"), Levelname = c("Medium", "High", 
                                                                                    "Low", "Med.High", "Med.High", "Med.High", "Med.High", "Med.High", 
                                                                                    "Med.High", "Medium", "Med.High", "Medium", "Med.High", "High", 
                                                                                    "Medium", "High", "Low", "Med.High", "Low", "High", "Medium", 
                                                                                    "Medium", "Med.High", "Low", "Low", "Med.High", "Low", "Low", 
                                                                                    "High", "High", "Med.High", "High", "Med.High", "Med.High", "Medium", 
                                                                                    "High", "Low", "Low", "Med.High", "Low"), hogs.fit = c(-0.122, 
                                                                                                                                           -0.871, -0.279, -0.446, 0.413, 0.011, 0.157, 0.131, 0.367, -0.23, 
                                                                                                                                           0.007, 0.05, 0.04, -0.184, -0.265, -1.071, -0.223, 0.255, -0.635, 
                                                                                                                                           -1.103, 0.008, -0.04, 0.831, 0.042, -0.005, -0.022, 0.692, 0.402, 
                                                                                                                                           0.615, 0.785, 0.758, 0.738, 0.512, 0.222, -0.424, 0.556, -0.128, 
                                                                                                                                           -0.495, 0.591, 0.923)), row.names = c(NA, -40L), groups = structure(list(
                                                                                                                                             Zone = c("B", "D"), .rows = structure(list(1:20, 21:40), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                                                                                    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                               "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
                                                                                                                                                                                                                                                                                                                                              "tbl_df", "tbl", "data.frame"))

# anova
anova <- aov(hogs.fit ~ Levelname * Zone, data = hogs.sample)

# Tukey's test
tukey <- TukeyHSD(anova)

# compact letter display
cld <- multcompLetters4(anova, tukey)

# table with factors and 3rd quantile
dt <- hogs.sample %>% 
  group_by(Zone, Levelname) %>%
  summarise(w=mean(exp(hogs.fit)), sd = sd(exp(hogs.fit)) / sqrt(n())) %>%
  arrange(desc(w)) %>% 
  ungroup() %>% 
  mutate(Levelname = factor(Levelname,
                            levels = c("High",
                                       "Med.High",
                                       "Medium",
                                       "Low"),
                            ordered = TRUE))

# extracting the compact letter display and adding to the Tk table
cld2 <- data.frame(letters = cld$`Levelname:Zone`$Letters)
dt$cld <- cld2$letters

print(dt)
#> # A tibble: 8 × 5
#>   Zone  Levelname     w     sd cld  
#>   <chr> <ord>     <dbl>  <dbl> <chr>
#> 1 D     High      1.97  0.104  a    
#> 2 D     Med.High  1.69  0.206  ab   
#> 3 D     Low       1.36  0.258  abc  
#> 4 B     Med.High  1.14  0.0872 abc  
#> 5 B     Medium    0.875 0.0641 bcd  
#> 6 D     Medium    0.874 0.111  bcd  
#> 7 B     Low       0.696 0.0837 cd   
#> 8 B     High      0.481 0.118  d

ggplot(dt, aes(x = Levelname, y = w)) + 
  geom_bar(stat = "identity", aes(fill = Levelname), show.legend = FALSE) +
  geom_errorbar(aes(ymin = w - sd, ymax = w + sd), width = 0.2) +
  labs(x = "Levelname", y = "Average hogs.fit") +
  geom_text(aes(label = cld, y = w + sd), vjust = -0.5) +
  facet_wrap(~Zone)

reprex package(v2.0.1)创建于2021-10-01

这篇关于使用TukeyHSD的输出将重要字母自动添加到gglot条形图的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆