R:ggplot对聚类摘要进行微调 [英] R: ggplot slight adjustment for clustering summary

查看:128
本文介绍了R:ggplot对聚类摘要进行微调的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述



  X = t(USArrests)
$ b $请检查我的可复制示例和结果图表。 b plot_color_clust =函数(X,N = N,
cols = c(red,blue,orange,darkgreen,green,yellow,gray,black ,white)
){
library(ggplot2)
library(gridExtra)
library(gtable)
library(scales)
library(ggdendro )
library(grid)
library(plyr)

if(N> length(cols))stop(N too big,Colour not in colours。)$如果(N> ncol(X))停止(N太大,数据中没有足够的列),那么b $ b如果(N> ncol(X))停止$ b dd.row = as.dendrogram(fit)
ddata_x< - dendro_data(dd.row)
temp = cutree(fit,k = N)
lab< - ggdendro: :label(ddata_x)

x = c()
for(i in 1:nrow(lab)){
x [i] = paste(clust,as.vector (temp [lab $ label [i] == names(temp)]),sep =)
}

lab $ group < - x

p1 < - ggp (数据=实验室,
aes(label = label,x = x,y = 0,color = group),hjust = 1)+
theme(legend.position =none,
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0,hjust = 0),
axis.title.x = element_text (),
theme(axis.text = element_blank =单位(0,lines),
axis.ticks.length =单位(0,cm))+
scale_colour_manual(values = cols)+ coord_flip()+
scale_y_continuous (限制= c(-0.1,2.1))

df2 <-data.frame(cluster = cutree(fit,N),states = factor(fit $ labels,levels = fit $ labels [fit (df2,。(cluster),summarize,pos = mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y = 1, fill = factor(cluster)))+ geom_tile()+
scale_y_continuous(expand = c(0,0))+
theme(axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position =none)+ coord_flip()+
geom_text(data = df3,aes(x = pos, label = cluster))+
scale_fill_manual(name =This is my title,values = cols)

gp1< -ggplotGrob(p1)
gp2< -ggplotGrob(p2 )
maxHeight = grid :: unit.pmax(gp1 $ heights [2:5],gp2 $ heights [2:5])
gp1 $ heights [2:5]< - as.list (maxHeight)
gp2 $ heights [2:5]< - as.list(maxHeight)
#grid.arrange(gp2,gp1,ncol = 2,widths = c(1 / 6,5 / 6))
R = arrangeGrob(gp2,gp1,ncol = 2,widths = c(1 / 6,5 / 6))
R

}

plot_color_clust(X,6)


Please check my reproducible example and the result chart.

X = t(USArrests)

plot_color_clust = function(X,N=N,
cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white") 
                            ){
  library(ggplot2)
  library(gridExtra)
  library(gtable)
  library(scales)
  library(ggdendro)
  library(grid)
  library(plyr)

  if(N>length(cols)) stop("N too big. Not enough colors in cols.")
  if(N>ncol(X)) stop("N too big. Not enough columns in data.")

  fit = ClustOfVar::hclustvar(X.quanti = X)
  dd.row = as.dendrogram(fit)
  ddata_x <- dendro_data(dd.row)
  temp = cutree(fit,k=N)
  lab <- ggdendro::label(ddata_x)

  x=c()
  for(i in 1:nrow(lab)){
    x[i]=    paste( "clust", as.vector(temp[ lab$label[i]==names(temp) ])   ,sep="")
  }

  lab$group <- x

  p1 <- ggplot(segment(ddata_x)) + 
    geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+coord_flip()+
    geom_text(data=lab,
              aes(label=label, x=x, y=0, colour=group),hjust=1) +
    theme(legend.position="none",
          axis.title.y=element_blank(),
          axis.title.x=element_blank(),
          axis.text.x = element_text(angle = 0, hjust = 0),
          axis.title.x = element_text(angle = 0, hjust = 0))+
    theme(axis.text = element_blank(), axis.title = element_blank(), 
          axis.ticks = element_blank(), axis.ticks.margin = unit(0, "lines"), 
          axis.ticks.length = unit(0, "cm"))+
    scale_colour_manual(values=cols)+coord_flip()+
    scale_y_continuous(limits = c(-0.1, 2.1))

  df2<-data.frame(cluster=cutree(fit,N),states=factor(fit$labels,levels=fit$labels[fit$order]))
  df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
  p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
    scale_y_continuous(expand=c(0,0))+
    theme(axis.title=element_blank(),
          axis.ticks=element_blank(),
          axis.text=element_blank(),
          legend.position="none")+coord_flip()+
    geom_text(data=df3,aes(x=pos,label=cluster))+
    scale_fill_manual(name = "This is my title", values = cols)

  gp1<-ggplotGrob(p1)
  gp2<-ggplotGrob(p2)  
  maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
  gp1$heights[2:5] <- as.list(maxHeight)
  gp2$heights[2:5] <- as.list(maxHeight)
  #grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
  R = arrangeGrob(gp2,gp1,ncol=2,widths=c(1/6,5/6))
  R

}

plot_color_clust(X,6)

Questions:

  1. These two parts (left colors tiles and right clustering tree) has inconsistent heights. How do we adjust their heights for them to match each other's?

  2. How can we make the tree on the right side shorter so states names (clustered subjects) can have more space to be fully displayed?

  3. Is there a way make the white space between those two parts smaller?

Your tweaking of the code is appreciated. Thanks.

解决方案

One major change: Rather than matching heights of the two charts, I extract the plot panel from gp2, then insert it into column 2 of gp1. There are no margins surrounding the resultant gp2, and thus, partly takes care of your point 3.

With respect to point 2: expand the limits of the axis to make room of the labels. (See point 2. in the code below). The parameters for points 2 and 3 were set by trial-and-error. Adjusting one parameter means the other needs to be adjusted.

With respect to point 1: expand the axis using the additive component of exapnd to add half a unit to each end of the axis (See point 1. in the code below).

Minor edit: updating to ggplot2 2.2.0 and R 3.3.2
axis.ticks.margin is deprecated

X = t(USArrests)

plot_color_clust = function(X, N = N,
 #  cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
   cols = rainbow(N)   # Easier to pick colours
  ){

  library(ggplot2)
  library(gtable)
  library(grid)
  library(ggdendro)
  library(plyr)

  if(N > length(cols)) stop("N too big. Not enough colors in cols.")
  if(N > ncol(X)) stop("N too big. Not enough columns in data.")

  fit = ClustOfVar::hclustvar(X.quanti = X)
  dd.row = as.dendrogram(fit)
  ddata_x <- dendro_data(dd.row)
  temp = cutree(fit, k = N)
  lab <- ggdendro::label(ddata_x)

  x = c()
  for(i in 1:nrow(lab)){
    x[i] = paste("clust", as.vector(temp[lab$label[i] == names(temp)]), sep = "")
  }

  lab$group <- x

  p1 <- ggplot(segment(ddata_x)) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_text(data = lab, aes(label = label, x = x, y = -.05, colour = group),  # y = -.05 adds a little space between label and tree
              size = 4, hjust = 1) +
    scale_x_continuous(expand = c(0, .5)) +   # 1. Add half a unit to each end of the vertical axis
    expand_limits(y = -0.4) +   # 2. Make room for labels 
    theme_classic() + 
    scale_colour_manual(values = cols) + 
    coord_flip() +
    theme(legend.position = "none", axis.line = element_blank(),
          axis.text = element_blank(), axis.title = element_blank(), 
          axis.ticks = element_blank(), 
          axis.ticks.length = unit(0, "cm")) 

  df2 <- data.frame(cluster = cutree(fit, N), 
      states = factor(fit$labels, levels = fit$labels[fit$order]))
  df3 <- ddply(df2, .(cluster),summarise,pos=mean(as.numeric(states)))
  p2 <- ggplot(df2, aes(states, y = 1, 
                    fill = factor(as.character(cluster)))) +   # 'as.character' - so that colours match with 10 or more clusters
    geom_tile() +
    scale_y_continuous(expand = c(0, 0)) + 
    scale_x_discrete(expand = c(0, 0)) +
    coord_flip() +
    geom_text(data = df3,aes(x = pos, label = cluster, size = 12)) +
    scale_fill_manual(values = cols)

  gp1 <- ggplotGrob(p1)  # Get ggplot grobs
  gp2 <- ggplotGrob(p2)  

  gp2 <- gp2[6, 4]      # 3. Grab plot panel only from tiles plot (thus, no margins)
  gp1 <- gtable_add_grob(gp1, gp2, t = 6, l = 2, name = "tiles")  # 3. Insert it into dendrogram plot
  gp1$widths[2] = unit(1, "cm")  # 3. Set width of column containing tiles

  grid.newpage()
  grid.draw(gp1)
}

plot_color_clust(X, 6)

这篇关于R:ggplot对聚类摘要进行微调的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆