R:ggplot对聚类摘要进行微调 [英] R: ggplot slight adjustment for clustering summary
问题描述
X = t(USArrests)
$ b $请检查我的可复制示例和结果图表。 b plot_color_clust =函数(X,N = N,
cols = c(red,blue,orange,darkgreen,green,yellow,gray,black ,white)
){
library(ggplot2)
library(gridExtra)
library(gtable)
library(scales)
library(ggdendro )
library(grid)
library(plyr)
if(N> length(cols))stop(N too big,Colour not in colours。)$如果(N> ncol(X))停止(N太大,数据中没有足够的列),那么b $ b如果(N> ncol(X))停止$ b dd.row = as.dendrogram(fit)
ddata_x< - dendro_data(dd.row)
temp = cutree(fit,k = N)
lab< - ggdendro: :label(ddata_x)
x = c()
for(i in 1:nrow(lab)){
x [i] = paste(clust,as.vector (temp [lab $ label [i] == names(temp)]),sep =)
}
lab $ group < - x
p1 < - ggp (数据=实验室,
aes(label = label,x = x,y = 0,color = group),hjust = 1)+
theme(legend.position =none,
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0,hjust = 0),
axis.title.x = element_text (),
theme(axis.text = element_blank =单位(0,lines),
axis.ticks.length =单位(0,cm))+
scale_colour_manual(values = cols)+ coord_flip()+
scale_y_continuous (限制= c(-0.1,2.1))
df2 <-data.frame(cluster = cutree(fit,N),states = factor(fit $ labels,levels = fit $ labels [fit (df2,。(cluster),summarize,pos = mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y = 1, fill = factor(cluster)))+ geom_tile()+
scale_y_continuous(expand = c(0,0))+
theme(axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position =none)+ coord_flip()+
geom_text(data = df3,aes(x = pos, label = cluster))+
scale_fill_manual(name =This is my title,values = cols)
gp1< -ggplotGrob(p1)
gp2< -ggplotGrob(p2 )
maxHeight = grid :: unit.pmax(gp1 $ heights [2:5],gp2 $ heights [2:5])
gp1 $ heights [2:5]< - as.list (maxHeight)
gp2 $ heights [2:5]< - as.list(maxHeight)
#grid.arrange(gp2,gp1,ncol = 2,widths = c(1 / 6,5 / 6))
R = arrangeGrob(gp2,gp1,ncol = 2,widths = c(1 / 6,5 / 6))
R
}
plot_color_clust(X,6)
Please check my reproducible example and the result chart.
X = t(USArrests)
plot_color_clust = function(X,N=N,
cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
){
library(ggplot2)
library(gridExtra)
library(gtable)
library(scales)
library(ggdendro)
library(grid)
library(plyr)
if(N>length(cols)) stop("N too big. Not enough colors in cols.")
if(N>ncol(X)) stop("N too big. Not enough columns in data.")
fit = ClustOfVar::hclustvar(X.quanti = X)
dd.row = as.dendrogram(fit)
ddata_x <- dendro_data(dd.row)
temp = cutree(fit,k=N)
lab <- ggdendro::label(ddata_x)
x=c()
for(i in 1:nrow(lab)){
x[i]= paste( "clust", as.vector(temp[ lab$label[i]==names(temp) ]) ,sep="")
}
lab$group <- x
p1 <- ggplot(segment(ddata_x)) +
geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+coord_flip()+
geom_text(data=lab,
aes(label=label, x=x, y=0, colour=group),hjust=1) +
theme(legend.position="none",
axis.title.y=element_blank(),
axis.title.x=element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0),
axis.title.x = element_text(angle = 0, hjust = 0))+
theme(axis.text = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank(), axis.ticks.margin = unit(0, "lines"),
axis.ticks.length = unit(0, "cm"))+
scale_colour_manual(values=cols)+coord_flip()+
scale_y_continuous(limits = c(-0.1, 2.1))
df2<-data.frame(cluster=cutree(fit,N),states=factor(fit$labels,levels=fit$labels[fit$order]))
df3<-ddply(df2,.(cluster),summarise,pos=mean(as.numeric(states)))
p2 = ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
scale_y_continuous(expand=c(0,0))+
theme(axis.title=element_blank(),
axis.ticks=element_blank(),
axis.text=element_blank(),
legend.position="none")+coord_flip()+
geom_text(data=df3,aes(x=pos,label=cluster))+
scale_fill_manual(name = "This is my title", values = cols)
gp1<-ggplotGrob(p1)
gp2<-ggplotGrob(p2)
maxHeight = grid::unit.pmax(gp1$heights[2:5], gp2$heights[2:5])
gp1$heights[2:5] <- as.list(maxHeight)
gp2$heights[2:5] <- as.list(maxHeight)
#grid.arrange(gp2, gp1, ncol=2,widths=c(1/6,5/6))
R = arrangeGrob(gp2,gp1,ncol=2,widths=c(1/6,5/6))
R
}
plot_color_clust(X,6)
Questions:
These two parts (left colors tiles and right clustering tree) has inconsistent heights. How do we adjust their heights for them to match each other's?
How can we make the tree on the right side shorter so states names (clustered subjects) can have more space to be fully displayed?
Is there a way make the white space between those two parts smaller?
Your tweaking of the code is appreciated. Thanks.
One major change: Rather than matching heights of the two charts, I extract the plot panel from gp2, then insert it into column 2 of gp1. There are no margins surrounding the resultant gp2, and thus, partly takes care of your point 3.
With respect to point 2: expand the limits of the axis to make room of the labels. (See point 2. in the code below). The parameters for points 2 and 3 were set by trial-and-error. Adjusting one parameter means the other needs to be adjusted.
With respect to point 1: expand the axis using the additive component of exapnd
to add half a unit to each end of the axis (See point 1. in the code below).
Minor edit: updating to ggplot2 2.2.0 and R 3.3.2
axis.ticks.margin
is deprecated
X = t(USArrests)
plot_color_clust = function(X, N = N,
# cols=c("red","blue", "orange", "darkgreen","green","yellow","grey","black","white")
cols = rainbow(N) # Easier to pick colours
){
library(ggplot2)
library(gtable)
library(grid)
library(ggdendro)
library(plyr)
if(N > length(cols)) stop("N too big. Not enough colors in cols.")
if(N > ncol(X)) stop("N too big. Not enough columns in data.")
fit = ClustOfVar::hclustvar(X.quanti = X)
dd.row = as.dendrogram(fit)
ddata_x <- dendro_data(dd.row)
temp = cutree(fit, k = N)
lab <- ggdendro::label(ddata_x)
x = c()
for(i in 1:nrow(lab)){
x[i] = paste("clust", as.vector(temp[lab$label[i] == names(temp)]), sep = "")
}
lab$group <- x
p1 <- ggplot(segment(ddata_x)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_text(data = lab, aes(label = label, x = x, y = -.05, colour = group), # y = -.05 adds a little space between label and tree
size = 4, hjust = 1) +
scale_x_continuous(expand = c(0, .5)) + # 1. Add half a unit to each end of the vertical axis
expand_limits(y = -0.4) + # 2. Make room for labels
theme_classic() +
scale_colour_manual(values = cols) +
coord_flip() +
theme(legend.position = "none", axis.line = element_blank(),
axis.text = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = unit(0, "cm"))
df2 <- data.frame(cluster = cutree(fit, N),
states = factor(fit$labels, levels = fit$labels[fit$order]))
df3 <- ddply(df2, .(cluster),summarise,pos=mean(as.numeric(states)))
p2 <- ggplot(df2, aes(states, y = 1,
fill = factor(as.character(cluster)))) + # 'as.character' - so that colours match with 10 or more clusters
geom_tile() +
scale_y_continuous(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
coord_flip() +
geom_text(data = df3,aes(x = pos, label = cluster, size = 12)) +
scale_fill_manual(values = cols)
gp1 <- ggplotGrob(p1) # Get ggplot grobs
gp2 <- ggplotGrob(p2)
gp2 <- gp2[6, 4] # 3. Grab plot panel only from tiles plot (thus, no margins)
gp1 <- gtable_add_grob(gp1, gp2, t = 6, l = 2, name = "tiles") # 3. Insert it into dendrogram plot
gp1$widths[2] = unit(1, "cm") # 3. Set width of column containing tiles
grid.newpage()
grid.draw(gp1)
}
plot_color_clust(X, 6)
这篇关于R:ggplot对聚类摘要进行微调的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!