Color Density通过群组在ggplot2中绘图 [英] Colour Density plots in ggplot2 by cluster groups

查看:97
本文介绍了Color Density通过群组在ggplot2中绘图的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我一直在对我的一些价值进行聚类,然后对它们进行分组。然后使用 ggplot2 绘制一些密度图并覆盖这些簇。一个示例图像如下:



对于群集中的每个组,我绘制一个密度图并覆盖它们。密度图中的着色对应于聚类中的分组。

我的问题是,我已经根据分组手动分割数据,并将它们放入各自的文本表(请参阅下面的代码)。这是非常低效的,对于大数据集可能会非常繁琐。我怎样才能在 ggplot2 中动态地绘制密度图,而不将它们分离到各自的文本表中?



原始输入表在分割前看起来像这样:

 分数<  -  read.table(textConnection(
档最高平均分最低
132 5112.0 6520.0 5728.0 5699.0
133 4720.0 6064.0 5299.0 5277.0
5 4617.0 5936.0 5185.0 5165.0
1 4384.0 5613.0 4917.0 4895.0
1010 5008.0 6291.0 5591.0 5545.0
104 4329.0 5554.0 4858.0 4838.0
105 4636.0 5905.0 5193.0 5165.0
35 4304.0 5578.0 4842.0 4831.0
36 4360.0 5580.0 4891.0 4867.0
37 4444.0 5663.0 4979.0 4952.0
31 4328.0 5559.0 4858.0 4839.0
39 4486.0 5736.0 5031.0 5006.0
32 4334.0 5558.0 4864.0 4843.0
),header = TRUE)

我用来生成图的代码:
请注意,将基本图形与网格结合在一起仍然无法正常工作。


($ {pre $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ b $ ,2,3,byrow = TRUE))

#定义函数来创建多重绘图设置(nrow,ncol)
vp.setup < - function(x,y){
grid.newpage()
pushViewport(viewport(layout = grid.layout(x,y)))
}

定义函数可轻松访问布局(行,col)
vp.layout< - function(x,y){
viewport(layout.pos.row = x,layout.pos.col = y)
}

vp.setup(2,3)

file_vals< - read.table(textConnection(
file avg_vals
133 1.5923
132 1.6351
1010 1.6532 $ b $ 104 1.6824 $ b $ 105 1.6087 $ b $ 39 1.8694 $ b $ 32 1.9934
31 1.9919
37 1.8638
36 1.9691
35 1.9802
1 1.7283
5 1.7637
),header = TRUE)

red< - read.table(textConnection(
file max min avg lowest
31 4328.0 5559.0 4858.0 4839.0
32 4334.0 5558.0 4864.0 4843.0
36 4360.0 5580.0 4891.0 4867.0
35 4304.0 5578.0 4842.0 4831.0
),header = TRUE)

blue < - read.table(textConnection(
file max min avg minimum
133 4720.0 6064.0 5299.0 5277.0
105 4636.0 5905.0 5193.0 5165.0
104 4329.0 5554.0 4858.0 4838.0
132 5112.0 6520.0 5728.0 5699.0
1010 5008.0 6291.0 5591.0 5545.0
),header = TRUE)

green< - read.table(textConnection(
file max min avg最低
39 4486.0 5736.0 5031.0 5006.0
37 4444.0 5663.0 4979.0 4952.0
5 4617.0 5936.0 5185.0 5165.0
1 4384.0 5613.0 4917.0 4895.0
),header = TRUE)


#执行集群
d< - dist(file_vals $ avg_vals,method =euclidean)
fit< - hclust(d,method =ward)
plot(fit,labels = file_vals $ file)
groups < - cutree(fit,k = 3)

cols = c('red','blue', 'green','purple','orange','magenta','brown','chartreuse4','darkgray','cyan1')
rect.hclust(fit,k = 3,border = cols)


#Desnity图
dat = rbind(data.frame(Cluster ='Red',数据框架(Cluster ='Blue',max_vals = blue $ max),data.frame(Cluster ='Green',max_vals = green $ max))
max =(ggplot(max_vals = red $ max) (aes(fill = factor(Cluster)),alpha = .3)+ xlim(c(3500,5500))+ scale_fill_manual(values = c(red,blue,green))
max = max + labs(fill ='Clusters')
print(max,vp = vp.layout(1,2))

dat = rbind(data.frame(Cluster ='Red',min_vals = red $ min),data.frame(Cluster ='Blue',min_vals = blue $ min),data.frame( Cluster ='Green',min_vals = green $ min))
min =(ggplot(dat,aes(x = min_vals)))
min = min + geom_density(aes(fill = factor(Cluster) ),alpha = .3)+ xlim(c(5000,7000))+ scale_fill_manual(values = c(red,'blue',green))
min = min + labs(fill ='集群')
print(min,vp = vp.layout(1,3))

dat = rbind(data.frame(Cluster ='Red',avg_vals = red $ avg) ,data.frame(Cluster ='Blue',avg_vals = blue $ avg),data.frame(Cluster ='Green ,avg_vals = green $ avg))
avg =(ggplot(dat,aes(x = avg_vals)))
avg = avg + geom_density(aes(fill = factor(Cluster)),alpha =。 3)+ xlim(c(4000,6000))+ scale_fill_manual(values = c(red,'blue',green))
avg = avg + labs(fill ='Clusters')
print(avg,vp = vp.layout(2,2))

dat = rbind(data.frame(Cluster ='Red',lowest_vals = red $ lowest),data.frame( (ggplot(dat,aes(x = lowest_vals)))$ cluster ='Blue',lowest_vals = blue $ minimum),data.frame(Cluster ='Green',lowest_vals = green $ lowest))
lowest = b $ b最低=最低+ geom_density(aes(fill = factor(Cluster)),alpha = .3)+ xlim(c(4000,6000))+ scale_fill_manual(values = c(red,'blue' (最低,vp = vp.layout(2,3))

p

解决方案

通过这种方式,您可以使用4个面板自动创建所需的绘图。

首先,数据:

 分数< - 读.table(textConnection(
文件最大最小平均最低
132 5112.0 6520.0 5728.0 5699.0
133 4720.0 6064.0 5299.0 5277.0
5 4617.0 5936.0 5185.0 5165.0
1 4384.0 5613.0 4917.0 4895.0
1010 5008.0 6291.0 5591.0 5545.0
104 4329.0 5554.0 4858.0 4838.0
105 4636.0 5905.0 5193.0 5165.0
35 4304.0 5578.0 4842.0 4831.0
36 4360.0 5580.0 4891.0 4867.0
37 4444.0 5663.0 4979.0 4952.0
31 4328.0 5559.0 4858.0 4839.0
39 4486.0 5736.0 5031.0 5006.0
32 4334.0 5558.0 4864.0 4843.0
),header = TRUE)

file_vals< - read.table(textConnection(
档案avg_vals
133 1.5923
132 1.6351
1010 1.6532 $ b $ 104 104.8024 $ b $ 105 1.6087 $ b $ 39 1.8694
32 1.9934
31 1.9919
37 1.8638
36 1.9691
35 1.9802
1 1.7283
5 1.7637
),header = TRUE)



$ b $ p









$ p $ dat< - merge(scores,file_vals,by =f ile)

适合:

<$ p $ (d,方法=病房)
组<-d(dat $ avg_vals,method =euclidean)
fit < - cc> (红,蓝,绿,紫,橙,品红,棕,chartreuse4), 'bluegray','cyan1')

添加一个包含颜色名称的列(基于fit) :

  dat $ group < -  cols [groups] 

$ b

将数据重新整形为长格式:

  dat_re < - reshape(dat,vary = c(max,min,avg,lowest),direction =long,drop = c(file,avg_vals),v.names =value ,idvar =group,times = c(max,min,avg,lowest),new.row.names = seq(nrow(scores)* 4))

Plot:

  p < - (ggplot(dat_re,aes(x = value)))+ 
geom_density(aes(fill = group),alpha = .3)+
sc ale_fill_manual(values = cols)+
labs(fill ='Clusters')+
facet_wrap(〜time)

print(p)


I have been clustering some of my values and then grouping them. I then plot some density plots using ggplot2 and overlay the clusters. An example image is below:

For each group in the cluster I plot a density plot and overlay them. The colouring in the density plots corresponds to the groupings in the clustering.

My problem is, I have split the data manually based on the groupings and put them in their own individual text table (see code below). This is very inefficient and can be come very tedious for large data sets. How can I dynamically plot the density plots in ggplot2 without separating the clusters into their own individual text tables?

The original input tables looks like this before it was split:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

The code I used to generate the plot: Please note combining the base graphics with grid is still not working correctly

library(ggplot2)
library(grid)

layout(matrix(c(1,2,3,1,4,5), 2, 3, byrow = TRUE))

# define function to create multi-plot setup (nrow, ncol)
vp.setup <- function(x,y){
grid.newpage()
pushViewport(viewport(layout = grid.layout(x,y)))
}

# define function to easily access layout (row, col)
vp.layout <- function(x,y){
viewport(layout.pos.row=x, layout.pos.col=y)
}

vp.setup(2,3)

file_vals <- read.table(textConnection("
file        avg_vals
133         1.5923
132         1.6351
1010        1.6532
104         1.6824
105         1.6087
39          1.8694
32          1.9934
31          1.9919
37          1.8638
36          1.9691
35          1.9802
1           1.7283
5           1.7637
"), header=TRUE)

red <- read.table(textConnection("
file        max        min        avg               lowest
31          4328.0     5559.0     4858.0            4839.0
32          4334.0     5558.0     4864.0            4843.0
36          4360.0     5580.0     4891.0            4867.0
35          4304.0     5578.0     4842.0            4831.0
"), header=TRUE)

blue <- read.table(textConnection("
file        max        min        avg               lowest
133         4720.0     6064.0     5299.0            5277.0
105         4636.0     5905.0     5193.0            5165.0
104         4329.0     5554.0     4858.0            4838.0
132         5112.0     6520.0     5728.0            5699.0
1010        5008.0     6291.0     5591.0            5545.0
"), header=TRUE)

green <- read.table(textConnection("
file        max        min        avg               lowest
39          4486.0     5736.0     5031.0            5006.0
37          4444.0     5663.0     4979.0            4952.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
"), header=TRUE)


# Perform Cluster
d <- dist(file_vals$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
plot(fit, labels=file_vals$file)
groups <- cutree(fit, k=3)

cols = c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')
rect.hclust(fit, k=3, border=cols)


# Desnity plots
dat = rbind(data.frame(Cluster='Red', max_vals = red$max), data.frame(Cluster='Blue', max_vals = blue$max), data.frame(Cluster='Green', max_vals = green$max))
max = (ggplot(dat,aes(x=max_vals)))
max = max + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(3500, 5500)) + scale_fill_manual(values=c("red",'blue',"green"))
max = max + labs(fill = 'Clusters')
print(max, vp=vp.layout(1,2))

dat = rbind(data.frame(Cluster='Red', min_vals = red$min), data.frame(Cluster='Blue', min_vals = blue$min), data.frame(Cluster='Green', min_vals = green$min))
min = (ggplot(dat,aes(x=min_vals)))
min = min + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(5000, 7000)) + scale_fill_manual(values=c("red",'blue',"green"))
min = min + labs(fill = 'Clusters')
print(min, vp=vp.layout(1,3))

dat = rbind(data.frame(Cluster='Red', avg_vals = red$avg), data.frame(Cluster='Blue', avg_vals = blue$avg), data.frame(Cluster='Green', avg_vals = green$avg))
avg = (ggplot(dat,aes(x=avg_vals)))
avg = avg + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
avg = avg + labs(fill = 'Clusters')
print(avg, vp=vp.layout(2,2))

dat = rbind(data.frame(Cluster='Red', lowest_vals = red$lowest), data.frame(Cluster='Blue', lowest_vals = blue$lowest), data.frame(Cluster='Green', lowest_vals = green$lowest))
lowest = (ggplot(dat,aes(x=lowest_vals)))
lowest = lowest + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
lowest = lowest + labs(fill = 'Clusters')
print(lowest, vp=vp.layout(2,3))

解决方案

In this way you can automatically create your desired plot with 4 panels.

First, the data:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

file_vals <- read.table(textConnection("
file        avg_vals
                                   133         1.5923
                                   132         1.6351
                                   1010        1.6532
                                   104         1.6824
                                   105         1.6087
                                   39          1.8694
                                   32          1.9934
                                   31          1.9919
                                   37          1.8638
                                   36          1.9691
                                   35          1.9802
                                   1           1.7283
                                   5           1.7637
                                   "), header=TRUE)

Both data frames can be merged into a single one:

dat <- merge(scores, file_vals, by = "file")

Fit:

d <- dist(dat$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
groups <- cutree(fit, k=3)
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')

Add a column with the colour names (based on the fit):

dat$group <- cols[groups]

Reshape data from wide to long format:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4))

Plot:

p <- (ggplot(dat_re ,aes(x = value))) +
geom_density(aes(fill = group), alpha=.3) +
scale_fill_manual(values=cols) +
labs(fill = 'Clusters') +
facet_wrap( ~ time)

print(p)

这篇关于Color Density通过群组在ggplot2中绘图的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆