在facet_wrap中绘制平均线 [英] Plot average line in a facet_wrap
问题描述
我有以下资料集:
structure(list(Geschaeft = c(0.0961028525512254,0.0753516756309475,
0,0.0722803347280335,0,0.000877706260971328),Gaststaette = C(0.0981116914423463,
0.0789718659495242,0.0336538461538462,0.0905857740585774,0,
0.00175541252194266),银行= C(0.100843712334271,0.0717832023169218,
0.00480769230769231,0.025, 0.00571428571428572,0.00965476887068461
)中,Hausarzt = C(0.0633989554037766,0.0589573851882499,0.0288461538461538,
0.0217573221757322,0.00685714285714286,0.0128730251609128)
Einr..F..Aeltere = C(0.0337484933708317,0.0550268928423666,
0.00480769230769231,0,0.00114285714285714,0.000292568753657109
),Park = c(0.0738449176376055,0.0726623913942904,0.0625,
0.0846234309623431,0.00228571428571429,0.111535332650673
),Sportstaette = c(0.0449979911611089,0.0612846503930492,
0.00480769230769231,0.0619246861924686,0 0.00114285714285714,
0),OEPNV = c(0.10847730012053,0.089056681836988,0.264423076923077,
0.135669456066946,0.0185488589818607),Mangel.an.Gruenflaechen = c(0.0867818400964243,
0.071369466280513,0.144230769230769,0.117259414225941 ,
0.260571428571429,0.186951433586893),Kriminalitaet = c(0.108316593009241,
0.083678113363674,0.389423076923077,0.133330543933054,
0.334857142857143,0.216500877706261),Auslaender = c(0.00715146645239052,
0.0212039718659495,0.0480769230769231,0.0550209205020921 ,
0.0114285714285714,0),Umweltbelastung = c(0.108879067898755,
0.0846607364501448,0,0.143828451882845,0.376,0.222203627852545
),Einr..f..Kinder = c(0.0693451185214946,0.0825403392635499,
0.0144230769230769,0.0527196652719665,0,0.0444704505558806
),Einr..f..Jugendliche = c(0,0.0934526272238312,0,0,
0,0.000877706260971328),count = c(1466,1821 ,81,149 1,
330,793),cluster = c(1,2,3,4,5,6)),.Names = c(Geschaeft,
Gaststaette,Bank, Hausarzt,Einr..F..Aeltere,Park,
Sportstaette,OEPNV,Mangel.an.Gruenflaechen,Kriminalitaet,
Auslaender, Umweltbelastung,Einr..f..Kinder,Einr..f..Jugendliche,
count,cluster),row.names = c(NA,-6L),class =data.frame)
我将其与
mdf < - melt(nbhpp [, - 15],id.vars ='cluster')
mdf < - transform(mdf,variable = reorder (变量,值,平均值),y =簇)
$ b
ggplot(mdf,aes(x = variable,y = value,group = cluster,color = factor(cluster)))+
geom_line ()+
scale_y_continuous('Anteile',formatter =percent)+
scale_colour_hue(name ='Cluster')+
xlab('Infrastrukturmerkmal')+
theme_bw )+
opts(axis.text.x = theme_text(angle = 90,hjust = 1),legend.position =none)+
facet_wrap(〜cluster,ncol = 3)
如果我理解正确,转换函数按平均值对数据进行排序。但是,如何将这些平均值作为灰线添加到每个图中?
感谢您的帮助
更新:
只是澄清:
查看重订单语句的输出
with(mdf,reorder(variable,value,mean))
比我获得以下属性:
attr(,scores)
Einr..f..Jugendliche Einr..F..Aeltere Auslaender Sportstaette
0.01572172 0.01583642 0.02381364 0.02902631
Hausarzt Bank Geschaeft Einr。 .f..Kinder
0.03211500 0.03630061 0.04076876 0.04391644
Gaststaette Park OEPNV Mangel.an.Gruenflaechen
0.05051310 0.06799505 0.13051918 0.14452739
Umweltbelastung Kriminalitaet
0.15692865 0.21201772
从左边(最低)到右边(最高)。
问题是,如何绘制一条线,使用这些属性...
编辑答案
要添加一条包含数据平均值的行,您需要构建一个包含数据的 data.frame
。您可以从 mdf
中提取值:
meanscores< - 属性(mdf $ variable)$ scores
meandf< - data.frame(
variable = rep(names(meanscores),6),
value = rep(unname(meanscores),6) ,
cluster = rep(1:6,each = 14)
)
然后使用 geom_line
:
ggplot(mdf,aes(x =变量,y =值,group = cluster,color = factor(cluster)))+
geom_line()+
scale_y_continuous('Anteile',formatter =percent)+
scale_colour_hue (name ='Cluster')+
xlab('Infrastrukturmerkmal')+
theme_bw()+
opts(axis.text.x = theme_text(angle = 90,hjust = 1), legend.position =none)+
facet_wrap(〜cluster,ncol = 3)+
geom_line(data = meandf,aes(x = variable,y = value),color =grey50)
原始答案
我原来的解释是你想要一个水平线, b
只需在图中添加 geom_hline
图层,然后将 yintercept
映射到 mean(value)
:
ggplot(mdf,aes(x = variable, y = value,group = cluster,color = factor(cluster)))+
geom_line()+
scale_y_continuous('Anteile',formatter =percent)+
scale_colour_hue(name = 'cluster')+
xlab('Infrastrukturmerkmal')+
theme_bw()+
opts(axis.text.x = theme_text(angle = 90,hjust = 1),legend.position =none)+
facet_wrap(〜cluster,ncol = 3)+
geom_hline(aes(yintercept = mean(value)),color =grey50)
I have the following data set:
structure(list(Geschaeft = c(0.0961028525512254, 0.0753516756309475,
0, 0.0722803347280335, 0, 0.000877706260971328), Gaststaette = c(0.0981116914423463,
0.0789718659495242, 0.0336538461538462, 0.0905857740585774, 0,
0.00175541252194266), Bank = c(0.100843712334271, 0.0717832023169218,
0.00480769230769231, 0.025, 0.00571428571428572, 0.00965476887068461
), Hausarzt = c(0.0633989554037766, 0.0589573851882499, 0.0288461538461538,
0.0217573221757322, 0.00685714285714286, 0.0128730251609128),
Einr..F..Aeltere = c(0.0337484933708317, 0.0550268928423666,
0.00480769230769231, 0, 0.00114285714285714, 0.000292568753657109
), Park = c(0.0738449176376055, 0.0726623913942904, 0.0625,
0.0846234309623431, 0.00228571428571429, 0.112053832650673
), Sportstaette = c(0.0449979911611089, 0.0612846503930492,
0.00480769230769231, 0.0619246861924686, 0.00114285714285714,
0), OEPNV = c(0.10847730012053, 0.089056681836988, 0.264423076923077,
0.135669456066946, 0, 0.185488589818607), Mangel.an.Gruenflaechen = c(0.0867818400964243,
0.071369466280513, 0.144230769230769, 0.117259414225941,
0.260571428571429, 0.186951433586893), Kriminalitaet = c(0.108316593009241,
0.083678113363674, 0.389423076923077, 0.139330543933054,
0.334857142857143, 0.216500877706261), Auslaender = c(0.00715146645239052,
0.0212039718659495, 0.0480769230769231, 0.0550209205020921,
0.0114285714285714, 0), Umweltbelastung = c(0.108879067898755,
0.0846607364501448, 0, 0.143828451882845, 0.376, 0.228203627852545
), Einr..f..Kinder = c(0.0693451185214946, 0.0825403392635499,
0.0144230769230769, 0.0527196652719665, 0, 0.0444704505558806
), Einr..f..Jugendliche = c(0, 0.0934526272238312, 0, 0,
0, 0.000877706260971328), count = c(1466, 1821, 81, 1491,
330, 793), cluster = c(1, 2, 3, 4, 5, 6)), .Names = c("Geschaeft",
"Gaststaette", "Bank", "Hausarzt", "Einr..F..Aeltere", "Park",
"Sportstaette", "OEPNV", "Mangel.an.Gruenflaechen", "Kriminalitaet",
"Auslaender", "Umweltbelastung", "Einr..f..Kinder", "Einr..f..Jugendliche",
"count", "cluster"), row.names = c(NA, -6L), class = "data.frame")
which I sort with
mdf <- melt(nbhpp[,-15], id.vars = 'cluster')
mdf <- transform(mdf, variable = reorder(variable, value, mean), y = cluster)
and plot with
ggplot(mdf, aes(x=variable, y=value, group=cluster, colour=factor(cluster))) +
geom_line() +
scale_y_continuous('Anteile', formatter = "percent") +
scale_colour_hue(name='Cluster') +
xlab('Infrastrukturmerkmal') +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1), legend.position = "none") +
facet_wrap(~cluster, ncol=3)
If I understand it correctly, the transform function sort the data by the average values. But how can I include these average values as a grey line to each plot?
Thanks for your help
UPDATE:
Just for clarification:
If I take a look at the output of the reorder statement
with(mdf, reorder(variable, value, mean))
than I get the following attributes:
attr(,"scores")
Einr..f..Jugendliche Einr..F..Aeltere Auslaender Sportstaette
0.01572172 0.01583642 0.02381364 0.02902631
Hausarzt Bank Geschaeft Einr..f..Kinder
0.03211500 0.03630061 0.04076876 0.04391644
Gaststaette Park OEPNV Mangel.an.Gruenflaechen
0.05051310 0.06799505 0.13051918 0.14452739
Umweltbelastung Kriminalitaet
0.15692865 0.21201772
Which are sorted in the plot from left (lowest) to right (highest). The question is, how to draw a line, with theses attributes...
Edited answer
To add a line with the cluster averages, you need to construct a data.frame
that contains the data. You can extract the values from mdf
:
meanscores <- attributes(mdf$variable)$scores
meandf <- data.frame(
variable = rep(names(meanscores), 6),
value = rep(unname(meanscores), 6),
cluster = rep(1:6, each=14)
)
Then plot using geom_line
:
ggplot(mdf, aes(x=variable, y=value, group=cluster, colour=factor(cluster))) +
geom_line() +
scale_y_continuous('Anteile', formatter = "percent") +
scale_colour_hue(name='Cluster') +
xlab('Infrastrukturmerkmal') +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1), legend.position = "none") +
facet_wrap(~cluster, ncol=3) +
geom_line(data=meandf, aes(x=variable, y=value), colour="grey50")
Original answer
My original interpretation was that you wanted a horizontal line with overall means.
Simply add a geom_hline
layer to your plot, and map the yintercept
to mean(value)
:
ggplot(mdf, aes(x=variable, y=value, group=cluster, colour=factor(cluster))) +
geom_line() +
scale_y_continuous('Anteile', formatter = "percent") +
scale_colour_hue(name='Cluster') +
xlab('Infrastrukturmerkmal') +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1), legend.position = "none") +
facet_wrap(~cluster, ncol=3) +
geom_hline(aes(yintercept=mean(value)), colour="grey50")
这篇关于在facet_wrap中绘制平均线的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!