为ggplot添加图例和构造数据 [英] Adding legend and structuring data for ggplot
问题描述
数据通过dput()包含在这里
$ b $ $ $ $ $ $ c $ help< (1L,1L,1L,1L,1L,1L,
1L,2L,2L,2L,2L,2L,2L,2L,2L,2L,2L, 2L,2L,3L,3L,3L,
3L,3L,3L,3L),。标签= c(AAA,BBB,CCC $ b Ind = structure(1:26,.Label = c(AAA_F01,AAA_F17,AAA_F33,
AAA_F49,AAA_F65,AAA_F81,AAA_F97,BBB_P01 ,BBB_P02,
,BBB_P03,BBB_P04,BBB_P05,BBB_P06,BBB_P07,BBB_P08,
,BBB_P09,BBB_P10,BBB_P11, BBB_P12,CCC_F02,CCC_F03,
CCC_F04,CCC_F05,CCC_F06,CCC_F07,CCC_F08),class =factor),
ObsValues = c (22L,50L,8L,15L,54L,30L,11L,90L,6L,
53L,9L,42L,72L,40L,60L,58L,1L,20L,37L,2L,50L, (28L,52L,6L,15L,35L,
31L,13L,79L,6L,58L,5L,42L,88L,49L,68L, 60L,1L,
26L,46L,0L,34L,71L, 20,25L,15L,35L,5L),AAAPredSE = c(3.5027829,
4.7852191,1.231803,2.5244013,4.873907,3.88854192,2.33532752,
6.3444402,1.738295,5.605111,1.667818,4.4709107,7.0437967,
5.447496,6.0840486,5.4371275,0.8156916,3.5153847,4.698754,
0,3.8901103,5.993616,3.1770272,2.6777869,4.5647313,
1.4864128),BBBPred = c(14L,43L,5L,13L,26L, 32L,14L,
80L,5L,62L,4L,44L,67L,44L,55L,42L,1L,20L,47L,
0L,26L,51L,15L,16L,34L, ,BBBPredSE = c(3.1873435,
4.8782831,1.3739863,2.5752273,4.4155679,3.8102168,2.33419518,
6.364606,1.7096028,5.6333421,1.5861323,4.4951428,6.6046699,
5.302902,5.9244328,5.18887055,0.8268689 ,3.4014041,4.600598,
0,3.8510512,5.5776686,3.0569531,2.6358433,4.5273782,
1.4263518),CCCPred = c(29L,53L,7L,15L,44L,32L,15L,
86L,8L,61L,5L,46L,99L,54L,74L,67L,1L,30L,51L,
1L,37L,94L,21L,17L ,36L,6L),CCCPredSE = c(3.4634488,
4.7953389,0.9484051,2.5207022,5.053452,3.8072731,2.22764727,
6.3605968,1.6044067,5.590048,1.6611899,4.4183913,7.0124638,
5.6495918, 6.1091934,5.4797929,0.8135164,3.4353934,4.6261147,
0.8187396,3.7936333,5.6512378,3.1668123,2.633179,4.5841921,
1.3989955)),.Names = c(StudyArea,Ind,ObsValues ,
AAAPred,AAAPredSE,BBBPred,BBBPredSE,CCCPred,CCCPredSE
,class =data.frame,row.names = c(NA, -26L))
head()
和 dim()
help
也在
<$ p $帮助$ b $ StudyArea Ind ObsValues AAAPred AAAPredSE BBBPred BBBPredSE CCCPred CCCPredSE
1 AAA AAA_F01 22 28 3.502783 14 3.187343 29 3.4634488
2 AAA AAA_F17 50 52 4.785219 43 4.878283 53 4.7953389
3 AAA AAA_F33 8 6 1.231803 5 1.373986 7 0.9484051
4 AAA AAA_F49 15 15 2.524401 13 2.575227 15 2.5207022
5 AAA AAA_F65 54 35 4.873907 26 4.415568 44 5.0534520
6 AAA AAA_F81 30 31 3.885419 32 3.810217 32 3.8072731
dim(帮助)
> dim(help)
[1] 26 9
我是ggplot的新手,我试图制作一个图表,为每个StudyArea使用不同的颜色显示每个人的观察值和预测值。我可以手动添加点并用下面的代码强制颜色,但是这样感觉很笨重,也不会产生图例,因为我没有在aes()中指定颜色。
require(ggplot2)
ggplot(help,aes(x = Ind,y = ObsValues))+
geom_point(color =red,pch = *),cex = 10)+
geom_point(aes(y = AAAPred),color =blue)+
geom_errorbar(aes(ymin = AAAPred-AAAPredSE,ymax = AAAPred + AAAPredSE) =blue)+
geom_point(aes(y = BBBPred),color =darkgreen)+
geom_errorbar(aes(ymin = BBBPred-BBBPredSE,ymax = BBBPred + BBBPredSE),color = (颜色=黑色)+
geom_errorbar(aes(ymin = CCCPred-CCCPredSE,ymax = CCCPred + CCCPredSE),color =black)+
geom_point(aes(y = CCCPred) )+
主题(axis.text.x = element_text(angle = 30,hjust = 1))
在上图中,星号是观察到的v线索和值是预测值,每个StudyArea一个。
我试图融化()数据,但遇到更多的绘图问题。话虽如此,我怀疑融化()或重塑()是最好的选择。
关于如何最好地改变/重构帮助
数据的任何建议,以便我可以绘制观察值和预测值对于每个StudyArea颜色不同的每个人都将不胜感激。
我也希望产生一个图例 - 数据格式正确后可能的默认值
注意:一旦我对ggplot有更好的处理,确实得到的数字非常繁忙,可能会简化。
在此先感谢。
试试这个: library(reshape2)
x.value< - melt(help,id.vars = 1:3,measure.vars = c(4,6,8))
x.se< - melt(help,id.vars = 1:3,measure.vars = c(5,7,9))
gg < - data.frame(x.value,se = x.se $ value)
ggplot(gg)+
geom_point(aes(x = Ind,y = ObsValues),size = 5,shape (x = Ind,y = value,color = variable),size = 3,shape = 1)+
geom_errorbar(aes(x = Ind,ymin = value- se,ymax = value + se,color = variable))+
theme(axis.text.x = element_text(angle = -90))
产生此:
编辑::回复@ B.Davis的问题:
您必须将 ObsValues
分组为 StudyArea
,而不是变量
。但是当你这样做的时候,你会得到六种颜色,三种用于 StudyArea
,三种用于预测组(变量
)。如果我们给予预测组(例如, AAAPred
等)与StudyArea组相同的名称(例如 AAA
等等),那么 ggplot
只会产生三种颜色。
gg $ variable< - substring(gg $ variable,1,3)#从组名称中删除Pred
ggplot(gg)+
geom_point(aes(x = Ind,y = ObsValues,color = StudyArea),size = 5,shape = 18)+
geom_point(aes(x = Ind,y = value,color = variable),size = 3,shape = 1)+
geom_errorbar(aes x = Ind,ymin = value-se,ymax = value + se,color = variable))+
theme(axis.text.x = element_text(angle = -90))
产生此:
In the data included below I have three sites (AAA,BBB,CCC) and individuals within each site (7, 12, 7 respectively). For each individual I have observed values (ObsValues) and three sets of predicted values each with a standard error. I have 26 rows (i.e. 26 individuals) and 9 columns.
The data is included here through dput()
help <- structure(list(StudyArea = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"),
Ind = structure(1:26, .Label = c("AAA_F01", "AAA_F17", "AAA_F33",
"AAA_F49", "AAA_F65", "AAA_F81", "AAA_F97", "BBB_P01", "BBB_P02",
"BBB_P03", "BBB_P04", "BBB_P05", "BBB_P06", "BBB_P07", "BBB_P08",
"BBB_P09", "BBB_P10", "BBB_P11", "BBB_P12", "CCC_F02", "CCC_F03",
"CCC_F04", "CCC_F05", "CCC_F06", "CCC_F07", "CCC_F08"), class = "factor"),
ObsValues = c(22L, 50L, 8L, 15L, 54L, 30L, 11L, 90L, 6L,
53L, 9L, 42L, 72L, 40L, 60L, 58L, 1L, 20L, 37L, 2L, 50L,
68L, 20L, 19L, 58L, 5L), AAAPred = c(28L, 52L, 6L, 15L, 35L,
31L, 13L, 79L, 6L, 58L, 5L, 42L, 88L, 49L, 68L, 60L, 1L,
26L, 46L, 0L, 34L, 71L, 20L, 15L, 35L, 5L), AAAPredSE = c(3.5027829,
4.7852191, 1.231803, 2.5244013, 4.873907, 3.8854192, 2.3532752,
6.3444402, 1.7387295, 5.605111, 1.667818, 4.4709107, 7.0437967,
5.447496, 6.0840486, 5.4371275, 0.8156916, 3.5153847, 4.698754,
0, 3.8901103, 5.993616, 3.1720272, 2.6777869, 4.5647313,
1.4864128), BBBPred = c(14L, 43L, 5L, 13L, 26L, 32L, 14L,
80L, 5L, 62L, 4L, 44L, 67L, 44L, 55L, 42L, 1L, 20L, 47L,
0L, 26L, 51L, 15L, 16L, 34L, 6L), BBBPredSE = c(3.1873435,
4.8782831, 1.3739863, 2.5752273, 4.4155679, 3.8102168, 2.3419518,
6.364606, 1.7096028, 5.6333421, 1.5861323, 4.4951428, 6.6046699,
5.302902, 5.9244328, 5.1887055, 0.8268689, 3.4014041, 4.6600598,
0, 3.8510512, 5.5776686, 3.0569531, 2.6358433, 4.5273782,
1.4263518), CCCPred = c(29L, 53L, 7L, 15L, 44L, 32L, 15L,
86L, 8L, 61L, 5L, 46L, 99L, 54L, 74L, 67L, 1L, 30L, 51L,
1L, 37L, 94L, 21L, 17L, 36L, 6L), CCCPredSE = c(3.4634488,
4.7953389, 0.9484051, 2.5207022, 5.053452, 3.8072731, 2.2764727,
6.3605968, 1.6044067, 5.590048, 1.6611899, 4.4183913, 7.0124638,
5.6495918, 6.1091934, 5.4797929, 0.8135164, 3.4353934, 4.6261147,
0.8187396, 3.7936333, 5.6512378, 3.1686123, 2.633179, 4.5841921,
1.3989955)), .Names = c("StudyArea", "Ind", "ObsValues",
"AAAPred", "AAAPredSE", "BBBPred", "BBBPredSE", "CCCPred", "CCCPredSE"
), class = "data.frame", row.names = c(NA, -26L))
The head()
and dim()
of help
are below too
head(help)
StudyArea Ind ObsValues AAAPred AAAPredSE BBBPred BBBPredSE CCCPred CCCPredSE
1 AAA AAA_F01 22 28 3.502783 14 3.187343 29 3.4634488
2 AAA AAA_F17 50 52 4.785219 43 4.878283 53 4.7953389
3 AAA AAA_F33 8 6 1.231803 5 1.373986 7 0.9484051
4 AAA AAA_F49 15 15 2.524401 13 2.575227 15 2.5207022
5 AAA AAA_F65 54 35 4.873907 26 4.415568 44 5.0534520
6 AAA AAA_F81 30 31 3.885419 32 3.810217 32 3.8072731
dim(help)
> dim(help)
[1] 26 9
I am a relative newcomer to ggplot and am trying to make a plot that displays the observed and predicted values for each individual with a different color for each StudyArea. I can manually add points and force the color with the code below, however this feel rather clunky and also does not produce a legend as I have not specified color in aes().
require(ggplot2)
ggplot(help, aes(x=Ind, y=ObsValues))+
geom_point(color="red", pch = "*", cex = 10)+
geom_point(aes(y = AAAPred), color="blue")+
geom_errorbar(aes(ymin=AAAPred-AAAPredSE, ymax=AAAPred+AAAPredSE), color = "blue")+
geom_point(aes(y = BBBPred), color="darkgreen")+
geom_errorbar(aes(ymin=BBBPred-BBBPredSE, ymax=BBBPred+BBBPredSE), color = "darkgreen")+
geom_point(aes(y = CCCPred), color="black")+
geom_errorbar(aes(ymin=CCCPred-CCCPredSE, ymax=CCCPred+CCCPredSE), color = "black")+
theme(axis.text.x=element_text(angle=30, hjust=1))
In the figure above, the asterisks are the observed values and the values are the predicted values, one from each StudyArea.
I tried to melt() the data, but ran into more problems plotting. That being said, I suspect melt()ing or reshape()ing is the best option.
Any suggestions on how to best alter/restructure the help
data so that I can plot the observed and predicted values for each individual with a different color for each StudyArea would be greatly appreciated.
I also hope to produce a legend - the likely default once the data is correctly formatted
Note: Indeed the resulting figure is very busy will likely be simplified once I get a better handle on ggplot.
thanks in advance.
Try this:
library(reshape2)
x.value <- melt(help,id.vars=1:3, measure.vars=c(4,6,8))
x.se <- melt(help,id.vars=1:3, measure.vars=c(5,7,9))
gg <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
Edit:: Response to @B.Davis' questions below:
You have to group the ObsValues
by StudyArea
, not variable
. But when you do that you get six colors, three for StudyArea
and three for the predictor groups (variable
). If we give the predictor groups (e.g., AAAPred
, etc.) the same names as the StudyArea groups (e.g. AAA
, etc.), then ggplot
just generates three colors.
gg$variable <- substring(gg$variable,1,3) # removes "Pred" from group names
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues, color=StudyArea),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
这篇关于为ggplot添加图例和构造数据的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!