In the data included below I have three sites (AAA,BBB,CCC) and individuals within each site (7, 12, 7 respectively). For each individual I have observed values (ObsValues) and three sets of predicted values each with a standard error. I have 26 rows (i.e. 26 individuals) and 9 columns.
The data is included here through dput()
help <- structure(list(StudyArea = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"),
Ind = structure(1:26, .Label = c("AAA_F01", "AAA_F17", "AAA_F33",
"AAA_F49", "AAA_F65", "AAA_F81", "AAA_F97", "BBB_P01", "BBB_P02",
"BBB_P03", "BBB_P04", "BBB_P05", "BBB_P06", "BBB_P07", "BBB_P08",
"BBB_P09", "BBB_P10", "BBB_P11", "BBB_P12", "CCC_F02", "CCC_F03",
"CCC_F04", "CCC_F05", "CCC_F06", "CCC_F07", "CCC_F08"), class = "factor"),
ObsValues = c(22L, 50L, 8L, 15L, 54L, 30L, 11L, 90L, 6L,
53L, 9L, 42L, 72L, 40L, 60L, 58L, 1L, 20L, 37L, 2L, 50L,
68L, 20L, 19L, 58L, 5L), AAAPred = c(28L, 52L, 6L, 15L, 35L,
31L, 13L, 79L, 6L, 58L, 5L, 42L, 88L, 49L, 68L, 60L, 1L,
26L, 46L, 0L, 34L, 71L, 20L, 15L, 35L, 5L), AAAPredSE = c(3.5027829,
4.7852191, 1.231803, 2.5244013, 4.873907, 3.8854192, 2.3532752,
6.3444402, 1.7387295, 5.605111, 1.667818, 4.4709107, 7.0437967,
5.447496, 6.0840486, 5.4371275, 0.8156916, 3.5153847, 4.698754,
0, 3.8901103, 5.993616, 3.1720272, 2.6777869, 4.5647313,
1.4864128), BBBPred = c(14L, 43L, 5L, 13L, 26L, 32L, 14L,
80L, 5L, 62L, 4L, 44L, 67L, 44L, 55L, 42L, 1L, 20L, 47L,
0L, 26L, 51L, 15L, 16L, 34L, 6L), BBBPredSE = c(3.1873435,
4.8782831, 1.3739863, 2.5752273, 4.4155679, 3.8102168, 2.3419518,
6.364606, 1.7096028, 5.6333421, 1.5861323, 4.4951428, 6.6046699,
5.302902, 5.9244328, 5.1887055, 0.8268689, 3.4014041, 4.6600598,
0, 3.8510512, 5.5776686, 3.0569531, 2.6358433, 4.5273782,
1.4263518), CCCPred = c(29L, 53L, 7L, 15L, 44L, 32L, 15L,
86L, 8L, 61L, 5L, 46L, 99L, 54L, 74L, 67L, 1L, 30L, 51L,
1L, 37L, 94L, 21L, 17L, 36L, 6L), CCCPredSE = c(3.4634488,
4.7953389, 0.9484051, 2.5207022, 5.053452, 3.8072731, 2.2764727,
6.3605968, 1.6044067, 5.590048, 1.6611899, 4.4183913, 7.0124638,
5.6495918, 6.1091934, 5.4797929, 0.8135164, 3.4353934, 4.6261147,
0.8187396, 3.7936333, 5.6512378, 3.1686123, 2.633179, 4.5841921,
1.3989955)), .Names = c("StudyArea", "Ind", "ObsValues",
"AAAPred", "AAAPredSE", "BBBPred", "BBBPredSE", "CCCPred", "CCCPredSE"
), class = "data.frame", row.names = c(NA, -26L))
The head() and dim() of help are below too
head(help)
StudyArea Ind ObsValues AAAPred AAAPredSE BBBPred BBBPredSE CCCPred CCCPredSE
1 AAA AAA_F01 22 28 3.502783 14 3.187343 29 3.4634488
2 AAA AAA_F17 50 52 4.785219 43 4.878283 53 4.7953389
3 AAA AAA_F33 8 6 1.231803 5 1.373986 7 0.9484051
4 AAA AAA_F49 15 15 2.524401 13 2.575227 15 2.5207022
5 AAA AAA_F65 54 35 4.873907 26 4.415568 44 5.0534520
6 AAA AAA_F81 30 31 3.885419 32 3.810217 32 3.8072731
dim(help)
> dim(help)
[1] 26 9
I am a relative newcomer to ggplot and am trying to make a plot that displays the observed and predicted values for each individual with a different color for each StudyArea. I can manually add points and force the color with the code below, however this feel rather clunky and also does not produce a legend as I have not specified color in aes().
require(ggplot2)
ggplot(help, aes(x=Ind, y=ObsValues))+
geom_point(color="red", pch = "*", cex = 10)+
geom_point(aes(y = AAAPred), color="blue")+
geom_errorbar(aes(ymin=AAAPred-AAAPredSE, ymax=AAAPred+AAAPredSE), color = "blue")+
geom_point(aes(y = BBBPred), color="darkgreen")+
geom_errorbar(aes(ymin=BBBPred-BBBPredSE, ymax=BBBPred+BBBPredSE), color = "darkgreen")+
geom_point(aes(y = CCCPred), color="black")+
geom_errorbar(aes(ymin=CCCPred-CCCPredSE, ymax=CCCPred+CCCPredSE), color = "black")+
theme(axis.text.x=element_text(angle=30, hjust=1))
In the figure above, the asterisks are the observed values and the values are the predicted values, one from each StudyArea.
I tried to melt() the data, but ran into more problems plotting. That being said, I suspect melt()ing or reshape()ing is the best option.
Any suggestions on how to best alter/restructure the help data so that I can plot the observed and predicted values for each individual with a different color for each StudyArea would be greatly appreciated.
I also hope to produce a legend - the likely default once the data is correctly formatted
Note: Indeed the resulting figure is very busy will likely be simplified once I get a better handle on ggplot.
thanks in advance.
Try this:
library(reshape2)
x.value <- melt(help,id.vars=1:3, measure.vars=c(4,6,8))
x.se <- melt(help,id.vars=1:3, measure.vars=c(5,7,9))
gg <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
Edit:: Response to #B.Davis' questions below:
You have to group the ObsValues by StudyArea, not variable. But when you do that you get six colors, three for StudyArea and three for the predictor groups (variable). If we give the predictor groups (e.g., AAAPred, etc.) the same names as the StudyArea groups (e.g. AAA, etc.), then ggplot just generates three colors.
gg$variable <- substring(gg$variable,1,3) # removes "Pred" from group names
ggplot(gg)+
geom_point(aes(x=Ind, y=ObsValues, color=StudyArea),size=5,shape=18)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
Produces this:
Similar to #jlhoward solution but I choose to treat ObsValues as a variable to get it in the legend.
help <- dat
x.value <- melt(help,id.vars=1:2, measure.vars=c(3,4,6,8))
x.se <- melt(help,id.vars=1:2, measure.vars=c(3,5,7,9))
gg <- data.frame(x.value,se=x.se$value)
ggplot(gg)+
geom_point(aes(x=Ind, y=value, color=variable),size=3, shape=1)+
geom_errorbar(data= subset(gg,variable!='ObsValues'),
aes(x=Ind, ymin=value-se, ymax=value+se, color=variable))+
theme(axis.text.x=element_text(angle=-90))
This is a little clumsy, but gets you what you want:
# jlhoward's melting is more elegant.
require(reshape2)
melted.points<-melt(help[,c('Ind','ObsValues','AAAPred','BBBPred','CCCPred')])
melted.points$observed<-ifelse(melted.points$variable=='ObsValues','observed','predicted')
melted.points.se<-melt(help[,c('Ind','AAAPredSE','BBBPredSE','CCCPredSE')])
melted.points.se$variable<-gsub('SE','',melted.points.se$variable,)
help2<-merge(melted.points,melted.points.se,by=c('Ind','variable'),all.x=TRUE)
help2<-rename(help2,c(value.x='value',value.y='se'))
And now the actual plot:
ggplot(help2,aes(x=Ind,y=value,color=variable,size=observed,shape=observed,ymin=value-se,ymax=value+se)) +
geom_point() +
geom_errorbar(size=1) +
scale_colour_manual(values = c("red","blue","darkgreen", "black")) +
scale_size_manual(values=c(observed=4,predicted=3)) +
scale_shape_manual(values=c(observed=8,predicted=16))
Related
This question already has answers here:
Fitting a linear model with multiple LHS
(1 answer)
Fast pairwise simple linear regression between variables in a data frame
(1 answer)
Closed 6 months ago.
Using the following code
model <- lm(log(y)~log(x))
I am able to get the coefficients for a potential law fit in the form y = ax^b. The obtained intercept and coefficient can be used to get the coefficient and exponent in the equation y = ax^b. The coefficient of model will be b and e^intercept will be a.
For V1 and V2 I get: Intercept=0.4272 log(x)=0.6009
Then: y = (e^0.4272)x^0.6009 = 1.5330x^0.6009
For the data:
data
structure(list(V1 = c(900L, 450L, 225L, 113L, 56L, 28L, 14L),
V2 = c(3L, 3L, 3L, 3L, 2L, 2L, 2L), V3 = c(27L, 30L, 17L,
14L, 9L, 7L, 5L), V4 = c(15L, 11L, 8L, 6L, 4L, 3L, 2L), V5 = c(50L,
38L, 23L, 14L, 8L, 5L, 4L), V6 = c(75L, 38L, 38L, 23L, 19L,
7L, 5L), V7 = c(82L, 50L, 45L, 38L, 19L, 9L, 7L), V8 = c(60L,
50L, 23L, 14L, 11L, 7L, 5L), V9 = c(129L, 64L, 56L, 38L,
19L, 28L, 14L), V10 = c(180L, 150L, 75L, 56L, 56L, 28L, 14L
), V11 = c(900L, 450L, 225L, 113L, 56L, 28L, 14L)), row.names = c(NA,
7L), class = "data.frame")
I will like the program to produce a df with a and b values. Taking V1 as x at all times, and from V2 to V11 for the y values.
So I have R program, and am struggling with getting all points in map
library(ggmap)
library(ggplot2)
setwd("d:/GIS/")
sep <- read.csv("SEP_assets_csv.csv")
Sub1 <- sep[grep("SEP.12", names(sep))]
sep$newCol <- 100*rowSums(Sub1)/rowSums(sep[4:7])
# create a new grouping variable
Percent_SEP12_Assets <- ifelse(sep[,8] >= 50, "Over 50", "Under 50")
# get the map
map <- get_map("Kissena Park, Queens", zoom = 13, maptype = 'roadmap')
# plot the map and use the grouping variable for the fill inside the aes
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude, color=Percent_SEP12_Assets ), size=9, alpha=0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
And here is output map
I wish to zoom in enough without cutting out data points, but no matter location I pick on map, the data keeps getting cut, i.e. Removed 2 rows containing missing values (geom_point).
Is there a way to set boundaries based on the extremities of latitude and longitude? The csv I import at
sep <- read.csv("SEP_assets_csv.csv")
Has list of latitude and longitude.
Help!
Coordinates
Latitude Longitude
40.758365 -73.824407
40.774168 -73.818543
40.761748 -73.811379
40.765602 -73.828293
40.751762 -73.81778
40.764834 -73.789712
40.777951 -73.842932
40.76501 -73.794319
40.785959 -73.817349
40.755764 -73.799256
40.745593 -73.829283
40.789929 -73.839501
40.760072 -73.783908
40.726437 -73.807592
40.741093 -73.808757
40.720926 -73.823358
40.729642 -73.81781
40.724191 -73.80937
40.782346 -73.77844
40.778164 -73.799841
40.775122 -73.8185
40.760344 -73.817909
40.792326 -73.809516
40.78322 -73.806977
40.73106 -73.805449
40.736521 -73.813001
40.783714 -73.795027
40.770194 -73.82762
40.735855 -73.823583
40.74943 -73.82141
40.769753 -73.832001
40.754465 -73.826204
40.738775 -73.823892
40.764868 -73.826819
40.738332 -73.82028
40.735017 -73.821339
40.72535 -73.811325
40.721466 -73.820401
dput
> dput(sep)
structure(list(School = structure(1:38, .Label = c("Queens\\25Q020",
"Queens\\25Q021", "Queens\\25Q022", "Queens\\25Q023", "Queens\\25Q024",
"Queens\\25Q025", "Queens\\25Q029", "Queens\\25Q032", "Queens\\25Q079",
"Queens\\25Q107", "Queens\\25Q120", "Queens\\25Q129", "Queens\\25Q130",
"Queens\\25Q154", "Queens\\25Q163", "Queens\\25Q164", "Queens\\25Q165",
"Queens\\25Q168", "Queens\\25Q169", "Queens\\25Q184", "Queens\\25Q185",
"Queens\\25Q189", "Queens\\25Q193", "Queens\\25Q194", "Queens\\25Q200",
"Queens\\25Q201", "Queens\\25Q209", "Queens\\25Q214", "Queens\\25Q219",
"Queens\\25Q237", "Queens\\25Q242", "Queens\\25Q244", "Queens\\25Q425",
"Queens\\25Q460", "Queens\\25Q499", "Queens\\25Q515", "Queens\\25Q707",
"Queens\\25Q792"), class = "factor"), Latitude = c(40.758365,
40.774168, 40.761748, 40.765602, 40.751762, 40.764834, 40.777951,
40.76501, 40.785959, 40.755764, 40.745593, 40.789929, 40.760072,
40.726437, 40.741093, 40.720926, 40.729642, 40.724191, 40.782346,
40.778164, 40.775122, 40.760344, 40.792326, 40.78322, 40.73106,
40.736521, 40.783714, 40.770194, 40.735855, 40.74943, 40.769753,
40.754465, 40.738775, 40.764868, 40.738332, 40.735017, 40.72535,
40.721466), Longitude = c(-73.824407, -73.818543, -73.811379,
-73.828293, -73.81778, -73.789712, -73.842932, -73.794319, -73.817349,
-73.799256, -73.829283, -73.839501, -73.783908, -73.807592, -73.808757,
-73.823358, -73.81781, -73.80937, -73.77844, -73.799841, -73.8185,
-73.817909, -73.809516, -73.806977, -73.805449, -73.813001, -73.795027,
-73.82762, -73.823583, -73.82141, -73.832001, -73.826204, -73.823892,
-73.826819, -73.82028, -73.821339, -73.811325, -73.820401), Windows.SEP.11 = c(48L,
154L, 11L, 62L, 20L, 72L, 9L, 37L, 8L, 22L, 9L, 47L, 44L, 99L,
78L, 91L, 42L, 122L, 55L, 14L, 162L, 108L, 89L, 87L, 23L, 14L,
75L, 74L, 141L, 73L, 43L, 14L, 534L, 189L, 128L, 10L, 79L, 38L
), Mac.SEP.11 = c(49L, 0L, 180L, 2L, 202L, 116L, 41L, 1L, 17L,
22L, 33L, 43L, 1L, 28L, 2L, 0L, 238L, 13L, 76L, 55L, 76L, 42L,
0L, 1L, 12L, 0L, 16L, 10L, 1L, 7L, 0L, 1L, 1L, 67L, 16L, 7L,
31L, 24L), Windows.SEP.12 = c(52L, 252L, 1L, 2L, 12L, 45L, 108L,
15L, 14L, 4L, 19L, 21L, 46L, 90L, 10L, 86L, 15L, 76L, 122L, 2L,
9L, 52L, 39L, 120L, 43L, 17L, 9L, 54L, 19L, 199L, 40L, 25L, 64L,
164L, 14L, 27L, 45L, 2L), Mac.SEP.12 = c(73L, 2L, 91L, 53L, 288L,
6L, 2L, 107L, 109L, 97L, 41L, 18L, 12L, 16L, 2L, 2L, 270L, 32L,
45L, 92L, 54L, 190L, 1L, 4L, 19L, 53L, 1L, 10L, 0L, 61L, 50L,
27L, 27L, 25L, 3L, 1L, 43L, 0L), newCol = c(56.3063063063063,
62.2549019607843, 32.5088339222615, 46.218487394958, 57.4712643678161,
21.3389121338912, 68.75, 76.25, 83.1081081081081, 69.6551724137931,
58.8235294117647, 30.2325581395349, 56.3106796116505, 45.4935622317597,
13.0434782608696, 49.1620111731844, 50.4424778761062, 44.4444444444444,
56.0402684563758, 57.6687116564417, 20.9302325581395, 61.734693877551,
31.0077519379845, 58.4905660377358, 63.9175257731959, 83.3333333333333,
9.9009900990099, 43.2432432432432, 11.8012422360248, 76.4705882352941,
67.6691729323308, 77.6119402985075, 14.5367412140575, 42.4719101123596,
10.5590062111801, 62.2222222222222, 44.4444444444444, 3.125)), .Names = c("School",
"Latitude", "Longitude", "Windows.SEP.11", "Mac.SEP.11", "Windows.SEP.12",
"Mac.SEP.12", "newCol"), row.names = c(NA, -38L), class = "data.frame")
You haven't provided us with any of the data, so I'm going to give an example using a dataset in the historydata package. Instead of getting a map based on a location and a zoom, you can get a map based on the bounding box of the latitudes and longitudes in your dataset.
library(historydata)
library(ggmap)
data("catholic_dioceses")
bbox <- make_bbox(catholic_dioceses$long, catholic_dioceses$lat, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=catholic_dioceses, aes(x = long, y = lat))
Note that the f = argument to make_bbox() lets you control how much padding there is around your map.
In your case, I think this will work:
library(ggmap)
bbox <- make_bbox(sep$Longitude, sep$Latitude, f = 0.01)
map <- get_map(bbox)
ggmap(map) +
geom_point(data=sep, aes(x = Longitude, y = Latitude,
color = Percent_SEP12_Assets),
size = 9, alpha = 0.6) +
scale_color_manual(breaks=c("Over 50", "Under 50"), values=c("green","red"))
I'm trying to plot the graph below, and want to manually specify colours.
I need to plot by genotype, since there are multiple genotypes belonging to the same Bgrnd_All, and I want them to come up separately in the lines plotted.
However, I want to colour the lines by Bgrnd_All, and specifically in the order/colour I use in scale_fill_manual.
When I do this, the values in scale_fill_manual do not overwrite the existing colour as defined in geom_line. How can I do this?
I'd be grateful for pointers.
[Data for graph below][1]https://www.dropbox.com/s/9nmu87wkh2yqfxn/summary_200_exp2.csv?dl=0
pd <- position_dodge(1)
ggplot(data=summary.200.exp2, aes(x=Time, y=Length, colour=Genotype, group=Genotype)) +
geom_errorbar(aes(ymin=Length - se, ymax=Length + se), colour="black", width=1, position=pd) +
geom_line(aes(colour=Bgrnd_All), position=pd, size =1) +
scale_x_continuous(breaks=c(0,17,22,41,89)) + #using breaks of when sampled
scale_fill_manual(values=c(Avalon="#000066",Av_A="#663399",Av_B="#339999",Cadenza="CC0033",Cad_A="FF6600",Cad_B="FF9933"))+
ylab("leaf segment width (mm)") +
xlab("Time") +
theme(axis.title = element_text(size=14,face="bold"),
axis.text = element_text(size=14),
strip.text.y = element_text(size=14))
A dput of the data:
summary.200.exp2 <- structure(list(X = 1:40,
Genotype = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L), .Label = c("4.18", "4.41", "7.50", "7.59", "8.51", "8.77", "Avalon", "Cadenza"), class = "factor"),
Time = c(0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L, 0L, 17L, 22L, 41L, 89L),
Bgrnd_All = structure(c(4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L), .Label = c("Av_A", "Av_B", "Avalon", "Cad_A", "Cad_B", "Cadenza"), class = "factor"),
N = c(43L, 48L, 44L, 47L, 48L, 22L, 21L, 26L, 27L, 25L, 36L, 24L, 44L, 48L, 45L, 50L, 26L, 52L, 54L, 53L, 38L, 52L, 52L, 49L, 50L, 39L, 39L, 42L, 38L, 42L, 84L, 42L, 84L, 42L, 42L, 50L, 26L, 53L, 27L, 27L),
Length = c(1.17423255813953, 1.58852083333333, 1.71263636363636, 1.86736170212766, 2.0331875, 1.07563636363636, 1.49866666666667, 1.48734615384615, 1.66796296296296, 2.15416, 1.08716666666667, 1.09858333333333, 1.24593181818182, 1.30827083333333, 1.81537777777778, 1.15672, 1.8475, 1.96815384615385, 2.01822222222222, 2.5057358490566, 1.14697368421053, 1.40276923076923, 1.49832692307692, 1.76981632653061, 2.27954, 1.18312820512821, 1.75928205128205, 1.86195238095238, 1.91426315789474, 2.26883333333333, 1.10839285714286, 1.97902380952381, 2.03271428571429, 2.15685714285714, 2.8227380952381, 1.08658, 1.68880769230769, 1.7277358490566, 1.9232962962963, 2.13466666666667),
sd = c(0.218740641945063, 0.357307960001092, 0.377931031662453, 0.416137123383518, 0.440003996899158, 0.176915784499843, 0.426273190962478, 0.305677731254037, 0.450036449932454, 0.48642939535627, 0.15212823538055, 0.175160775008132, 0.293836087650785, 0.282464815326021, 0.346608194369436, 0.211422397593258, 0.408328617659845, 0.413460118977535, 0.419730221832425, 0.508692484972064, 0.217587942685885, 0.207510416973071, 0.245473270071832, 0.377310585673427, 0.536134471785516, 0.159925670150259, 0.298319411009668, 0.338847829173593, 0.296186727462412, 0.445638589029855, 0.162594700328365, 0.308723610551514, 0.318831396748337, 0.381781291715339, 0.402059458017902, 0.167826451905484, 0.257140275994371, 0.338637947743116, 0.362428434825926, 0.343680867174096),
se = c(0.0333576351702583, 0.0515729617225566, 0.0569752467571038, 0.0606998379642952, 0.06350910651356, 0.0377185719899813, 0.0930204363959963, 0.0599483352513503, 0.0866095551712153, 0.097285879071254, 0.0253547058967583, 0.0357545434766975, 0.0442974569365289, 0.040770284291269, 0.0516692989445678, 0.0298996422065822, 0.0800798303617661, 0.0573366022820362, 0.0571180485063685, 0.0698742866122227, 0.0352974252834232, 0.0287765172534354, 0.0340410177692235, 0.053901512239061, 0.0758208641254813, 0.0256086023072023, 0.0477693365291991, 0.052285355168868, 0.0480478318490224, 0.0687635271596866, 0.0177405362346046, 0.0476370873204908, 0.0347873573697084, 0.0589101322645314, 0.0620391212561054, 0.0237342444409691, 0.0504293571163821, 0.046515499476421, 0.0697493848029077, 0.0661414137260961),
ci = c(0.0673184331863912, 0.103751416510302, 0.114901535684132, 0.122182436693452, 0.127763842564108, 0.0784400645137227, 0.194037230170767, 0.123465907623535, 0.178028490322197, 0.200788185881879, 0.0514727894594648, 0.0739639084701291, 0.0893343358495282, 0.0820192326650262, 0.104132629687123, 0.0600855805773719, 0.164927497928001, 0.11510803218647, 0.11456429705202, 0.140213013986381, 0.0715193770736051, 0.0577712690042106, 0.0683401947985261, 0.108376253996364, 0.152367731004308, 0.0518419050566429, 0.0967039660836575, 0.105592416917608, 0.0973541547573791, 0.138870760371045, 0.0352852130493688, 0.0962050495562246, 0.06919065466693, 0.118971425682342, 0.125290547146885, 0.0476957499005439, 0.103861205171753, 0.0933401784102089, 0.143371913789607, 0.135955623027448)),
.Names = c("X", "Genotype", "Time", "Bgrnd_All", "N", "Length", "sd", "se", "ci"), class = "data.frame", row.names = c(NA, -40L))
As stated by #juba in the comments, you should use scale_colour_manual instead of scale_fill_manual. Moreover, you are trying to plot to many lines and errorbars in one plot. They overlap each other to much and it is therefore hard to distuinguish between the lines/errorbars.
An example with the use of facetting (and some simplification of your code):
ggplot(summary.200.exp2, aes(x=Time, y=Length, group=Genotype)) +
geom_line(aes(colour=Bgrnd_All), size =1) +
geom_errorbar(aes(ymin=Length-se, ymax=Length+se, colour=Bgrnd_All), width=2) +
scale_x_continuous("Time", breaks=c(0,17,22,41,89)) +
scale_colour_manual(values=c(Avalon="#000066",Av_A="#663399",Av_B="#339999",Cadenza="#CC0033",Cad_A="#FF6600",Cad_B="#FF9933"))+
ylab("leaf segment width (mm)") +
theme_bw() +
theme(axis.title = element_text(size=14,face="bold"), axis.text = element_text(size=10)) +
facet_wrap(~Bgrnd_All, ncol=3)
this gives:
require(ggplot2)
The data: It's shark incidents grouped by shark species. It's actually a real dataset, already summarized.
D <- structure(list(FL_FATAL = structure(c(2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L), .Label = c("FATAL",
"NO FATAL"), class = "factor"), spec = structure(c(26L, 24L,
6L, 26L, 25L, 16L, 2L, 11L, 27L, 5L, 24L, 29L, 12L, 21L, 13L,
15L, 28L, 1L, 17L, 19L, 8L, 3L, 6L, 13L, 22L, 18L, 27L, 14L,
23L, 20L, 7L, 4L, 8L, 9L, 10L), .Label = c("blacknose", "blacktip",
"blue", "bonnethead", "bronze", "bull", "caribbean", "draughtsboard",
"dusky", "galapagos", "ganges", "hammerhead", "involve", "leon",
"mako", "nurse", "porbeagle", "recovered", "reef", "sand", "sandtiger",
"sevengill", "spinner", "tiger", "unconfired", "white", "whitespotted",
"whitetip", "wobbegong"), class = "factor"), N = c(368L, 169L,
120L, 107L, 78L, 77L, 68L, 59L, 56L, 53L, 46L, 42L, 35L, 35L,
33L, 30L, 29L, 29L, 26L, 25L, 25L, 25L, 24L, 24L, 21L, 21L, 20L,
20L, 17L, 16L, 16L, 15L, 11L, 11L, 11L)), .Names = c("FL_FATAL",
"spec", "N"), row.names = c(NA, -35L), class = "data.frame")
.
head(D)
# FL_FATAL spec N Especies
# 1 NO FATAL white 368 white
# 2 NO FATAL tiger 169 tiger
# 3 NO FATAL bull 120 bull
# 4 FATAL white 107 white
# 5 NO FATAL unconfired 78 unconfired
# 6 NO FATAL nurse 77 nurse
Reordering a factor variable by a numeric making a new variable.
# Re-order spec creating Especies variable ordered by D$N
D$Especies <- factor(D$spec, levels = unique(D[order(D$N), "spec"]))
# This two plots work as spected
ggplot(D, aes(x=N, y=Especies)) +
geom_point(aes(size = N, color = FL_FATAL))
ggplot(D, aes(x=N, y=Especies)) +
geom_point(aes(size = N, color = FL_FATAL)) +
facet_grid(. ~ FL_FATAL)
Reordering using reorder()
# Using reorder isn't working or am i missing something?
ggplot(D, aes(x=N, y=reorder(D$spec, D$N))) +
geom_point(aes(size = N, color = FL_FATAL))
# adding facets makes it worse
ggplot(D, aes(x=N, y=reorder(D$spec, D$N))) +
geom_point(aes(size = N, color = FL_FATAL)) +
facet_grid(. ~ FL_FATAL)
Which would be the correct approach for producing the plots with reorder()?
The problem is that by using D$ in your reorder call, you're reordering spec independent of the data frame, so the values no longer match up with the corresponding x values. You need to use it directly on the variables:
ggplot(D, aes(x=N, y=reorder(spec, N, sum))) +
geom_point(aes(size = N, color = FL_FATAL)) +
facet_grid(. ~ FL_FATAL)
I'm surprised you like your first way--it's a happy coincidence that worked out. Most of your species have one N value (NO_FATAL only), but you have a few that have both FATAL and NO_FATAL. Whenever there are more than two numeric rows corresponding to a factor, reorder uses a function of those numerics to do the final sort. The default function is mean, but you probably want sum, to sort by the total number of incidents.
D$spec_order <- reorder(D$spec, D$N, sum)
ggplot(D, aes(x=N, y=spec_order)) +
geom_point(aes(size = N, color = FL_FATAL))
ggplot(D, aes(x=N, y=spec_order)) +
geom_point(aes(size = N, color = FL_FATAL)) +
facet_grid(. ~ FL_FATAL)
I want to create a boxplot from a matrix with several variables grouped by two levels of a factor.
Some sample data:
mymatrix = structure(list(Treatment = structure(c(1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 2L), .Label = c("con", "treat"), class = "factor"),
c1 = c(13L, 93L, 6L, 3L, 45L, 1L, 69L, 38L, 23L, 48L, 82L
), c5 = c(33L, 79L, 3L, 5L, 17L, 22L, 94L, 99L, 85L, 74L,
9L), c3 = c(96L, 52L, 0L, 6L, 60L, 14L, 69L, 96L, 57L, 99L,
39L), c8 = c(40L, 27L, 94L, 68L, 76L, 73L, 88L, 45L, 67L,
95L, 85L), c12 = c(20L, 14L, 53L, 9L, 93L, 1L, 12L, 45L,
59L, 38L, 25L)), .Names = c("Treatment", "c1", "c5", "c3",
"c8", "c12"), class = "data.frame", row.names = c("1a", "1b",
"2a", "2b", "3a", "3b", "4a", "4b", "5a", "5b", "5c"))
I was able to get a boxplot for each variable, but I cannot manage to group them at the same time:
boxplot(as.matrix(mymatrix[,2:6]))
boxplot(as.matrix(mymatrix[,2:6])~Treatment, data=mymatrix)
Thanks in advance for any help.
v <- stack(mymatrix[-1])
v$Treatment <- mymatrix$Treatment
boxplot(values~Treatment+ind, v)
The first part will give us a data.frame like this:
values ind
1 13 c1
2 93 c1
...
11 82 c1
12 33 c5
...
22 9 c5
23 96 c3
...
55 25 c12
Then we append the Treatment column, and just plot as usual.
update: using the reshape package as suggested by Drew.
v <- melt(mymatrix, id.vars="Treatment")
boxplot(value~Treatment+variable, v)
Personally I like to use the ggplot2/reshape2 approach - it is maybe a little tougher to learn at first, but once you get good at it, I think it makes things much easier.
Note that your 'matrix' is not actually a matrix, it is a data frame. This is convenient, because the approach I suggest only works with data frames.
str(mymatrix)
'data.frame': 11 obs. of 6 variables:
...
First, 'reshape' it to 'long' format, where each row represents a different observation
dfm <- melt(mymatrix, id.vars="Treatment")
(My convention is to append any melted data frame with the letter m).
Next, make the plot using ggplot2. I've mapped the Treatment column to the x axis, and the c1-c12 columns (named variable after reshaping) to the fill color, but the syntax of ggplot2 allows you to easily change that up:
ggplot(dfm, aes(x=Treatment, y=value, fill=variable)) +
geom_boxplot()