I would like to plot the following dataset
structure(list(X = structure(c(3L, 12L, 11L, 7L, 13L, 2L, 1L,
10L, 5L, 4L, 8L, 14L, 9L, 6L), .Label = c("BUM", "DDR", "ETB",
"EXP", "HED", "HEDOS", "KON", "LEIT", "MAIN", "MAT", "PER", "PMA",
"TRA", "TRADITION"), class = "factor"), Geschaeft = c(0.0468431771894094,
0.0916666666666667, 0.0654761904761905, 0.0905432595573441, 0.0761904761904762,
0.0672097759674134, 0.0869565217391304, 0.0650887573964497, 0.0762250453720508,
0.0518234165067179, 0.0561330561330561, 0.060077519379845, 0.0865384615384615,
0.0628683693516699), Gaststaette = c(0.0855397148676171, 0.0604166666666667,
0.0555555555555556, 0.0764587525150905, 0.0895238095238095, 0.0712830957230143,
0.075098814229249, 0.0631163708086785, 0.0780399274047187, 0.0383877159309021,
0.0561330561330561, 0.0581395348837209, 0.0596153846153846, 0.0648330058939096
), Bank = c(0.065173116089613, 0.0854166666666667, 0.0972222222222222,
0.0824949698189135, 0.060952380952381, 0.0529531568228106, 0.0731225296442688,
0.0828402366863905, 0.0725952813067151, 0.0806142034548944, 0.0686070686070686,
0.0503875968992248, 0.0807692307692308, 0.0550098231827112),
Hausarzt = c(0.0712830957230143, 0.0833333333333333, 0.0912698412698413,
0.0704225352112676, 0.0628571428571429, 0.0672097759674134,
0.106719367588933, 0.0710059171597633, 0.108892921960073,
0.0940499040307102, 0.0852390852390852, 0.0794573643410853,
0.0826923076923077, 0.110019646365422), Einr..F..Aeltere = c(0.10183299389002,
0.104166666666667, 0.107142857142857, 0.100603621730382,
0.12, 0.116089613034623, 0.112648221343874, 0.112426035502959,
0.121597096188748, 0.0998080614203455, 0.118503118503119,
0.131782945736434, 0.121153846153846, 0.104125736738703),
Park = c(0.0855397148676171, 0.0666666666666667, 0.0912698412698413,
0.0804828973843058, 0.0704761904761905, 0.0672097759674134,
0.0731225296442688, 0.0670611439842209, 0.0834845735027223,
0.0806142034548944, 0.0686070686070686, 0.0658914728682171,
0.0884615384615385, 0.0609037328094303), Sportstaette = c(0.0855397148676171,
0.0791666666666667, 0.0952380952380952, 0.0824949698189135,
0.0933333333333333, 0.114052953156823, 0.0810276679841897,
0.0788954635108481, 0.0780399274047187, 0.0825335892514395,
0.0831600831600832, 0.0852713178294574, 0.0884615384615385,
0.1237721021611), OEPNV = c(0.0529531568228106, 0.05625,
0.0456349206349206, 0.0583501006036217, 0.0666666666666667,
0.0366598778004073, 0.0434782608695652, 0.0571992110453649,
0.0344827586206897, 0.0633397312859885, 0.0478170478170478,
0.062015503875969, 0.0519230769230769, 0.0235756385068762
), Mangel.an.Gruenflaechen = c(0.0692464358452139, 0.0645833333333333,
0.0694444444444444, 0.0422535211267606, 0.0666666666666667,
0.0692464358452139, 0.0711462450592885, 0.0749506903353057,
0.0598911070780399, 0.0959692898272553, 0.0623700623700624,
0.0717054263565891, 0.0653846153846154, 0.0746561886051081
), Kriminalitaet = c(0.0672097759674134, 0.0541666666666667,
0.0476190476190476, 0.0422535211267606, 0.0628571428571429,
0.0509164969450102, 0.0454545454545455, 0.0532544378698225,
0.058076225045372, 0.072936660268714, 0.0602910602910603,
0.063953488372093, 0.0461538461538462, 0.0648330058939096
), Auslaender = c(0.0244399185336049, 0.04375, 0.0416666666666667,
0.0663983903420523, 0.0228571428571429, 0.0509164969450102,
0.0237154150197628, 0.0236686390532544, 0.0217785843920145,
0.0441458733205374, 0.024948024948025, 0.0232558139534884,
0.0230769230769231, 0.0451866404715128), Umweltbelastung = c(0.0468431771894094,
0.0479166666666667, 0.0476190476190476, 0.0402414486921529,
0.0438095238095238, 0.0468431771894094, 0.0454545454545455,
0.0512820512820513, 0.0417422867513612, 0.0518234165067179,
0.0478170478170478, 0.0445736434108527, 0.0442307692307692,
0.0451866404715128), Einr..f..Kinder = c(0.0753564154786151,
0.075, 0.0555555555555556, 0.0724346076458753, 0.0533333333333333,
0.0794297352342159, 0.075098814229249, 0.0788954635108481,
0.0598911070780399, 0.0460652591170825, 0.0977130977130977,
0.0930232558139535, 0.0634615384615385, 0.0451866404715128
), Einr..f..Jugendliche = c(0.122199592668024, 0.0875, 0.0892857142857143,
0.0945674044265594, 0.11047619047619, 0.109979633401222,
0.0869565217391304, 0.120315581854043, 0.105263157894737,
0.0978886756238004, 0.122661122661123, 0.11046511627907,
0.0980769230769231, 0.119842829076621)), .Names = c("X",
"Geschaeft", "Gaststaette", "Bank", "Hausarzt", "Einr..F..Aeltere",
"Park", "Sportstaette", "OEPNV", "Mangel.an.Gruenflaechen", "Kriminalitaet",
"Auslaender", "Umweltbelastung", "Einr..f..Kinder", "Einr..f..Jugendliche"
), row.names = c(NA, -14L), class = "data.frame")
So that it look like this picture (or better with each line in a seperate plot) that I created with Excel.
But I can't figure out how...
Thanks a lot for your help.
Dominik
UPDATE: Here is just a map of what the groups (BUM,DDR,ETB etc.) mean.
This is an extension to #Andrie's solution. It combines the faceting idea with that of overplotting (stolen liberally from the learnr blog, which I find results in a cool visualization. Here is the code and the resulting output. Comments are welcome
mdf <- melt(df, id.vars="X")
mdf = transform(mdf, variable = reorder(variable, value, mean), Y = X)
ggplot(mdf, aes(x = variable, y = value)) +
geom_line(data = transform(mdf, X = NULL), aes(group = Y), colour = "grey80") +
geom_line(aes(group = X)) +
facet_wrap(~X) +
opts(axis.text.x = theme_text(angle=90, hjust=1))
EDIT: If you have groupings of milieus, then a better way to present might be the following
mycols = c(brewer.pal(4, 'Oranges'), brewer.pal(4, 'Greens'),
brewer.pal(3, 'Blues'), brewer.pal(3, 'PuRd'))
mdf2 = read.table(textConnection("
V1, V2
ETB, LEIT
PMA, LEIT
PER, LEIT
LEIT, LEIT
KON, TRADITION
TRA, TRADITION
DDR, TRADITION
TRADITION, TRADITION
BUM, MAIN
MAT, MAIN
MAIN, MAIN
EXP, HEDOS
HED, HEDOS
HEDOS, HEDOS"), sep = ",", header = T, stringsAsFactors = F)
mdf2 = data.frame(mdf2, mycols = mycols)
mdf3 = merge(mdf, mdf2, by.x = 'X', by.y = "V1")
p1 = ggplot(mdf3, aes(x = variable, y = value, group = X, colour = mycols)) +
geom_line(subset = .(nchar(as.character(X)) == 3)) +
geom_line(subset = .(nchar(as.character(X)) != 3), size = 1.5) +
facet_wrap(~ V2) +
scale_color_identity(name = 'Milieus', breaks = mdf2$mycols, labels = mdf2$V1) +
theme_bw() +
opts(axis.text.x = theme_text(angle=90, hjust=1))
The trick is to reshape your data into tall format before you pass it to ggplot. This is easy when using the melt function in package reshape2:
Assuming your data is a variable called df:
library(reshape2)
library(ggplot2)
mdf <- melt(df, id.vars="X")
str(mdf)
ggplot(mdf, aes(x=variable, y=value, colour=X, group=X)) + geom_line() +
opts(axis.text.x = theme_text(angle=90, hjust=1))
Edit As #Chase suggests, you can use facetting to make the plot more readable:
ggplot(mdf, aes(x=X, y=value)) + geom_point() +
opts(axis.text.x = theme_text(angle=90, hjust=1)) + facet_wrap(~variable)
First, melt the data to put it in a long format.
melted_data <- melt(the_data, id.vars = "X")
Now draw the plot with a numeric x axis, and fix up the labels.
p <- ggplot(melted_data, aes(as.numeric(variable), value, colour = X)) +
geom_line() +
scale_x_continuous(
breaks = seq_len(nlevels(melted_data$variable)),
labels = levels(melted_data$variable)
) +
opts(axis.text.x = theme_text(angle = 90))
p
Having answered this, I'm not sure what the plot tells you &ndahs; it's just a jumble of lines to me. You might be better greying out most of the lines, and highlighting one or two interesting ones.
Add a column that picks out, e.g., EXP.
melted_data$is_EXP <- with(melted_data, X == "EXP")
Ignore my previous anser; Andrie's is better. Use manual colour and size scales to highlight your new column.
p <- ggplot(melted_data, aes(variable, value, colour = is_EXP, size = is_EXP, group = X)) +
geom_line() +
scale_colour_manual(values = c("grey80", "black")) +
scale_size_manual(values = c(0.5, 1.5)) +
opts(axis.text.x = theme_text(angle = 90, hjust=1))
p
Related
I have this dataframe:
structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
It is a dataframe where each row is a category, and the columns correspond with a time series (from T0 til T5).
I want to do a bar chart for each category (taxon) for their time (T0-T5):
melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
geom_bar(stat = "identity") +
facet_wrap(~ taxon, ncol = 3) +
labs(title = "Bar Chart for Different Time Series",
x = "Time Point",
y = "Value",
fill = "Category")
The question is if it is possible to assign the standard error (se columns) to their logFC value (lfc columns) for each time series.
Update:
I did this, but only for T0:
ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category") +
# Add error bars using se_StatusChronic.ACST0. column
geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0.,
ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
width = 0.4)
Output expected (the image is from other data):
Is this what you're looking for?
library(dplyr)
library(tidyr)
library(ggplot2)
dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
dat %>%
rename(lfc_time.fT0 = lfc_StatusChronic.ACST0.,
se_Time.fT0 = se_StatusChronic.ACST0.) %>%
pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)",
names_to = c(".value", "time")) %>%
ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
geom_bar(stat = "identity") +
geom_errorbar(width=.4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette="Set1") +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
facet_wrap(~taxon, ncol=1) +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")```
If so, the key is to rename the T0 variables to have the same format as the other time-period variables and then use pivot_longer() to put all the lfc measures in a single column and all the se measures in a single column. The rest is accomplished with faceting on the time variable. The pivot_longer() documentation has some good examples of retaining multiple columns, see in particular the last example on the page.
I have a dataset where the errors have the following x1 (age min), x2 (age max), y1 (height min), y2 (height max) and make a trapezium shape like this plot.
I want to do the same and plot these as errors and then have the gaussian process mean and error from a different model showing. To plot the errors as trapezium shapes I think I can do this using geom_polygon but I can't work out how to get the polygons to plot. It looks like you have to manually specify all of the coordinates see https://ggplot2.tidyverse.org/reference/geom_polygon.html . This seems extremely time-consuming to do for over 20 data points. Does anyone know of a more concise way to do this?
N.B. I have flipped the coordinates for the plot - this can be a bit confusing
Thanks,
library(ggplot2)
library(tidypalaeo)
### Create graph
ggplot(WAPRSL, aes(x =RSLc, y = Age))+
labs(x = "RSL (m)",y="Age (AD)")+
theme_classic()+
geom_lineh(data = WAPRSLgp, aes(x=mean,y=Age),col="#227988")+
coord_flip()+
geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error, xmin=mean-error), fill="#227988",alpha=.5)+
geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error*2, xmin=mean-error*2), fill="#227988",alpha=.7)+
geom_polygon(data=WAPRSL, aes(c(x1,x2,x2,x1),c(y1,y1,y2,y2))) ### something like this?
current plot without polygons
Data
### WAPRSL data
structure(list(depths = c(0.5, 1.5, 2.5, 3.5, 4.5, 5.5), RSL = c(0.162319931,
0.170053941, 0.166157744, 0.268604159, 0.173369111, 0.207652794
), RSLerror = c(0.084355046, 0.084524909, 0.084307832, 0.084389419,
0.0838797, 0.083901714), Age = c(2017.393323, 2015.935137, 2013.065412,
2008.534508, 2004.853771, 2001.797776), Ageerror = c(0.183297248,
0.303357588, 0.566892665, 1.183257304, 2.427930603, 2.481236284
), RSLc = c(0.162319931, 0.16973314, 0.165205604, 0.26665522,
0.17061041, 0.204221774), y1 = c(2017.210026, 2015.631779, 2012.498519,
2007.351251, 2002.42584, 1999.31654), y2 = c(2017.57662, 2016.238495,
2013.632305, 2009.717765, 2007.281702, 2004.279012), x1 = c(0.162360256,
0.169799879, 0.16533032, 0.266915536, 0.171144554, 0.204767646
), x2 = c(0.162279606, 0.169666401, 0.165080887, 0.266394903,
0.170076265, 0.203675902)), row.names = c(NA, 6L), class = "data.frame")
### WAPRSLgp data
structure(list(Age = 1832:1837, mean = c(-0.098482271, -0.09855201,
-0.098622714, -0.098572523, -0.098894533, -0.099396926), error = c(0.054412551,
0.053483911, 0.052543897, 0.051595228, 0.05064071, 0.049683294
), min = c(-0.152894822, -0.152035921, -0.151166611, -0.150167751,
-0.149535243, -0.14908022), max = c(-0.04406972, -0.045068098,
-0.046078817, -0.046977296, -0.048253822, -0.049713632)), row.names = c(NA,
6L), class = "data.frame")
Your x1, x2, y1 and y2 points describe a perfect rectangle. Hence, the easiest thing is to simply use geom_rect(). I've commented out some lines since the WAPRSLgp data seems to describe a different part of the x-axis. The examples assume the WAPRSL data is in the global environment.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.5
ggplot(WAPRSL, aes(x =RSLc, y = Age))+
labs(x = "RSL (m)",y="Age (AD)")+
theme_classic()+
# geom_line(data = WAPRSLgp, aes(x=mean,y=Age),col="#227988", orientation = "x")+
coord_flip()+
# geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error, xmin=mean-error), fill="#227988",alpha=.5)+
# geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error*2, xmin=mean-error*2), fill="#227988",alpha=.7) +
geom_rect(aes(xmin = x1, xmax = x2, ymin = y1, ymax = y2),
fill = "transparent", colour = "black")
However, if you insist on polygons, you'd need to reshape your data a bit.
WAPRSL$id <- seq_len(nrow(WAPRSL))
poly <- tidyr::pivot_longer(WAPRSL, y1:y2, names_to = "y_var", values_to = "y_val")
poly <- tidyr::pivot_longer(poly, x2:x1, names_to = "x_var", values_to = "x_val")
# Correct for the order
poly <- poly[(poly$id - 1) * 4 + rep(c(1,2,4,3), max(poly$id)), ]
ggplot(WAPRSL, aes(x =RSLc, y = Age))+
labs(x = "RSL (m)",y="Age (AD)")+
theme_classic()+
# geom_line(data = WAPRSLgp, aes(x=mean,y=Age),col="#227988", orientation = "x")+
coord_flip()+
# geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error, xmin=mean-error), fill="#227988",alpha=.5)+
# geom_ribbon(data = WAPRSLgp, aes(x=mean, xmax=mean+error*2, xmin=mean-error*2), fill="#227988",alpha=.7) +
geom_polygon(
data = poly,
aes(x = x_val, y = y_val, group = id),
fill = NA, colour = "black"
)
Created on 2021-07-07 by the reprex package (v1.0.0)
I am trying to customize colors, size, and alpha of a ggplot with lines using data from a data frame. Specifically I want to highlight an average line superimposed over the other lines. I can get the colors to work, but neither alpha nor size parameters. The structure and code are below.
Appreciate any advice!
library(ggplot2)
library(reshape2)
df <- structure(list(V20 = c(0.021331, 0.034844, 0.023003, -0.001772,
-0.028383, -0.016064, 0, 0.016824, 0.045668, 0.029395), V21 = c(0.088426,
0.096454, 0.064565, 0.03234, 0.03058, 0.00994800000000001, 0,
0, 0, 0), V22 = c(0.091629, 0.061091, 0.039268, 0.00933999999999996,
0.005019, 0.000396000000000007, 0.009496, -0.013611, -0.020616,
-0.010839), avg = c(0.0130733014285714, -0.00393914761904762,
-0.00299403809523809, 0.00435276190476191, 0.00829649999999999,
0.00264482857142857, -0.0120784133333333, -0.00930774761904762,
0.00377989047619047, 0.00355469323809524), time = 1:10), row.names = c(NA,
10L), class = "data.frame")
colors.df <- data.frame(key=c(as.character(colnames(df)[1:length(colnames(df))-1])),
value=rep("gray",length(colnames(df))-1),
alpha=rep(0.2,length(colnames(df))-1),
size=rep(1.0,length(colnames(df))-1),
stringsAsFactors=FALSE)
colors.df[which(colors.df$key=='avg'),]$value <- "red"
colors.df[which(colors.df$key=='avg'),]$alpha <- 1.0
colors.df[which(colors.df$key=='avg'),]$size <- 3.0
df.melt <- melt(df, id='time', variable.name = "spike", value.name = "voltage")
ggplot(df.melt, aes(x=time, y=voltage, colour=spike)) +
geom_line() +
scale_alpha_manual(values = colors.df[1:length(colors.df$value),]$alpha) +
scale_color_manual(values = colors.df[1:length(colors.df$value),]$value) +
scale_size_manual(values= colors.df[1:length(colors.df$value),]$size)
Thanks #PoGibas for the hint!
ggplot(df.melt, aes(x=time, y=voltage, colour=spike, alpha=spike, size=spike))+
geom_line() +
scale_alpha_manual(values = colors.df[1:length(colors.df$value),]$alpha) +
scale_color_manual(values = colors.df[1:length(colors.df$value),]$value) +
scale_size_manual(values= colors.df[1:length(colors.df$value),]$size)
I'd like to plot 2 different charts side by side with a common y-axis using ggplot2:
p1) an "area rectangle" graph using geom_rect()
p2) a density plot using geom_density()
I've come close by using grid.arrange(), but the y-axes don't quite line up. I don't think facet_wrap or facet_grid will work here but I could be wrong.
rm(list=ls())
library(ggplot2)
library(gridExtra)
library(dplyr)
df<-structure(list(total_ulr = c(0.442, 0.679, 0.74, 0.773, 0.777,
0.8036, 0.87, 0.871, 0.895, 0.986, 1.003, 1.2054, 1.546, 1.6072
), width = c(4222L, 14335L, 2572L, 2460L, 1568L, 8143L, 3250L,
17119L, 3740L, 3060L, 2738L, 1L, 1L, 790L), w = c(4222L, 18557L,
21129L, 23589L, 25157L, 33300L, 36550L, 53669L, 57409L, 60469L,
63207L, 63208L, 63209L, 63999L), wm = c(0L, 4222L, 18557L, 21129L,
23589L, 25157L, 33300L, 36550L, 53669L, 57409L, 60469L, 63207L,
63208L, 63209L), wt = c(2111, 11389.5, 19843, 22359, 24373, 29228.5,
34925, 45109.5, 55539, 58939, 61838, 63207.5, 63208.5, 63604),
mainbuckets = c(" 4,222", "14,335", " 2,572", " 2,460", " 1,568",
" 8,143", " 3,250", "17,119", " 3,740", " 3,060", " 2,738",
"", "", " 790"), mainbucketsULR = c("0.44", "0.68", "0.74",
"0.77", "0.78", "0.80", "0.87", "0.87", "0.90", "0.99", "1.00",
"", "", "1.61")), .Names = c("total_ulr", "width", "w", "wm",
"wt", "mainbuckets", "mainbucketsULR"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -14L))
textsize<-4
p1<-
ggplot(df, aes(ymin=0)) +
geom_rect(aes(xmin = wm, xmax = w, ymax = total_ulr, fill = total_ulr)) +
scale_x_reverse() +
geom_text(aes(x = wt, y = total_ulr+0.02, label = mainbuckets),size=textsize,color="black") +
geom_text(aes(x = wt, y = 0.02, label = mainbucketsULR),size=textsize+1,color="white",hjust=0,angle=90) +
xlab("Frequency") +
ylab("Ratio") +
ggtitle(paste("My Title")) +
theme_bw() +
theme(legend.position = "none"
,axis.text.x=element_blank())
p2<-ggplot(df, aes(total_ulr,fill=width,ymin=0)) + geom_density(color="grey",fill="grey") +
ggtitle("Density") +
xlab("") +
ylab("") +
theme_bw() +
coord_flip()+scale_y_reverse() +
theme(text=element_text(size=10)
,axis.text.x=element_blank()
,legend.position="none"
#,axis.text.y=element_blank()
)
grid.arrange(p2.common.y,p1.common.y,ncol=2,widths=c(1,5))
Here you go. You need to define a common axis with the scale_y_* functions, then set the plot heights to match.
limits <- c(0, 2)
breaks <- seq(limits[1], limits[2], by=.5)
# assign common axis to both plots
p1.common.y <- p1 + scale_y_continuous(limits=limits, breaks=breaks)
p2.common.y <- p2 + scale_x_continuous(limits=limits, breaks=breaks)
# At this point, they have the same axis, but the axis lengths are unequal, so ...
# build the plots
p1.common.y <- ggplot_gtable(ggplot_build(p1.common.y))
p2.common.y <- ggplot_gtable(ggplot_build(p2.common.y))
# copy the plot height from p1 to p2
p2.common.y$heights <- p1.common.y$heights
grid.arrange(p2.common.y,p1.common.y,ncol=2,widths=c(1,5))
I am making a plot in ggplot2 that contains a geom_pointrange and a geom_line. I see that when I change the order of the geoms, either the points are plotted on top of the line, or vice versa. The legend also changes which geom is plotted on top of the other based on the same ordering of the geoms. However, I would like for the line to plot first, then the pointrange on top, in the plot itself, with the opposite in the legend. Is this possible? I would greatly appreciate any input.
Here is the code I used to make the figure.
md.figd2 <- structure(list(date = c("2013-05-28", "2013-07-11", "2013-09-22",
"2013-05-28", "2013-07-11", "2013-09-22", "2013-05-28", "2013-07-11",
"2013-09-22"), trt = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("- Fescue", "- Random", "Control"), class = "factor"),
means = c(1, 0.921865257043089, 0.793438250521971, 1, 0.878305313846414,
0.85698797555687, 1, 0.840679145697309, 0.798547331410388
), mins = c(1, 0.87709562979756, 0.72278951032918, 1, 0.816185624483356,
0.763720265496049, 1, 0.780804129401513, 0.717089626439849
), maxes = c(1, 0.966634884288619, 0.864086990714762, 1,
0.940425003209472, 0.950255685617691, 1, 0.900554161993105,
0.880005036380927)), .Names = c("date", "trt", "means", "mins",
"maxes"), row.names = c(NA, 9L), class = "data.frame")
library(ggplot2)
dplot1.ysc <- scale_y_continuous(limits=c(0,1), breaks=seq(0,1,.2), name='Proportion mass lost')
dplot1.xsc <- scale_x_date(limits=as.Date(c('2013-05-23', '2013-10-03')), labels=c('May 28', 'July 11', 'Sep 22'), breaks=md.figdata$date, name='Date')
dplot1.csc <- scale_color_manual(values=c('grey20','grey50','grey80'))
dplot1.lsc <- scale_linetype_manual(values=c('solid','dotted','dashed'))
djitter <- rep(c(0,-1,1), each=3)
# This one produces the plot with the legend I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2) + geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_line(aes(linetype=trt), size=1) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
You can use gtable::gtable_filter to extract the legend from the plot you want, and then gridExtra::grid.arrange to recreate the plot you want
# the legend I want
plot1a <- ggplot(md.figd2, aes(x=date , y=means, group=trt)) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2,
position = position_dodge(width=1)) +
geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date, y=means, group=trt)) +
geom_line(aes(linetype=trt), size=1) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
w <- dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
# legend
l <- dplot1a + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
library(gtable)
library(gridExtra)
# extract legend ("guide-box" element)
leg <- gtable_filter(ggplot_gtable(ggplot_build(l)), 'guide-box')
# plot the two components, adjusting the widths as you see fit.
grid.arrange(w + theme(legend.position='none'),leg,ncol=2, widths = c(3,1))
An alternative is to simply replace the legend in the plot you want with the legend you want that you have extracted (using gtable_filter)
# create ggplotGrob of plot you want
wGrob <- ggplotGrob(w)
# replace the legend
wGrob$grobs[wGrob$layout$name == "guide-box"][[1]] <- leg
grid.draw(wGrob)
Quick and dirty. To get the correct plotting order in both figure and legend, add the layers like this: (1) geom_pointrange, (2) geom_line, and then (3) a second geom_pointrange without legend (show.legend = FALSE).
ggplot(md.figd2, aes(x = date, y = means, group = trt)) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2) +
geom_line(aes(linetype = trt), size = 1) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2,
show.legend = FALSE) +
scale_y_continuous(limits = c(0,1), breaks = seq(0,1, 0.2), name = 'Proportion mass lost') +
scale_x_date(limits = as.Date(c('2013-05-23', '2013-10-03')), name = 'Date') +
scale_color_manual(values = c('grey20', 'grey50', 'grey80')) +
scale_linetype_manual(values = c('solid', 'dotted', 'dashed'))