Related
I have this dataframe:
structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
It is a dataframe where each row is a category, and the columns correspond with a time series (from T0 til T5).
I want to do a bar chart for each category (taxon) for their time (T0-T5):
melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
geom_bar(stat = "identity") +
facet_wrap(~ taxon, ncol = 3) +
labs(title = "Bar Chart for Different Time Series",
x = "Time Point",
y = "Value",
fill = "Category")
The question is if it is possible to assign the standard error (se columns) to their logFC value (lfc columns) for each time series.
Update:
I did this, but only for T0:
ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category") +
# Add error bars using se_StatusChronic.ACST0. column
geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0.,
ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
width = 0.4)
Output expected (the image is from other data):
Is this what you're looking for?
library(dplyr)
library(tidyr)
library(ggplot2)
dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
dat %>%
rename(lfc_time.fT0 = lfc_StatusChronic.ACST0.,
se_Time.fT0 = se_StatusChronic.ACST0.) %>%
pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)",
names_to = c(".value", "time")) %>%
ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
geom_bar(stat = "identity") +
geom_errorbar(width=.4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette="Set1") +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
facet_wrap(~taxon, ncol=1) +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")```
If so, the key is to rename the T0 variables to have the same format as the other time-period variables and then use pivot_longer() to put all the lfc measures in a single column and all the se measures in a single column. The rest is accomplished with faceting on the time variable. The pivot_longer() documentation has some good examples of retaining multiple columns, see in particular the last example on the page.
I have this data frame where I want to create multiple plots at the same time in a loop, but when I run the code it gives me an error. Can anyone please tell me what I am doing wrong!
Data:
structure(list(Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361,
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299,
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939,
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127,
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771,
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389,
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476,
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589,
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458,
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771,
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196,
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407,
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476,
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513,
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908,
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145,
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322,
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347,
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Code:
for (i in 2:ncol(data2)) { # Printing ggplot within for-loop
print(ggplot(data2, aes(x = Date, y = data2[, i])) + # Basic ggplot2 plot of x & y's
geom_line() +
labs(title = "Uk States",
y = "",
x = "") +
theme_bw() +
geom_hline(yintercept = 0))
Sys.sleep(1)
}
Error:
Don't know how to automatically pick scale for object of type tbl_df/tbl/data.frame. Defaulting to continuous.
Error in is.finite(x) : default method not implemented for type 'list'
I would suggest to loop over the column names instead of value. You may then use .data to use as y-index.
library(tidyverse)
for(i in names(data2)[-1]) { # Printing ggplot within for-loop
# Basic ggplot2 plot of x & y's
print(ggplot(data2, aes(x = Date, y = .data[[i]])) +
geom_line()+ labs(title = "Uk States",
y = "",
x = "")+
theme_bw()+
geom_hline(yintercept = 0))
Sys.sleep(1)
}
You may also try facet_wrap to combine multiple plots together.
data2 %>%
pivot_longer(cols = -Date) %>%
ggplot(aes(Date, value)) +
geom_line() + facet_wrap(~name) +
labs(title = "Uk States", x = "", y = "") +
theme_bw() +
geom_hline(yintercept = 0)
Another way of generating ggplot in a loop is to use lapply, where we loop for colnames and use aes_string as the aesthetic mapping.
Here the results are saved to the list ggplot_list, where you can extract individual plot by indexing (e.g. plot for NORTH is stored in ggplot_list[[1]])
Note that I've changed labs(title = i) so that the plot title would be your column names.
library(ggplot2)
ggplot_list <- lapply(colnames(data2[-1]), \(i) {
ggplot(data2, aes_string("Date", x)) +
geom_line() +
labs(title = i, y = "", x = "") +
theme_bw() +
geom_hline(yintercept = 0)
})
Data:
df1 <- structure(list(Index = 1:11, Duration = structure(c(1487577655,
1487577670, 1487577675, 1487577680, 1487577685, 1487577680, 1487577700,
1487577705, 1487577695, 1487577700, 1487577680), class = c("POSIXct",
"POSIXt"), tzone = "")), .Names = c("Index", "Duration"), class = "data.frame", row.names = 3:13)
Now I construct the graph as follows:
g1 <- ggplot(df1, aes(x = Index, y = Duration, color = Duration))+
geom_point()+
geom_line()+
scale_y_datetime(labels = date_format("%M:%S"))
As it is now, the color scale is set to the default "Black" to "Blue" gradient.
The problem is, I get an error trying to assign a custom gradient to the data.
For a non-POSIXct object:
scale_color_gradient("Duration", low = "#D80427", high = "#07a0ff", space = "Lab")
works, but I get the following error with the POSIXct object df1$Duration as the explanatory variable:
Error in Ops.POSIXt((x - from[1]), diff(from)) : '/' not defined
for "POSIXt" objects
Is there a different gradient function I need to use when graphing a POSIXct object?
You may use trans = time_trans():
library(ggplot2)
library(scales)
g1 +
scale_color_gradient("Duration", low = "#D80427", high = "#07a0ff",
trans = time_trans())
If you wish another format of the labels in the legend, add e.g. labels = format(pretty(df1$Duration), "%M:%S").
We can convert date to number for colour:
library(ggplot2)
library(scales)
ggplot(df1, aes(x = Index, y = Duration, color = as.numeric(Duration))) +
geom_point() +
geom_line() +
scale_y_datetime(labels = date_format("%M:%S")) +
scale_color_gradient("Duration", low = "#D80427", high = "#07A0FF",
labels = c("00", "10", "20", "30", "40"))
As suggested by #Henrik, to avoid hardcoding the labels use below:
# avoid hardcoding labels using pretty()
ggplot(df1, aes(x = Index, y = Duration, color = as.numeric(Duration))) +
geom_point() +
geom_line() +
scale_y_datetime(labels = date_format("%M:%S")) +
scale_color_gradient("Duration", low = "#D80427", high = "#07A0FF",
breaks = pretty(as.numeric(df1$Duration)),
labels = format(pretty(df1$Duration), "%M:%S"))
I am having certain issues while converting ggplot2 plots to plotly plots.
Here is my code:
> dput(dat.c)
structure(list(Cell_Line = structure(1:15, .Label = c("NBLS",
"NBSD", "NGP", "NLF", "RPE1", "RPE1MYCN40HT", "RPE1MYCNWT", "RPE1WT40HT",
"SKNAS", "SKNDZ", "SKNFI", "SKNKAN", "SKNSH", "SMSSAN", "SY5Y"
), class = "factor"), A1CF = c(5.10772389542502, 5.04909249557583,
5.16367852798093, 5.14220860530212, 5.25310652067225, 5.26436607107436,
5.230991944454, 5.4310065318786, 5.18630235564568, 5.02696275142877,
5.04518295317946, 5.15650800484188, 5.18630235564568, 5.18630235564568,
5.04905014785891), A2M = c(5.95668979157631, 5.59054925920344,
5.87084903365957, 5.85359773104682, 5.94551823960579, 5.82444459419149,
5.69488212149351, 5.70563676209623, 5.81016207843128, 5.66186721932247,
5.62134775395947, 5.62471305571508, 5.67165736680416, 5.76130826308792,
5.88006576048066), A2ML1 = c(5.56172395998964, 5.50076886952901,
5.7884753846352, 5.86613223339835, 5.82836474266047, 5.62750510524894,
5.76666636363946, 5.95103526370421, 5.58407662670697, 5.44780492507868,
5.35529657578242, 5.58813057293296, 5.67254168845041, 5.68685275370324,
5.6859273460443), A4GALT = c(6.73058652581215, 6.57480531818191,
6.70607981578649, 6.97173508307211, 7.0975112557987, 6.8286006127757,
6.56835917368749, 7.07629253436335, 6.66209247382635, 6.5876785423068,
6.59571996076717, 6.46673750407667, 6.70110916967979, 6.85058340238055,
6.59506833206593), A4GNT = c(4.87275116647384, 4.60002647258705,
4.99494601675408, 4.69477600401491, 4.7985530619801, 4.8349540959233,
4.77659739577691, 4.95071744980212, 4.77868342368918, 4.8025955817638,
4.87887068068956, 4.84258505663777, 4.84258505663777, 4.84616620572434,
4.66050997534254)), .Names = c("Cell_Line", "A1CF", "A2M", "A2ML1",
"A4GALT", "A4GNT"), row.names = c(NA, -15L), class = "data.frame")
gene1 <- 'A2M'
# modify gene name, dashes present in most of them
gene1.mut <- paste('`',gene1,'`',sep='')
# ggplot
p <- ggplot(dat.c, aes_string(x='Cell_Line', y=gene1.mut, fill='Cell_Line')) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=90)) + ggtitle(gene1)
ggplotly(p)
This generates a figure like this:
As you can see, the one bar with X label RPE1MYCN40HT is not shown completely. Also the X and Y axis titles are clipped. How do I adjust the X axis labels and title so that they are not clipped? I do want to stick with ggplotly() instead of plot_ly() if that is possible.
Try to adjust plot.margins:
# ggplot
ggplot(dat.c, aes_string(x='Cell_Line', y=gene1.mut, fill='Cell_Line')) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle=90),
plot.margin = unit(c(3, 3, 3, 3), "cm")) +
ggtitle(gene1)
When window is small then Xaxis label is overlapped with X labels, but when window is big it doesn't.
I am making a plot in ggplot2 that contains a geom_pointrange and a geom_line. I see that when I change the order of the geoms, either the points are plotted on top of the line, or vice versa. The legend also changes which geom is plotted on top of the other based on the same ordering of the geoms. However, I would like for the line to plot first, then the pointrange on top, in the plot itself, with the opposite in the legend. Is this possible? I would greatly appreciate any input.
Here is the code I used to make the figure.
md.figd2 <- structure(list(date = c("2013-05-28", "2013-07-11", "2013-09-22",
"2013-05-28", "2013-07-11", "2013-09-22", "2013-05-28", "2013-07-11",
"2013-09-22"), trt = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("- Fescue", "- Random", "Control"), class = "factor"),
means = c(1, 0.921865257043089, 0.793438250521971, 1, 0.878305313846414,
0.85698797555687, 1, 0.840679145697309, 0.798547331410388
), mins = c(1, 0.87709562979756, 0.72278951032918, 1, 0.816185624483356,
0.763720265496049, 1, 0.780804129401513, 0.717089626439849
), maxes = c(1, 0.966634884288619, 0.864086990714762, 1,
0.940425003209472, 0.950255685617691, 1, 0.900554161993105,
0.880005036380927)), .Names = c("date", "trt", "means", "mins",
"maxes"), row.names = c(NA, 9L), class = "data.frame")
library(ggplot2)
dplot1.ysc <- scale_y_continuous(limits=c(0,1), breaks=seq(0,1,.2), name='Proportion mass lost')
dplot1.xsc <- scale_x_date(limits=as.Date(c('2013-05-23', '2013-10-03')), labels=c('May 28', 'July 11', 'Sep 22'), breaks=md.figdata$date, name='Date')
dplot1.csc <- scale_color_manual(values=c('grey20','grey50','grey80'))
dplot1.lsc <- scale_linetype_manual(values=c('solid','dotted','dashed'))
djitter <- rep(c(0,-1,1), each=3)
# This one produces the plot with the legend I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2) + geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date + djitter, y=means, group=trt)) + geom_line(aes(linetype=trt), size=1) + geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
You can use gtable::gtable_filter to extract the legend from the plot you want, and then gridExtra::grid.arrange to recreate the plot you want
# the legend I want
plot1a <- ggplot(md.figd2, aes(x=date , y=means, group=trt)) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2,
position = position_dodge(width=1)) +
geom_line(aes(linetype=trt), size=1)
# This one produces the plot with the points on the main plot that I want.
dplot1b <- ggplot(md.figd2, aes(x=date, y=means, group=trt)) +
geom_line(aes(linetype=trt), size=1) +
geom_pointrange(aes(ymin=mins, ymax=maxes, color=trt), size=2)
w <- dplot1b + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
# legend
l <- dplot1a + dplot1.xsc + dplot1.ysc + dplot1.csc + dplot1.lsc
library(gtable)
library(gridExtra)
# extract legend ("guide-box" element)
leg <- gtable_filter(ggplot_gtable(ggplot_build(l)), 'guide-box')
# plot the two components, adjusting the widths as you see fit.
grid.arrange(w + theme(legend.position='none'),leg,ncol=2, widths = c(3,1))
An alternative is to simply replace the legend in the plot you want with the legend you want that you have extracted (using gtable_filter)
# create ggplotGrob of plot you want
wGrob <- ggplotGrob(w)
# replace the legend
wGrob$grobs[wGrob$layout$name == "guide-box"][[1]] <- leg
grid.draw(wGrob)
Quick and dirty. To get the correct plotting order in both figure and legend, add the layers like this: (1) geom_pointrange, (2) geom_line, and then (3) a second geom_pointrange without legend (show.legend = FALSE).
ggplot(md.figd2, aes(x = date, y = means, group = trt)) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2) +
geom_line(aes(linetype = trt), size = 1) +
geom_pointrange(aes(ymin = mins, ymax = maxes, color = trt),
position = position_dodge(width = 5), size = 2,
show.legend = FALSE) +
scale_y_continuous(limits = c(0,1), breaks = seq(0,1, 0.2), name = 'Proportion mass lost') +
scale_x_date(limits = as.Date(c('2013-05-23', '2013-10-03')), name = 'Date') +
scale_color_manual(values = c('grey20', 'grey50', 'grey80')) +
scale_linetype_manual(values = c('solid', 'dotted', 'dashed'))