I am trying to shade the 0.025 and 0.975 quantiles on this graph that has three lines. I have tried geom_area, geom_ribbon, and I cannot highlight every quantile in every line.
Please note that "y" was ignored in this density graph.
example <-data.frame(source=c("Leaflitter","Leaflitter","Leaflitter","Leaflitter",
"Leaflitter","Leaflitter","Leaflitter","Leaflitter","Leaflitter","Leaflitter",
"Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm","Biofilm",
"Biofilm","Biofilm","Algae","Algae","Algae","Algae","Algae","Algae","Algae","Algae",
"Algae","Algae"), n=c(1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10),
density=c(0.554786934, 0.650578421, 0.039317168, 0.53537613,0.435081982,0.904056941,0.556284164,0.855319434,
0.399169622,0.570246304,0.076722032,0.257427999,0.172736928,0.447424473,0.520976948,0.011720494,0.311348655,
0.120698996,0.016336661,0.331741377, 0.368491034,0.09199358,0.787945904,0.017199397,0.04394107,
0.084222564,0.132367181,0.023981569,0.584493716,0.098012319))
example
One subgroup and quantiles
L <- filter(QPA_G_Feb17, source == "Leaflitter")
L <-as.data.frame(L)
Lq025 <- quantile(L$density, .025)
Lq975 <- quantile(L$density, .975)
ggplot(QPA_G_Feb17, aes(x=density, color=source)) +
labs(y="Density", x="Sorce contribution") +
geom_density(aes(linetype = source), size=1.2) +
scale_color_manual(values=c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_linetype_manual(values = c("solid", "dotted", "longdash")) +
theme_classic()+
ylim(0, 5)+
theme(axis.text.y=element_text(angle=0, size=12, vjust=0.5, color="black")) +
theme(axis.text.x =element_text(angle=0, size=12, vjust=0.5, color="black")) +
theme(axis.title.x = element_text(color="black", size=14))+
theme(axis.title.y = element_text(color="black", size=14))
I would appreciate your help since I have looked in other forums, and there is information to highlight when there is only 1 line.
I think this data is a bit more representative of the data displayed in your plot:
set.seed(50)
QPA_G_Feb17 <- data.frame(density = c(rgamma(400, 2, 10),
rgamma(400, 2.25, 9),
rgamma(400, 5, 7)),
source = rep(c("Algae", "Biofilm", "Leaflitter"),
each = 400))
I find that when you are trying to do something complex or non-standard in ggplot, the best thing to do is calculate the data you wish to plot ahead of time. In this case, we can calculate the density curves and the cumulative densities, including their 0.025 and 0.975 quantiles, and putting them all in a data frame like this:
dens <- lapply(split(QPA_G_Feb17, QPA_G_Feb17$source),
function(x) density(x$density, from = 0, to = 1))
df <- do.call(rbind, mapply(function(x, y) {
data.frame(x = x$x, y = x$y, source = y)
}, dens, names(dens), SIMPLIFY = FALSE))
df <- df %>%
group_by(source) %>%
mutate(cdf = cumsum(y * mean(diff(x))),
lower = cdf < 0.025,
upper = cdf > 0.975)
Now it is easy to plot using geom_area:
ggplot(df, aes(x, y, color = source)) +
geom_area(data = df[df$lower,], aes(fill = source), alpha = 0.5,
position = "identity") +
geom_area(data = df[df$upper,], aes(fill = source), alpha = 0.5,
position = "identity") +
labs(y = "Density", x = "Source contribution") +
geom_line(aes(linetype = source), size = 1.2) +
scale_fill_manual(values = c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_color_manual(values = c("#31a354", "#2c7fb8", "#d95f0e")) +
scale_linetype_manual(values = c("solid", "dotted", "longdash")) +
theme_classic() +
ylim(0, 5) +
xlim(0, 1) +
theme(axis.text.y = element_text(size = 12, vjust = 0.5),
axis.text.x = element_text(size = 12, vjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14))
Here, the 2.5% and 97.5% extremeties of each density curve are shaded below each line. The exception is in the "Leaflitter` line, which clearly extends out of the 0-1 range that has been plotted in your example.
Related
I am trying to adapt the approach from (ggplot2 multiple sub groups of a bar chart) but something is not as it should be.
The code is:
library(grid)
MethodA= rep(c("ARIMA"), 6)
MethodB=rep(c("LSTM"), 6)
MethodC = rep(c("ARIMA-LSTM"),6)
MethodD=rep(c("SSA"),6)
Method=c(MethodA, MethodB, MethodC, MethodD)
Measure = rep(c("RMSE", "RMSE", "MAE", "MAE", "MAPE", "MAPE"), 4)
trtest=rep(c("train", "test"), 12)
Value=sample(x = 4000:7000, size = 24, replace = TRUE)
df2 <- data.frame(Method, Measure, trtest, Value)
dodge <- position_dodge(width = 0.9)
g1 <- ggplot(data = df, aes(x = interaction(Variety, Trt), y = yield, fill = factor(geno))) +
geom_bar(stat = "identity", position = position_dodge()) +
#geom_errorbar(aes(ymax = yield + SE, ymin = yield - SE), position = dodge, width = 0.2) +
coord_cartesian(ylim = c(0, 7500)) +
annotate("text", x = 1:6, y = - 10,
label = rep(c("Variety 1", "Variety 2", "Variety 3"), 2)) +
annotate("text", c(1.5, 3.5), y = - 20, label = c("Irrigated", "Dry")) +
theme_classic() +
theme(plot.margin = unit(c(1, 1, 4, 1), "lines"),
axis.title.x = element_blank(),
axis.text.x = element_blank())
# remove clipping of x axis labels
g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid.draw(g2)
The problem is aslo in a sequence that interaction function generates - the sequences are not by the order - ARIMA - RMSE, MAE, MAPE, then LSTM - RMSE, MAE, MAPE ...
I would appreciate for any help.
Best,
Nikola
Instead of using interaction, it might be a lot clearer if you use facets.
Note that your example is not reproducible (your sample data has different variable names from the ones you use in your plotting code, so I had to guess which you meant to substitute):
ggplot(data = df2, aes(x = Measure, y = Value, fill = trtest)) +
geom_bar(stat = "identity", position = position_dodge()) +
coord_cartesian(ylim = c(0, 7500)) +
facet_grid(.~Method, switch = 'x') +
theme_classic() +
theme(strip.placement = 'outside',
strip.background = element_blank(),
strip.text = element_text(face = 'bold', size = 16),
panel.spacing.x = unit(0, 'mm'),
panel.border = element_rect(fill = NA, color = 'gray'))
I have a bar graph (created using ggplot2 package) coming from one set of yearly data (with multiple y values)and I want to overlay on it data from another set of yearly data in the form of a line. Here is my code:
library (zoo)
require(ggplot2)
library(reshape)
library(Cairo)
library(reshape2)
x<-c(2000,2001,2002,2003,2004)
y1<-c(41,62,71,316,172)
y2<-c(3018,2632,2643,2848,2738)
y3<-c(3065,2709,2721,3192,2925)
dat1 <- data.frame(Year=x, y1, y2)
dat.m1 <- melt(dat1, id.vars='Year')
a<-ggplot(dat.m1, aes(Year, value)) +
geom_bar(width=0.6,aes(fill = variable),stat = "identity")+
xlab("Year") + ylab("Water Depth (mm)")+
theme(legend.position="top")+
theme(panel.background = element_rect(fill = 'white', colour = 'black'))+
theme(axis.text=element_text(size=13),axis.title=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(plot.margin=unit(c(0.2,0.7,0.5,0.2),"cm"))+
guides(fill = guide_legend(title="", title.position="top", direction="horizontal"))
a
At this stage, bar plot is running nicely but when I tried to add line plot from different data frame as follow:
dat2 <- data.frame(Year=x, y3)
dat.m2 <- melt(dat2, id.vars='Year')
b<-ggplot(dat.m1, aes(Year, value)) +
geom_bar(width=0.6,aes(fill = variable),stat = "identity")+
geom_line(dat.m2, aes(x = x, y = y3), size = 1.5, color="red") +
xlab("Year") + ylab("Water Depth (mm)")+
theme(legend.position="top")+
theme(panel.background = element_rect(fill = 'white', colour = 'black'))+
theme(axis.text=element_text(size=13),axis.title=element_text(size=14))+
theme(legend.text=element_text(size=14))+
theme(plot.margin=unit(c(0.2,0.7,0.5,0.2),"cm"))+
guides(fill = guide_legend(title="", title.position="top", direction="horizontal"))
b
It did not work and I received this error message:
"Error in validate_mapping():
! mapping must be created by aes()
Run rlang::last_error() to see where the error occurred."
Anyone can help me to fix this issue? Also, any suggestion to add a line plot with each bar in the first data frame?
You need to add argument name data in geom_line(). Otherwise dat.m2 is received as mapping to the geom_line function.
dat2 <- data.frame(Year = x, y3)
dat.m2 <- melt(dat2, id.vars = 'Year')
b <- ggplot(dat.m1, aes(Year, value)) +
geom_bar(width = 0.6, aes(fill = variable), stat = "identity") +
geom_line(data = dat.m2, aes(x = x, y = y3), size = 1.5, color = "red") + # adding data argument name
xlab("Year") + ylab("Water Depth (mm)") +
theme(legend.position = "top") +
theme(panel.background = element_rect(fill = 'white', colour = 'black')) +
theme(axis.text = element_text(size = 13),
axis.title = element_text(size = 14)) +
theme(legend.text = element_text(size = 14)) +
theme(plot.margin = unit(c(0.2, 0.7, 0.5, 0.2), "cm")) +
guides(fill = guide_legend(
title = "",
title.position = "top",
direction = "horizontal"
))
b
I'm trying to plot 2 normal distribution density plots for null and alternative hazard ratios of 1 and 0.65, respectively, to replicate an example (plot attached). Here's my code so far but it doesn't makes sense to me to have negative values for hazard ratios, but when I don't have negative values, the distributions are cut off. So I know I'm doing something wrong here. Thanks!
x <- seq(-2, 2, length.out = 100000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = log(1), sd = sqrt(1/60 + 1/60)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = log(0.65), sd = sqrt(1/60 + 1/60)), id="H1, HR = 0.65")))
vline <- 0.65
p1 <- ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue'))
The plot I'm trying to replicate
This gets reasonably close to the image that you have posted.
You should not use the log() of the means, but rather the means as is. Moreover if you use the normal distribution, you assume that parameters can take any value between -Inf and Inf, albeit with very small densities far from the mean. Therefore, you cannot expect all values to be positive. If you would like your values to be bounded by 0, then you should use a gamma distribution instead.
x <- seq(-2, 2, length.out = 1000)
df <- do.call(rbind,
list(data.frame(x=x, y=dnorm(x, mean = 1, sd = sqrt(1/50)), id="H0, HR = 1"),
data.frame(x=x, y=dnorm(x, mean = 0.65, sd = sqrt1/50)), id="H1, HR = 0.65")))
vline <- 0.65
ggplot(df, aes(x, y, group = id, color = id)) +
geom_line() +
geom_area(aes(fill = id),
data = ~ subset(., (id == "H1, HR = 0.65" & x > (vline)) | (id == "H0, HR = 1" & x < (vline))),
alpha = 0.3) +
geom_vline(xintercept = vline, linetype = "dashed") +
labs(x = "log(Hazard Ratio)", y = 'Density') + xlim(-2, 2) +
guides(fill = "none", color = guide_legend(override.aes = list(fill = "white"))) +
theme_classic() +
theme(legend.title=element_text(size=10), legend.position = c(0.8, 0.4),
legend.text = element_text(size = 10),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
) +
scale_color_manual(name = '', values = c('red', 'blue')) +
scale_fill_manual(values = c('red', 'blue')) +
scale_x_continuous(breaks = seq(-0.3, 2.1, 0.3),
limits = c(-0.3, 2.1))
I am trying to create a boxplot with a log y axis as I have some very small values and then some much higher values which do not work well in a boxplot with a continuous y axis. However, I have negative values which obviously do not work with a log scale. I was wondering if there was a way around this so that I can display my data on a boxplot which is still easy to interpret but has a more appropriate scale on the y axis.
p <- ggplot(data = Elstow.monthly.fluxes, aes(x = Month1, y = CH4.Flux)) + stat_boxplot(geom = "errorbar", linetype = 1, width = 0.5) + geom_boxplot() +
xlab(expression("Month")) + ylab(expression(~CH[4]~Flux~(µg~CH[4]~m^{-2}~d^{-1}))) +
scale_y_continuous(breaks = seq(-5000,40000,5000), limits = c(-5000,40000))+
theme(axis.text.x = element_text(colour = "black")) + theme(axis.text.y = element_text(colour =
"black")) +
theme(panel.background = element_rect("white", "black")) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
theme(axis.text = element_text(size = 12))+ theme(axis.title = element_text(size = 14))+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
geom_hline(yintercept = 0, linetype ="dashed", colour = "black")
While you could indeed use the secondary axis to get the labels you want as Zhiqiang suggests, you could also use a transformation that fits your needs.
Consider the following skewed boxplots:
df <- data.frame(
x = rep(letters[1:2], each = 500),
y = rlnorm(1000) - 2
)
ggplot(df, aes(x, y)) +
geom_boxplot()
Instead, you could use the pseudo-log transformation to visualise your data:
ggplot(df, aes(x, y)) +
geom_boxplot() +
scale_y_continuous(trans = scales::pseudo_log_trans())
Alternatively, you could make any transformation you want. I personally like the inverse hyperbolic sine transformation, which is very much like the pseudo-log:
asinh_trans <- scales::trans_new(
"inverse_hyperbolic_sine",
transform = function(x) {asinh(x)},
inverse = function(x) {sinh(x)}
)
ggplot(df, aes(x, y)) +
geom_boxplot() +
scale_y_continuous(trans = asinh_trans)
I have a silly solution: trick the secondary axis to re-scale y axis. I do not have your data, just made up some numbers for the purpose of demonstration.
First convert y values as logy = log(y + 5000). When generating the graph, transform the values back to the original scale. I borrow the second axis to display the values. I am pretty sure others may have more elegant ways to do this.
I was lazy for not trying to find the right way to remove the primary y axis tick labels, just used breaks = c(0).
df<-data.frame(y = runif(33, min=-5000, max=40000),
x = rep(c("Aug", "Sep", "Oct"),33))
library(tidyverse)
df$logy = log(df$y+5000)
p <- ggplot(data = df, aes(x = x, y = logy)) +
stat_boxplot(geom = "errorbar", linetype = 1, width = 0.5) +
geom_boxplot() +
xlab(expression("Month")) +
ylab(expression(~CH[4]~Flux~(µg~CH[4]~m^{-2}~d^{-1}))) +
scale_y_continuous(sec.axis = sec_axis(~(exp(.) -5000),
breaks = c(-4000, 0, 5000, 10000, 20000, 40000)),
breaks = c(0))+
theme(axis.text.x = element_text(colour = "black")) +
theme(axis.text.y = element_text(colour = "black")) +
theme(panel.background = element_rect("white", "black")) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) +
theme(axis.text = element_text(size = 12))+
theme(axis.title = element_text(size = 14))+
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0))) +
theme(axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0))) +
geom_hline(yintercept = log(5000), linetype ="dashed", colour = "black")
p
coord_trans() is applied after the statistics are calculated (unlike scale). This can be combined with the pseudo_log_trans to cope with negatives.
library(plotly)
set.seed(1234)
dat <- data.frame(cond = factor(rep(c("A","B"), each=200)), rating = c(rnorm(200),rnorm(200, mean=500)))
pseudoLog <- scales::pseudo_log_trans(base = 10)
p <- ggplot(dat, aes(x=cond, y=rating)) + geom_boxplot() + coord_trans(y=pseudoLog)
I have the following example plot:
test <- data.frame("Factor" = as.factor(c("O", "C", "A")),
b = c(0.18, .34, .65, -.13, .38, .26),
lower95 = c(-.1, .09, .34, -.52, .10, -.02),
upper95 = c(.48, .58, .98, .26, .67, .56),
group = factor(c("Experiment 1","Experiment 2")))
test$Factor <- factor(test$Factor, as.character(test$Factor))
test$group <- factor(test$group, as.character(test$group))
ggplot(test, aes(Factor, b, colour = group)) +
geom_errorbar(aes(ymin = lower95, ymax = upper95),
size = 1,
width = .5,
position = 'dodge') +
geom_hline(yintercept = 0) +
ylim(-1.25, 1.25) +
coord_flip() +
theme_bw() +
ggtitle("Title") +
theme(
axis.text=element_text(size = 20),
axis.title=element_text(size = 18),
plot.title = element_text(size = 20, face = "bold"),
axis.text.y=element_text(size = 12)
)
As you'll see, the error bars appear in the reverse order (from top to bottom) as they do in the legend. I would like Experiment 1 error bars to appear above Experiment 2 error bars.
I have tried
ggplot(test, aes(Factor, b, colour = forcats::fct_rev(groups)
But this reverses the order of the group labels in the legend, not the order of the colours in the legend–which is what would work. I have also tried reversing the order in which I enter them in the data frame and this does not solve the problem.
I would appreciate some help!
Re-factoring will change the order of the plot, but, as you saw, also changes the order of the legend. In addition to reversing the levels of group, you can reverse the order the legend is displayed with the reverse argument in guide_legend.
ggplot(test, aes(Factor, b, colour = forcats::fct_rev(group))) +
geom_errorbar(aes(ymin = lower95, ymax = upper95),
size = 1,
width = .5,
position = 'dodge') +
geom_hline(yintercept = 0) +
ylim(-1.25, 1.25) +
coord_flip() +
theme_bw() +
ggtitle("Title") +
theme(
axis.text=element_text(size = 20),
axis.title=element_text(size = 18),
plot.title = element_text(size = 20, face = "bold"),
axis.text.y=element_text(size = 12)
) +
guides(color = guide_legend(reverse = TRUE) )
If you are using scale_color_discrete or scale_color_manual to control other scale elements like the legend name, you can use guide_legend there instead of via guides.
+
scale_color_discrete(name = "Experiment", guide = guide_legend(reverse = TRUE) )
Do you mean something like this?
test$Factor <- factor(test$Factor, levels = rev(levels(test$Factor)));
test$group <- factor(test$group, levels = rev(levels(test$group)));
ggplot(test, aes(Factor, b, colour = group)) +
geom_errorbar(aes(ymin = lower95, ymax = upper95),
size = 1,
width = .5,
position = 'dodge') +
geom_hline(yintercept = 0) +
ylim(-1.25, 1.25) +
coord_flip() +
theme_bw() +
ggtitle("Title") +
theme(
axis.text=element_text(size = 20),
axis.title=element_text(size = 18),
plot.title = element_text(size = 20, face = "bold"),
axis.text.y=element_text(size = 12)
)
I'm not entirely clear on whether you want to reverse the ordering of test$Factor as well; just (un)comment the corresponding line depending on what you're after.