R ggplot scatter plot and mean with error bar - r

I want to produce a scatter plot and mean value with error bars. My code is as follows. When I add the geom_errorbar(), there is an error message:
Error in FUN(X[[i]], ...) : object 'value' not found
Z <- c(.1,.5,1.)
T <- seq(1:10)
ZT <- expand.grid(Z,T)
colnames(ZT) <- c("Z","T")
n <- nrow(ZT)
nrep <- 100
rmat <- replicate(nrep, rnorm(n))
ave <- apply(rmat,1,mean)
var <- apply(rmat,1,var)
se <- sqrt(var)/sqrt(nrep)
rmat.summary <- as.data.frame(cbind(ZT,ave,se))
colnames(rmat.summary) <- c("Z","T","ave","se")
library(reshape)
library(ggplot2)
rmat <- as.data.frame(cbind(ZT,rmat))
rmat <- melt(as.data.frame(rmat),id=c(1,2))
ggplot(rmat, aes(x = T, y = value)) + geom_point() + geom_line(data =
rmat.summary, aes(x = T, y = ave)) + facet_wrap( ~ Z)
ggplot(rmat, aes(x = T, y = value)) + geom_point() + geom_line(data =
rmat.summary, aes(x = T, y = ave)) +
geom_errorbar(data = rmat.summary, aes(ymin = ave - se, ymax = ave + se))
+ facet_wrap( ~ Z)
So can anybody please help me to correct this error? Thanks in advance!

This should work:
ggplot() +
geom_point(data = rmat, aes(x = T, y = value)) +
geom_line(data = rmat.summary, aes(x = T, y = ave)) +
geom_errorbar(data = rmat.summary, aes(x = T, y = ave, ymin = ave - se, ymax = ave + se)) +
facet_wrap( ~ Z)
But I think you have to play around with ymin and ymax.

Related

log_2(x + 1) transformation in ggplot2

I'm trying to implement the log_2(x + 1) transformation in ggplot2 but am running into issues.
Here is an MWE
library(ggplot2)
x <- rexp(100)
y <- rexp(100)
df <- data.frame(x = x, y = y)
p <- ggplot(df, aes(x = x, y = y)) + geom_point(colour = "blue") +
scale_x_continuous(trans = "log2") +
scale_y_continuous(trans = "log2")
print(p)
However, I'm unsure how to best go about transforming the axes, as well as labelling the axes as log_2{x + 1) and log_2(y + 1).
You could use log2_trans from scales with a function to add 1 like this:
library(ggplot2)
library(scales)
x <- rexp(100)
y <- rexp(100)
df <- data.frame(x = x, y = y)
p <- ggplot(df, aes(x = x, y = y)) + geom_point(colour = "blue") +
scale_x_continuous(trans = log2_trans(),
breaks = trans_breaks("log2", function(x) x + 1),
labels = trans_format("log2", math_format(.x + 1))) +
scale_y_continuous(trans = log2_trans(),
breaks = trans_breaks("log2", function(x) x + 1),
labels = trans_format("log2", math_format(.x + 1)))
print(p)
Created on 2022-11-04 with reprex v2.0.2

How to smooth out a time-series geom_area with fill in ggplot?

I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.

removing the intercept from regression line equation from ggplot using stat_reg_line() function

I am adding the regression line equation to my ggplot. However, I would like to remove the intercept from plot and keep only the slope and R^2.
Here is the code I am using to generate the plot and equation. Do you have any idea how can I remove the intercept?
library(ggpmisc)
df <- data.frame(x = c(1:100))
df$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
df$group <- factor(rep(c("A", "B"), 50))
df <- df %>% group_by(group) %>% mutate(ymax = max(y))
df %>%
group_by(group) %>%
do(tidy(lm(y ~ x, data = .)))
p <- ggplot(data = df, aes(x = x, y = y, colour = group)) +
geom_smooth(method = "lm", se=FALSE, formula = y ~ x) +
stat_regline_equation(
aes( x = x, y = y , label = paste(..eq.label..,..rr.label.., sep = "~~~~")),
formula=y~x, size=3,
)
p
Thanks,
You can use stat_fit_tidy from the ggpmisc package:
df <- data.frame(x = c(1:100))
df$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
df$group <- factor(rep(c("A", "B"), 50))
library(ggpmisc)
my_formula <- y ~ x
ggplot(df, aes(x = x, y = y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_fit_tidy(
method = "lm",
method.args = list(formula = my_formula),
mapping = aes(label = sprintf('slope~"="~%.3g',
after_stat(x_estimate))),
parse = TRUE)
EDIT
If you want the R squared as well:
ggplot(df, aes(x = x, y = y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_fit_tidy(
method = "lm",
method.args = list(formula = my_formula),
mapping = aes(label = sprintf('slope~"="~%.3g',
after_stat(x_estimate))),
parse = TRUE) +
stat_poly_eq(formula = my_formula,
aes(label = ..rr.label..),
parse = TRUE,
label.x = 0.6)
EDIT
Another way:
myformat <- "Slope: %s --- R²: %s"
ggplot(df, aes(x, y, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = my_formula, se = FALSE) +
stat_poly_eq(
formula = my_formula, output.type = "numeric",
mapping = aes(label =
sprintf(myformat,
formatC(stat(coef.ls)[[1]][[2, "Estimate"]]),
formatC(stat(r.squared)))),
vstep = 0.1
)

R ggplot2 : continuous x + colors

I'm trying to create a boxplot using ggplot2 with :
X as a continuous variable
Colors for different groups
Here is an example :
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
I can either have colors and x as a factor :
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color))
or x as a continuous variable and no colors :
ggplot(data = data) + geom_boxplot(aes(x = x,y = y,group = x))
But not both.
Does somebody know how to do this ?
Thanks
I think you need one more column for group, which is the combination of color and x. For example, how about simply paste()ing them?
set.seed(1)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
library(ggplot2)
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, group = paste(color, x)))
You can use scales to change the x-axis scale.
library(ggplot2)
library(scales)
x <- sample(c(1,2,5),300,replace = TRUE)
y <- sapply(x,function(mu) rnorm(1,mean = mu))
color <- sample(c("color 1","color 2"),300,replace = TRUE)
data <- data.frame(x, y, color)
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = c('1','2','3','4','5'))
Hack for dynamic limits:
min = min(data$x)
max = max(data$x)
limits <- as.character(seq(min:max))
ggplot(data = data) + geom_boxplot(aes(x = factor(x),y = y,col = color)) + scale_x_discrete(limit = limits)
You could misuse the fill aesthetic:
ggplot(data = data) +
geom_boxplot(aes(x = x, y = y, col = color, fill = factor(x))) +
scale_fill_manual(values = rep(NA, 3), guide = "none")

Add significance lines to facets in ggplot2

How to adjust the height of each geom_line depending on the facet group (y-lims differ depending on the group, see image below)?
I tried to build a custom data.frame which contains heights for each condition but this is not accepted by geom_line.
I have this little working example:
carData <- mtcars
carData$cyl <- factor(carData$cyl)
maxval <- max(carData$mpg)
maxval <- maxval * 1.1
lowval <- maxval - maxval * 0.02
txtval <- maxval * 1.04
llev <- "4"
rlev <- "6"
lpos <- which(levels(carData$cyl) == llev)
rpos <- which(levels(carData$cyl) == rlev)
mpos <- (lpos + rpos) / 2
df1 <- data.frame(a = c(lpos,lpos,rpos,rpos), b = c(lowval, maxval, maxval, lowval))
p <- ggplot(carData, aes(cyl, mpg))
p <- p + geom_boxplot()
p <- p + geom_line(data = df1, aes(x = a, y = b)) + annotate("text", x = mpos, y = txtval, label = "3.0")
p <- p + facet_wrap( ~ gear,ncol=2,scales="free")
You need to capture the variable you are using to facet with, in your summary data.frame. We could capture group wise maxima and use them for the y positions of the geom_segment() and geom_text:
library(tidyverse)
# get the max for each gear facet
df2 <- carData %>% group_by(gear) %>%
summarise(ypos = max(mpg)*1.1) %>%
mutate(x = lpos, xend = rpos) # use your factor level locators
p <- ggplot(carData, aes(cyl, mpg)) +
geom_boxplot() +
geom_segment(data = df2, aes(y = ypos, yend = ypos, x = x, xend = xend)) +
geom_text(data = df2, aes(y = ypos*1.02, x = mean(c(x, xend))), label = "3.0") +
facet_wrap( ~ gear,ncol=2, scales="free")
# if you want the end ticks
p + geom_segment(data = df2, aes(y = ypos, yend = ypos * .99, x = x, xend = x)) +
geom_segment(data = df2, aes(y = ypos, yend = ypos *.99, x = xend, xend = xend))

Resources