full text label on Boxplot, with added mean point - r

Am trying to get text label similar to what this https://stats.stackexchange.com/questions/8206/labeling-boxplots-in-r, but I cant get it to work. MWE similar to what I have is this:
data <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
meanFunction <- function(x){
return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}
ggplot(melt(data), aes(x=variable, y=value)) +
geom_boxplot(aes(fill=variable), width = 0.7) +
stat_summary(fun.y = mean, geom="point",colour="darkred", size=4) +
stat_summary(fun.data = meanFunction, geom="text", size = 4, vjust=1.3)
That produces something like "A" in the attached image, and I am trying to get something like "B" for each of the boxes. Thanks.

Here is my attempt. First, I reshaped your data. Then, I produced your boxplot. I changed the size and colour of text for mean. Then, I looked into the data that ggplot used, which you can access using ggplot_build(objectname)$data[[1]]. You can see the numbers you need. I selected necessary variables and reshaped the data, which is df. Using df, you can annotate the numbers you want.
library(dplyr)
library(tidyr)
library(ggplot2)
set.seed(123)
mydf <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
mydf <- gather(mydf, variable, value)
meanFunction <- function(x){
return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}
g <- ggplot(data = mydf, aes(x = variable, y = value, fill = variable)) +
geom_boxplot(width = 0.5) +
stat_summary(fun.y = mean, geom = "point",colour = "darkred", size=4) +
stat_summary(fun.data = meanFunction, geom ="text", color = "white", size = 3, vjust = 1.3)
df <- ggplot_build(g)$data[[1]] %>%
select(ymin:ymax, x) %>%
gather(type, value, - x) %>%
arrange(x)
g + annotate("text", x = df$x + 0.4, y = df$value, label = df$value, size = 3)

First, I would take your data and then calculate all the boxplot features yourself. Here's one way to do that
dd <- data.frame(replicate(5,sample(0:100,100,rep=TRUE)))
tt <- data.frame(t(sapply(dd, function(x) c(boxplot.stats(x)$stats, mean(x)))))
names(tt) <- c("ymin","lower","middle","upper","ymax", "mean")
tt$var <- factor(rownames(tt))
I'm sure there are prettier ways to do that with dplyr but this point is you'll need to calculate those values yourself so you know where to draw the labels. Then you can do
ggplot(tt) +
geom_boxplot(aes(x=var, ymin=ymin, lower=lower, middle=middle, upper=upper, ymax=ymax), stat="identity", width=.5) +
geom_text(aes(x=as.numeric(var)+.3, y=middle, label=formatC(middle,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= lower, label=formatC(lower,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= upper, label=formatC(upper,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= ymax, label=formatC(ymax,1, format="f")), hjust=0) +
geom_text(aes(x=as.numeric(var)+.3, y= ymin, label=formatC(ymin,1, format="f")), hjust=0) +
geom_point(aes(x=var, y=mean)) +
geom_text(aes(x=as.numeric(var), y= mean, label=formatC(mean,1, format="f")), hjust=.5, vjust=1.5)
to draw each of the labels

Related

How to smooth out a time-series geom_area with fill in ggplot?

I have the following graph and code:
Graph
ggplot(long2, aes(x = DATA, y = value, fill = variable)) + geom_area(position="fill", alpha=0.75) +
scale_y_continuous(labels = scales::comma,n.breaks = 5,breaks = waiver()) +
scale_fill_viridis_d() +
scale_x_date(date_labels = "%b/%Y",date_breaks = "6 months") +
ggtitle("Proporcions de les visites, només 9T i 9C") +
xlab("Data") + ylab("% visites") +
theme_minimal() + theme(legend.position="bottom") + guides(fill=guide_legend(title=NULL)) +
annotate("rect", fill = "white", alpha = 0.3,
xmin = as.Date.character("2020-03-16"), xmax = as.Date.character("2020-06-22"),
ymin = 0, ymax = 1)
But it has some sawtooth, how am I supposed to smooth it out?
I believe your situation is roughly analogous to the following, wherein we have missing x-positions for one group, but not the other at the same position. This causes spikes if you set position = "fill".
library(ggplot2)
x <- seq_len(100)
df <- data.frame(
x = c(x[-c(25, 75)], x[-50]),
y = c(cos(x[-c(25, 75)]), sin(x[-50])) + 5,
group = rep(c("A", "B"), c(98, 99))
)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
To smooth out these spikes, it has been suggested to linearly interpolate the data at the missing positions.
# Find all used x-positions
ux <- unique(df$x)
# Split data by group, interpolate data groupwise
df <- lapply(split(df, df$group), function(xy) {
approxed <- approx(xy$x, xy$y, xout = ux)
data.frame(x = ux, y = approxed$y, group = xy$group[1])
})
# Recombine data
df <- do.call(rbind, df)
# Now without spikes :)
ggplot(df, aes(x, y, fill = group)) +
geom_area(position = "fill")
Created on 2022-06-17 by the reprex package (v2.0.1)
P.S. I would also have expected a red spike at x=50, but for some reason this didn't happen.

Add in legend to ggplot

I know this question is similar to ones that has been asked before but the suggested solutions don't seem to apply.
I set up the problem as follows
mat1 <- NULL
mat2 <- NULL
mat1 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat2 <- data.frame(matrix(nrow =16, ncol =2, data = rnorm(32, 0, 1)))
mat1[,1] = mat2[,1] = 1:16
colnames(mat1) = c("Window", "CM")
colnames(mat2) = c("Window", "FM")
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model")
I want to add in a legend. Specifically i want the blue line to be labelled as CM and the black line to be labelled as FM
In these kind of scenarios I think it is often the easiest to bring your data into the appropriate format for ggplot. Then you can properly use all of the ggplot toolset.
library(tidyverse)
mat3 = bind_cols(mat1, mat2) %>%
select(-Window1) %>%
gather(type, value, -Window)
mat3 %>%
ggplot(aes(x = Window, y = value, group = type, color = type, linetype = type)) +
geom_line() +
scale_color_manual("cohort model",
values = c("CM" = "steelblue","FM" = "black"),
breaks = c("CM", "FM")) +
scale_linetype_manual("cohort model",
values = c("twodash", "solid"),
breaks = c("CM", "FM")) +
labs(x = "Quater after alpha assessment", y = "Estimated Coefficient") +
theme_classic()
I assume the simplest way to do this would be to use annote():
ggplot() +
geom_line(data = mat1, aes(x = mat1$Window, y= mat1$CM), linetype ="twodash", color ="steelblue") +
geom_line(data = mat2, aes(x = mat2$Window, y= mat2$FM), color = "black") +
theme_classic() + xlab("Quater after alpha assessment") + ylab("Estimated Coefficient") + labs(fill = "cohort model") +
xlim(NA,18) +
annotate(geom="text", x=16.5, y=1.51232841, label="CM", color="blue", size=3) +
annotate(geom="text", x=16.5, y=-0.487350382, label="FM", color="black", size=3)
You can easily change and adjust the position with x= and y=. I also slightly extended the upper limit of x-scale so that the text fits in.
Of course, I don't know if that's enough for you. Otherwise, you could also add a text field as legend. But this would be the easiest and fastest way.

Pass changed geom from object to other ggplot

I first make a plot
df <- data.frame(x = c(1:40, rep(1:20, 3), 15:40))
p <- ggplot(df, aes(x=x, y = x)) +
stat_density2d(aes(fill='red',alpha=..level..),geom='polygon', show.legend = F)
Then I want to change the geom_density values and use these in another plot.
# build plot
q <- ggplot_build(p)
# Change density
dens <- q$data[[1]]
dens$y <- dens$y - dens$x
Build the other plot using the changed densities, something like this:
# Built another plot
ggplot(df, aes(x=x, y =1)) +
geom_point(alpha = 0.3) +
geom_density2d(dens)
This does not work however is there a way of doing this?
EDIT: doing it when there are multiple groups:
df <- data.frame(x = c(1:40, rep(1:20, 3), 15:40), group = c(rep('A',40), rep('B',60), rep('C',26)))
p <- ggplot(df, aes(x=x, y = x)) +
stat_density2d(aes(fill=group,alpha=..level..),geom='polygon', show.legend = F)
q <- ggplot_build(p)
dens <- q$data[[1]]
dens$y <- dens$y - dens$x
ggplot(df, aes(x=x, y =1)) +
geom_point(aes(col = group), alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = fill, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F)
Results when applied to my own dataset
Although this is exactly what I'm looking for the fill colors seem not to correspond to the initial colors (linked to A, B and C):
Like this? It is possible to plot a transformation of the shapes plotted by geom_density. But that's not quite the same as manipulating the underlying density...
ggplot(df, aes(x=x, y =1)) +
geom_point(alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = fill, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F)
Edit - OP now has multiple groups. We can plot those with the code below, which produces an artistic plot of questionably utility. It does what you propose, but I would suggest it would be more fruitful to transform the underlying data and summarize that, if you are looking for representative output.
ggplot(df, aes(x=x, y =1)) +
geom_point(aes(col = group), alpha = 0.3) +
geom_polygon(data = dens, aes(x, y, fill = group, group = piece, alpha = alpha)) +
scale_alpha_identity() +
guides(fill = F, alpha = F) +
theme_minimal()

In ggplot2, generate error bars across Facets in a data with multiple independent variables

I am trying to put error bar on mean values from a data frame which has three independent variables and plotted as facet_grid. However, the plot below is putting error bars in wrong facets. Could anyone please help me?
Please see below the example data and associated code:
life <- rep(c("1d", "2d", "4d"), 2, each = 2)
trt <- rep(c("c1", "c2"), 6)
species <- rep(c("SP1", "SP2"), each = 6)
mean_v <- runif(12, 12, 45)
sem_v <- runif(12, 1, 4)
data1 <- data.frame(life, trt, species, mean_v, sem_v)
plot1 <- ggplot(data1, aes(x = trt, y = mean_v, group = species, fill = species))
plot1 + geom_bar(stat = "identity", position = "dodge") +
facet_grid(~life) +
geom_errorbar(aes(ymin = data1$mean_v - data1$sem_v,
ymax = data1$mean_v + data1$sem_v,
width = 0.2),
position = position_dodge(width = 0.90),
group = data1$trt)
Thanks very much in advance.
The solution seems to be to specify position=position_dodge(width=0.9) in both geom_bar and geom_errorbar.
library(ggplot2)
plot1 <- ggplot(data1, aes(x=trt, y=mean_v, group=species, fill=species)) +
geom_bar(stat="identity", position=position_dodge(width=0.9)) +
facet_grid(. ~ life) +
geom_errorbar(aes(ymin=mean_v - sem_v, ymax=mean_v + sem_v),
width=0.2, position=position_dodge(width=0.9))
ggsave("dodged_barplot.png", plot=plot1, height=4, width=6, dpi=150)

ggplot2 and facet_grid : add highest value for each plot

I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

Resources