Add mean line to ggplot? - r

I currently have this plot:
current plot without mean line
I want to add a continuous line in the plot that shows the mean value of each x-axis point.
How can i do this? Here is my code:
data <- ndpdata[which(ndpdata$FC.Fill.Size==250),] #250 fill size
data$PS_DATE <- as.Date(data$PS_DATE, "%Y-%m-%d")
data$PS_DATE <- as.Date(data$PS_DATE, "%m-%d-%Y")
data$final <- paste(data$PS_DATE, data$FC.Batch.Nbr, sep=" ") %>% na.omit()
library(tidyr)
my_df_long <- gather(data, group, y, -final)
data = my_df_long[2075:2550,] %>% na.omit()
ggplot(data, aes(final, y, color=final), na.rm=TRUE) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + theme(legend.position = "none") + geom_point(na.rm=TRUE) +
scale_y_discrete(breaks = c(251,270,290,310,325))

First, for the future please note the note of MrFlick.
We could use stat_summary. x should be factor and in a meaningful order.
I can't test because no data provided:
ggplot(data, aes(x=factor(final), y, color=final), na.rm=TRUE) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + theme(legend.position = "none") + geom_point(na.rm=TRUE) +
scale_y_discrete(breaks = c(251,270,290,310,325)) +
stat_summary(fun=mean, colour="red", geom="line", aes(group = 1))

Related

How to combine multiple ggplots into one plot with same x and y axis

I want to combine multiple ggplots into one plot with same x and y axis. This is my data. I have one Time column and 6 trend columns (A_Trnd, B_Trnd, C_Trnd etc). I have generated plot for Time vs A_Trnd.
library(ggplot2)
library(scales)
result <- read.csv("Downloads/Questions Trend - Questions Trend.csv")
result$Time_Formatted <- as.Date(result$Time_Formatted)
date_breaks <- as.Date(c("9/1/08", "5/12/14", "7/1/17", "2/2/19", "6/3/20"), "%m/%d/%y")
p1 <- ggplot(result, aes(result$Time_Formatted, result$A_Trnd)) +
geom_point(size = 0.1) + xlab("Month") + ylab("Temporal Trend") +
scale_x_date(breaks = date_breaks , date_labels = "%Y-%m", limits = c(as.Date("2008-08-01"), as.Date("2021-08-01"))) +
theme(axis.text.x = element_text(angle = 70, vjust = 0.9, hjust = 1))
p1 + geom_smooth(method = "loess", color = "red")
Now, I want to plot the same for Time vs B_Trnd, Time vs C_Trnd and have a combine plot like below.
How can I achieve this?
library(tidyverse)
library(scales)
result <-read.csv("Downloads/Questions Trend - Questions Trend.csv") %>%
mutate(Time = as.Date(Time, format = "%m/%d/%y")) %>%
pivot_longer(cols = -Time, names_to = "group", values_to = "value")
date_breaks <- as.Date(c("9/1/08", "5/12/14", "7/1/17", "2/2/19", "6/3/20"), "%m/%d/%y")
p1 <- ggplot(result, aes(Time, value)) +
geom_point(size = 0.1) +
labs(x = "Month", y = "Temporal Trend") +
scale_x_date(breaks = date_breaks , date_labels = "%Y-%m", limits = c(as.Date("2008-08-01"), as.Date("2021-08-01"))) +
theme(axis.text.x = element_text(angle = 70, vjust = 0.9, hjust = 1),
legend.position = "none") +
geom_smooth(method = "loess", aes(color = group)) +
facet_wrap(vars(group), nrow = 1)
p1

Exclude observations below a certain threshold in a stacked bar chart in ggplot2

I need to exclude some observations below a certain threshold in stacked bar chart done with ggplot2.
An example of my dataframe:
My code:
ggplot(df, aes(x=reorder(UserName,-Nb_Interrogations, sum), y=Nb_Interrogations, fill=Folder)) +
geom_bar(stat="identity") +
theme_bw()+
theme(legend.key.size = unit(0.5,"line"), legend.position = c(0.8,0.7)) +
labs(x = "UserName") +
ylim(0, 95000) +
scale_y_continuous(breaks = seq(0, 95000, 10000)) +
scale_fill_brewer(palette = "Blues") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
The problem is that I have many observations (UserName) with low values on the Y axes (Nb_Interrogations). So I'd like to exclude all the UserName below a certain threshold from the barplot, let's say 100.
I tried with the which function changing my code:
ggplot(df[which(df$Nb_Interrogations>100),]aes(x=reorder(UserName,-Nb_Interrogations, sum), y=Nb_Interrogations, fill=Folder)) +
geom_bar(stat="identity") +
theme_bw()+
theme(legend.key.size = unit(0.5,"line"), legend.position = c(0.8,0.7)) +
labs(x = "UserName") +
ylim(0, 95000) +
scale_y_continuous(breaks = seq(0, 95000, 10000)) +
scale_fill_brewer(palette = "Blues") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
But it doesn't fit my case since it excludes all the observations below the threshold = 100 that are present in my DF from the general computation changing also the Y axes values. How can I solve this problem? thanks
It looks like the simplest solution for you will involve subsetting your data first, and then plotting. Without workable data to test, this is just a theoretical answer, so you may have to adapt for your needs. You can pipe the subsetting and plotting together for ease. Something like this might do the trick for you:
df %>%
group_by(UserName) %>%
filter(sum(Nb_Interrogations > 100)) %>%
ggplot(., aes(x=reorder(UserName,-Nb_Interrogations, sum), y=Nb_Interrogations, fill=Folder)) +
## the rest of your plotting code here ##

ggplot works in individual code but not work in loop

I am having issue of ggplot in loop.
It works well individually.
as below:
*plotgg<-
ggplot(renewalplot, aes(x = Month,y=Rate)) +
stat_summary(fun = mean, na.rm = TRUE,geom="bar")+
labs(x = "Month") +
ggtitle("Rate Change Distribution")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
facet_grid(cols = vars(Year))
print(plotgg)*
when I put them in loop, it gives me error:
vars <- colnames(detailinfo_renewal_1)
varslist1 = vars[c(13)]
for (i in varslist1) {
renewalplot <- detailinfo_renewal_1 %>%
filter(Product=="FI")
plotgg<-
ggplot(renewalplot, aes(x = renewalplot[, i],y=Rate)) +
stat_summary(fun = mean, na.rm = TRUE,geom="bar")+
labs(x = i) +
ggtitle("Mean of Rate Change Distribution")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
facet_grid(cols = vars(Year))
print(plotgg)
}
Much appreciated it!
LC
The bit that you've changed when putting into a loop tells you a lot about where the error might be:
aes(x = renewalplot[, i],y=Rate)
This method of mapping an aesthetic won't work. Normally, when you pick an aesthetic, you pass the name of the aesthetic to the value of x, such as in x = Month. Behind the scenes, ggplot() then figures out the appropriate values from your data source.
An alternative way of mapping aesthetics is to use aes_string(), which would probably be better suited to your use-case. Since i is already the name of the column as a string, this will fit right in
for (i in varslist1) {
renewalplot <- detailinfo_renewal_1 %>%
filter(Product=="FI")
plotgg<-
ggplot(renewalplot, aes_string(x = i, y = "Rate")) +
stat_summary(fun = mean, na.rm = TRUE,geom="bar")+
labs(x = i) +
ggtitle("Mean of Rate Change Distribution")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
facet_grid(cols = vars(Year))
print(plotgg)
}
You can write a function :
library(dplyr)
library(ggplot2)
plot_fn <- function(col) {
renewalplot <- detailinfo_renewal_1 %>% filter(Product=="FI")
ggplot(renewalplot, aes(x = .data[[col]],y=Rate)) +
stat_summary(fun = mean, na.rm = TRUE,geom="bar")+
labs(x = col) +
ggtitle("Mean of Rate Change Distribution")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
facet_grid(cols = vars(Year))
}
and use lapply to apply it for every value in varslist1.
list_plot <- lapply(varslist1, plot_fn)

How to add labels to multiple ggplot graphs (A, B, C)

I am trying to add the labels A, B, and C to the top left hand corner of each of these graphs. I have tried cowplot::draw_plot_label(), but nothing seems to work. Can anyone help?
These A, B and C labels are not the main title of each plot.
# Packages
library(ggplot2)
library(gridExtra)
library(cowplot)
# 1st plot
p1 <- ggplot(data = new_data %>%
filter(Species =="Sharksucker_Remora")) +
scale_colour_manual(values=c(Sharksucker_Remora="black"), labels = c("Sharksucker Remora")) +
geom_line(mapping = aes(x = Date, y = Proportion, group = Species, colour = Species)) +
xlab("") +
ylab("Proportion") +
theme(legend.position="top") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + labs(colour = ~italic(M.alfredi)~"Hitchhiker Species:") +
theme(legend.key=element_blank())
# 2nd plot
p2 <- ggplot(data = new_data %>%
filter(Species !="Sharksucker_Remora")) +
geom_line(mapping = aes(x = Date, y = Proportion, group = Species, colour = Species)) +
scale_colour_manual(values=c(Golden_Trevally="goldenrod2", Red_Snapper="firebrick2", Juvenile_Remora="darkolivegreen3"), labels = c("Juvenile Remora", "Golden Trevally", "Red Snapper")) +
xlab("") + ylab("Proportion") + labs(colour = "") + theme(legend.position="top") + theme(legend.key=element_blank()) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
# 3rd plot
p3 <- ggplot(data = new_data_counts) +
geom_bar(mapping = aes(x = Date, y = Count), stat =
'identity') +
xlab("Date (2015-2019)") + ylab("Total"~italic
(M.alfredi)~"Sightings") +
draw_plot_label(label =c("C") + theme(axis.text.x =
element_text(angle = 90, vjust = 0.5, hjust = 1))
# The grid
grid.arrange(p1,p2,p3)
I suggest you use labs(..., tag = ...) and theme(plot.tag = element_text()).
The code show how you can format the main title (here centered with hjust = 0.5) and the tag inside the theme() function. See the reproducible example, below:
# Packages
library(ggplot2)
library(gridExtra)
# library(cowplot) # not necessary here
# Plots
p1 <- ggplot() +
labs(title = "plot 1", tag = "A") +
theme(plot.title = element_text(hjust = 0.5),
plot.tag = element_text())
p2 <- ggplot() +
labs(title = "plot 2", tag = "B") +
theme(plot.title = element_text(hjust = 0.5),
plot.tag = element_text())
grid.arrange(p1, p2)
If you want the tag (A, B, C) to be inside the plotting area, this post suggest to use plot.tag.position = c(x, y). See for example:
p3 <- ggplot() +
labs(title = "plot 3", tag = "C") +
theme(plot.title = element_text(hjust = 0.5),
plot.tag = element_text(),
plot.tag.position = c(0.1, 0.8))
p3
Have you tried the package egg?
https://cran.r-project.org/web/packages/egg/vignettes/Overview.html
library(tidyverse)
library(magrittr)
data <- list()
for(i in 1:6) data[[i]] <- rnorm(100,0,1)
data %<>% bind_cols() %>% setNames(paste0("var",1:6))
p1 <- ggplot(data,aes(x = var1, y = var2)) + geom_point()
p2 <- ggplot(data,aes(x = var3, y = var4)) + geom_point()
p3 <- ggplot(data,aes(x = var5, y = var6)) + geom_point()
egg::ggarrange(p1,p2,p3,ncol = 1,
labels = c("A","B","C"))
Another option is using the patchwork package with plot_annotation which has the tag_levels argument which gives the possibility to add tags like letters or numbers. First a reproducible example with letters:
library(patchwork)
library(ggplot2)
p1 <- ggplot(mtcars) +
geom_point(aes(hp, disp)) +
ggtitle('Plot 1')
p2 <- ggplot(mtcars) +
geom_boxplot(aes(gear, mpg, group = gear)) +
ggtitle('Plot 2')
p1 + p2 & plot_annotation(tag_levels = 'A')
Created on 2022-08-21 with reprex v2.0.2
Another option with numbers where you change the tag_levels to "1" like this:
p1 + p2 & plot_annotation(tag_levels = '1')
Created on 2022-08-21 with reprex v2.0.2
As you can see, the tags have letters or numbers. Check the links above for more information and options.

ggplot2 and facet_grid : add highest value for each plot

I am using facet_grid() to plot multiple plot divided per groups of data. For each plot, I want to add in the corner the highest value of the Y axis. I've tried several hacks but it never gives me the expected results. This answer partially helps me but the value I want to add will constantly be changing, therefore I don't see how I can apply it.
Here is a minimal example, I'd like to add the red numbers on the graph below:
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
Thanks for your help!
library(dplyr)
data2 <- data %>% group_by(group) %>% summarise(Max = max(value))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(aes(label = Max), x = Inf, y = Inf, data2,
hjust = 2, vjust = 2, col = 'red') +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)
This does the trick. If you always have fixed ranges you can position the text manually.
library(ggplot2)
data <- data.frame('group'=rep(c('A','B'),each=4),'hour'=rep(c(1,2,3,4),2),'value'=c(5,4,2,3,6,7,4,5))
ggplot(data,aes(x = hour, y = value)) +
geom_line() +
geom_point() +
geom_text(
aes(x, y, label=lab),
data = data.frame(
x=Inf,
y=Inf,
lab=tapply(data$value, data$group, max),
group=unique(data$group)
),
vjust="inward",
hjust = "inward"
) +
theme(aspect.ratio=1) +
scale_x_continuous(name ="hours", limits=c(1,4)) +
scale_y_continuous(limits=c(1,10),breaks = seq(1, 10, by = 2))+
facet_grid( ~ group)

Resources