Combine geom_bar, geom_segment and facet_grid on time series visualization - r

I'm trying to do a nice graph with ggplot but I'm still faces a barrier.
When I use facet_grid at the end of my code, somethings wrong happen. A helping hand would be great!
This is my code :
# Package
library(ggplot2)
# Function
firstup <- function(x) {
x <- tolower(x)
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
Create data
data_F = data.frame(DATE = seq(as.Date('2020-01-21'), as.Date('2020-03-06'), by = 'days'),
NB = sample(1:20, 46, replace=TRUE))
Manage the data
data_F = data.frame(DATE = data_F$DATE,
year = as.numeric(format(data_F$DATE, format = "%Y")),
month = as.factor(format(data_F$DATE, format = "%B")),
days = as.numeric(format(data_F$DATE, format = "%d")),
NB = data_F$NB)
Relevel month with the right order
data_F$month = as.factor(firstup(data_F$month))
data_F$month = factor(data_F$month,unique(data_F$month))
month = factor(data_F$month,unique(data_F$month))
month = unique(month)
month = as.factor(month)
The main plot
plot1 = ggplot(data_F,aes(x=DATE,y=NB)) +
geom_bar(stat = "identity", colour="black", fill = "dodgerblue3", width=0.5) +
scale_x_date(breaks = data_F$DATE, labels = data_F$days, minor_breaks = NULL,
expand = expansion(add = 0.3))+
scale_y_continuous(limits = c(0, 65), breaks = seq(0, 65, by = 5), minor_breaks = seq(0, 65, by = 1))
plot1
Creating the segment data
data.segm = data.frame(x=data_F$DATE,y=Inf, xend = data_F$DATE, yend=-Inf,
month=data_F$month)
Show two days, for example at the row 6 and 35
i = 6
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
i = 35
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
plot1
And know my problem with facet_grid
plot2 = plot1 + facet_grid(.~month, space="free_x", scales="free_x", switch="x")
plot2
Jonas

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

Add extra tick where vertical line crosses x-axis

I have below ggplot
library(ggplot2)
library(quantmod)
dat = data.frame(val = 1:20, qtr = as.yearqtr(seq(as.Date('2000-01-01'), length.out = 20, by = '3 months')))
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = as.yearqtr(as.Date('2003-04-01')))
I want to add one extra tick in x-axis where my vertical line crosses the x-axis. The corresponding tick label will also be manual i.e. 'Sep : 2003-04-01'. Is there any function available in ggplot to achieve this?
Thanks for your help.
You could define your bespoke date breaks as a separate vector.
And the labels as a matching text vector including the bespoke text for the x intercept tick.
Adjustments to the axis text for legibility.
library(ggplot2)
library(quantmod)
intercept_date <- as.yearqtr(as.Date('2003-04-01'))
x_breaks <- c(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months')), intercept_date)
x_labels <- c(as.character(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months'))), "Sep : 2003-04-01")
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = intercept_date)+
scale_x_yearqtr(breaks = x_breaks,
labels = x_labels)+
theme(axis.text.x = element_text(angle = 30, hjust = 1))
Created on 2022-10-16 with reprex v2.0.2
Option 2
In response to comment: intercept breaks and labels defined within scale_x_yearqtr:
ggplot(data = dat) +
geom_line(aes(x = qtr, y = val)) +
geom_vline(xintercept = as.yearqtr(as.Date('2003-04-01')))+
scale_x_yearqtr(breaks = c(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months')), as.yearqtr(as.Date('2003-04-01'))),
labels = c(as.character(as.yearqtr(seq(as.Date('2000-01-01'), length.out = 5, by = '12 months'))), "Sep : 2003-04-01"))+
theme(axis.text.x = element_text(angle = 30, hjust = 1))

Plot the legend at the bottom in different way when we deal with discrete color filling in ggplot

Suppose we want to plot this data:
library(ggplot2)
library(sf)
library(raster)
library(colorRamps)
min_lon <- 10
max_lon <- 17
min_lat <- 8
max_lat <- 17
grid_size <- 0.5
lon_grids <- 1 + ((max_lon - min_lon)/grid_size)
lat_grids <- 1 + ((max_lat - min_lat)/grid_size)
points <- data.frame(lon = rep(seq(min_lon, max_lon, grid_size), lat_grids), lat = rep(seq(min_lat, max_lat, grid_size), each = lon_grids))
points$Var <- runif(min= 10, max = 48, 285)
points$value <-cut(points$Var, breaks= seq(10.08, 47.80, length.out = 13), dig.lab = 1)
ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw()+
geom_raster(data = points, aes(x = lon, y = lat, fill = value), interpolate = FALSE) +
labs(x="Longitude", y="Latitude")+
scale_fill_manual(values = matlab.like(n = 13), name = "[m]",
labels = sprintf("%.2f", seq(10.08, 47.80, length.out = 13)),
guide = guide_legend(reverse = TRUE))+theme(legend.position = "bottom")
This code produces the following graph:
Two problems I am facing here:
To make it discrete, I used the cut function. I chose the breaks= seq(10.08, 47.80, length.out = 13) arbitrary based on the minimum and maximum values with a random length of 13. Is there any criteria to decide the correct range?
Is there any way to make the legend look like this?
One option would be to use e.g. scale_fill_stepsn with guide_binswhich does not require to manually discretize the variable mapped on fill. Additionally I use a custom function to set the breaks of the legend instead of the default mechanism to set the number of breaks.
set.seed(123)
library(ggplot2)
library(colorRamps)
base <- ggplot() +
coord_sf(xlim = c(min_lon, max_lon), ylim = c(min_lat, max_lat)) +
theme_bw() +
geom_raster(data = points, aes(x = lon, y = lat), interpolate = FALSE) +
labs(x = "Longitude", y = "Latitude") +
theme(legend.position = "bottom")
base +
aes(fill = Var) +
scale_fill_stepsn(colors = matlab.like(n = 13), name = "[m]",
breaks = function(x) seq(x[[1]], x[[2]], length.out = 13),
labels = ~ sprintf("%.0f", .x),
guide = guide_bins(axis = FALSE,
show.limits = TRUE))

geom_text change factor order of facet plot

I am trying annotate individual plots of a facet plot. I have set the order to 2008, 1999 using factor levels.
But when I add the geom_text to the ggplot, the order of the plots change. See examples below. What am I doing wrong? How can I solve this?
library(tidyverse)
df <- mpg %>% mutate(year = factor(year, levels = c(2008,1999)))
anno <- data.frame(xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1","text2"),
year = c("2008","1999"))
df %>% ggplot(aes(class, displ)) +
geom_col(aes(fill=drv)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab))
Convert the year column in your annotation dataframe also to a factor with the same levels and order as in your main df:
library(ggplot2)
df <- mpg
df$year = factor(df$year, levels = c(2008, 1999))
anno <- data.frame(
xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1", "text2"),
year = factor(c("2008", "1999"), levels = c(2008, 1999))
)
ggplot(df, aes(class, displ)) +
geom_col(aes(fill = drv)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

ggplot Multiple facets and combined x axis

I am trying to create a plot to track results over days for multiple factors. Ideally I would like my xaxis to be Day, with the day number centered in the middle of the reps for that particular day, the y axis to be result, and the facet will be the Lot (1-4). I am having difficulty making the day centered on the bottom using repeatable text, as the number of reps may vary.
I was using ideas shown in this post: Multi-row x-axis labels in ggplot line chart but have been unable to make any progress.
Here is some code I have been using and the plot that I have so far. The x axis is far too busy and I am trying to consolidate it.
data <- data.frame(System = rep(c("A", "B"), each = 120), Lot = rep(1:4, each = 30),
Day = rep(1:5, each = 6), Rep = rep(1:6, 40), Result = rnorm(240))
library(ggplot2)
ggplot(data, aes(x = interaction(Day, Rep, lex.order = TRUE), y = Result, color = System, group = System)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom") +
facet_wrap(~Lot, ncol = 1) +
geom_vline(xintercept = (which(data$Rep == 1 & data$Day != 1)), color = "gray60")
I'm not 100% sure if this is exactly what you are after but this will center the day on the x-axis.
library(dplyr)
library(tidyr)
library(ggplot2)
df <- data.frame(System = rep(c("A", "B"), each = 120), Lot = rep(1:4, each = 30),
Day = rep(1:5, each = 6), Rep = rep(1:6, 40), Result = rnorm(240))
df <- df %>%
unite(Day_Rep, Day, Rep, sep = ".", remove = F) %>%
mutate(Day_Rep = as.numeric(Day_Rep))
ggplot(df, aes(x = Day_Rep, y = Result, color = System, group = System)) +
geom_point() +
geom_line() +
theme(legend.position = "bottom") +
facet_wrap(~Lot, ncol = 1) +
scale_x_continuous(labels = df$Day, breaks = df$Day + 0.5)+
geom_vline(xintercept = setdiff(unique(df$Day), 1))

Resources