How do I change x-axis from numeric to custom ggplot? - r

I have data that looks like:
require(ggplot2); require(lubridate); require(data.table); require(dplyr)
Date <- rep(seq.Date(as.Date("2014-01-01"), as.Date("2016-12-31"), by = "days"), 3)
Index <- rep(c(rep(1:365, times = 2), 1:366), 3)
Counts<- sample(1:25, size = length(Index), replace = TRUE)
City <- rep(c("Dallas", "San Antonio", "Houston"), each = length(Index)/3)
df <- data.frame(Date = Date, Index = Index, Counts = Counts, City = City)
df <- df %>% mutate(Year = year(as.Date(Date)))
I graphed the data using the following code:
ggplot(df, aes(x = Index, y = Counts, colour = City)) +
geom_line(aes(group = City))+
geom_vline(xintercept = c(31, 59.5, 90, 120, 151, 181, 212, 243, 273, 304, 334))+
facet_grid(Year~.) +
theme(legend.position = "bottom")
My goal is to actually label the x-axis as Jan Feb ... Dec. I have tried scale_x_continuous and scale_x_discrete with no avail. Any suggestions?
Thanks in advance

I think this is easier to achieve if you use Date as your x variable and make use of scale_x_date:
df <- df %>% mutate(Year = year(as.Date(Date)),
MonthStart = round_date(Date, unit = "month"))
mid_months <- seq.Date(as.Date("2014-01-15"), as.Date("2016-12-15"), by = "months")
ggplot(df, aes(x = Date, y = Counts, colour = City)) +
geom_line(aes(group = City))+
facet_wrap(~ Year, ncol = 1, scales = "free") +
geom_vline(aes(xintercept = MonthStart), colour = 'black') +
# expand = c(0,0) to ensure scale doesn't extend to additional
# months
scale_x_date(breaks = mid_months, date_labels = "%b", expand = c(0, 0)) +
theme(legend.position = "bottom")
EDIT: updated to add mid-month labels

A quick option to make use of what you've already got is to pass your vector of x intercepts to scale_x_continuous as breaks, and use month.abb to name them:
library(tidyverse)
df <- data_frame(
Date = rep(seq.Date(as.Date("2014-01-01"), as.Date("2016-12-31"), by = "days"), 3),
Index = rep(c(rep(1:365, times = 2), 1:366), 3),
Counts = sample(1:25, size = length(Index), replace = TRUE),
City = rep(c("Dallas", "San Antonio", "Houston"), each = length(Index) / 3),
Year = lubridate::year(Date)
)
month_breaks <- c(31, 59.5, 90, 120, 151, 181, 212, 243, 273, 304, 334)
ggplot(df, aes(x = Index, y = Counts, colour = City)) +
geom_line() +
geom_vline(xintercept = month_breaks) +
scale_x_continuous(breaks = month_breaks, labels = month.abb[-1]) +
facet_grid(Year ~ .) +
theme(legend.position = "bottom")

Related

gganimate - have geom_rect adjust each frame

I have the following data:
library(ggplot2)
library(gganimate)
library(tidyverse)
createData<- function(vintage, id){#create data
# Generate a sequence of dates from 2010-01-01 to 2025-12-31 with a quarterly frequency
Dates <- seq(from = as.Date("2010-01-01"), to = as.Date("2025-12-31"), by = "quarter")
RLG<- cumsum(sample(c(-1, 1), 64, TRUE))
df<- data.frame( Dates,RLG, vintage,id)
return(df)
}
#createData
df<- createData("2018-01-01",1) %>%
rbind(createData("2019-01-01",2))%>%
rbind(createData("2020-01-01",3)) %>%
rbind(createData("2021-01-01",4))%>%
rbind(createData("2022-01-01",5))%>%
rbind(createData("2023-01-01",6))%>%
rbind(createData("2024-01-01",7))%>%
rbind(createData("2025-01-01",8))
Which I use to make the following chart:
options(gganimate.nframes = 8*length(unique(df$vintage)), gganimate.res = 30)
p<- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = "RLG") +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none")+
transition_time(id)+
exit_fade(alpha = 0.5)+
shadow_mark(alpha = 0.2)
animate(p, end_pause = 30)
I would like to add a geom_rect which goes from vintage to max(Dates). At each frame, vintage will increase, so the geom_rect will shrink slightly. How can I do this without interfering with the shadow_mark and exit_fades which I am applying to the lines?
If you mean something like a progress bar you could do it like so:
create an DF for the geom which is a subset of the original
df_geom <- df |>
mutate(vintage = as.Date(vintage)) |>
group_by(id) |>
slice(n())
Use geom_segment with the DF from above.
If you want to leave shadow_mark in you can do shadow_mark(exclude_layer = 2).
p <- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = RLG) +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none") +
geom_segment(
data = df_geom,
mapping = aes(x=vintage, xend=Dates,
y = 18, yend = 18),
size = 10, alpha =.4, color ='lightblue'
) +
transition_time(id)+
exit_fade(alpha = 0.5)
# shadow_mark(alpha = 0.2)
animate(p)

unable to show axis tick label in ggplot in R

I'm trying to plot a stacked bar chart with ggplot. Below is my code:
df = data.frame(Y = c(0,0,1,1), X = c(0,1,0,1), N = c(200, 50, 300, 70))
ggplot(data=df, aes(y=N, x= X, fill=Y)) +
geom_bar(position="stack", stat="identity", width=0.7)+
scale_x_discrete(name ="", breaks = c(0, 1), labels=c("No",'Yes')) +
theme(legend.position="none")
I want to show 'No' and 'Yes' as tick label on x axis. But nothing shows up. Does anyone know why my tick labelz do not show up? I do not understand what I did wrong.
You just need to insert factor(X) to make X discrete rather than continuous:
library(tidyverse)
df <- data.frame(Y = c(0, 0, 1, 1), X = c(0, 1, 0, 1), N = c(200, 50, 300, 70))
ggplot(data = df, aes(y = N, x = factor(X), fill = Y)) +
geom_bar(position = "stack", stat = "identity", width = 0.7) +
scale_x_discrete(name = "", breaks = c(0, 1), labels = c("No", "Yes")) +
theme(legend.position = "none")
Created on 2022-06-14 by the reprex package (v2.0.1)
An easier way to do it may be to do the data transformation outside the ggplot function:
library(ggplot2)
library(dplyr)
df = data.frame(Y = c(0,0,1,1), X = c(0,1,0,1), N = c(200, 50, 300, 70))
df %>%
mutate(X = if_else(X == 0, "No", "Yes")) %>%
ggplot(aes(y=N, x= X, fill=Y)) +
geom_bar(position="stack", stat="identity", width=0.7)+
theme(legend.position="none")

geom_text change factor order of facet plot

I am trying annotate individual plots of a facet plot. I have set the order to 2008, 1999 using factor levels.
But when I add the geom_text to the ggplot, the order of the plots change. See examples below. What am I doing wrong? How can I solve this?
library(tidyverse)
df <- mpg %>% mutate(year = factor(year, levels = c(2008,1999)))
anno <- data.frame(xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1","text2"),
year = c("2008","1999"))
df %>% ggplot(aes(class, displ)) +
geom_col(aes(fill=drv)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab))
Convert the year column in your annotation dataframe also to a factor with the same levels and order as in your main df:
library(ggplot2)
df <- mpg
df$year = factor(df$year, levels = c(2008, 1999))
anno <- data.frame(
xstar = c(5, 2), ystar = c(100, 70),
lab = c("text1", "text2"),
year = factor(c("2008", "1999"), levels = c(2008, 1999))
)
ggplot(df, aes(class, displ)) +
geom_col(aes(fill = drv)) +
geom_text(data = anno, aes(x = xstar, y = ystar, label = lab)) +
facet_grid(~year) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Perform manual annotations with ggsignif

I am hoping to add manual comparisons and annotations to a ggplot with facets. While the previous stack overflow questions have been helpful, transitioning to multiple pairwise comparison per facet has been tricky.
Fortunately, the authors give a great section on this at the bottom of https://github.com/const-ae/ggsignif titled "Advanced Example". It has been modified to show it's possible to add multiple pairwise comparison in one facet box:
library(ggplot2)
library(ggsignif)
annotation_df <- data.frame(
color = c("E", "E"),
start = c("Good", "Fair"),
end = c("Fair", "Very Good"),
y = c(3.6, 4.7),
label = c("Comp. 1", "Comp. 2")
)
ggplot(diamonds, aes(x = cut, y = carat)) +
geom_boxplot() +
geom_signif(
data = annotation_df,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE
) +
facet_wrap(~color) +
ylim(NA, 5.3)
Great, but when I do something similar, I seem only able to get one number produced. Additionally, I can't get it to graph without fill being set. What might be causing this issue?
library(ggplot2)
library(ggsignif)
library(dplyr)
EnrichmentDF <- data.frame(
cancer = c(rep("BRCA", 3), rep("PRAD", 3), rep("PAAD", 3)),
occurance = c(166, 152, 90, 288, 512, 291, 58, 145, 101),
cluster = rep(1:3)
)
Cancer <- apply(EnrichmentDF, 1, function(x)rep(x[1], x[2])) %>% unlist()
Cluster <- apply(EnrichmentDF, 1, function(x)rep(x[3], x[2])) %>% unlist()
niceTable <- table(Cancer, Cluster)
EnrichmentDF <- as.data.frame(niceTable)
colnames(EnrichmentDF) <- c("Cancer", "Cluster", "NumberOfPatients")
EnrichmentDF$CancerTotalN <- rowSums(niceTable)
EnrichmentDF$Percentage <- round(100 * EnrichmentDF$NumberOfPatients / EnrichmentDF$CancerTotalN, 2)
SignificanceStorage <- runif(9, 0.001, 0.2)
SignificanceDF <- data.frame(Cancer = rep(unique(EnrichmentDF$Cancer), each = ncol(niceTable)),
start = c(1,1,2), end = c(2, 3, 3),
y = c(75, 95, 125),
label = signif(SignificanceStorage, digits=2)
)
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage, fill = Cancer)) + # example did not have fill but breaks without it
geom_bar(aes(fill = Cluster), stat = "identity",
position = "dodge", width = .5) +
geom_signif(
data = SignificanceDF,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE) +
facet_wrap(~Cancer) + ylim(0, 130) +
theme_bw()
This question is similar to this question, but without depreciation fixes leading to consistent overlaps
Thanks to TarJae and the contributors to the post here: https://github.com/const-ae/ggsignif/issues/63, an issue has been found requiring a group = in the parameters of geom_signif when faceting. While the comment on github mentions it's due to duplicated data, I do not see such in the example provided here.
These are the changes:
SignificanceDF$group <- 1:nrow(SignificanceDF)
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage)) +
and have been marked by a double ##
library(ggplot2)
library(ggsignif)
library(dplyr)
EnrichmentDF <- data.frame(
cancer = c(rep("BRCA", 3), rep("PRAD", 3), rep("PAAD", 3)),
occurance = c(166, 152, 90, 288, 512, 291, 58, 145, 101),
cluster = rep(1:3)
)
Cancer <- apply(EnrichmentDF, 1, function(x)rep(x[1], x[2])) %>% unlist()
Cluster <- apply(EnrichmentDF, 1, function(x)rep(x[3], x[2])) %>% unlist()
niceTable <- table(Cancer, Cluster)
EnrichmentDF <- as.data.frame(niceTable)
colnames(EnrichmentDF) <- c("Cancer", "Cluster", "NumberOfPatients")
EnrichmentDF$CancerTotalN <- rowSums(niceTable)
EnrichmentDF$Percentage <- round(100 * EnrichmentDF$NumberOfPatients / EnrichmentDF$CancerTotalN, 2)
SignificanceStorage <- runif(9, 0.001, 0.2)
SignificanceDF <- data.frame(Cancer = rep(unique(EnrichmentDF$Cancer), each = ncol(niceTable)),
start = c(1,1,2), end = c(2, 3, 3),
y = c(75, 95, 125),
label = signif(SignificanceStorage, digits=2)
)
SignificanceDF$group <- 1:nrow(SignificanceDF) ## NEW CODE ADDED
EnrichmentDF %>% ggplot(aes(x = Cluster, y= Percentage)) + ## REMOVED FILL
geom_bar(aes(fill = Cluster), stat = "identity",
position = "dodge", width = .5) +
geom_signif(
data = SignificanceDF,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
textsize = 3, vjust = -0.2,
manual = TRUE) +
facet_wrap(~Cancer) + ylim(0, 130) +
theme_bw()

Combine geom_bar, geom_segment and facet_grid on time series visualization

I'm trying to do a nice graph with ggplot but I'm still faces a barrier.
When I use facet_grid at the end of my code, somethings wrong happen. A helping hand would be great!
This is my code :
# Package
library(ggplot2)
# Function
firstup <- function(x) {
x <- tolower(x)
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
x
}
Create data
data_F = data.frame(DATE = seq(as.Date('2020-01-21'), as.Date('2020-03-06'), by = 'days'),
NB = sample(1:20, 46, replace=TRUE))
Manage the data
data_F = data.frame(DATE = data_F$DATE,
year = as.numeric(format(data_F$DATE, format = "%Y")),
month = as.factor(format(data_F$DATE, format = "%B")),
days = as.numeric(format(data_F$DATE, format = "%d")),
NB = data_F$NB)
Relevel month with the right order
data_F$month = as.factor(firstup(data_F$month))
data_F$month = factor(data_F$month,unique(data_F$month))
month = factor(data_F$month,unique(data_F$month))
month = unique(month)
month = as.factor(month)
The main plot
plot1 = ggplot(data_F,aes(x=DATE,y=NB)) +
geom_bar(stat = "identity", colour="black", fill = "dodgerblue3", width=0.5) +
scale_x_date(breaks = data_F$DATE, labels = data_F$days, minor_breaks = NULL,
expand = expansion(add = 0.3))+
scale_y_continuous(limits = c(0, 65), breaks = seq(0, 65, by = 5), minor_breaks = seq(0, 65, by = 1))
plot1
Creating the segment data
data.segm = data.frame(x=data_F$DATE,y=Inf, xend = data_F$DATE, yend=-Inf,
month=data_F$month)
Show two days, for example at the row 6 and 35
i = 6
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
i = 35
plot1 = plot1 + geom_segment(data = data.segm, aes_string(x=data.segm$x[[i]],y=data.segm$y[[i]],
xend=data.segm$xend[[i]],yend=data.segm$yend[[i]]),
colour = alpha("gray90",0.5),size=8,inherit.aes = F)
plot1
And know my problem with facet_grid
plot2 = plot1 + facet_grid(.~month, space="free_x", scales="free_x", switch="x")
plot2
Jonas

Resources