ggplot: re-order categorical y-axis (Gantt chart) - r

I am trying to produce a Gantt chart out of a table with different task (each having a start date and end date).
library(tidyverse)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")
So far so good, but now I have a major problem and a minor problem:
major problem:
How can I re-order the categories on the y-axis as they appear in the data (not alphabetically)? From top to bottom it should be: Finance, Research, Other. Additionally, within each category (finance, research, other) the lines should be ordered by starting date (i.e. the task starting first should be on top)
minor problem:
on the x-axis how can I plot a thicker line for each month and a thinner line for each week?
Thanks for help!

This could be achieved like so:
As #RuiBarrades mentioned in his comments to get the right order you have to convert to a factor and set the levels in the right order. First, set the levels for the groups. Second, to get the tasks in the desired order I rearrange the dataset by group and start date and make use of forcats::fct_inorder to set the levels of the tasks in the desired order.
If I got you right you want different grid lines for week and month? This could be achieved by setting date_breaks_minor="month" and styling of the grid lines via theme and panel.grid.minor/major.x. Here I opted for a "black" color but if you prefer different sizes you could do so via size.
library(tidyverse)
library(ggplot2)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to),
group = factor(group, levels = c("Finance", "Research", "Other"))) %>%
arrange(desc(group), desc(from)) %>%
mutate(task = forcats::fct_inorder(task)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_minor_breaks = "1 month",
date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.minor.x = element_line(color = "black"))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")

Related

ggplot: labeling x axis in lineplot

since a long time I despair to straighten the label of the x-axis in my plot (ggplot2).
The challenge is that I have two geom_paths, each fetching the data from a different dataframe - I'm sure this will become a bit clearer in the code:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
The goal is, to Label the X-Axis with the years from the dataframe "dx", as shown in the aes-parameter. And it works! But only if you disable the geom_paths - shown below:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
#geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
#geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
I can't really understand why the paths destroy the labeling like this - it must be the aes parameters.
If someone has a solution for this, I would be extremely grateful!
This could be achieved like so:
Convert your original month variable to a date time before calling xspline. This way the interpolated date values could be easily converted back to datetime via e.g. lubridate::as_datetime.
besides that you could row bind your datasets which makes plotting a bit easier
library(ggplot2)
library(tidyr)
library(dplyr)
datengesamt <- datengesamt %>%
# Convert to datetime
mutate(month = as.POSIXct(month))
plot(1, 1)
ps <- xspline(datengesamt[,1], datengesamt[,2], 1, draw=FALSE)
pg <- xspline(datengesamt[,1], datengesamt[,3], 1, draw=FALSE)
pp <- list("Person 1" = data.frame(ps), "Person 2" = data.frame(pg)) %>%
bind_rows(.id = "id") %>%
mutate(x = lubridate::as_datetime(x))
ggplot(pp, aes(x, y, color = id, linetype = id)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 2)) +
geom_path(size=0.5) +
scale_x_datetime(date_labels = "%Y")

geom_dumbell spacing, legends in different places, and multiple aesthetics (timelines)

I saw this interesting way of creating a publication timeline using geom_dumbell, so I created my own by first loading the libraries:
library(tidyverse)
library(ggalt)
library(ggrepel)
Entering in some data:
# create data frame
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2))
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
and, finally, creating my own basic timeline using this code:
ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
#theme_ipsum_tw() +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
As you can see from the above image, I can't see the x-axis. Additionally, I'd like to put another aesthetic and legend on the right side for the journal, perhaps putting a different shape on each line. Any other bells and whistles using the above data would be fun, too. Thanks!
Ok, I finally found some time to figure this out with help from this terrific post. To start, let's load the revised list of packages:
library(tidyverse)
library(ggalt)
library(ggrepel)
library(gridExtra)
library(gtable)
library(grid)
For comprehensiveness, let's reload the data:
# create dataframe
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
Journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2)
)
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
First, let's create the plot with the color legend and extract it. Because I want that legend to be on top, I make sure indicate that as my legend position. Note that I specify my preferred colors using the scale_color_manual argument:
# make plot with color legend
p1 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
# Extract the color legend - leg1
leg1 <- gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
Second, let's make the plot with the shape legend and extract it. Because I want this legend to be positioned on the right side, I don't need to even specify the legend position here. Note that I specify my preferred shapes using the scale_shape_manual argument:
# make plot with shape legend
p2 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
shape = Journal)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE)
# Extract the shape legend - leg2
leg2 <- gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
Third, let's make the full plot with no legend, specifying both the scale_color_manual and scale_shape_manual arguments as well as theme(legend.position = 'none'):
# make plot without legend
plot <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color =status, shape = Journal)) +
geom_dumbbell(size = 1, size_x = 3) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, nudge_x = -5.25, show.legend = FALSE) +
theme(legend.position = 'none')
Fourth, let's arrange everything according to our liking:
# Arrange the three components (plot, leg1, leg2)
# The two legends are positioned outside the plot:
# one at the top and the other to the side.
plotNew <- arrangeGrob(leg1, plot,
heights = unit.c(leg1$height, unit(1, "npc") - leg1$height), ncol = 1)
plotNew <- arrangeGrob(plotNew, leg2,
widths = unit.c(unit(1, "npc") - leg2$width, leg2$width), nrow = 1)
Finally, plot and enjoy the final product:
grid.newpage()
grid.draw(plotNew)
As everyone will no doubt recognize, I relied very heavily on this post. However, I did change a few things, I tried be comprehensive with my explanation, and some others spent time trying to help, so I think it is still helpful to have this answer here.

draw geom_vlines with different colors relative to the modalities of a variable

I have several geom_vline() in my plot coresponding to the date where the variable "cas" changes value and I want them to have a different colors relatively to the variable "cas". Is there a way to do that?
Here's my codes:
df <- data.frame(date=c("2020-01-01 00:00:00","2020-01-01 03:00:00","2020-01-01 06:00:00","2020-01-01 09:00:00","2020-01-01 12:00:00","2020-01-01 15:00:00","2020-01-01 18:00:00",
"2020-01-01 21:00:00","2020-01-02 00:00:00","2020-01-02 03:00:00"), cas=c("cas 0", "cas 0", "cas 0", "cas 1", "cas 1", "cas 1", "cas 2", "cas 2", "cas 2", "cas 0"),
meteo=c("t", "t", "t_S1", "P_S1","t_S1","P_S1","t", "t_S1", "t_S1", "P_S1"), valeur = c(-2.364850,-2.274782,-2.229748,-2.034601,-1.704351,-1.644305,-1.584260,-1.554237,-1.479181,-1.509203))
date_cas <- df[which(df$cas != dplyr::lag(df$cas)),] %>% select(date) %>% unlist()
date_cas <- date_cas %>% lubridate::parse_date_time2(., "YmdHMS") %>% format(.,"%Y-%m-%d %H:%M:%S" )
ggplot(df, aes(x=lubridate::date(date), y =valeur,colour=meteo, group = meteo )) +
geom_line(size=0.8)+
geom_vline( aes ( xintercept = lubridate::date(df$date[which(df$date %in% date_cas), colour = df$cas[which((df$date) %in% date_cas)] ]) , size=1, linetype = "dashed")+
labs(y="", x = "Date")+
theme_minimal()
I get this error :
Erreur : Aesthetics must be either length 1 or the same as the data (10): xintercept and colour
can anyone help please ?
I think a couple of parentheses/brackets needed to be moved, and would recommend subsetting df in geom_vline with data = df[df$date %in% date_cas, ].... This would help simplify your code a bit. See if this gives you what you need.
library(lubridate)
library(ggplot2)
df$date <- lubridate::date(df$date)
ggplot(df, aes(x=date, y=valeur, colour=meteo, group=meteo)) +
geom_line(size=0.8)+
geom_vline(data = df[df$date %in% date_cas, ],
aes(xintercept=date, colour=cas),
size=1,
linetype = "dashed")+
labs(y="", x = "Date")+
theme_minimal()

ggplot bar graph is showing multiple values as labels in one graph

I am getting multiple values in graph. As in following table, Machine 1 has 3 different Machine ID. So one bar is showing values of 3 Machine ID under one bar.
FDP_2019 <- data.frame(
Machine_ID = c(“M11”, “M21”, “M31”, “M41”, “M12”, “M22”, “M32”, “M42”, “M13”, “M14”, “M23”, “M43”),
Model_Name = c(“Machine 1”, “Machine 2”, “Machine 3”, “Machine 4”, “Machine 1”, “Machine 2”, “Machine 3”, “Machine 4”, “Machine 1”, “Machine 1”, “Machine 2”, “Machine 4” ),
Fuel_Count = c(12,34,23,56,78,45,67,99,12,3,0,122),
Pressure_Count = c(56,748,33,787,457,345,565,445,34,23,45,799),
Water_Count = c(768,33,0,8,45,23,56,77,85,100,150,133))
new_FDP <- FDP_2019 %>%
gather(Alert, Count, `Fuel Count`:`Water Count`) # I have combine all variables into one column i.e “Alert” instead of 3 separate column as in FDP_2019
graph <- new_FDP %>%
ggplot(aes(Alert, Count, fill = Alert)) + geom_bar(stat = "identity", size = 3) +
coord_flip() +
geom_text(aes(label = Count)) +
theme(legend.position = "none") +
facet_wrap(~ `Model Name`, nrow = 4 )
I want sum of all values. How to do it?
Try like this:
FDP_2019 <- data.frame(
Machine_ID = c("M11", "M21", "M31", "M41", "M12", "M22", "M32", "M42", "M13", "M14", "M23", "M43"),
Model_Name = c("Machine 1", "Machine 2", "Machine 3", "Machine 4", "Machine 1", "Machine 2", "Machine 3", "Machine 4", "Machine 1", "Machine 1", "Machine 2", "Machine 4" ),
Fuel_Count = c(12,34,23,56,78,45,67,99,12,3,0,122),
Pressure_Count = c(56,748,33,787,457,345,565,445,34,23,45,799),
Water_Count = c(768,33,0,8,45,23,56,77,85,100,150,133))
new_FDP <- FDP_2019 %>%
gather(Alert, Count, Fuel_Count:Water_Count) %>%
select(-Machine_ID) %>%
group_by(Alert,Model_Name) %>%
summarise(Sum=sum(Count))
new_FDP %>%
ggplot(aes(Alert, Sum, fill = Alert)) + geom_bar(stat = "identity", size = 3) +
coord_flip() +
geom_text(aes(label = Sum)) +
theme(legend.position = "none") +
facet_wrap(~ `Model_Name`, nrow = 4 )

ggplot2: separate legend per layer

I'm trying to plot my data and got quite far with it (as I am a bloody beginner with R and ggplot). Now I'm stuck in making the legend:
I would like to have separate legend entries for line and point layers, which means something like
-- female
-- male
-- Plot A
-- Plot B
O Start
O End
O Year 1
Any suggestions how to solve that problem?
test<-data.frame(id=1:6, sex=rep(c("female", "male"),times=3), plot=rep(c("A", "B"), times=3), start=sample(seq(as.Date('2015/01/01'), as.Date('2016/01/01'), by="day"), 6), end=sample(seq(as.Date('2016/01/01'), as.Date('2016/10/01'), by="day"),6))
test$duration <- difftime(test$end, test$start, units="days")
test$Year1 <- as.Date(test$start+366)
test$Year1[test$Year1>=Sys.Date() | test$duration<365] <-NA
startTime<- as.Date("2015-01-01")
endTime <- Sys.Date()
start.end <-c(startTime, endTime)
ggplot(test, aes(x=start, y=id, color=sex, linetype=plot))+
geom_segment(aes(x=start, xend=end, y=id, yend=id), size=.75)+
geom_point(aes(Year1), na.rm=TRUE, shape=16, size=3)+
geom_point(aes(start), shape=1, size=3)+
geom_point(data=subset(test, end!= Sys.Date()), aes(end), shape=13, size=3)+
guides(color=guide_legend(title=NULL))+
scale_x_date(date_breaks="6 months", date_minor_breaks = "1 month", date_labels="%m/%Y", name="duration", limits=start.end)+
scale_color_discrete(name="", breaks=c("female", "male"), labels=c("f", "m"))+
scale_linetype_manual(name="", breaks=c("A", "B"), labels=c("Plot A", "Plot B"), values=c("dotdash","solid"))+
scale_shape_manual(name="", guide='legend', breaks=c("Year1", "start", "end"), labels=c("Year1", "start", "end"), values=c("16", "1", "13"))
enter image description here
You have to place the start, end and Year1 in a common variable and map the shape aesthetic to this variable. This should work :
library(ggplot2)
library(tidyr)
library(dplyr)
test<-data.frame(id=1:6, sex=rep(c("female", "male"),times=3), plot=rep(c("A", "B"), times=3), start=sample(seq(as.Date('2015/01/01'), as.Date('2016/01/01'), by="day"), 6), end=sample(seq(as.Date('2016/01/01'), as.Date('2016/10/01'), by="day"),6))
test$duration <- difftime(test$end, test$start, units="days")
test$Year1 <- as.Date(test$start+366)
test$Year1[test$Year1>=Sys.Date() | test$duration<365] <-NA
startTime<- as.Date("2015-01-01")
endTime <- Sys.Date()
start.end <-c(startTime, endTime)
test_melt <- test %>%
select(id, sex, start, end, Year1) %>%
gather(type, value, -sex,-id)
ggplot(test)+
geom_segment(aes(x=start, xend=end, y=id, yend=id, color=sex, linetype=plot), size=.75)+
geom_point(aes(x = value, y = id, color = sex, shape = type), data = test_melt, size = 3) +
guides(color=guide_legend(title=NULL))+
scale_x_date(date_breaks="6 months", date_minor_breaks = "1 month", date_labels="%m/%Y", name="duration", limits=start.end)+
scale_color_discrete(name="", breaks=c("female", "male"), labels=c("f", "m"))+
scale_linetype_manual(name="", breaks=c("A", "B"), labels=c("Plot A", "Plot B"), values=c("dotdash","solid"))+
scale_shape_manual(name="", guide='legend', breaks=c("Year1", "start", "end"), labels=c("Year1", "start", "end"), values=c(16, 1, 13))

Resources