ggplot bar graph is showing multiple values as labels in one graph - r

I am getting multiple values in graph. As in following table, Machine 1 has 3 different Machine ID. So one bar is showing values of 3 Machine ID under one bar.
FDP_2019 <- data.frame(
Machine_ID = c(“M11”, “M21”, “M31”, “M41”, “M12”, “M22”, “M32”, “M42”, “M13”, “M14”, “M23”, “M43”),
Model_Name = c(“Machine 1”, “Machine 2”, “Machine 3”, “Machine 4”, “Machine 1”, “Machine 2”, “Machine 3”, “Machine 4”, “Machine 1”, “Machine 1”, “Machine 2”, “Machine 4” ),
Fuel_Count = c(12,34,23,56,78,45,67,99,12,3,0,122),
Pressure_Count = c(56,748,33,787,457,345,565,445,34,23,45,799),
Water_Count = c(768,33,0,8,45,23,56,77,85,100,150,133))
new_FDP <- FDP_2019 %>%
gather(Alert, Count, `Fuel Count`:`Water Count`) # I have combine all variables into one column i.e “Alert” instead of 3 separate column as in FDP_2019
graph <- new_FDP %>%
ggplot(aes(Alert, Count, fill = Alert)) + geom_bar(stat = "identity", size = 3) +
coord_flip() +
geom_text(aes(label = Count)) +
theme(legend.position = "none") +
facet_wrap(~ `Model Name`, nrow = 4 )
I want sum of all values. How to do it?

Try like this:
FDP_2019 <- data.frame(
Machine_ID = c("M11", "M21", "M31", "M41", "M12", "M22", "M32", "M42", "M13", "M14", "M23", "M43"),
Model_Name = c("Machine 1", "Machine 2", "Machine 3", "Machine 4", "Machine 1", "Machine 2", "Machine 3", "Machine 4", "Machine 1", "Machine 1", "Machine 2", "Machine 4" ),
Fuel_Count = c(12,34,23,56,78,45,67,99,12,3,0,122),
Pressure_Count = c(56,748,33,787,457,345,565,445,34,23,45,799),
Water_Count = c(768,33,0,8,45,23,56,77,85,100,150,133))
new_FDP <- FDP_2019 %>%
gather(Alert, Count, Fuel_Count:Water_Count) %>%
select(-Machine_ID) %>%
group_by(Alert,Model_Name) %>%
summarise(Sum=sum(Count))
new_FDP %>%
ggplot(aes(Alert, Sum, fill = Alert)) + geom_bar(stat = "identity", size = 3) +
coord_flip() +
geom_text(aes(label = Sum)) +
theme(legend.position = "none") +
facet_wrap(~ `Model_Name`, nrow = 4 )

Related

ggplot: labeling x axis in lineplot

since a long time I despair to straighten the label of the x-axis in my plot (ggplot2).
The challenge is that I have two geom_paths, each fetching the data from a different dataframe - I'm sure this will become a bit clearer in the code:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
The goal is, to Label the X-Axis with the years from the dataframe "dx", as shown in the aes-parameter. And it works! But only if you disable the geom_paths - shown below:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
#geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
#geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
I can't really understand why the paths destroy the labeling like this - it must be the aes parameters.
If someone has a solution for this, I would be extremely grateful!
This could be achieved like so:
Convert your original month variable to a date time before calling xspline. This way the interpolated date values could be easily converted back to datetime via e.g. lubridate::as_datetime.
besides that you could row bind your datasets which makes plotting a bit easier
library(ggplot2)
library(tidyr)
library(dplyr)
datengesamt <- datengesamt %>%
# Convert to datetime
mutate(month = as.POSIXct(month))
plot(1, 1)
ps <- xspline(datengesamt[,1], datengesamt[,2], 1, draw=FALSE)
pg <- xspline(datengesamt[,1], datengesamt[,3], 1, draw=FALSE)
pp <- list("Person 1" = data.frame(ps), "Person 2" = data.frame(pg)) %>%
bind_rows(.id = "id") %>%
mutate(x = lubridate::as_datetime(x))
ggplot(pp, aes(x, y, color = id, linetype = id)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 2)) +
geom_path(size=0.5) +
scale_x_datetime(date_labels = "%Y")

geom_dumbell spacing, legends in different places, and multiple aesthetics (timelines)

I saw this interesting way of creating a publication timeline using geom_dumbell, so I created my own by first loading the libraries:
library(tidyverse)
library(ggalt)
library(ggrepel)
Entering in some data:
# create data frame
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2))
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
and, finally, creating my own basic timeline using this code:
ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
#theme_ipsum_tw() +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
As you can see from the above image, I can't see the x-axis. Additionally, I'd like to put another aesthetic and legend on the right side for the journal, perhaps putting a different shape on each line. Any other bells and whistles using the above data would be fun, too. Thanks!
Ok, I finally found some time to figure this out with help from this terrific post. To start, let's load the revised list of packages:
library(tidyverse)
library(ggalt)
library(ggrepel)
library(gridExtra)
library(gtable)
library(grid)
For comprehensiveness, let's reload the data:
# create dataframe
df <- data.frame(
paper = c("Paper 1", "Paper 1", "Paper 2", "Paper 2", "Paper 3", "Paper 3", "Paper 3", "Paper 3"),
round = c("first","revision","first","revision","first","first","first","first"),
submission_date = c("2019-05-23","2020-12-11", "2020-08-12","2020-10-28","2020-12-10","2020-12-11","2021-01-20","2021-01-22"),
journal_type = c("physics", "physics","physics","physics","chemistry","chemistry","chemistry","chemistry"),
Journal = c("journal 1", "journal 1", "journal 2", "journal 2", "journal 3", "journal 4", "journal 5", "journal 6"),
status = c("Revise and Resubmit", "Waiting for Decision", "Revise and Resubmit", "Accepted", "Desk Reject","Desk Reject", "Desk Reject","Waiting for Decision"),
decision_date = c("2019-09-29", "2021-01-24", "2020-08-27", "2020-10-29", "2020-12-10","2021-01-05","2021-01-22","2021-01-24"),
step_complete = c("yes","no","yes","yes","yes","yes","yes", "no"),
duration_days = c(129,44,15,1,0,25,2,2)
)
# convert variables to dates
df$decision_date = as.Date(df$decision_date)
df$submission_date = as.Date(df$submission_date)
First, let's create the plot with the color legend and extract it. Because I want that legend to be on top, I make sure indicate that as my legend position. Note that I specify my preferred colors using the scale_color_manual argument:
# make plot with color legend
p1 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color = status)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE) +
theme(legend.position = 'top')
# Extract the color legend - leg1
leg1 <- gtable_filter(ggplot_gtable(ggplot_build(p1)), "guide-box")
Second, let's make the plot with the shape legend and extract it. Because I want this legend to be positioned on the right side, I don't need to even specify the legend position here. Note that I specify my preferred shapes using the scale_shape_manual argument:
# make plot with shape legend
p2 <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
shape = Journal)) +
geom_dumbbell(size = 1, size_x = 1) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, show.legend = FALSE)
# Extract the shape legend - leg2
leg2 <- gtable_filter(ggplot_gtable(ggplot_build(p2)), "guide-box")
Third, let's make the full plot with no legend, specifying both the scale_color_manual and scale_shape_manual arguments as well as theme(legend.position = 'none'):
# make plot without legend
plot <- ggplot(df, aes(x = submission_date, xend = decision_date,
y = paper, label = duration_days,
color =status, shape = Journal)) +
geom_dumbbell(size = 1, size_x = 3) +
scale_color_manual(values=c("green", "red", "darkolivegreen4", "turquoise1")) +
scale_shape_manual(values=c(15, 16, 17, 18, 19,25))+
labs(x=NULL, color = 'Status:',
y=NULL,
title="Timeline of Journal Submissions",
subtitle="Start date, decision date, and wait time (in days) for my papers.") +
ggrepel::geom_label_repel(nudge_y = -.25, nudge_x = -5.25, show.legend = FALSE) +
theme(legend.position = 'none')
Fourth, let's arrange everything according to our liking:
# Arrange the three components (plot, leg1, leg2)
# The two legends are positioned outside the plot:
# one at the top and the other to the side.
plotNew <- arrangeGrob(leg1, plot,
heights = unit.c(leg1$height, unit(1, "npc") - leg1$height), ncol = 1)
plotNew <- arrangeGrob(plotNew, leg2,
widths = unit.c(unit(1, "npc") - leg2$width, leg2$width), nrow = 1)
Finally, plot and enjoy the final product:
grid.newpage()
grid.draw(plotNew)
As everyone will no doubt recognize, I relied very heavily on this post. However, I did change a few things, I tried be comprehensive with my explanation, and some others spent time trying to help, so I think it is still helpful to have this answer here.

ggplot: re-order categorical y-axis (Gantt chart)

I am trying to produce a Gantt chart out of a table with different task (each having a start date and end date).
library(tidyverse)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")
So far so good, but now I have a major problem and a minor problem:
major problem:
How can I re-order the categories on the y-axis as they appear in the data (not alphabetically)? From top to bottom it should be: Finance, Research, Other. Additionally, within each category (finance, research, other) the lines should be ordered by starting date (i.e. the task starting first should be on top)
minor problem:
on the x-axis how can I plot a thicker line for each month and a thinner line for each week?
Thanks for help!
This could be achieved like so:
As #RuiBarrades mentioned in his comments to get the right order you have to convert to a factor and set the levels in the right order. First, set the levels for the groups. Second, to get the tasks in the desired order I rearrange the dataset by group and start date and make use of forcats::fct_inorder to set the levels of the tasks in the desired order.
If I got you right you want different grid lines for week and month? This could be achieved by setting date_breaks_minor="month" and styling of the grid lines via theme and panel.grid.minor/major.x. Here I opted for a "black" color but if you prefer different sizes you could do so via size.
library(tidyverse)
library(ggplot2)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to),
group = factor(group, levels = c("Finance", "Research", "Other"))) %>%
arrange(desc(group), desc(from)) %>%
mutate(task = forcats::fct_inorder(task)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_minor_breaks = "1 month",
date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.minor.x = element_line(color = "black"))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")

How to add observation count (n) in ggplot2 scatter plot legend

Image of legend i would like to add to
I would like to know how can i add a simple observation number (n) in legend of this scatter plot in ggplot2
library(readr)
library(ggplot2)
library(dplyr)
All.mutations.no.inserts <- read_csv("All mutations no inserts.csv")
All.mutations.no.inserts$Fungicide <- factor(All.mutations.no.inserts$Fungicide, levels = c("SDHI 1",
"SDHI 2",
"SDHI 3",
"SDHI 4",
"SDHI 5",
"SDHI 6",
"SDHI 7",
"SDHI 8",
"SDHI 9",
"SDHI 10",
"SDHI 11",
"SDHI 12"))
All.mutations.no.inserts$SDH.mutation <- factor(All.mutations.no.inserts$`SDH.mutation`)
ggplot(All.mutations.no.inserts, aes(x = Fungicide, y = EC50, color = SDH.mutation)) +
geom_point(size = 4) +
scale_y_log10() +
theme_minimal() +
theme(axis.text.x=element_text(angle = -90, hjust = 0),
axis.title.x=element_blank())
How should i modify my code?
here's an example using dplyr. See the comments in the code.
library(dplyr)
library(ggplot2)
# sample data set
expand.grid(y = rnorm(20),
x = letters[1:5],
z = letters[6:10]) %>%
sample_frac(0.75) %>%
# add column n with counts for each group
add_count(z) %>%
# combine the group z and count n into one column
mutate(zn = paste0(z, ' (', n, ')')) %>%
# plot as you had
ggplot(aes(x, y, colour = zn)) +
geom_point() +
# rename the legend title
labs(colour = 'z (# obs)')
Created on 2019-02-06 by the reprex package (v0.2.1)

highlight points in ggplot2 stat_qq output

I am trying to highlight selected points based on their order statistics in a ggplot stat_qq output:
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ggplot(ydata, aes(sample=x)) + stat_qq() + facet_wrap(~sample) + scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) + theme_bw(base_size = 14, base_family = "sans") + labs(title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",
x = "Standard Deviation\n[%exceeding]") +
geom_text(data = ydata[ydata$order %in% c(2,16,50,84,98),], aes(x=qnorm(pnorm(x)), y=x, label = order), nudge_y = 1)
Which produced this:
Obviously my text notation is not highlighting the right points (the 2, 16, 50 84, 98th points). I wish I could also highlight the actual points in red. Would appreciate any suggestions.
You could calculate the qq values outside of ggplot and create a separate column to group the qq values into highlighted and not highlighted. Then you could plot them using geom_point with the grouping variable as a colour aesthetic. For example:
library(tidyverse)
# Generate data reproducibly
set.seed(2)
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
# Quantile indices to highlight
pts = c(2,16,50,84,98)
# Add qq values and grouping column to data frame and pipe into ggplot
# Use split and map to calculate the qq values separately for each Sample
split(ydata, ydata$sample) %>%
map_df(~ .x %>% mutate(xq = qqnorm(x, plot.it=FALSE)$x,
group = ifelse(order %in% pts, "A", "B"))) %>%
ggplot(aes(xq, x, colour=group)) +
geom_point(size=1) +
geom_text(aes(label=ifelse(group=="A", order, "")),
nudge_y=1, size=3) +
facet_wrap(~ sample) +
theme_bw(base_size = 14, base_family = "sans") +
scale_colour_manual(values=c("red", "black")) +
guides(colour=FALSE)
As an alternative, a quick hack would be to use ggplot_build to highlight specific points in your original plot (note though that something is not quite right with how you placed the labels relative to the highlighted points):
pts = rep(c(2,16,50,84,98), 4) + rep(seq(0,300,100), each=5)
# Assuming you've assigned your plot to the object p
pb = ggplot_build(p)
# Change point colors
pb$data[[1]][pts, "colour"] = "red"
# Change label colors
pb$data[[2]][["colour"]] = "red"
# Regenerate plot object
p = ggplot_gtable(pb)
plot(p)
You can apply stat="qq" to your geom_point and then use the colors assigned to new variable
ydata <- data.frame(sample = c(rep("Sample 1", 100),
rep("Sample 2", 100),
rep("Sample 3", 100),
rep("Sample 4", 100)),
x=rnorm(400))
ydata <- ydata[order(ydata$sample, ydata$x),]
ydata$order <- 1:100
ydata$highlight = ifelse(ydata$order %in% c(2,16,50,84,98), "#FF0000", "#000000")
ydata$order_txt = ifelse(ydata$order %in% c(2,16,50,84,98), ydata$order, "")
ggplot(ydata, aes(sample=x)) +
geom_point(color=ydata$highlight, stat="qq") +
geom_text(label=ydata$order_txt, stat="qq", nudge_y=1) +
facet_wrap(~sample) +
scale_x_continuous(breaks = -2:2, labels = function(x) paste0(x, " \n [",100 * signif(pnorm(-2:2, lower.tail=FALSE),2), "%]")) +
theme_bw(base_size = 14, base_family = "sans") +
labs(
title = "Four Samples of 100 Observations From Normal Distribution",
caption = "4 Samples of n = 100 from Normal Distribution \nNumbers indicate order of value",
y = "Sample Value",x = "Standard Deviation\n[%exceeding]")

Resources