I have the following data set:
structure(list(Jahr = 2005:2019, Fahrrad = c(275208L, 296105L,
308336L, 313363L, 326017L, 311756L, 302193L, 295702L, 268773L,
268295L, 256726L, 248916L, 250242L, 233652L, 230464L), E.Bike = c(1792L,
3181L, 5825L, 12600L, 23886L, 39247L, 49615L, 52941L, 49362L,
57613L, 66332L, 75665L, 87987L, 111661L, 133032L), gesamt = c(277000L,
299286L, 314161L, 325963L, 349903L, 351003L, 351808L, 348643L,
318135L, 325908L, 323058L, 324581L, 338229L, 345313L, 363496L
)), class = "data.frame", row.names = c(NA, -15L))
My goal is to create a graph that shows on the left y-axis the absolute purchases of different bike types. On the right y-axis id like to show the ratio of "E-Bike" purchases over "Fahrrad" purchases to emphasize the trend (if both are sold equally the desired value is 100, if e-bike is less than below 100). Something like this:
Is that even possible? I know ggplot doesn't allow some second y-axis.
Here is the code to produce the plot below (without the MS paint edits)
dfm <- melt(df, id="Jahr")
dfm$variable <- factor(dfm$variable, levels = c("gesamt", "Fahrrad", "E.Bike"))
dfm$variable <- revalue(dfm$variable, c("E.Bike"="E-Bike"))
dfm$value <- dfm$value/1000
ggplot(data=dfm) +
geom_line(aes(x=dfm$Jahr, y=dfm$value, colour=dfm$variable), lineend = "round", lwd=1.5)+
scale_x_continuous(limits = c(2013,2019), breaks = c(2013, 2014, 2015,2016, 2017,2018, 2019))+
scale_y_continuous(limits = c(0, 400))+
labs(title = "Verkäufe in Tausend")+
theme_minimal()+
scale_colour_manual(name="Mode",
labels=c("Total", "Fahrrad","E-Bike"),
values = c( "#8c8c8c", "#256bc2","#7ea9de"),
guide="none")+
geom_text(x = 2013.5, y = 350, label="Total" , hjust = "inward", size=3) +
geom_text(x = 2013.5, y = 290, label="Fahrrad" , hjust = "inward", size=3) +
geom_text(x = 2013.5, y = 80, label = "E-Bike", hjust = "inward", size=3)+
theme(legend.title = element_blank(),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank(),
axis.text=element_text(size=8.5),
plot.title = element_text(hjust = -0.06, size=9),
plot.caption = element_text(hjust = -0.08, size=6,color="#BFBFBF"))
The way that secondary axes work in ggplot are as follows. At the position scale, add a sec.axis argument for a secondary axis that is a linear transformation of the first axis, specified in trans. Since both the primary and secondary axes can start at 0, this means a simple scaling factor will do. You need to manually transform the input data and specify the reverse transformation as the trans argument. Simplified example below (assuming df is your provided data):
library(ggplot2)
library(scales)
dfm <- reshape2::melt(df, id="Jahr")
# Scale factor for utilising whole y-axis range
scalef <- max(df$gesamt) / max(df$E.Bike / df$gesamt)
# Scale factor for using 0-100%
# scalef <- max(df$gesamt)
ggplot(dfm, aes(Jahr, value)) +
geom_line(aes(colour = variable)) +
geom_line(aes(y = E.Bike / gesamt * scalef),
data = df, linetype = 2) +
scale_y_continuous(
labels = number_format(scale = 1e-3),
sec.axis = sec_axis(trans = ~ .x / scalef,
labels = percent_format(),
name = "Percentage E-bike")
)
Created on 2021-01-04 by the reprex package (v0.3.0)
Related
I have not been working with r for long, but have already found many answers to my questions in this community. But now I can't get any further and ask my first own question.
Objective: I want to display values from different years (here in the example 10 years) over time in a barplot. Each year should be represented by a column. The years 1 to 9 should get a uniform color, the 10th year another. For the 10th year the value should also be displayed. There should be only two entries in the legend: "Year 1 - 9" and "Year 10".
I have created the following dummy data set:
library(ggplot2)
# texts 2 display
tit <- "Title"
subtit <- "Subtitle"
lab <- c("lab1", "lab2", "lab3", "lab4")
# prepare dataset with random data
n_label <- length(lab)
cohort <-
c(
rep("year01", n_label),
rep("year02", n_label),
rep("year03", n_label),
rep("year04", n_label),
rep("year05", n_label),
rep("year06", n_label),
rep("year07", n_label),
rep("year08", n_label),
rep("year09", n_label),
rep("year10", n_label)
)
data_rel <- runif(40, min = 0, max = .5)
df_data <- data.frame(lab, cohort, data_rel)
df_data %>% summarise(count = n_distinct(cohort)) -> n_cohort
I was able to implement the plot as desired with the following code:
# plot data
df_data %>%
ggplot() +
geom_bar (aes(
x = factor(lab, levels = c("lab1", "lab2", "lab3", "lab4")),
y = data_rel,
fill = cohort
),
stat = "identity",
position = position_dodge()) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
theme_classic() +
theme(
legend.position = "bottom",
plot.title = element_text(hjust = 0.5,
size = 14,
face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 0.5),
) +
geom_text(
data = subset(df_data, cohort == "year10"),
aes(
x = lab,
y = data_rel,
label = paste0(sprintf("%.1f", data_rel * 100), "%")
),
vjust = -1,
hjust = -1.5,
size = 3
) +
scale_fill_manual(
values = c("#7F7F7F", "#389DC3"),
limits = c("year01", "year10"),
labels = c("Year 1 - 9", "Year 10")
) +
labs(
subtitle = paste(subtit),
title = str_wrap(tit, 45),
x = "",
y = "",
fill = ""
)
Unfortunately, I cannot adjust the colors of the columns for years 1 - 9. Either not all columns get the correct color, or I get unwanted entries in the legend.
Does anyone have an idea what i am doing wrong? I am grateful for every hint!
In setting the fill attribute you can group all other levels of the factor together (here using forcats::fct_other to collapse Years 1-9 into one level) to give your two levels of fill colours. At the same time, using group = cohort will keep bars separate:
library(forcats)
# plot data
df_data %>%
ggplot() +
geom_bar (aes(
x = factor(lab, levels = c("lab1", "lab2", "lab3", "lab4")),
y = data_rel,
group = cohort,
fill = fct_other(cohort, "year10", other_level = "year01")
),
stat = "identity",
position = position_dodge()) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
theme_classic() +
theme(
legend.position = "bottom",
plot.title = element_text(hjust = 0.5,
size = 14,
face = "bold"),
plot.subtitle = element_text(hjust = 0.5),
plot.caption = element_text(hjust = 0.5),
) +
geom_text(
data = subset(df_data, cohort == "year10"),
aes(
x = lab,
y = data_rel,
label = paste0(sprintf("%.1f", data_rel * 100), "%")
),
vjust = -1,
hjust = -1.5,
size = 3
) +
scale_fill_manual(
values = c("#7822CC", "#389DC3"),
limits = c("year01", "year10"),
labels = c("Year 1 - 9", "Year 10")
) +
labs(
subtitle = paste(subtit),
title = str_wrap(tit, 45),
x = "",
y = "",
fill = ""
)
(Changed manual fill colour to distinguish from unfilled bars)
It's also possible to do by creating a new 2-level cohort_lumped variable before passing to ggplot(), but this way helps keep your data as consistent as possible up to the point of passing into graphing stages (and doesn't need extra columns storing essentially same information).
df<-age_limbset
df$limbset<-as.factor(df$limbset)
limb_splot<-ggplot(df, aes(x=age,y=score))
limb_splot +
geom_point(aes(color = limbset, shape = limbset))+
geom_smooth(aes(color = limbset),
method = loess, se = FALSE, fullrange = TRUE)+
scale_color_manual(values = c("blue","hotpink"))+
scale_fill_manual(values = c("blue","hotpink"))+
ggpubr::stat_cor(aes(color = limbset),method="spearman", label.x = 3)+
labs(x="Age (years)",y="Total proprioception score (0-4)")+
scale_y_continuous(breaks=seq(0,4,0.5))+
scale_x_continuous(breaks=seq(2,16,2))+
theme_bw()
Sorry I do not know how to enter data here.
I have created this scatterplot showing relationship between age and proprioception of both forelimbs and hindlimbs. The plot is listening to my instruction for the x axis limits and breaks, but I can only get it to listen to either the limits OR the breaks for the y axis. What am I doing wrong?
How can I change the symbols for the data points? Ideally I would like them all to be dots.
I would also like to change the legend name and labels to start with a capital letter.
Here's an example of a reproducible example which also addresses your questions, e.g. with scale_shape_manual to get the shapes you want which you could choose here.
library(tidyverse)
tibble(years = rep(seq(2, 16, 1), 2),
score = abs(rnorm(30, 10, 10)),
set = rep(c("fore", "hind"), 15)
) |>
ggplot(aes(years, score, shape = set, colour = set)) +
geom_point() +
scale_shape_manual(values = c(1, 10)) +
scale_y_continuous(limits = c(0, 40), breaks = seq(0, 40, 5)) +
labs(x = "Years", y = "Score", shape = "Set", colour = "Set")
Created on 2022-05-04 by the reprex package (v2.0.1)
I must code factor age with both color and shape. I know how to do that (see the plot and data/code below).
In addition, I also have to code factor day with shape.
Is it somehow possible to assign specified different shapes to two different factors?
Below is the legend I would love to achieve (I made an example in power point).
The plot is not quite right, as only factor age is coded with color and shape.
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor((df$test))
df$age <- factor((df$age))
df$day <- factor((df$day))
windows(width=4, height=3 )
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age, shape=age, group=age),
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(16, 15, 17), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
theme_bw()+
theme(panel.border = element_blank(), axis.ticks = element_blank(),
legend.position=c(), legend.text=element_text(size=10, face="bold"), legend.title=element_text(size=10),
panel.grid.major.x = element_blank() ,
panel.grid.major.y = element_blank() ,
plot.title = element_text(size=10, face = "bold"), axis.title=element_text(size=11),
axis.text.y = element_text(size=9, angle = 45),
axis.text.x = element_text(size=9, angle = 90),
plot.margin = unit(c(0.5,0.2,0,0), "cm")) +
labs(title= "", x = "",y = "Test result") +
scale_y_continuous(breaks=c(0,1,2), labels=c('good','better','the best')) +
geom_vline(xintercept=c(0.5,1.5,2.5),color="grey90")+
geom_hline(yintercept=-0.5, color="grey90")+
expand_limits(x=3.9, y=c(0,2.35)) +
scale_x_discrete(limits = c("1", "2", "3"),labels = c("test a", "test b", "test c")) +
coord_cartesian(clip = "off")
You can use shapes on an interaction between age and day, and use color only one age. Then remove the color legend and color the shape legend manually with override.aes.
This comes close to what you want - labels can be changes, I've defined them when creating the factors.
how to make fancy legends
However, you want a quite fancy legend, so the easiest would be to build the legend yourself as a separate plot and combine to the main panel. ("Fake legend"). This requires some semi-hardcoding, but you're not shy to do this anyways given the manual definition of your shapes. See Part Two how to do this.
Part one
library(ggplot2)
df = data.frame(test = c(1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3, 1,2,3),
age = c(1,1,1, 2,2,2, 3,3,3, 1,1,1, 2,2,2, 3,3,3),
day = c(1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2),
result = c(1,2,2,1,1,2,2,1,0, 2,2,0,1,2,1,2,1,0))
df$test <- factor(df$test)
## note I'm changing this here already!! If you udnergo the effor tof changing to
## factor, define levels and labels here
df$age <- factor(df$age, labels = c("young", "older", "the oldest"))
df$day <- factor(df$day, labels = paste("Day", 1:2))
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
guides(color = "none",
shape = guide_legend(
override.aes = list(color = rep(c('#009E73','#56B4E9','#D55E00'), each = 2)),
ncol = 3))
Part two - the fake legend
library(ggplot2)
library(dplyr)
library(patchwork)
## df and factor creation as above !!!
p_panel <-
ggplot(df, aes(x=test, y=result)) +
geom_jitter(aes(color=age, shape=interaction(day, age)),
width = .1, height = .1) +
## you won't get around manually defining the shapes
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
## for this solution, I'm removing the legend entirely
theme(legend.position = "none")
## make the data frame for the fake legend
## the y coordinates should be defined relative to the y values in your panel
y_coord <- c(.9, 1.1)
df_legend <- df %>% distinct(day, age) %>%
mutate(x = rep(1:3,2), y = rep(y_coord,each = 3))
## The legend plot is basically the same as the main plot, but without legend -
## because it IS the legend ... ;)
lab_size = 10*5/14
p_leg <-
ggplot(df_legend, aes(x=x, y=y)) +
geom_point(aes(color=age, shape=interaction(day, age))) +
## I'm annotating in separate layers because it keeps it clearer (for me)
annotate(geom = "text", x = unique(df_legend$x), y = max(y_coord)+.1,
size = lab_size, angle = 45, hjust = 0,
label = c("young", "older", "the oldest")) +
annotate(geom = "text", x = max(df_legend$x)+.2, y = y_coord,
label = paste("Day", 1:2), size = lab_size, hjust = 0) +
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17)) +
scale_color_manual(values = c('#009E73','#56B4E9','#D55E00')) +
theme_void() +
theme(legend.position = "none",
plot.margin = margin(r = .3,unit = "in")) +
## you need to turn clipping off and define the same y limits as your panel
coord_cartesian(clip = "off", ylim = range(df$result))
## now combine them
p_panel + p_leg +
plot_layout(widths = c(1,.2))
So again following: Filled and hollow shapes where the fill color = the line color the following code provides the goods without giving you the legend.
df %>%
ggplot( aes(x=test, y=result)) +
geom_point(aes(color=age,
shape=age,
group=age,
fill=factor(ifelse(day==1, NA, age))), # STEP 1
position=position_jitterdodge(dodge.width=0.8,jitter.height=0.2, jitter.width=0),
size=2, stroke=0.8) +
scale_shape_manual(values=c(22,21,24), name="", labels=c("young","older","the oldest")) +
scale_color_manual(name="", labels=c("young","older","the oldest"), values=c('#009E73','#56B4E9','#D55E00')) +
scale_fill_manual(name="",
labels=c("young","older","the oldest"),
values=c('#009E73','#56B4E9','#D55E00'),
na.value=NA, guide="none") # STEP 2
I was misleading in my comment, rather than "hallow" shapes, we want shapes 21 through 26. These apparently accept distinct fill and color.
This question is related to the one I have already asked here: Properly align country names and values for horizontal bar graph in ggplot
I would like to produce the following bar graph, but want to make sure that the distance from the beginning of the country name to the bar graph is always the same. So no matter if I read in the first or second df, it should always be the same distance as here:
#df1
loooooong country1 100% Bar
looooong country2 99% Bar
#df2
short country1 100% Bar
short country2 99% Bar
As it is for now, the distance between the end of the country name and the bar is always the same. I have found a workaround with filling up country names with spaces and use monospace font, but this looks pretty bad.. :)
library(ggplot2)
library(dplyr)
### first df
df <- data.frame(
info_country = c("country1", "country loooooooong name", "country2", "country middle name", "country3"),
indicator = c(50,100,50,50,5))
### second df
# df <- data.frame(
# info_country = c("country1", "country3", "country2", "country4", "country5"),
# indicator = c(50,100,50,50,5))
### change factor level for ggplot order
df$info_country <- factor(df$info_country, levels = df$info_country[order(df$indicator)])
factor(df$info_country)
### create bar graph
bar_graph <- df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
geom_text(aes(y = -2, label = paste(indicator, "%", sep=" ")),
hjust = 1, size = 11 * 0.8 / ggplot2::.pt, color = "grey30") +
xlab("") +
ylab("") +
scale_y_continuous(labels = NULL, limits = c(-2, 100)) +
# Use clip = "off" to prevent that percentage labels are clipped off
coord_flip(clip = "off") +
theme(
panel.background = element_rect(fill = "white", colour = NA),
# Set color of ticks to NA
axis.ticks.x = element_line(color=NA),
axis.ticks.y = element_line(color=NA),
# Increase the margin
axis.text.y = element_text(hjust=0, margin = margin(r = 6, unit = "cm")),
axis.text.x = element_text(hjust=0),
)
bar_graph
I would simply repeat the trick of plotting text as if it were axis labels. You can control the distance between the left edge of the labels and the start of the bars by setting the labels' hjust to 0 and using a large negative number for y in their aesthetic. A value of -100 is nicely symmetrical:
df %>%
ggplot( aes(x = info_country, y = indicator)) +
geom_bar(stat = "identity", width = 0.8, fill = "#EE5859") +
geom_text(aes(y = -2, label = paste(indicator, "%", sep=" ")),
hjust = 1, size = 11 * 0.8 / .pt, color = "grey30") +
geom_text(aes(y = -100, label = info_country),
hjust = 0, size = 11 * 0.8 / .pt, color = "grey30") +
labs(x = "", y = "") +
scale_y_continuous(labels = NULL, limits = c(-100, 100)) +
coord_flip(clip = "off") +
theme(panel.background = element_rect(fill = "white", colour = NA),
axis.ticks.x = element_line(color = NA),
axis.ticks.y = element_line(color = NA),
axis.text.y = element_blank())
I have the following data
structure(list(id = 1:7, date = c(2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), station = structure(1:7, .Label = c("41B004",
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"), class = "factor"),
days = c(6L, 21L, 5L, 9L, 13L, 14L, 3L), mean3y = c(8.33,
21.3, NA, 10, 11.3, 16.3, 3.67), environ = structure(c(3L,
4L, 2L, 1L, 3L, 4L, 3L), .Label = c("Industriel avec influence modérée du trafic",
"Urbain avec faible influence du trafic", "Urbain avec influence modérée du trafic",
"Urbain avec très faible influence du trafic"), class = "factor")), class = "data.frame", row.names = c(NA,
-7L))
which is plotted with the following ggplot code
ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
generating this figure.
I would like to also add in this figure the variable mean3y besides days for each x value using another geom_col, such as
p <- ggplot(data, aes(x = reorder(station, -days),
y = days, fill = environ)) +
geom_col(width = 0.5, colour = "black", size = 0.5) +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = days),
vjust=-0.3, color="black", size = 3.5) +
geom_col(aes(x = reorder(station, -days),
y = mean3y, fill = environ),
inherit.aes = FALSE,
width = 0.5, colour = "black", size = 0.5) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom",
legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
However, I was not able to achieve the desired result, despite the use of position = "dodge", as illustrated by this figure where both variables are overlapping.
Is there a way to achieve this, please ?
Many thanks.
Position dodges only work in a single layer and not between multiple layers. You could either solve the problem by manually nudging them or by formatting the data in such a way that it can be dodged. Examples of both in code below.
Your data was hard to copy into my R session and your code was more elaborate than necessary to demonstrate the problem, so I've kept both to a minimum.
library(ggplot2)
df <- data.frame(
x = c("A", "B"),
y = c(10, 15),
z = c(12, 9)
)
# Example of nudging
# Choose width and nudge values manually to fit your data
ggplot(df, aes(x, y)) +
geom_col(aes(fill = "first col"),
width = 0.45,
position = position_nudge(x = -0.225)) +
geom_col(aes(y = z, fill = "second_col"),
width = 0.45,
position = position_nudge(x = 0.225))
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.6.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Example of dodging + data formatting
ggplot(mapping = aes(x, y)) +
geom_col(data = rbind(mutate(df, a = "first_col"),
mutate(df, y = z, a = "second_col")),
aes(fill = a),
position = "dodge")
Created on 2020-04-16 by the reprex package (v0.3.0)
Consider this possible solution for your dataset - although you may want to play around with the aesthetics. I attempted to keep the aesthetics as similar as possible and set the bars to be the same color (based on df$environ), but make the difference between "days" and "mean3y" clear with text labels.
Data Preparation
First, we need to take the information from two columns and combine them: "days" and "mean3y". In your original data frame, these two columns can (and should) be combined to show type of value and the value itself. What we want to do is convert this type of data:
day.type.1 day.type.2
1 4 1
2 5 3
3 6 4
4 7 5
To this type of data:
day.type day.value
1 day.type.1 4
2 day.type.1 5
3 day.type.1 6
4 day.type.1 7
5 day.type.2 1
6 day.type.2 3
7 day.type.2 4
8 day.type.2 5
In the above example, you can use the gather() function from dplyr:
t %>% gather('day.type', 'day.value')
If we apply that to your data frame, we have to specify to do that to the data frame, but ignore the other columns:
df1 <- df %>% gather('variable', 'value', -date, -station, -environ)
This converts your "days" and "mean3y" columns into two new columns called "variable" (which is either "days" or "mean3y") and "value" (which is the actual number)
I also had to convert the new column "value" into numeric... but that could have been due to how I had to import your data, which was... difficult. Please note, it is recommended that you include your dataset in future questions via the output of dput(your.data.frame)... believe me it makes all the difference. ;)
Plotting the new Dataset
Here the idea is to keep your same x axis, but we are now setting "value" as the y aesthetic. In addition, you want to make sure to include a group= aesthetic of "variable" so that dodging works appropriately for text and columns. If you are not familiar, "dodging" is the term for when a geom is kind of "split" across an axis aesthetic: like "subsetting" of discrete axis values.
The geom_col call is set for position='dodge'... not much else changes there. You need this because the default position is set to "stacked" (which is why your attempt resulted in columns "stacked" on top of one another.
The geom_text call has a few things going on:
The dodge is set here with position=position_dodge(), which allows you to specify how far apart the "dodge" will be. It allowed me to "push apart" the labels to be a bit wider so that the text looks okay and doesn't run into the adjacent column. A larger width= argument in position_dodge() results in "pushing" the labels further apart. A value of 0 would be putting the labels in the center of the x axis aesthetic... 0.5 is default.
The label aesthetic is actually using both "variable" and "value" columns as a way to differentiate your columns from one another. I used paste0 and stuck a '\n' in-between so that you had two lines and could fit them. Had to adjust the size a bit too.
By default, the labels would be positioned right at y (value), which would mean they would overlap with your columns. You need to "nudge" them up, but cannot use nudge_y to push them up because you cannot combine nudge_y with position. What to do? Well, we can just overwrite the default y aesthetic by setting it equal to y + "a number" to nudge them up. Much better to do it this way.
Here's the final code:
ggplot(df1, aes(x = reorder(station, -value),
y = value, fill = environ,
group=variable)) +
geom_col(width = 0.5, colour = "black", size = 0.5, position='dodge') +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste0(variable,'\n', value), y=value+1.5),
color="black", size = 3,
position=position_dodge(0.7)) +
geom_hline(aes(yintercept = 25),
linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() +
theme(legend.position="bottom", legend.title = element_blank(),
legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) +
geom_hline(yintercept = 0)
One way to achieve this is to convert the data to long format via e.g. tidyr::pivot_longer, so that the variables we want to plot are categories of one variable. To get the order of the stations right I reorder station according to days before converting to long. To get the bars side-by-side I use position_dodge2 both in geom_col and geom_text. To show which bar corresponds to which var I put the names of the vars in the labels above the bars.
library(ggplot2)
library(dplyr)
library(tidyr)
data1 <- data %>%
mutate(station = forcats::fct_reorder(station,-days)) %>%
pivot_longer(c(days, mean3y), names_to = "var", values_to = "value")
my_labels <- function(x) {
gsub("(days.|mean3y.)", "", x)
}
p <- ggplot(data1, aes(x = station, y = value, fill = environ)) +
geom_col(position = position_dodge2(preserve = "single"), colour = "black") +
guides(fill = guide_legend(ncol = 2)) +
geom_text(aes(label = paste(var, "\n", value)), position = position_dodge2(width = .9, preserve = "single"), vjust=-0.3, color="black", size = 3.5) +
scale_x_discrete(labels = my_labels) +
geom_hline(aes(yintercept = 25), linetype = 'dashed', colour = 'red', size = 1) +
labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
theme_minimal() + theme(legend.position="bottom", legend.title = element_blank(), legend.margin=margin(l = -2, unit='line'),
legend.text = element_text(size = 11),
axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 11),
axis.text.x = element_text(size = 11),
panel.grid.major.x = element_blank()) + geom_hline(yintercept = 0)