I'm trying to show the values of each bar on the plot, so that there are twelve unique values in total across the plot, by or on each bar. Each bar is made up of several stacked variables, and I've made an additional data frame with the sum totals of each bar. But instead they duplicate, when using geom_text. Is there any way to fix this? I've never posted here before, so I don't know how much I should provide of my code (it is extremely messy because I've Frankensteined the code from too many google searches)
The following, Finx, is the basis for my plot.
Finx:
AROID Marktäcke Nutrient Area.km2 Konc.KgM3 Markan.
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 646432-143677 ExtensivVall N 5.36 952. Jordbruk
2 645785-143513 ExtensivVall N 5.86 1040. Jordbruk
3 645157-143904 ExtensivVall N 3.09 549. Jordbruk
4 646432-143677 Grönträda N 2.87 3523. Jordbruk
5 645785-143513 Grönträda N 0.956 1174. Jordbruk
6 645157-143904 Grönträda N 0.100 123. Jordbruk
7 646432-143677 Havre N 1.07 1712. Jordbruk
8 645785-143513 Havre N 0.203 325. Jordbruk
9 645157-143904 Havre N 0.0173 27.7 Jordbruk
10 646432-143677 Höstraps N 1.16 2146. Jordbruk
# … with 69 more rows
Which I work with to present the data.
Finx1 <- Finx %>%
group_by(AROID, Nutrient) %>%
mutate(AROID = recode(AROID, "645157-143904" = "Dis. Utl. Bonderydssjön", "646432-143677" = "Inloppet Tåkern", "645785-143513" = "Nära S:t Åby" )) %>%
ungroup()
Fa <- Finx1 %>%
group_by(AROID, Nutrient, Markan.) %>%
summarize(Konc.KgM3) %>%
summarize_all(sum) %>%
mutate(Tot.kg = Konc.KgM3) %>%
select(-Konc.KgM3)
Fum <- left_join(Finx1, Fa, by = c("AROID", "Nutrient"))
Fum <- Fum[!duplicated(Fum[c('Tot.kg')]), ]
Fum[,'Tot.kg']=round(Fum[,'Tot.kg'], 0)
Fum <- Fum %>%
select(-Markan..x)
Then I make the ggplot,
ggplot(Finx1) +
aes(x = AROID, fill = Marktäcke, y = Konc.KgM3) +
geom_col(alpha = 0.8) +
scale_fill_manual(values = wes_palette("Darjeeling1", type = "continuous", n = 17)) +
theme_light() +
coord_flip() +
geom_text(aes(AROID, Tot.kg, label = Tot.kg, fill = NULL), size = 3, data = Fum2, hjust = 1) +
facet_grid(Markan. ~ Nutrient, scales = "free_x") +
ylab("Total mängd i Kg/år") +
xlab("Delavrinningsområde") +
labs(title='Utlakning av näringsämnen 2007') +
theme(plot.title = element_text(hjust = 0.5, color = "#5A5A5A"), axis.title.x = element_text(color = "#383838"), axis.title.y = element_text(color = "#383838"))
I probably make some mistake here, like showing too little data or something. Idk, but I have no idea what I'm supposed to do.
In principle your ggplot2 code was fine but you messed up the data wrangling, which could be simplifeid without the need of a join:
library(dplyr)
library(ggplot2)
library(wesanderson)
Finx1 <- Finx %>%
mutate(AROID = recode(AROID,
"645157-143904" = "Dis. Utl. Bonderydssjön",
"646432-143677" = "Inloppet Tåkern",
"645785-143513" = "Nära S:t Åby"
))
Fum <- Finx1 %>%
group_by(AROID, Nutrient, Markan.) %>%
summarize(Tot.kg = sum(Konc.KgM3))
ggplot(Finx1) +
aes(x = Konc.KgM3, y = AROID) +
geom_col(aes(fill = Marktäcke), alpha = 0.8) +
geom_text(aes(x = Tot.kg, label = Tot.kg), size = 3, data = Fum, hjust = -.1) +
scale_x_continuous(expand = c(0, 0, 0, 1500)) +
scale_fill_manual(values = wes_palette("Darjeeling1",
type = "continuous", n = 17
)) +
facet_grid(Markan. ~ Nutrient, scales = "free_x") +
theme_light() +
theme(
plot.title = element_text(hjust = 0.5, color = "#5A5A5A"),
axis.title = element_text(color = "#383838")
) +
labs(
x = "Total mängd i Kg/år",
y = "Delavrinningsområde",
title = "Utlakning av näringsämnen 2007"
)
DATA
Finx <- structure(list(AROID = c(
"646432-143677", "645785-143513", "645157-143904",
"646432-143677", "645785-143513", "645157-143904", "646432-143677",
"645785-143513", "645157-143904", "646432-143677"
), Marktäcke = c(
"ExtensivVall",
"ExtensivVall", "ExtensivVall", "Grönträda", "Grönträda",
"Grönträda", "Havre", "Havre", "Havre", "Höstraps"
), Nutrient = c(
"N",
"N", "N", "N", "N", "N", "N", "N", "N", "N"
), Area.km2 = c(
5.36,
5.86, 3.09, 2.87, 0.956, 0.1, 1.07, 0.203, 0.0173, 1.16
), Konc.KgM3 = c(
952,
1040, 549, 3523, 1174, 123, 1712, 325, 27.7, 2146
), Markan. = c(
"Jordbruk",
"Jordbruk", "Jordbruk", "Jordbruk", "Jordbruk", "Jordbruk", "Jordbruk",
"Jordbruk", "Jordbruk", "Jordbruk"
)), class = "data.frame", row.names = c(
"1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"
))
Related
I registered here specifically for this purpose.
Basically I have two data frames that have the exact same information but from two different years.
Here the head() of one of the data frames:
species dbh_cm height_m f plot dbh_m ba
1 1 0.7 1.34 7.1627066 16 0.007 3.848451e-05
2 3 1.9 1.95 2.0018036 16 0.019 2.835287e-04
3 3 4.0 3.05 0.9120516 16 0.040 1.256637e-03
4 1 3.5 2.27 1.0072122 16 0.035 9.621128e-04
5 3 0.6 1.52 6.9312671 16 0.006 2.827433e-05
6 3 4.2 2.70 0.9406631 16 0.042 1.385442e-03
volume class Sp
1 0.0003693754 (0,5] Spruce
2 0.0011067593 (0,5] Larch
3 0.0034956596 (0,5] Larch
4 0.0021997474 (0,5] Spruce
5 0.0002978850 (0,5] Larch
6 0.0035187332 (0,5] Larch
For plotting the graphs for each of these I used:
ggplot(data=trees_b, aes(x=class, fill = Sp)) +
geom_bar(stat = "count") +
labs( x = "DBH classes [cm]", y = "Number of trees [n]", fill="Species") +
scale_x_discrete(labels=c("(0,5]" = "2.5","(5,10]" = "7.5", "(10,15]" = "12.5",
"(15,20]" = "17.5", "(20,25]" = "22.5", "(25,30]" = "27.5",
"(30,35]" = "32.5", "(35,40]" = "37.5", "(40,45]" = "42.5",
"(45,50]" = "47.5", "(50,55]" = "52.5", "(55,60]" = "57.5",
"(60,65]" = "62.5", "(65,70]" = "67.5","(70,75]" = "72.5",
"(75,80]" = "77.5", "(80,85]" = "82.5")) +
scale_fill_viridis(direction = -1, discrete = T) +
theme(axis.text.x = element_text( size = 15),
axis.text.y = element_text (size = 15),
axis.title = element_text(size = 15),
legend.text = element_text (size = 15),
legend.title = element_text (size = 16, face = "bold"))
I know the code is not the cleanest but it worked out perfectly for what I needed and that is this:
enter image description here
Now I want to basically combine the two graphs into one for comparison purposes, is there a way to do that?
One approach to combine your graphs would be to use faceting. To this end I use dplyr::bind_rows to bind your datasets by row and which makes it easy to add an identifier column to the data which could then be used as faceting variable:
Note: I also added a simple function to compute the class means.
trees_b <- trees_a
trees <- list(a = trees_a, b = trees_b) |>
dplyr::bind_rows(.id = "id")
library(ggplot2)
library(viridis)
#> Loading required package: viridisLite
class_mean <- function(x) {
sapply(stringr::str_extract_all(x, "\\d+"), function(x) mean(as.numeric(x)))
}
ggplot(data = trees, aes(x = class, fill = Sp)) +
geom_bar(stat = "count") +
labs(x = "DBH classes [cm]", y = "Number of trees [n]", fill = "Species") +
scale_x_discrete(labels = class_mean) +
scale_fill_viridis(direction = -1, discrete = T) +
theme(
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title = element_text(size = 15),
legend.text = element_text(size = 15),
legend.title = element_text(size = 16, face = "bold")
) +
facet_wrap(~id)
EDIT As you clarified in your comment you want a stacked and dodged bar chart. One approach to achieve that would be via the "facets that's don't look like facets" trick. The basic idea is to facet by the variable you mapped on x and instead map the faceting variable on x. Afterwards we use some styling via theme options to get rid of the faceting look. For more options have a look at ggplot2 - bar plot with both stack and dodge.
ggplot(data = trees, aes(x = id, fill = Sp)) +
geom_bar(stat = "count") +
labs(x = "DBH classes [cm]", y = "Number of trees [n]", fill = "Species") +
scale_fill_viridis(direction = -1, discrete = T) +
theme(
axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title = element_text(size = 15),
legend.text = element_text(size = 15),
legend.title = element_text(size = 16, face = "bold")
) +
facet_wrap(~class, labeller = labeller(class = class_mean), strip.position = "bottom", nrow = 1) +
theme(strip.placement = "outside", strip.background.x = element_blank(), panel.spacing.x = unit(0, "pt"))
DATA
trees_a <- structure(list(
species = c(1L, 3L, 3L, 1L, 3L, 3L), dbh_cm = c(
0.7,
1.9, 4, 3.5, 0.6, 4.2
), height_m = c(
1.34, 1.95, 3.05, 2.27,
1.52, 2.7
), f = c(
7.1627066, 2.0018036, 0.9120516, 1.0072122,
6.9312671, 0.9406631
), plot = c(16L, 16L, 16L, 16L, 16L, 16L),
dbh_m = c(0.007, 0.019, 0.04, 0.035, 0.006, 0.042), ba = c(
3.848451e-05,
0.0002835287, 0.001256637, 0.0009621128, 2.827433e-05, 0.001385442
), volume = c(
0.0003693754, 0.0011067593, 0.0034956596, 0.0021997474,
0.000297885, 0.0035187332
), class = c(
"(0,5]", "(0,5]", "(0,5]",
"(0,5]", "(0,5]", "(0,5]"
), Sp = c(
"Spruce", "Larch", "Larch",
"Spruce", "Larch", "Larch"
)
), class = "data.frame", row.names = c(
"1",
"2", "3", "4", "5", "6"
))
I borrowed the R code from the link and produced the following graph:
Using the same idea, I tried with my data as follows:
library(tidyverse)
library(tidytable)
library(ggforce)
library(ggtext)
library(camcorder)
library(bibliometrix)
library(bibliometrixData)
data(management)
M <- metaTagExtraction(management, "AU_CO")
CO <-
tidytable(
Country = unlist(strsplit(M$AU_CO,";"))
, year = rep(M$PY, lengths(strsplit(M$AU_CO,";")))
, nAuPerArt = rep(lengths(strsplit(M$AU_CO,";")),lengths(strsplit(M$AU_CO,";")))
)
df0 <-
CO %>%
summarise.(
frequency = length(Country)
, frequencyFractionalized = sum(1/nAuPerArt)
, .by = c(Country, year)
) %>%
arrange.(Country, year)
df1 <-
df0 %>%
mutate.(
min_year = min(year)
, n_total = sum(frequency)
, .by = Country
) %>%
mutate.(Country = fct_reorder(Country, min_year)) %>%
count(Country, n_total, min_year, year) %>%
mutate.(
a_deg = as.numeric(Country) * 2.7 + 8.5
, a = a_deg * pi/180
, x = -(year - min(year) + 10) * cos(a + pi/2.07)
, y = (year - min(year) + 10) * sin(a + pi/2.07)
, label_a = ifelse(a_deg > 180, 270 - a_deg, 90 - a_deg)
, h = ifelse(a_deg > 180, 1, 0)
, label = ifelse(h == 0,
paste0(Country, " <span style = 'color:darkorange;'>(", n_total, ")</span>"),
paste0(" <span style = 'color:darkorange;'>(", n_total, ")</span>", Country))
) %>%
arrange.(as.character(Country), year)
df1
# df1 %>% view()
Years <-
tidytable(
r = seq(
from = 10
, to = 280
, length.out = 12
)
, l = seq(from = min(df0$year), to = max(df0$year), by = 3)
) %>%
mutate.(
lt = ifelse(row_number.() %% 2 == 0, "dotted", "solid")
)
Years
f1 = "Porpora"
gg_record(dir = "temp", device = "png", width = 10, height = 11, units = "in", dpi = 320)
ggplot(data = df1) +
# Purple points
geom_point(data = df1, aes(x = x, y = y, size = n * 10), shape = 21, stroke = 0.15, fill = "purple") +
# Year circles
geom_circle(
data = Years
, aes(x0 = 0, y0 = 0, r = r, linetype = lt), size = 0.08, color = "grey50"
) +
# Year labels
geom_label(
data = Years
, aes(x = 0, y = r, label = l), size = 3, family = f1, label.padding = unit(0.25, "lines"), label.size = NA, fill = "grey95", color = "grey70") +
# Orange points (totals)
geom_point(aes(x = -290 * cos(a + pi/2.07), y = 290 * sin(a + pi/2.07), size = n_total), stat = "unique", shape = 21, stroke = 0.5, fill = "orange") +
# Family names and totals
geom_richtext(aes(x = -305 * cos(a + pi/2.07),
y = 305 * sin(a + pi/2.07),
label = label,
angle = label_a,
hjust = h), stat = "unique", Country = f1, size = 3.5,
fill = NA, label.color = NA, color = "#0b5029") +
# Annotations
annotate("text", 0, 293, label = "Total", Country = f1, color = "orange") +
scale_size_continuous(range = c(0, 8)) +
scale_color_viridis_c(option = "turbo") +
coord_fixed(clip = "off", xlim = c(-400, 400)) +
# labs(
# caption = "<span style = 'font-size:30px;'>Taxonomic publications, 1757-2021</span><br>
# Publications by family and year <span style = 'color:purple;'>(purple circles)</span>and total publications by family <span style = 'color:darkorange;'>(orange circles and text)</span><br>
# <span style = 'color:black;'>Source: World Spider Catalog - Graphic: Georgios Karamanis</span>"
# ) +
theme_void() +
theme(
legend.position = "none",
plot.background = element_rect(fill = "grey95", color = NA),
plot.margin = margin(0, 20, 20, 20),
plot.caption = element_markdown(family = f1, hjust = 0.5, margin = margin(100, 0, -100, 0), size = 14, lineheight = 1.4, color = "#0b5029")
)
which produces the following graph
However, I'm facing two issues with the produced graph.
How to get pink points aligned correctly?
How to use whole circle for country names?
Edited
Looking for more succinct and reproducible code which can be used for different data sets. Preferably looking for a generic function which can handle different data sets.
You can do calculations within a function for the x and y values to construct the ggplot which extends the circle all the way round and gives labels correct heights.
I've adapted a function to work with other datasets. This takes a dataset in a tidy format, with:
a 'year' column
one row per 'event'
a grouping variable (such as country)
I've used Nobel laurate data from here as an example dataset to show the function in practice. Data setup:
library(tidyverse)
library(ggforce)
library(ggtext)
nobel <- read_csv("archive.csv")
# Filtering in this example to create a plottable dataset
nobel_filt <- nobel %>%
mutate(country = fct_lump_n(factor(`Birth Country`), n = 50)) %>%
filter(country != "Other")
nobel_filt
#> # A tibble: 883 x 19
#> Year Category Prize Motivation `Prize Share` `Laureate ID` `Laureate Type`
#> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr>
#> 1 1901 Chemistry The ~ "\"in rec~ 1/1 160 Individual
#> 2 1901 Literature The ~ "\"in spe~ 1/1 569 Individual
#> 3 1901 Medicine The ~ "\"for hi~ 1/1 293 Individual
#> 4 1901 Peace The ~ <NA> 1/2 462 Individual
#> 5 1901 Peace The ~ <NA> 1/2 463 Individual
#> 6 1901 Physics The ~ "\"in rec~ 1/1 1 Individual
#> 7 1902 Chemistry The ~ "\"in rec~ 1/1 161 Individual
#> 8 1902 Literature The ~ "\"the gr~ 1/1 571 Individual
#> 9 1902 Medicine The ~ "\"for hi~ 1/1 294 Individual
#> 10 1902 Peace The ~ <NA> 1/2 464 Individual
#> # ... with 873 more rows, and 12 more variables: Full Name <chr>,
#> # Birth Date <date>, Birth City <chr>, Birth Country <chr>, Sex <chr>,
#> # Organization Name <chr>, Organization City <chr>,
#> # Organization Country <chr>, Death Date <date>, Death City <chr>,
#> # Death Country <chr>, country <fct>
This function will then take the dataframe as an argument, along with the names of the column to group by and the column to mark time periods by. It's not super-succinct, as there is a lot of data processing going on. But hopefully within a function it's tidier.
circle_plot <- function(data, group_var, time_var) {
df_full <-
data %>%
select(group = {{group_var}}, year = {{time_var}}) %>%
mutate(group = factor(group),
group = fct_reorder(group, year, .fun = min),
order = as.numeric(group))
year_vals <-
tibble(year = as.character(seq(min(df_full$year), max(df_full$year), 1)),
level = 1 + 1:length(year))
y_vals <- year_vals %>%
bind_rows(tribble(~ year, ~ level,
"total", max(year_vals$level) + 5,
"title", max(year_vals$level) + 10
))
year_labs <-
tibble(year = as.character(floor(seq(
min(df_full$year), max(df_full$year), length.out = 10
)))) %>%
left_join(y_vals, by = "year")
x_len <- max(df_full$order)
df_ang <- df_full %>%
mutate(year = as.character(year)) %>%
count(group, order, year) %>%
left_join(y_vals, by = "year") %>%
mutate(a_deg = order * 350/x_len + 5,
x = - level * cos(a_deg * pi/180 + pi/2.07),
y = level * sin(a_deg * pi/180 + pi/2.07))
df_lab <- df_ang %>%
group_by(group, a_deg) %>%
summarise(n_total = n()) %>%
mutate(
group_name = str_trunc(as.character(group), 30),
label_a = ifelse(a_deg > 180, 270 - a_deg, 90 - a_deg),
h = ifelse(a_deg > 180, 1, 0),
label = ifelse(
h == 0,
paste0(
group_name,
" <span style = 'color:darkorange;'>(",
n_total,
")</span>"
),
paste0(
"<span style = 'color:darkorange;'>(",
n_total,
")</span> ",
group_name
)
),
year = "title"
) %>%
left_join(y_vals, by = "year") %>%
mutate(
x = -level * cos(a_deg * pi / 180 + pi / 2.07),
y = level * sin(a_deg * pi / 180 + pi / 2.07),
total_x = -(level - 5) * cos(a_deg * pi / 180 + pi / 2.07),
total_y = (level - 5) * sin(a_deg * pi / 180 + pi / 2.07)
)
ggplot() +
geom_circle(
data = year_labs,
aes(
x0 = 0,
y0 = 0,
r = level
),
size = 0.08,
color = "grey50"
) +
geom_label(
data = year_labs,
aes(x = 0, y = level, label = year),
size = 3,
label.padding = unit(0.25, "lines"),
label.size = NA,
fill = "grey95",
color = "grey70"
) +
geom_point(
data = df_ang,
aes(x = x, y = y, size = n),
shape = 21,
stroke = 0.15,
fill = "purple"
) +
geom_point(
data = df_lab,
aes(total_x, total_y,
size = n_total
),
stat = "unique",
shape = 21,
stroke = 0.5,
fill = "orange"
) +
geom_richtext(
data = df_lab,
aes(x, y,
label = label,
angle = label_a,
hjust = h
),
stat = "unique",
size = 4,
fill = NA,
label.color = NA,
color = "#0b5029"
) +
annotate(
"text",
0,
y = y_vals[y_vals$year=="total",]$level,
label = "Total",
color = "orange",
size = 4,
vjust = 0
) +
scale_size_continuous(range = c(1, 9)) +
scale_color_viridis_c(option = "turbo") +
coord_fixed(clip = "off", xlim = c(-120, 120)) +
theme_void() +
theme(
legend.position = "none",
plot.background = element_rect(fill = "grey95", color = NA),
plot.margin = margin(100, 180, 150, 180),
)
}
circle_plot(nobel_filt, `Birth Country`, Year)
# ggsave("test.png", height = 10, width = 10)
This creates the following graph:
The biggest headache (as you can see here) will be changing margins to accommodate long labels and exporting plot sizes which fit the sizes of text/numbers of year circles neatly. This might have to be experimented with across each plot. You can adapt the margin call within the function to a sensible default, or add a further theme element to the function call like so:
circle_plot(nobel_filt, `Birth Country`, Year) +
theme(plot.margin = margin(80, 150, 120, 150))
Hope that helps!
Created on 2021-12-27 by the reprex package (v2.0.1)
I am trying to label the stack bar chart with percentage and I ended labeling with proportion. Here are my codes:
ggplot(data = GradeSTEM_data, aes(x = Grade, y = percent, fill = STEMFlag, label = sprintf("%.02f", percent))) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
geom_text(position = position_stack(vjust = 0.5), size = 2)
Here is a potential solution:
# Load libraries
library(tidyverse)
# Create 'fake' data (minimal reproducible example)
stem_data <- data.frame(Grade = rep(c("A", "B", "C", "P", "NP"), 2),
STEMflag = factor(x = c(rep("STEM", 5), rep("NONSTEM", 5)),
levels = c("STEM", "NONSTEM")),
percent = c(0.95, 0.93, 0.90, 0.67, 0.86,
0.05, 0.07, 0.10, 0.33, 0.14))
head(stem_data)
#> Grade STEMflag percent
#> 1 A STEM 0.95
#> 2 B STEM 0.93
#> 3 C STEM 0.90
#> 4 P STEM 0.67
#> 5 NP STEM 0.86
#> 6 A NONSTEM 0.05
# Plot the example data
ggplot(data = stem_data, aes(x = Grade, y = percent, fill = STEMflag,
label = paste(percent * 100, "%", sep = ""))) +
geom_bar(position = "fill", stat = "identity") +
scale_y_continuous(labels = scales::label_percent(accuracy = 1)) +
geom_text(position = position_stack(vjust = 0.5), size = 4)
jared's answer can be further improved:
scales::label_percent(accuracy = 1) can be used to format both kind of labels in geom_text() and scale_y_continuous() consistently and without repeating code
geom_bar(stat = "identity") can be abbreviated by geom_col()
# function to format percent labels
lp <- scales::label_percent(accuracy = 1)
library(ggplot2)
ggplot(data = GradeSTEM_data,
aes(x = Grade, y = percent, fill = STEMflag, label = lp(percent))) +
geom_col(position = "fill") +
scale_y_continuous(labels = lp) +
geom_text(position = position_stack(vjust = 0.5))
Data
GradeSTEM_data <- data.frame(
Grade = factor(rep(c("A", "B", "C", "P", "NP"), 2),
levels = c("A", "B", "C", "P", "NP")),
STEMflag = factor(x = c(rep("STEM", 5), rep("NONSTEM", 5)),
levels = c("STEM", "NONSTEM")),
percent = c(0.95, 0.93, 0.90, 0.67, 0.86,
0.05, 0.07, 0.10, 0.33, 0.14))
I wanted to visualize the bar graph in a additive way (waterfall plot, see below).
This is the data:
structure(list(Parameter = c("Driving", "Driver Behaviour", "Road Quality",
"Passenger load", "Speed", "Topography", "climate", "total"),
Values = c(0.8, 0.2, 0.2, 0.2, 0.24, 0.5, 0.8, 2.82)),
row.names = c(NA, -8L), class = "data.frame")
# Parameter Values
# 1 Driving 0.80
# 2 Driver Behaviour 0.20
# 3 Road Quality 0.20
# 4 Passenger load 0.20
# 5 Speed 0.24
# 6 Topography 0.50
# 7 climate 0.80
# 8 total 2.82
This is the output im trying to produce. Is there any way i can do it in R?
Still in need of some polishing but in principle your watefall chart can be achieved like so:
BTW: Because of rounding errors your data gives a total of 2.94 instead of 2.82.
d <- structure(list(Parameter = c("Driving", "Driver Behaviour", "Road Quality",
"Passenger load", "Speed", "Topography", "climate", "total"),
Values = c(0.8, 0.2, 0.2, 0.2, 0.24, 0.5, 0.8, 2.82)),
row.names = c(NA, -8L), class = "data.frame")
library(ggplot2)
library(dplyr)
# Prepare the dataset
d1 <- d %>%
mutate(ymax = cumsum(Values),
ymin = lag(ymax, default = 0),
xmax = as.numeric(factor(Parameter, levels = Parameter)),
xmin = lag(xmax, default = 0),
x = (xmin + xmax) / 2,
y = (ymin + ymax) / 2,
label = Values,
label_color = "white",
) %>%
# Get the total right
mutate(ymin = ifelse(Parameter == "total", 0, ymin),
ymax = ifelse(Parameter == "total", Values, ymax),
y = ifelse(Parameter %in% c("Driving", "total"), Values + .2, y),
label = case_when(
Parameter %in% c("Driving") ~ paste0("Best Case\n", Values),
Parameter %in% c("total") ~ paste0("Worst Case\n", Values),
TRUE ~ as.character(Values)),
label_color = ifelse(Parameter %in% c("Driving", "total"), "black", "white"))
ggplot(d1, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)) +
geom_rect(fill = "#E40019") +
geom_text(aes(x = x, y = y, label = label, color = label_color), show.legend = FALSE) +
scale_x_continuous(breaks = seq(.5, 7.5, 1), labels = d1$Parameter) +
scale_color_manual(values = c(white = "white", black = "black")) +
theme_bw()
Created on 2020-06-16 by the reprex package (v0.3.0)
I have one data frame with means of multiple ensembles from 5 different models, so 5 columns plus a date column, and a second data frame where i have the standard deviations.
I would like to plot all in one plot, where i have the mean and a shaded standard deviation on each mean for the different models. Any idea how i can do this in ggplot?
A sample data:
Means:
Means <- structure(list(M1 = c(0.146803, 0.1477525, 0.1465378, 0.1430386,
0.14315, 0.1407827, 0.1394645, 0.1389529, 0.1400275, 0.1375498
), M2 = c(0.09307112, 0.09162262, 0.09091183, 0.09075522, 0.09127082,
0.08992585, 0.08821484, 0.08810128, 0.08770718, 0.08705453),
M3 = c(0.1310087, 0.1255959, 0.1271953, 0.1270623, 0.1242448,
0.1249174, 0.1247585, 0.1224901, 0.1228224, 0.1207565), M4 = c(0.1328935,
0.133063, 0.1302629, 0.1291629, 0.1255703, 0.1244377, 0.1235587,
0.1236105, 0.1206313, 0.1192216), M5 = c(0.1312402, 0.1296496,
0.1288667, 0.1304318, 0.1291016, 0.1290919, 0.1286337, 0.1284389,
0.1270611, 0.1289673), date = c("2015-01-01", "2016-01-01",
"2017-01-01", "2018-01-01", "2019-01-01", "2020-01-01", "2021-01-01",
"2022-01-01", "2023-01-01", "2024-01-01")), row.names = c(NA,
-10L), class = "data.frame")
SD:
SD <- structure(list(M1 = c(NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN,
NaN, NaN), M2 = c(0.002352747, 0.002636449, 0.002584647, 0.002942681,
0.003041309, 0.00279849, 0.00282362, 0.002546572, 0.002362555,
0.003004829), M3 = c(0.003872364, 0.003809441, 0.003494403, 0.004341524,
0.00372956, 0.00382587, 0.00394011, 0.00428747, 0.0030507, 0.003493746
), M4 = c(0.002779382, 0.003130044, 0.003052774, 0.002544359,
0.0028259, 0.002732643, 0.001902435, 0.001727357, 0.002808552,
0.001431315), M5 = c(0.003038877, 0.004208446, 0.005034087, 0.003276497,
0.004041488, 0.004525613, 0.003653864, 0.00377299, 0.003307351,
0.00320737), date = c("2015-01-01", "2016-01-01", "2017-01-01",
"2018-01-01", "2019-01-01", "2020-01-01", "2021-01-01", "2022-01-01",
"2023-01-01", "2024-01-01")), row.names = c(NA, -10L), class = "data.frame")
df2 <- melt(Means, id = "date")
tit <- sprintf("%s %s Anuual Burden - %s", regnm, spcname, scenm)
filename <- sprintf("%s/TS_%s_%s_BurdenANN_%s.png",folderout, regnm, spcname, scenm)
png(filename,width = 8 * 360, height = 5 * 360, res = 360)
print(ggplot(data = df2, aes(x = date, y = value, color = variable)) +
geom_line(data=subset(df2, variable=="M1"), size=2) +
geom_line(data=subset(df2, variable=="M2"), size=2) +
geom_line(data=subset(df2, variable=="M3"), size=2) +
geom_line(data=subset(df2, variable=="M4"), size=2) +
geom_line(data=subset(df2, variable=="M5"), size=2) +
scale_colour_manual(name = spcname, breaks = c("M1","M2","M3","M4","M5"), values = clr2) +
xlab("Years") + ylab(sprintf("%s (Tg)", spcname)) + ggtitle(tit) + theme_bw() + theme(legend.key = element_blank()) +
guides(color = guide_legend(override.aes = list(linetype = c(1,1,1,1,1), shape = c(NA,NA,NA,NA,NA)))) + theme(plot.margin=unit(c(1,3,1,1),"cm"))+
theme(legend.position=c(1.1,.6), legend.direction = "vertical") +
theme(legend.title = element_blank())) # + expand_limits(y=0)
dev.off()
By converting the dataframes to long format and joining into one df this can be achieved via one geom_line and a geom_ribbon like so:
library(ggplot2)
library(dplyr)
library(tidyr)
means_long <- pivot_longer(Means, -date, values_to = "mean", names_to = "variable")
sd_long <- pivot_longer(SD, -date, values_to = "sd", names_to = "variable")
df_join <- means_long %>%
left_join(sd_long)
#> Joining, by = c("date", "variable")
ggplot(data = df_join, aes(x = date, group = variable)) +
geom_line(aes(y = mean, color = variable), size = 1) +
geom_ribbon(aes(y = mean, ymin = mean - sd, ymax = mean + sd, fill = variable), alpha = .2) +
xlab("Years") +
theme_bw() +
theme(legend.key = element_blank()) +
theme(plot.margin=unit(c(1,3,1,1),"cm"))+
theme(legend.position = c(1.1,.6), legend.direction = "vertical") +
theme(legend.title = element_blank())
#> Warning in max(ids, na.rm = TRUE): kein nicht-fehlendes Argument für max; gebe -
#> Inf zurück
Created on 2020-05-20 by the reprex package (v0.3.0)