Change `ggplot` label color by conditions - r

I have a question about changing color for geom_text & geom_label base on a condition.
Expected out put
Data label has color = blue for 2021, color = grey for 2020
Gap label is red when negative and green when positive
Data
data <- tibble(Factor = c("A", "B", "A", "B"),
Score = c(90, 85, 85, 90),
Year = c("2020", "2020", "2021", "2021"))
Data transform
df_2 = data %>%
pivot_wider(names_from = Year, names_prefix = "Year", values_from = Score) %>%
mutate(gap = Year2021 - Year2020) %>%
select(Factor, gap)
Plot
data %>%
left_join(df_2) %>%
ggplot(aes(x = Factor, y = Score, fill = Year)) +
geom_col(position='dodge') +
geom_text(aes(label=Score),
position=position_dodge(width = 0.9),
vjust=-0.40) +
geom_label(aes(y = 100, label = ifelse(Year == 2021, gap, NA_character_)), na.rm = TRUE) +
scale_y_continuous(limits = c(0,105))
theme_minimal()
Expected out put
85 & 90 = blue for 2021, 90 & 85 = grey for 2020
-5 = red and 5 = green

Second option would be to use a manual scale for the text and bar colors. For the labels we could use the ggnewscale package which allows for multiple scales for the same aesthetic. Doing so we could add a second fill scale:
library(ggplot2)
library(ggnewscale)
ggplot(dat, aes(x = Factor, y = Score, fill = Year)) +
geom_col(position = "dodge") +
geom_text(aes(label = Score, color = Year),
position = position_dodge(width = 0.9),
vjust = -0.40
) +
scale_color_manual(aesthetics = c("fill", "color"), values = c("2021" = "blue", "2020" = "grey")) +
ggnewscale::new_scale_fill() +
geom_label(aes(y = 100, label = ifelse(Year == 2021, gap, NA_character_), fill = gap > 0), na.rm = TRUE) +
scale_fill_manual(values = c("FALSE" = "red", "TRUE" = "green")) +
scale_y_continuous(limits = c(0, 105)) +
theme_minimal()
DATA
data <- data.frame(
Factor = c("A", "B", "A", "B"),
Score = c(90, 85, 85, 90),
Year = c("2020", "2020", "2021", "2021")
)
library(dplyr)
library(tidyr)
df_2 <- data %>%
pivot_wider(names_from = Year, names_prefix = "Year", values_from = Score) %>%
mutate(gap = Year2021 - Year2020) %>%
select(Factor, gap)
dat <- data %>%
left_join(df_2)

You can conditionally assign the colors to the aesthetics using case_when. You can use the following code:
library(tibble)
library(ggplot2)
library(dplyr)
library(tidyr)
data <- tibble(Factor = c("A", "B", "A", "B"),
Score = c(90, 85, 85, 90),
Year = c("2020", "2020", "2021", "2021"))
df_2 = data %>%
pivot_wider(names_from = Year, names_prefix = "Year", values_from = Score) %>%
mutate(gap = Year2021 - Year2020) %>%
select(Factor, gap)
df <- data %>% left_join(df_2)
#> Joining, by = "Factor"
ggplot(df, mapping = aes(x = Factor, y = Score, fill = Year)) +
geom_col(position='dodge') +
geom_text(df, mapping = aes(label=Score, color = Year),
color = case_when(
df$Score == 85 & df$Year == 2021 | df$Score == 90 & df$Year == 2021 ~ "blue",
df$Score == 85 & df$Year == 2020 | df$Score == 90 & df$Year == 2020 ~ "grey",
),
position=position_dodge(width = 0.9),
vjust=-0.40) +
geom_label(data = df, mapping = aes(y = 100, label = ifelse(Year == 2021, gap, NA_character_)),
fill = case_when(
df$gap == -5 ~ "red",
df$gap == 5 ~ "green"
), na.rm = TRUE) +
scale_y_continuous(limits = c(0,105)) +
theme_minimal()
Created on 2022-09-24 with reprex v2.0.2

Related

"Crossing off" tiles on a heatmap

For a heatmap made using ggplot and geom_tile, how would you "cross off" a tile based on a conditional value?
The heatmap shows counts of the number of times an animal performed a behavior between 1990-2020.
Rows are animal IDs, columns are years.
Years go from 1990-2020 but not all animals are alive throughout that time frame (ie, some born later than 1990 or die earlier than 2020)
So I want to cross off any tiles where an animal isn't alive, or before it was born.
Data look like this (shortened to 5 rows for brevity):
data <- data.frame(date = structure(c(8243, 8243, 8243, 8248, 8947), class = "Date"),
year = c("1992", "1992", "1992", "1992", "1994"),
event.id = c(8L, 8L, 8L, 10L, 11L),
id = c("L5", "L58", "L73", "L21", "L5"),
birth = c(1964L, 1980L, 1986L, 1950L, 1964L),
death = c(2012L, 2003L, NA, NA, 2012L))
NA means the animal is still alive and it wouldn't be crossed off since before it was born.
Any help to create this is greatly appreciated!
Code looks like this:
heatmap <- data %>%
mutate(x = case_when(year %in% 1990:1999 ~ "1990-1999",
TRUE ~ year)) %>%
mutate(y = paste(id)) %>%
group_by(x, y, .drop = FALSE) %>%
summarize(count = n()) %>%
arrange(y)
ggplot(data = heatmap, aes(x, y, fill = count)) +
geom_tile()
EDIT
Current heat map.
Here's how you could use color to indicate NA, like suggested by #Gregor Thomas.
Transforming your data to "complete":
library(dplyr)
library(tidyr)
library(ggplot2)
hm <- dat %>%
mutate(x = case_when(year < 1960 ~ "Pre-1960",
year %in% 1960:1969 ~ "1960-1969",
year %in% 1970:1979 ~ "1970-1979",
year %in% 1980:1989 ~ "1980-1989",
year %in% 1990:1999 ~ "1990-1999",
TRUE ~ year)) %>%
mutate(y = paste(matriline, id)) %>%
group_by(x, y, .drop = FALSE) %>%
summarize(count = n()) %>%
ungroup() %>%
tidyr::complete(x, y) %>%
arrange(y) %>%
tidyr::separate(y, into = c("ym", "yid"), sep = " ", remove = FALSE)
Then define a color for NA:
ggplot(data = hm, aes(x, yid, fill = count)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red", na.value = "grey50") +
scale_x_discrete(position = "top", drop = FALSE) +
scale_y_discrete(limits=rev) +
labs(x = NULL, y = NULL) +
facet_wrap( ~ ym, strip.position = "left", dir = "v", ncol = 1) +
theme(panel.spacing = unit(0.2, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
axis.text.x = element_text(angle = 45, hjust = -0.02))
Data:
ids <- c("J11", "J16", "J17", "J02", "J22", "J26", "J27", "J30")
matrilines <- c("J02","J04", "K11", "L20", "P90", "K100", "R22")
dat <- data.frame(year = as.character(sample(1960:2018, 1000, replace = TRUE)),
id = sample(ids, 1000, replace = TRUE),
matriline = sample(matrilines, 1000, replace = TRUE))

Stacked bar chart fill with multiple conditions ggplot2

I'm working on a stacked bar chart. I'm trying to have a column of full length, where one part will be equal to actual value, and filled with the specific (!) color, and the second part is a "filler" (100-value) which always should be beige.
How could I fill it under the nested condition (if it is filler = gray, if value = painted depending on another column(with manual color selection))?
my_data <- data.frame(group = c("group1", "group2", "group3", "group4"),
value = c(10, 20, 30, 10))
my_data <- my_data %>% mutate(filler = 100-value)
my_data <- my_data %>% gather(key = "obs", value = "value", -1)
my_data <- my_data %>% mutate(col = case_when(value <=10 ~ "yellow",
value >10 & value <=20 ~ "pink",
value >20 ~ "red")
ggplot(my_data)+
geom_bar(aes(x=group, y=value, fill=obs),
stat="identity", alpha=1, position = position_stack())+
scale_fill_manual(breaks = c("value", "filler"),
values = c("filler" = "beige", "value" = "black"))
You can use scale_fill_identity for this
library(tidyverse)
my_data <- data.frame(group = c("group1", "group2", "group3", "group4"),
value = c(10, 20, 30, 10))
my_data <- my_data %>% mutate(filler = 100-value)
my_data <- my_data %>% gather(key = "obs", value = "value", -1)
my_data <- my_data %>% mutate(col = case_when(value <=10 ~ "yellow",
value >10 & value <=20 ~ "pink",
value >20 ~ "red"))
ggplot(my_data)+
geom_bar(aes(x=group, y=value, fill=col), stat="identity", alpha=1, position = position_stack())+
scale_fill_identity()
One way you can avoid manually specifying colour for the background beige is by simple creating 2 layers of geoms one will be in background, which doesn't require any calculation and your actual values will be front of it.
my_data <- data.frame(group = c("group1", "group2", "group3", "group4"),
value = c(10, 20, 30, 10))
my_data %>%
mutate(col = case_when(value <=10 ~ "yellow",
value >10 & value <=20 ~ "pink",
value >20 ~ "red")) %>%
ggplot() +
geom_col(aes(x = group, y= 100), fill = "beige") +
geom_col(aes(x = group, y = value, fill = col)) +
scale_color_identity()
Created on 2021-02-10 by the reprex package (v0.3.0)

facet_wrap and assign colors to categorical variables in ggplot2

I am trying to reproduce this graphic below on the COVID19 (first plot) using facet_wrap() but I cannot make the other background series visible in gray (second plot).
Second plot
library(dplyr)
library(httr)
library(readxl)
library(ggplot2)
library(ggrepel)
library(scales)
library(forcats)
url <- paste("https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-",format(Sys.time(), "%Y-%m-%d"), ".xlsx", sep = "")
GET(url, authenticate(":", ":", type="ntlm"), write_disk(tf <- tempfile(fileext = ".xlsx")))
data <- read_excel(tf)
data$`Countries and territories` = fct_recode( data$`Countries and territories`, "Canada" ="CANADA")
days100 = data %>%
rename(country = `Countries and territories`) %>%
select(-Day, -Month, -Year) %>%
arrange(country, DateRep) %>%
group_by(country) %>%
mutate(test = if_else(Cases >= 1,
cumsum(Cases),0),
logtest = if_else(test > 0,
log10(test),0),
dummy100 = if_else(test >= 100,
1,0),
num100 = if_else(dummy100 == 1,
cumsum(dummy100),0),
selec_count = if_else(country == "Ecuador",
1,
if_else(country == "Italy",
2,
if_else(country == "US",
3,
if_else(country == "China",
4,
0))))) %>%
filter(country != 'Cases_on_an_international_conveyance_Japan',
test >=100)
days100 = days100 %>%
mutate(fil_count = if_else(GeoId == "CL" | GeoId == "IT" | GeoId == "CN" | GeoId == "FR", 1, 0))
ggplot(data = days100, aes(x = num100,
y = test,
color = selec_count,
group = country)) +
geom_line() +
guides(color = F) +
#scale_color_manual(values = c("1"="#5aae61", "2"="#7b3294", "3" = "red", "4" = "blue", "0"= "black")) +
facet_wrap(~ country) +
scale_x_continuous(expand = c(0, -1)) +
scale_y_continuous(trans="log10",
labels = scales::comma,
limits = c(100, NA),
expand = expand_scale(mult = c(0, 0.05))) +
theme_bw() +
ggrepel::geom_text_repel(data = days100 %>%
filter(fil_count==1 &
DateRep == last(DateRep)),
aes(label = country))
Also I want to add manual colors for selec_count category so that each series can be better visualized using scale_color_manual().
Without facet_wrap()
The only way I can think of is to duplicate (with crossing or similar) the data across all available countries.
library(dplyr)
library(tidyr)
library(ggplot2)
# helpful to find the most-impacted countries with over 1000 cases
topdat <- dat %>%
group_by(GeoId) %>%
summarize(n=max(Cases)) %>%
filter(n > 1000) %>%
arrange(desc(n))
plotdat <- dat %>%
mutate(
`Countries and territories` =
gsub("_", " ",
if_else(`Countries and territories` == "CANADA",
"Canada", `Countries and territories`))) %>%
inner_join(., topdat, by = "GeoId") %>%
arrange(DateRep) %>%
group_by(GeoId) %>%
filter(cumany(Cases > 100)) %>%
mutate(
ndays = as.numeric(difftime(DateRep, min(DateRep), units = "days")),
ncases = cumsum(Cases),
ndeaths = cumsum(Deaths),
ismax = ncases == max(ncases)
) %>%
crossing(., Country = unique(.$`Countries and territories`)) %>%
mutate(
col = case_when(
`Countries and territories` == Country ~ 1L,
GeoId %in% c("CN", "IT", "UK") ~ 2L,
TRUE ~ 3L
)
)
firstpane <- plotdat %>%
select(-Country) %>%
filter(GeoId %in% c("CN", "IT", "UK")) %>%
group_by(GeoId) %>%
slice(which.max(ncases)) %>%
crossing(., Country = unique(plotdat$`Countries and territories`))
ggplot(plotdat, mapping = aes(x = ndays, y = ncases, group = GeoId)) +
geom_line(aes(color = factor(col)), data = ~ subset(., col == 3L)) +
geom_line(aes(color = factor(col)), data = ~ subset(., col == 2L)) +
geom_line(aes(color = factor(col)), data = ~ subset(., col == 1L)) +
geom_text(aes(label = `Countries and territories`),
hjust = 0, vjust = 1.2,
data = subset(firstpane, Country == min(Country))) +
geom_point(data = firstpane) +
geom_point(color = "red", data = ~ subset(., ismax & col == 1L)) +
facet_wrap(~ Country) +
scale_y_continuous(trans = "log10", labels = scales::comma) +
scale_color_manual(values = c("red", "gray50", "#bbbbbb88"), guide = FALSE) +
labs(x = "Days since 100th case", y = NULL) +
lims(x = c(1, 100))
I did three geom_line to manually control the layering, so the red line is always on top. Otherwise, replace all three with geom_line(aes(color = factor(col))).

gganimate: Combining transition_layers and geom_smooth

How do I combine geom_smooth(method = "lm) function with gganimate()'s transition_layers, so that, as the individual bars drift/grow upwards, the linear line of geom_smooth appears, like so: Example of desired appearance of geom_smooth line The only difference is that in my case, instead of the points, the bars would drift upwards as the line appears.
The bars current work well, appearing by drifting upwards, made possible by using the transition_layers function of gganimate.
However, I can't figure out how to add the geom_smooth line, so it appears as the bars grow upwards. Right now, the line appears just at the end, as seen below.
See below for the current look of the animation.
Here is a simple reprex of my problem:
#Df for reprex
library(ggplot2)
library(tidyverse)
year <- as.numeric(c(1996:2002,
1996:2002,
1996:2002))
c <- c(39, 40, 67, 80, 30, 140, 90, 23, 100, 123,
140, 1, 2, 1, 13, 3, 3, 30, 1, 3, 3)
df <- data.frame(year, c) %>%
select(year, c) %>%
arrange(year)
#Static plot
(static_plot <- ggplot(data = df) +
geom_bar(data = df %>% filter(year == 1996), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 1997), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 1998), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 1999), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 2000), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 2001), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_bar(data = df %>% filter(year == 2002), stat="identity", position ="stack",
aes(x = year, y = c)) +
labs(y = "year",
x = "c",
title = "Reprex") +
geom_smooth(df, mapping = aes(x = year, y = c), method = "lm",
colour = "black", se = F)
)
#Animation
library(gganimate)
anim <- static_plot +
transition_layers(layer_length = 1, transition_length = 1) +
enter_drift(x_mod = 0, y_mod = -max(df$c))
animate(anim, fps = 10, duration = 10,
width = 600, height = 500, renderer = gifski_renderer())
Here's an approach where the data is replicated and then filtered so each version shows progressively more years.
library(dplyr); library(tidyr)
animate(
df %>%
count(year, wt = c, name = "c") %>% # Aggregate for each year's total
uncount(7, .id = "year_disp") %>% # Make 7 copies, one for each year
arrange(year_disp, year) %>%
mutate(year_disp = year_disp + min(df$year) - 1) %>%
filter(year <= year_disp) %>% # Only keep years up to "year_disp"
ggplot(aes(year, c)) +
geom_col(aes(group = year)) + # "group" here connects each year to itself between frames
geom_smooth(method = "lm", se = F) +
transition_states(year_disp) +
enter_drift(y_mod = -max(df$c)),
fps = 10, duration = 10,
width = 600, height = 500, renderer = gifski_renderer())
The geom-line is calculated in the end and hence it appears only at the end. After each calculation of geom-bar, you have to calculate the geom-line as well, so that the line appears simultaneously with the Bars growing.
geom_bar(data = df %>% filter(year == 1997), stat="identity", position ="stack",
aes(x = year, y = c)) +
geom_line(filter(df, year %in% c(1996, 1997)), mapping = aes(x = year, y = lm),
colour = "black")
Do this for all the years and you should be getting the expected result!

scale_color_manual() for different geoms in ggplot

library(tidyverse)
delta <- tibble(
type = c("alpha", "beta", "gamma"),
a = rnorm(3, 5),
b = rnorm(3, 6)
) %>%
mutate(delta = abs(a - b)) %>%
gather(`a`, `b`, `delta`, key = "letter", value = "value")
ggplot(delta %>% filter(letter != "delta"), aes(type, value, fill = letter)) +
geom_col(position = "dodge") +
geom_col(data = delta %>% filter(letter == "delta"), width = 0.5) +
scale_color_manual("grey", "black", "blue")
I'd like the a and b bars to be grey and black. And the delta bar to be blue. How do I do this with scale_color_manual()? Seems my syntax above is off.
There are two things that need to be changed:
Since you've used fill = letter, you should use scale_fill_manual instead of scale_color_manual (which would have been appropriate if you had used color = letter).
The manual color values should be provided as a vector.
library(tidyverse)
delta <- tibble(
type = c("alpha", "beta", "gamma"),
a = rnorm(3, 5),
b = rnorm(3, 6)
) %>%
mutate(delta = abs(a - b)) %>%
gather(`a`, `b`, `delta`, key = "letter", value = "value")
ggplot(delta %>% filter(letter != "delta"), aes(type, value, fill = letter)) +
geom_col(position = "dodge") +
geom_col(data = delta %>% filter(letter == "delta"), width = 0.5) +
scale_fill_manual(values = c("grey", "black", "blue"))
Created on 2018-10-08 by the reprex package (v0.2.1)

Resources