How to use condition in geom_text / nudge_y - r

I want text labels were above or under of bar cap depending on where is more space for them. Now it's always down which is not always looks good:
Here is my code:
library(tidyr)
library(ggplot2)
library(dplyr)
library(stringr)
library(purrr)
numa.nodes <- tibble (
numa_name = c("numa_01","numa_01","numa_01","numa_01","numa_01","numa_01","numa_02","numa_02","numa_02","numa_02"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_03","numa_03","numa_03","numa_03","numa_03","numa_03","numa_04","numa_04","numa_04","numa_04"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","memory_used","memory_total","cpu01","cpu02","memory_used","memory_total"),
value = c(sample(0:100,4), sample(0:32,1), 32, sample(0:100,1), sample(0:100,1), sample(0:128,1), 128)
)
numa.nodes <- numa.nodes %>% add_row(
numa_name = c("numa_05","numa_05","numa_05","numa_05","numa_05","numa_05","numa_05"),
counter_name =c("cpu01","cpu02","cpu03","cpu04","cpu05","memory_used","memory_total"),
value = c(sample(1:100,5), sample(1:64,1), 64)
)
numa.nodes <- numa.nodes %>% mutate(counter_name=factor(counter_name,levels = unique(counter_name),ordered = T))
memory_columns <- numa.nodes %>% filter(counter_name=='memory_total')
memory_y_scale <- max(memory_columns$value, na.rm = TRUE) + 6
plot_numa = function(num){
df = numa.nodes %>% filter(str_detect(numa_name, num))
cpu_plot = df %>%
filter(str_detect(counter_name, "cpu")) %>%
ggplot(aes(x = counter_name)) +
geom_col(aes(y = 100), fill = "white", color = "black") +
geom_col(aes(y = value), fill = "#00AFBB", color = "black") +
geom_text(aes(y = value, label = paste0(value,"%")), nudge_y = 5, color = "black") +
theme_bw() +
labs(x = "CPU", y = "")
memory_plot = df %>%
filter(str_detect(counter_name, "memory")) %>%
pivot_wider(names_from = counter_name, values_from = value) %>%
ggplot(aes(x = "") ) +
geom_col(aes(y = memory_total), fill = "white", color = "black") +
geom_col(aes(y = memory_used), fill = "#FC4E07", color = "black") +
geom_text(aes(label = paste(memory_total, "GB"), y = memory_total), nudge_y = 5, color = "black") +
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black") +
theme_bw() +
ylim(0, memory_y_scale) +
labs(x = "Memory", y = "")
ggpubr::ggarrange(cpu_plot, memory_plot, ncol = 2) %>% ggpubr::annotate_figure(top = paste("NUMA",num))
}
numa_numbers <- unique(numa.nodes$numa_name) %>% str_remove ("numa_")
ggpubr::ggarrange(plotlist = map(.x = numa_numbers, .f = ~plot_numa(num = .x)))
I tried to change this line:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used), nudge_y = -3, color = "black")
to something like that:
geom_text(aes(label = paste(memory_used, "GB"), y = memory_used),nudge_y = ifelse( (memory_total-memory_used) > 10, 5, -3)
, color = "black")
But I've got an error:
Error in ifelse((memory_total - memory_used) > 10, 5, -3) :
object 'memory_total' not found
Is there a better way to print labels optimal way?
What am I doing wrong?
How to change color of label to more contrast ie black on white, white on red?

Think of it this way: The nudge value will be different (potentially) for every observation in your data frame. That means that this is something that should be handled within aes(), where stuff is designed to change with your data, rather than nudge_y, which is designed to be a constant (and complains if used otherwise).
So, the solution is to do away entirely with nudge_y and build your ifelse() statement directly into aes(y=...).
In this case, here's the replacement for that particular geom_text() line:
# to see the same plot posted here, put this at the top of your code
set.seed(7331)
...
# plot code...
... +
geom_text(aes(
label = paste(memory_used, "GB"),
y = ifelse((memory_total-memory_used > 10), memory_used + 5, memory_used - 3)),
color = "black") +

Related

Combining two heatmaps with the variables next to each other

I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.

Joining 2 bar columns in barcharts with curved line

I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)

How to plot average temperature and variation?

I have hourly temperature data from 1970 to 2021. I would like to summarize the dataset into a graph in ggplot showing the mean temperature of each month and year. And I also want to show the average temperature and temperature volatility increased. And I would like to show average temperature in the '2010s is higher than in the '1990s.
Here is the code of temperature density plot in 2010s and 1990s. But I don't know how to set the x axis.
data_re <- data
data_re$Year910 <- ifelse(data$Year %in% c(1990:1999), "1990s",
ifelse(data$Year %in% c(2010:2020), "2010s",NA))
data_re1 <- subset(data_re, Year910 %in% c("1990s","2010s"))
data_re1$Year910 <- factor(data_re1$Year910)
ggplot(data_re1, aes(x=, color=Year910)) +
xlab('Temperature')+
geom_density()
Here is the data file.
https://docs.google.com/spreadsheets/d/1HwPFJ1wKMMr0845Et60tV36WIWXM66Ig/edit?usp=sharing&ouid=111186166036061320361&rtpof=true&sd=true
Any advice on how to best go about this would be greatly appreciated.
I used lubridate library to convert the character to Date.
df = df %>%
pivot_longer(!c(Date,Month,Year), names_to = "HourNo", values_to = "Temp")
df$Date = as.Date(df$Date, format = "%Y-%m-%d")
df = na.omit(df)
df$Abbr = paste(month.abb[month(df$Date)], "-", year(df$Date), sep="")
df_Aggr = df %>%
group_by(Abbr) %>%
summarise(Avg = mean(Temp),
Begin = min(Date))
ggplot(df_Aggr, aes(x =Begin, y=Avg))+geom_line(color="blue", size=1)+
scale_x_date(date_breaks = "24 month", date_labels = "%b-%Y")+
theme(axis.text.x = element_text(vjust = 0.8,
angle = 35, hjust = 0.9))+ggtitle("Average Temperature")
I would first of all ensure your data is in the correct format after importing it. I am assuming you have already loaded your data frame from Excel and named it df:
df <- df[1:624,]
df[2:28] <- lapply(df[2:28], as.numeric)
df$Date <- as.POSIXct(df$Date)
Now we can load a couple of useful packages:
library(tidyverse)
library(geomtextpath)
Then we could summarize and plot with something like the following:
df %>%
rowwise() %>%
mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
select(-starts_with('Hour')) %>%
filter(Date > as.POSIXct('1989-12-31')) %>%
group_by(Year) %>%
summarize(Temp = mean(Temp), Date = median(Date)) %>% {
ggplot(., aes(Date, Temp)) +
geom_line(color = 'gray') +
geom_point(color = 'gray75') +
geom_textsegment(aes(x = as.POSIXct('1990-01-01'),
xend = as.POSIXct('1999-12-31'),
y = mean(Temp), yend = mean(Temp), color = '1990s',
label = '1990s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2000-01-01'),
xend = as.POSIXct('2009-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2000s',
label = '2000s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2010-01-01') &
.$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2010-01-01'),
xend = as.POSIXct('2019-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2010s',
label = '2010s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2020-01-01') &
.$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
theme_light(base_size = 16) +
scale_color_brewer(palette = 'Set1') +
theme(legend.position = 'none') +
labs(title = 'Annual mean temperature')
Note that I have used an annual summary of the temperature here. If you use monthly temperatures, the range of temperatures becomes much larger and the message of the plot becomes much weaker:
df %>%
rowwise() %>%
mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
select(-starts_with('Hour')) %>%
filter(Date > as.POSIXct('1989-12-31')) %>% {
ggplot(., aes(Date, Temp)) +
geom_line(color = 'gray') +
geom_point(color = 'gray75') +
geom_textsegment(aes(x = as.POSIXct('1990-01-01'),
xend = as.POSIXct('1999-12-31'),
y = mean(Temp), yend = mean(Temp), color = '1990s',
label = '1990s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2000-01-01'),
xend = as.POSIXct('2009-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2000s',
label = '2000s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2010-01-01') &
.$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
geom_textsegment(aes(x = as.POSIXct('2010-01-01'),
xend = as.POSIXct('2019-12-31'),
y = mean(Temp), yend = mean(Temp), color = '2010s',
label = '2010s'), vjust = -0.2, size = 6,
data = .[.$Date < as.POSIXct('2020-01-01') &
.$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
theme_light(base_size = 16) +
scale_color_brewer(palette = 'Set1') +
theme(legend.position = 'none') +
labs(title = 'Annual mean temperature')
}

How to adjust ggrepel label on pie chart?

I am trying to create a pie chart to visualize percent abundance of 9 genera. However, the labels are all clumping together. How do I remedy this? Code included below:
generaabundance2020 <- c(883, 464, 1948, 1177, 2607, 962, 2073, 620, 2670)
genera2020 <- c("Andrena", "Ceratina", "Halictus",
"Hesperapis", "Lasioglossum", "Melissodes",
"Osmia", "Panurginus", "Other")
generabreakdown2020 <- data.frame(group = genera2020, value = generaabundance2020)
gb2020label <- generabreakdown2020 %>%
group_by(value) %>% # Variable to be transformed
count() %>%
ungroup() %>%
mutate(perc = `value` / sum(`value`)) %>%
arrange(perc) %>%
mutate(labels = scales::percent(perc))
generabreakdown2020 %>%
ggplot(aes(x = "", y = value, fill = group)) +
geom_col() +
coord_polar("y", start = 0) +
theme_void() +
geom_label_repel(aes(label = gb2020label$labels), position = position_fill(vjust = 0.5),
size = 5, show.legend = F, max.overlaps = 50) +
guides(fill = guide_legend(title = "Genera")) +
scale_fill_manual(values = c("brown1", "chocolate1",
"darkgoldenrod1", "darkgreen",
"deepskyblue", "darkslateblue",
"darkorchid4", "hotpink1",
"lightpink"))
Which produces the following:
Thanks for adding your data.
There are a few errors in your code. The main one is that you didn't precalculate where to place the labels (done here in the text_y variable). That variable needs to be passed as the y aesthetic for geom_label_repel.
The second is that you no longer need
group_by(value) %>% count() %>% ungroup() because the data you provided is already aggregated.
library(tidyverse)
library(ggrepel)
generaabundance2020 <- c(883, 464, 1948, 1177, 2607, 962, 2073, 620, 2670)
genera2020 <- c("Andrena", "Ceratina", "Halictus", "Hesperapis", "Lasioglossum", "Melissodes", "Osmia", "Panurginus", "Other")
generabreakdown2020 <- data.frame(group = genera2020, value = generaabundance2020)
gb2020label <-
generabreakdown2020 %>%
mutate(perc = value/ sum(value)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(group)) %>% ## arrange in the order of the legend
mutate(text_y = cumsum(value) - value/2) ### calculate where to place the text labels
gb2020label %>%
ggplot(aes(x = "", y = value, fill = group)) +
geom_col() +
coord_polar(theta = "y") +
geom_label_repel(aes(label = labels, y = text_y),
nudge_x = 0.6, nudge_y = 0.6,
size = 5, show.legend = F) +
guides(fill = guide_legend(title = "Genera")) +
scale_fill_manual(values = c("brown1", "chocolate1",
"darkgoldenrod1", "darkgreen",
"deepskyblue", "darkslateblue",
"darkorchid4", "hotpink1",
"lightpink"))
If you want to arrange in descending order of frequency, you should remember to also set the factor levels of the group variable to the same order.
gb2020label <-
generabreakdown2020 %>%
mutate(perc = value/ sum(value)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(perc)) %>% ## arrange in descending order of frequency
mutate(group = fct_rev(fct_inorder(group))) %>% ## also arrange the groups in descending order of freq
mutate(text_y = cumsum(value) - value/2) ### calculate where to place the text labels
gb2020label %>%
ggplot(aes(x = "", y = value, fill = group)) +
geom_col() +
coord_polar(theta = "y") +
geom_label_repel(aes(label = labels, y = text_y),
nudge_x = 0.6, nudge_y = 0.6,
size = 5, show.legend = F) +
guides(fill = guide_legend(title = "Genera")) +
scale_fill_manual(values = c("brown1", "chocolate1",
"darkgoldenrod1", "darkgreen",
"deepskyblue", "darkslateblue",
"darkorchid4", "hotpink1",
"lightpink"))
Created on 2021-10-27 by the reprex package (v2.0.1)
You didn't provide us with your data to work with so I'm using ggplot2::mpg here.
library(tidyverse)
library(ggrepel)
mpg_2 <-
mpg %>%
slice_sample(n = 20) %>%
count(manufacturer) %>%
mutate(perc = n / sum(n)) %>%
mutate(labels = scales::percent(perc)) %>%
arrange(desc(manufacturer)) %>%
mutate(text_y = cumsum(n) - n/2)
Chart without polar coordinates
mpg_2 %>%
ggplot(aes(x = "", y = n, fill = manufacturer)) +
geom_col() +
geom_label(aes(label = labels, y = text_y))
Chart with polar coordinates and geom_label_repel
mpg_2 %>%
ggplot(aes(x = "", y = n, fill = manufacturer)) +
geom_col() +
geom_label_repel(aes(label = labels, y = text_y),
force = 0.5,nudge_x = 0.6, nudge_y = 0.6) +
coord_polar(theta = "y")
But maybe your data isn’t dense enough to need repelling?
mpg_2 %>%
ggplot(aes(x = "", y = n, fill = manufacturer)) +
geom_col() +
geom_label(aes(label = labels, y = text_y), nudge_x = 0.6) +
coord_polar(theta = "y")
Created on 2021-10-26 by the reprex package (v2.0.1)

R label with commas but no decimals

My goal is to produce labels with commas, but no decimals. Let's say I have a ggplot with the following section:
geom_text(aes(y = var,
label = scales::comma(round(var))), hjust = 0, nudge_y = 300 )
This is almost what I need. It gives me the commas, but has a decimal. I have seen here (axis labels with comma but no decimals ggplot) that comma_format() could be good, but I think the label in my case needs a data argument, which comma_format() does not take. What can I do?
Update:
As an example of when this problem occurs, see the following, which uses gganimate and has a lot more going on. Code derived from Jon Spring's answer at Animated sorted bar chart with bars overtaking each other
library(gapminder)
library(gganimate)
library(tidyverse)
gap_smoother <- gapminder %>%
filter(continent == "Asia") %>%
group_by(country) %>%
complete(year = full_seq(year, 1)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
group_by(year) %>%
mutate(rank = min_rank(-gdpPercap) * 1) %>%
ungroup() %>%
group_by(country) %>%
complete(year = full_seq(year, .5)) %>%
mutate(gdpPercap = spline(x = year, y = gdpPercap, xout = year)$y) %>%
mutate(rank = approx(x = year, y = rank, xout = year)$y) %>%
ungroup() %>%
arrange(country,year)
gap_smoother2 <- gap_smoother %>% filter(year<=2007 & year>=1999)
gap_smoother3 <- gap_smoother2 %<>% filter(rank<=8)
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(country, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap))), hjust = 0, nudge_y = 300 ) +
coord_flip(clip = "off", expand = FALSE) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
labs(title='{closest_state %>% as.numeric %>% floor}',
x = "", y = "GFP per capita") +
theme(plot.title = element_text(hjust = 0, size = 22),
axis.ticks.y = element_blank(), # These relate to the axes post-flip
axis.text.y = element_blank(), # These relate to the axes post-flip
plot.margin = margin(1,1,1,4, "cm")) +
transition_states(year, transition_length = 1, state_length = 0) +
enter_grow() +
exit_shrink() +
ease_aes('linear')
animate(p, fps = 2, duration = 5, width = 600, height = 500)
In addition to the solution provided by #drf, you need to add scale_y_continuous(scales::comma) to your ggplot commands. But put it before the coord_flip function.
p <- ggplot(gap_smoother3, aes(rank, group = country,
fill = as.factor(country), color = as.factor(country))) +
geom_tile(aes(y = gdpPercap/2,
height = gdpPercap,
width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = gdpPercap,
label = scales::comma(round(gdpPercap), accuracy=1)),
hjust = 0, nudge_y = 300 ) +
scale_y_continuous(labels = scales::comma) +
... etc.

Resources