Changing the axis labels in a group of stacked bar charts - r

I have the following code
library(tibble)
library(tidyr)
library(dplyr)
library(ggplot2)
test <- tibble(
cat1 = c(rep('foo',6),rep('bar',6)),
cat2 = rep(c('g1','g2','g3','g4','g5','g6'), 2),
zoom = rnorm(12, 0, 10),
zaps = rnorm(12, 5, 10),
buzz = rnorm(12, -5, 10)
) %>% pivot_longer(c(zoom,zaps,buzz),names_to = 'cat3', values_to='value')
test2 <- inner_join(test, summarise(group_by(test, cat1, cat2), agg = sum(value)))
ggplot(test2, aes(x = cat1, y = value, fill = cat3, label = as.integer(value))) +
geom_bar(stat = 'identity', position = 'stack') +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "#555555") +
geom_errorbar(aes(ymin = agg, ymax = agg)) +
facet_grid(~ cat2)
which produces the following chart:
I like this and I am mostly happy with it, but I would love to include a sum total label for each column (the same value as the horizontal black line) somewhere in the column, ideally either at the bottom above/below the x axis labels or above the top edge of the plot below the g1,g2...
Can I do this by changing the displayed labels soo foo in g1 would be 'foo\n8' ? or is there a generic way to tell ggplot to put a number above the bar/foo labels in the plot or above the top edge of the top column component?

You may try this way. Please let me know if I miss something or I'm wrong with your purpose.
df2 %>%
group_by(cat1, cat2) %>%
mutate(n = sum(as.integer(value))) %>%
rowwise %>%
mutate(cat1 = paste0(c(cat1, n), collapse = "\n")) %>%
ggplot(aes(x = cat1, y = value, fill = cat3, label = as.integer(value))) +
geom_bar(stat = 'identity', position = 'stack') +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "#555555") +
geom_errorbar(aes(ymin = agg, ymax = agg)) +
facet_wrap(~ cat2, scales = "free_x", ncol = 6)

Related

selectize widget of ggplotly highlight not always visible (depends on order of geoms?)

I want to do an interactive scatterplot where I can
highlight individual points
a tooltip shows me the id
search for specific id with a selectize widget
I tried for some time with plotly and ended up with this code
library(tidyverse)
library(plotly)
set.seed(1)
dat <- tibble(id = LETTERS[1:10],
trt = factor(rep(0:1, 5)),
x = rnorm(10),
y = x + rnorm(10, sd = 0.2)) %>%
highlight_key(~id)
dat %>%
{ggplot(., aes(x = x, y = y, group = id, color = trt)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed")} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
It took my very long to understand that the order of geoms seems to be important
## no color, geom order reversed
## selectize.js widget is completely missing
dat %>%
{ggplot(., aes(x = x, y = y, group = id)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point()} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
## color by trt, geom order reversed
## selectize.js widget only works for data where t = 0
dat %>%
{ggplot(., aes(x = x, y = y, group = id, color = trt)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point()} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
Can somebody explain this strange behavior? What if I would like to reverse the order of geoms i.e. hline ploted behind points?

gganimate - have geom_rect adjust each frame

I have the following data:
library(ggplot2)
library(gganimate)
library(tidyverse)
createData<- function(vintage, id){#create data
# Generate a sequence of dates from 2010-01-01 to 2025-12-31 with a quarterly frequency
Dates <- seq(from = as.Date("2010-01-01"), to = as.Date("2025-12-31"), by = "quarter")
RLG<- cumsum(sample(c(-1, 1), 64, TRUE))
df<- data.frame( Dates,RLG, vintage,id)
return(df)
}
#createData
df<- createData("2018-01-01",1) %>%
rbind(createData("2019-01-01",2))%>%
rbind(createData("2020-01-01",3)) %>%
rbind(createData("2021-01-01",4))%>%
rbind(createData("2022-01-01",5))%>%
rbind(createData("2023-01-01",6))%>%
rbind(createData("2024-01-01",7))%>%
rbind(createData("2025-01-01",8))
Which I use to make the following chart:
options(gganimate.nframes = 8*length(unique(df$vintage)), gganimate.res = 30)
p<- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = "RLG") +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none")+
transition_time(id)+
exit_fade(alpha = 0.5)+
shadow_mark(alpha = 0.2)
animate(p, end_pause = 30)
I would like to add a geom_rect which goes from vintage to max(Dates). At each frame, vintage will increase, so the geom_rect will shrink slightly. How can I do this without interfering with the shadow_mark and exit_fades which I am applying to the lines?
If you mean something like a progress bar you could do it like so:
create an DF for the geom which is a subset of the original
df_geom <- df |>
mutate(vintage = as.Date(vintage)) |>
group_by(id) |>
slice(n())
Use geom_segment with the DF from above.
If you want to leave shadow_mark in you can do shadow_mark(exclude_layer = 2).
p <- ggplot(df) +
aes(x = Dates, y = RLG, group = as.Date(vintage), colour = RLG) +
geom_line()+
scale_y_continuous(labels = \(x) paste0(x, "%"))+
theme(axis.title = element_blank(),legend.position="none") +
geom_segment(
data = df_geom,
mapping = aes(x=vintage, xend=Dates,
y = 18, yend = 18),
size = 10, alpha =.4, color ='lightblue'
) +
transition_time(id)+
exit_fade(alpha = 0.5)
# shadow_mark(alpha = 0.2)
animate(p)

Joining 2 bar columns in barcharts with curved line

I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)

How to highlight points from and hist chart in R

I have some troubles with my code. I'm very very beginner in R, so I would like some help. I have a dataframe and I need to make an hist chart and then highlight some points. But I cannot understand how to find those points in my dataset. Here is and example of what I have.
x <- c("a","b","c","d","f","g","h","i","j","k")
y <- c(197421,77506,130474,18365,30470,22518,70183,15378,29747,11148)
z <- data.frame(x,y)
hist(z$y)
For example, how can I find in the hist where is the "a" and "h" value placed? and in a barplot? I tried the function points, but I cannot find the coordinates. Please let me know how could I make that . Thanks in advance.
Here is a way with dplyr and ggplot2. The approach is to cut the y variable into bins and then use summarise to create the counts and the labels.
library(dplyr)
library(ggplot2)
z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
ggplot() +
geom_bar(aes(x = bins, y = count), stat = "identity", color = "black", fill = "grey") +
geom_text(aes(x = bins, y = count + 0.5, label = xes)) +
xlab("y")
Here is a more complicated way that makes a plot that looks more like what hist() produces.
z2 <- z %>%
mutate(bins = cut(y, seq(0, 200000, 50000))) %>%
group_by(bins) %>%
summarise(xes = paste0(x, collapse = ", "),
count = n()) %>%
separate(bins, into = c("start", "end"), sep = ",") %>%
mutate(across(start:end, ~as.numeric(str_remove(., "\\(|\\]"))))
ggplot() +
geom_histogram(data = z, aes(x = y), breaks = seq(0, 200000, 50000),
color = "black", fill = "grey") +
geom_text(data = z2, aes(x = (start + end) / 2, y = count + 0.5, label = xes))

ggplot monthly date scale on x axis uses days as units

When plotting a bar chart with monthly data, ggplot shortens the distance between February and March, making the chart look inconsistent
require(dplyr)
require(ggplot2)
require(lubridate)
## simulating sample data
set.seed(.1073)
my_df <- data.frame(my_dates = sample(seq(as.Date('2010-01-01'), as.Date('2016-12-31'), 1), 1000, replace = TRUE))
### aggregating + visualizing counts per month
my_df %>%
mutate(my_dates = round_date(my_dates, 'month')) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_date(date_breaks = 'months', date_labels = '%y-%b') +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
I would keep the dates as dates rather than factors. Yes, factors will keep the bars uniform in size but you'll have to remember to join in any months that are missing so that blank months aren't skipped and factors are easy to get out of order. I would recommend adjusting your aesthetics to reduce the effect that the black outline has on the gap between February and March.
Here are two examples:
Adjust the outline color to be white. This will reduce the contrast and makes the gap less noticible.
Set the width to 20 (days).
As an aside, you don't need to summarize the data, you can use floor_date() or round_date() in an earlier step and go straight into geom_bar().
dates <- seq(as.Date("2010-01-01"), as.Date("2016-12-31"), 1)
set.seed(.1073)
my_df <-
tibble(
my_dates = sample(dates, 1000, replace = TRUE),
floor_dates = floor_date(my_dates, "month")
)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "white", fill = "slateblue", alpha = .5)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "black", fill = "slateblue", alpha = .5, width = 20)
using some parts from IceCream's answer you can try this.
Of note, geom_col is now recommended to use in this case.
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ungroup() %>%
mutate(my_dates_x = as.numeric(my_dates)) %>%
mutate(my_dates_label = paste(month(my_dates,label = T), year(my_dates))) %>%
{ggplot(.,aes(x = my_dates_x, y = n_row))+
geom_col(color = 'black',width = 0.8, fill = 'slateblue', alpha = .5) +
scale_x_continuous(breaks = .$my_dates_x, labels = .$my_dates_label) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))}
You can convert it to a factor variable to use as the axis, and fix the formatting with a label argument to scale_x_discrete.
library(dplyr)
library(ggplot2)
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_discrete(labels = function(x) format(as.Date(x), '%Y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Edit: Alternate method to account for possibly missing months which should be represented as blank spaces in the plot.
library(dplyr)
library(ggplot2)
library(lubridate)
to_plot <-
my_df %>%
mutate(my_dates = round_date(my_dates, 'month'),
my_dates_ticks = interval(min(my_dates), my_dates) %/% months(1))
to_plot %>%
group_by(my_dates_ticks) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates_ticks, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_continuous(
breaks = unique(to_plot$my_dates_ticks),
labels = function(x) format(min(to_plot$my_dates) + months(x), '%y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))

Resources