ggplot2 geom_segment by group - r

I am trying to draw separate line segments for each of the countries (A, B, C) in the plot.
I used the variable country for the group argument (as the docs suggest), but that does not work. The line is still a continuous line connecting all the text labels, but I need 3 separate lines to be drawn, one for each country, connecting the 3 text labels across the years.
library(dplyr)
library(ggplot2)
df_p <- data.frame(
year = rep(2019:2021, each = 3),
country = rep(LETTERS[1:3], 3),
var_a = c(1,6,10,2,5,7,3,7,9),
var_b = c(2,8,14,4,9,15,2,9,19)
)
df_p %>% arrange(country, year) %>%
ggplot(aes(x = var_a, y = var_b, color = country)) +
geom_text(aes(label = year)) +
geom_segment(
aes(
xend = c(tail(var_a, n = -1), NA),
yend = c(tail(var_b, n = -1), NA),
group = country
),
arrow = arrow(type = "open", length = unit(0.15, "inches"))
)

I think you just need geom_path instead of geom_segment.
Try this:
df_p %>% arrange(country, year) %>%
ggplot(aes(x = var_a, y = var_b, color = country)) +
geom_text(aes(label = year)) +
geom_path(
aes(
group = country
),
arrow = arrow(type = "open", length = unit(0.15, "inches"))
)

Another possible solution with geom_polygon() without showing the direction of the connections:
Sample data:
df_p <- data.frame(
year = rep(2019:2021, each = 3),
country = rep(LETTERS[1:3], 3),
var_a = c(1,6,10,2,5,7,3,7,9),
var_b = c(2,8,14,4,9,15,2,9,19)
)
Sample code:
library(dplyr)
library(ggplot2)
df_p %>%
arrange(country, year) %>%
ggplot(aes(x = var_a, y = var_b, group = country)) +
geom_point(aes(colour = country, shape = country), size = 4) +
geom_line(aes(colour = country), size = 1)+
geom_text(aes(label = year)) +
geom_polygon(
aes(
fill= country), alpha = .4)+
labs(x="Variable B",y="Variable A")+
theme_bw()
Output:

Related

selectize widget of ggplotly highlight not always visible (depends on order of geoms?)

I want to do an interactive scatterplot where I can
highlight individual points
a tooltip shows me the id
search for specific id with a selectize widget
I tried for some time with plotly and ended up with this code
library(tidyverse)
library(plotly)
set.seed(1)
dat <- tibble(id = LETTERS[1:10],
trt = factor(rep(0:1, 5)),
x = rnorm(10),
y = x + rnorm(10, sd = 0.2)) %>%
highlight_key(~id)
dat %>%
{ggplot(., aes(x = x, y = y, group = id, color = trt)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed")} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
It took my very long to understand that the order of geoms seems to be important
## no color, geom order reversed
## selectize.js widget is completely missing
dat %>%
{ggplot(., aes(x = x, y = y, group = id)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point()} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
## color by trt, geom order reversed
## selectize.js widget only works for data where t = 0
dat %>%
{ggplot(., aes(x = x, y = y, group = id, color = trt)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point()} %>%
ggplotly(tooltip = c("id")) %>%
highlight(on = "plotly_hover", selectize = TRUE)
Can somebody explain this strange behavior? What if I would like to reverse the order of geoms i.e. hline ploted behind points?

Simple one about Alluvial plot in R

I would like to make a simple flow graph.
Here is my code:
## Data
x = tibble(qms = c("FLOW", "FLOW"),
move1 = c("Birth", "Birth"),
move2 = c("Direct", NA),
freq = c(100, 50))
## Graph
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:3, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms),stat = "alluvium") +
geom_stratum(aes(fill = stratum), show.legend=FALSE) +
geom_text(stat = "stratum", size = 3)
This is the outcome:
My desired outcome is that:
How can I express the decreasing pattern with the missing value?
By slightly reshaping your data you can get what you want. I think the key is to map the alluvium to something fixed like 1 so that it will be a single flow, and mapping stratum to the same variable as x.
library(tidyverse)
library(ggalluvial)
x <- tibble(x = c("Birth", "Direct"),
y = c(100, 50))
x %>%
ggplot(aes(x, y, alluvium = 1, stratum = x)) +
geom_alluvium() +
geom_stratum()
Created on 2022-11-15 with reprex v2.0.2

Joining 2 bar columns in barcharts with curved line

I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)

Changing the axis labels in a group of stacked bar charts

I have the following code
library(tibble)
library(tidyr)
library(dplyr)
library(ggplot2)
test <- tibble(
cat1 = c(rep('foo',6),rep('bar',6)),
cat2 = rep(c('g1','g2','g3','g4','g5','g6'), 2),
zoom = rnorm(12, 0, 10),
zaps = rnorm(12, 5, 10),
buzz = rnorm(12, -5, 10)
) %>% pivot_longer(c(zoom,zaps,buzz),names_to = 'cat3', values_to='value')
test2 <- inner_join(test, summarise(group_by(test, cat1, cat2), agg = sum(value)))
ggplot(test2, aes(x = cat1, y = value, fill = cat3, label = as.integer(value))) +
geom_bar(stat = 'identity', position = 'stack') +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "#555555") +
geom_errorbar(aes(ymin = agg, ymax = agg)) +
facet_grid(~ cat2)
which produces the following chart:
I like this and I am mostly happy with it, but I would love to include a sum total label for each column (the same value as the horizontal black line) somewhere in the column, ideally either at the bottom above/below the x axis labels or above the top edge of the plot below the g1,g2...
Can I do this by changing the displayed labels soo foo in g1 would be 'foo\n8' ? or is there a generic way to tell ggplot to put a number above the bar/foo labels in the plot or above the top edge of the top column component?
You may try this way. Please let me know if I miss something or I'm wrong with your purpose.
df2 %>%
group_by(cat1, cat2) %>%
mutate(n = sum(as.integer(value))) %>%
rowwise %>%
mutate(cat1 = paste0(c(cat1, n), collapse = "\n")) %>%
ggplot(aes(x = cat1, y = value, fill = cat3, label = as.integer(value))) +
geom_bar(stat = 'identity', position = 'stack') +
geom_text(position = position_stack(vjust = 0.5), size = 3, color = "#555555") +
geom_errorbar(aes(ymin = agg, ymax = agg)) +
facet_wrap(~ cat2, scales = "free_x", ncol = 6)

How to use crosstalk with bar + line plot in r?

I am new to crosstalk & trying to make rmarkdown file more interactive by using on bar+line plot but it is not giving line on the plot and also gets weird when I change country.
library(tidyverse)
library(plotly)
library(crosstalk)
library(glue)
library(scales)
library(tidytext)
load data:
file_url <- "https://raw.githubusercontent.com/johnsnow09/covid19-df_stack-code/main/test_crosswalk.csv"
test_df <- read.csv(url(file_url))
Country_selected = c("Brazil")
selected_case_type = c("Confirmed_daily")
trend_sd <- test_df %>%
filter(Daily_Cases_type %in% selected_case_type
# Country.Region %in% Country_selected,
) %>%
select(Country.Region, date, Cases_count)%>%
arrange(date) %>%
group_by(Country.Region) %>%
mutate(new_avg = cumsum(Cases_count)/ seq_len(length(Cases_count))) %>%
ungroup() %>%
SharedData$new()
bscols(widths = c(9, 3),
list(
filter_select(id = "country", label = "Country:", sharedData = trend_sd, group = ~ Country.Region),
ggplotly(ggplot(data = trend_sd) +
geom_col(aes(x = date, y = Cases_count), fill = "turquoise", alpha = .3) +
geom_point(aes(x = date, y = new_avg), col = "tomato") +
geom_line(aes(x = date, y = new_avg), col = "tomato", size = .9, alpha = .3) +
scale_y_continuous(labels = comma) +
# expand_limits(y = 100000) +
labs(title = glue("{Country_selected}'s {selected_case_type} Cases {date_from} onwards"),
caption = "Data source: covid19.analytics")
))
)
This doesn't give correct line plot & even when I change country to some other then bars gets distorted.
Code & Plot Result below without crosstalk & plotly:
Country_selected = c("India") # can be selective
selected_case_type = c("Confirmed_daily")
test_df %>%
filter(Daily_Cases_type %in% selected_case_type,
Country.Region %in% Country_selected,
) %>%
select(Country.Region, date, Cases_count)%>%
arrange(date) %>%
group_by(Country.Region) %>%
mutate(new_avg = cumsum(Cases_count)/ seq_len(length(Cases_count))) %>%
ungroup() %>%
ggplot() +
geom_col(aes(x = date, y = Cases_count), fill = "turquoise", alpha = .3) +
geom_point(aes(x = date, y = new_avg), col = "tomato") +
geom_line(aes(x = date, y = new_avg), col = "tomato", size = .9, alpha = .3) +
scale_y_continuous(labels = comma) +
labs(title = glue("{Country_selected}'s {selected_case_type} Cases {date_from} onwards"),
subtitle = "With Average Daily Cases Trend line",
caption = "Data source: covid19.analytics")

Resources