Ribbon chart in R - r

I search in R implementation (may be html widget on java script) a stacked bar chart in ribbon style, which allows you to see the rating change for each category in the dynamics.
It's look like ribbon chart in power bi desktop
Search rseek.org gave no results.

First off: Not a fan of that ribbon-styled stacked bar chart at all; while colourful and stylish, it's difficult to synthesise the relevant information. But that's just my opinion.
You could try building a similar plot in ggplot2 using geom_ribbon. See below for a minimal example:
# Sample data
set.seed(2017);
one <- sample(5:15, 10);
two <- rev(one);
df <- cbind.data.frame(
x = rep(1:10, 2),
y = c(one, two),
l = c(one - 1, two - 1),
h = c(one + 1, two + 1),
id = rep(c("one", "two"), each = 10));
require(ggplot2);
ggplot(df, aes(x = x, y = y)) +
geom_ribbon(aes(ymin = l, ymax = h, fill = id), alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9"));
If you need interactivity, you could wrap it inside plotly::ggplotly.

Using ggsankey package.
In the following you can make use of smooth argument geom_sankey_bump to control the look/feel of the chart as in ribbon chart of Power BI.
df <- data.frame (model = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018),
sales = c(450,678,456,344,984,456,234,244,655,789,234,567,234,567,232,900,1005,1900,450,345,567,235,456,345,144,333,555,777,111,444,222,223,445,776,331,788,980,1003,456,434))
#install.packages("remotes")
#remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)
library(tidyverse)
ggplot(df, aes(x = Year,
node = model,
fill = model,
value = sales)) +
geom_sankey_bump(space = 0, type = "alluvial", color = "transparent", smooth = 15) +
scale_fill_viridis_d(option = "A", alpha = .8) +
theme_sankey_bump(base_size = 16) +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Model",
color = NULL) +
theme(legend.position = "bottom") +
labs(title = "Sales per model per year")
On suggestion in comments, I tried replicating some of the features of power BI chart.
# Prepare some data
set.seed(1)
df <- data.frame(
occupation = rep(c("Clerical", "Management", "Manual", "Professional", "Skilled"), 12),
Month = factor(rep(month.abb, 5), levels = month.abb, ordered = TRUE),
Sales = sample(200:1000, 60, replace = TRUE)
)
df %>%
group_by(Month) %>%
mutate(Max = sum(Sales)) %>%
ungroup() %>%
mutate(Max = max(Sales)) %>%
ggplot(aes(x = Month,
node = occupation,
fill = occupation,
value = Sales)) +
geom_col(aes(x = Month, y = Max/1.2),
alpha = 0.5,
fill = 'grey',
width = 0.4) +
geom_sankey_bump(space = 15,
type = "alluvial",
color = "transparent",
smooth = 8,
alpha = 0.8) +
scale_fill_brewer(palette = "Set3") +
theme_minimal() +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Occupation",
color = NULL) +
theme(legend.position = "top") +
labs(title = "Sales per occupation per month")
Created on 2022-07-07 by the reprex package (v2.0.1)

You may find your answers with ggalluvial package.
https://cran.r-project.org/web/packages/ggalluvial/vignettes/ggalluvial.html

Related

Position stacked identity data sample size as geom_text directly over a bar using geom_bar from ggplot2

In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)

ggplot line plot with one group`s lines on top

I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")

Make 3 plots into the same graph with the facet_wrap function

Can someone explain to me how can I use the facet_wrap layer in ggplot to make 3 plots in the same graph so that each plot should be in a row and so that the scales of each plot changes freely.
Here are the 3 graphs that I have made because I wanted to see the relation between those three covariates: median_income , pct_immigrant, income_inequality and the percentage of votes for Marine Le Pen individually.
To be precise, those three covariates are not variables that is why I had to filter the data frame elections_2017_long_metrop_covariates_lepen_long first so that I can only keep each observation among the variable covariates
Also if you have any suggestion to better the visualisation of the graphs
graph1 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "pct_immigrant")       
ggplot(graph1,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")
graph2 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "income_inequality")       
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "income inequality",y = "percentage of votes for Marine Le Pen")
graph3 = filter(elections_2017_long_metrop_covariates_lepen_long, covariates == "median_income")  
ggplot(graph2,aes(x = value,y = pct_votes)) + geom_point(size = 3, alpha = 0.5,colour = "#d90502") + expand_limits(x = 0, y = 0:100) +labs(x = "median income",y = "percentage of votes for Marine Le Pen")
You did not gave a complete reproducible example, but I think this should work for you.
To make facets you should pass the filter variable into the facet_wrap() function.
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes)) +
geom_point(alpha = 0.5) +
facet_wrap(~covariates)
The full solution considering colors can be:
library(ggplot2)
#Plot
ggplot(subset(elections_2017_long_metrop_covariates_lepen_long
covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')),
aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
Or:
library(dplyr)
library(ggplot2)
#Code 2
elections_2017_long_metrop_covariates_lepen_long %>%
filter(covariates %in% c('pct_immigrant', 'median_income', 'income_inequality')) %>%
ggplot(aes(x = value,y = pct_votes,color=covariates))+
geom_point(size = 3, alpha = 0.5)+
expand_limits(x = 0, y = 0:100) +
labs(x = "share of immigrants",y = "percentage of votes for Marine Le Pen")+
facet_wrap(.~covariates,scales = 'free',ncol = 1)+
scale_color_manual(values=rep("#d90502",3))
No output showed in lack of data.

R - How can I add a bivariate legend to my ggplot2 chart?

I'm trying to add a bivariate legend to my ggplot2 chart but I don't know whether (a) this is possible through some guides options and (b) how to achieve it.
The only way I've managed to produce something close to the desired outcome was by specifically creating a new chart which resembles a legend (named p.legend below) and inserting it, via the cowplot package, somewhere in the original chart (named p.chart below). But surely there must be a better way than this, given that this approach requires creating the legend in the first place and fiddling with its size/location to fit it in the original chart.
Here's code for a dummy example of my approach:
library(tidyverse)
# Create Dummy Data #
set.seed(876)
n <- 2
df <- expand.grid(Area = LETTERS[1:n],
Period = c("Summer", "Winter"),
stringsAsFactors = FALSE) %>%
mutate(Objective = runif(2 * n, min = 0, max = 2),
Performance = runif(2 * n) * Objective) %>%
gather(Type, Value, Objective:Performance)
# Original chart without legend #
p.chart <- df %>%
ggplot(., aes(x = Area)) +
geom_col(data = . %>% filter(Type == "Objective"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7, alpha = 0.6) +
geom_col(data = . %>% filter(Type == "Performance"),
aes(y = Value, fill = Period),
position = "dodge", width = 0.7) +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
theme_minimal() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank())
# Create a chart resembling a legend #
p.legend <- expand.grid(Period = c("Summer", "Winter"),
Type = c("Objective", "Performance"),
stringsAsFactors = FALSE) %>%
ggplot(., aes(x = Period, y = factor(Type, levels = c("Performance", "Objective")),
fill = Period, alpha = Type)) +
geom_tile() +
scale_fill_manual(values = c("Summer" = "#ff7f00", "Winter" = "#1f78b4"), guide = FALSE) +
scale_alpha_manual(values = c("Objective" = 0.7, "Performance" = 1), guide = FALSE) +
ggtitle("Legend") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5),
rect = element_rect(fill = "transparent"),
axis.title = element_blank(),
panel.grid.major = element_blank())
# Add legend to original chart #
p.final <- cowplot::ggdraw() +
cowplot::draw_plot(plot = p.chart) +
cowplot::draw_plot(plot = p.legend, x = 0.5, y = 0.65, width = 0.4, height = 0.28, scale = 0.7)
# Save chart #
cowplot::ggsave("Bivariate Legend.png", p.final, width = 8, height = 6, dpi = 500)
... and the resulting chart:
Is there an easier way of doing this?
This might work at some point, but right now the colorbox seems to ignore all breaks, names and labels (#ClausWilke?). Probably because the multiscales package is in really early stages.
Posting since it might work when future readers are here.
library(multiscales)
df %>%
mutate(
period = as.numeric(factor(Period)),
type = as.numeric(factor(Type))
) %>%
ggplot(., aes(x = Area, y = Value, fill = zip(period, type), group = interaction(Area, Period))) +
geom_col(width = 0.7, position = 'dodge') +
bivariate_scale(
"fill",
pal_hue_sat(c(0.07, 0.6), c(0.4, 0.8)),
guide = guide_colorbox(
nbin = 2,
name = c("Period", "Type"), #ignored
breaks = list(1:2, 1:2), #ignored
labels = list(levels(.$Period), levels(.$Type)) #ignored
)

Adding legend in ggplot2

I have looked through similar questions and I have a feeling I have done everything. Still not getting the desire output. I am using ggplot2 and tidyquant packages to visualize data with 2 financial trends I am trying to display a legend that contains trends line coloron plot
data %>%
ggplot(aes(date, price)) +
geom_line() +
geom_ma(ma_fun = SMA, n = 50, size = 1 , col = "red" , show.legend = TRUE)+
geom_ma(ma_fun = SMA, n = 200, size = 1 , col = "blue", show.legend= TRUE)+
theme_tq()
Here you go:
library(tidyquant)
library(ggplot2)
data <- data.frame(date = 1:1000, price = cumsum(rnorm(1000)))
data %>%
ggplot(aes(date, price)) +
geom_line() +
geom_ma(aes(color = 'MA50'), ma_fun = SMA, n = 50, size = 1 ,show.legend = TRUE)+
geom_ma(aes(color = 'MA200'), ma_fun = SMA, n = 200, size = 1 , show.legend = TRUE) +
scale_colour_manual(name = 'Legend',
guide = 'legend',
values = c('MA50' = 'red',
'MA200' = 'blue'),
labels = c('SMA(50)',
'SMA(200)'))

Resources