Related
In this experiment, we tracked presence or absence of bacterial infection in our subject animals. We were able to isolate which type of bacteria was present in our animals and created a plot that has Week Since Experiment Start on the X axis, and Percentage of Animals Positive for bacterial infection on the Y axis. This is a stacked identity ggplot where each geom_bar contains the different identities of the bacteria that were in the infected animals each week. Here is a sample dataset with the corresponding ggplot code and result:
DummyData <- data.frame(matrix(ncol = 5, nrow = 78))
colnames(DummyData) <- c('WeeksSinceStart','BacteriaType','PositiveOccurences','SampleSize','NewSampleSize')
DummyData$WeeksSinceStart <- c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,9,9,9,9,9,10,10,10,10)
DummyData$BacteriaType <- c("BactA","BactB","BactD","BactB","BactE","BactA","BactS","BactF","BactE","BactH","BactJ","BactK","BactE","BactB","BactS","BactF","BactL","BactE","BactW","BactH","BactS","BactJ","BactQ","BactN","BactW","BactA","BactD","BactE","BactA","BactC","BactD","BactK","BactL","BactE","BactD","BactA","BactS","BactK","BactB","BactE","BactF","BactH","BactN","BactE","BactL","BactZ","BactE","BactC","BactR","BactD","BactJ","BactN","BactK","BactW","BactR","BactE","BactW","BactA","BactM","BactG","BactO","BactI","BactE","BactD","BactM","BactH","BactC","BactM","BactW","BactA","BactL","BactB","BactE","BactA","BactS","BactH","BactQ","BactF")
PosOcc <- seq(from = 1, to = 2, by = 1)
DummyData$PositiveOccurences <- rep(PosOcc, times = 13)
DummyData$SampleSize <- c(78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,78,29,29,29,29,29,10,10,10,10)
DummyData$NewSampleSize <- c(78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,NA,NA,78,NA,NA,NA,NA,NA,NA,29,NA,NA,NA,NA,10,NA,NA,NA)
numcolor <- 20
plotcolors <- colorRampPalette(brewer.pal(8, "Set3"))(numcolor)
#GGplot for Dummy Data
DummyDataPlot <- ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences/SampleSize, fill = BacteriaType)) + geom_bar(position = "stack", stat = "identity") +
geom_text(label = DummyData$NewSampleSize, nudge_y = 0.1) +
scale_y_continuous(limits = c(0,0.6), breaks = seq(0, 1, by = 0.1)) + scale_x_continuous(limits = c(0.5,11), breaks = seq(0,10, by =1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive") +
scale_fill_manual(values = plotcolors)
The problem: I cannot seem to find a way to position the labels from geom_text directly over each bar. I would also love to add the text "n = " to the sample size value directly over each bar. Thank you for your help!
I have tried different values for position_dodge statement and nudge_y statement with no success.
Sometimes the easiest approach is to do some data wrangling, i.e. one option would be to create a separate dataframe for your labels:
library(ggplot2)
library(dplyr)
dat_label <- DummyData |>
group_by(WeeksSinceStart) |>
summarise(y = sum(PositiveOccurences / SampleSize), SampleSize = unique(SampleSize))
ggplot(DummyData, aes(x = WeeksSinceStart, y = PositiveOccurences / SampleSize, fill = BacteriaType)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(data = dat_label, aes(x = WeeksSinceStart, y = y, label = SampleSize), inherit.aes = FALSE, nudge_y = .01) +
#scale_y_continuous(limits = c(0, 0.6), breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(limits = c(0.5, 11), breaks = seq(0, 10, by = 1)) +
labs(
x = "Weeks Since Start",
y = "Proportion Positive"
) +
scale_fill_manual(values = plotcolors)
I am making a line plot of several groups and want to make a visualization where one of the groups lines are highlighted
ggplot(df) + geom_line(aes(x=timepoint ,y=var, group = participant_id, color=color)) +
scale_color_identity(labels = c(red = "g1",gray90 = "Other"),guide = "legend")
However, the group lines are partially obscured by the other groups lines
How can I make these lines always on top of other groups lines?
The simplest way to do this is to plot the gray and red groups on different layers.
First, let's try to replicate your problem with a dummy data set:
set.seed(1)
df <- data.frame(
participant_id = rep(1:50, each = 25),
timepoint = factor(rep(0:24, 50)),
var = c(replicate(50, runif(1, 50, 200) + runif(25, 0.3, 1.5) *
sin(0:24/(0.6*pi))^2/seq(0.002, 0.005, length = 25))),
color = rep(sample(c("red", "gray90"), 50, TRUE, prob = c(1, 9)), each = 100)
)
Now we apply your plotting code:
library(ggplot2)
ggplot(df) +
geom_line(aes(x=timepoint ,y=var, group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend") +
theme_classic()
This looks broadly similar to your plot. If instead we plot in different layers, we get:
ggplot(df, aes(timepoint, var, group = participant_id)) +
geom_line(data = df[df$color == "gray90",], aes(color = "Other")) +
geom_line(data = df[df$color == "red",], aes(color = "gl")) +
scale_color_manual(values = c("red", "gray90")) +
theme_classic()
Created on 2022-06-20 by the reprex package (v2.0.1)
You can use factor releveling to bring the line (-s) of interest to front.
First, let's plot the data as is, with the red line partly hidden by others.
library(ggplot2)
library(dplyr)
set.seed(13)
df <-
data.frame(timepoint = rep(c(1:100), 20),
participant_id = paste0("p_", sort(rep(c(1:20), 100))),
var = abs(rnorm(2000, 200, 50) - 200),
color = c(rep("red", 100), rep("gray90", 1900)))
ggplot(df) +
geom_line(aes(x = timepoint ,
y = var,
group = participant_id, color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
Now let's bring p_1 to front by making it the last factor level.
df %>%
mutate(participant_id = factor(participant_id)) %>%
mutate(participant_id = relevel(participant_id, ref = "p_1")) %>%
mutate(participant_id = factor(participant_id, levels = rev(levels(participant_id)))) %>%
ggplot() +
geom_line(aes(x=timepoint,
y=var,
group = participant_id,
color = color)) +
scale_color_identity(labels = c(red = "g1", gray90 = "Other"),
guide = "legend")
I would like to ask for some help with depicting the slopes generated by a lmer() model.
The data that I have is the mass volume of different rats across different days. Each rat has different time points where they took the measurement of that volume.
For rat 1 I have volume c(78,304,352,690,952,1250) at days c(89,110,117,124,131,138) that belong to country Chile
For rat 2 I have volume c(202,440,520,870,1380) at days c(75,89,96,103,110) that belong to country Chile.
For rat 3 I have volume c(186,370,620,850,1150) at days c(75,89,96,103,110) that belong to country Chile.
For rat 4 I have volume c(92,250,430,450,510,850,1000,1200) at days c(47,61,75,82,89,97,103,110) that belong to country England.
For rat 5 I have volume c(110,510,710,1200) at days c(47,61,75,82) that belong to country England.
For rat 6 I have volume c(115,380,480,540,560,850,1150,1350) at days c(47,61,75,82,89,97,103,110) that belong to country England.
The lmer model is:
m1 <- lmer(lVolume ~ Country*Day + (1|Rat))
I managed to plot the curves of my model by using:
m1%>%
augment() %>%
clean_names() %>%
ggplot(data = .,
mapping = aes(x = day,
y = exp(l_volume),
group = rat)) +
geom_point(alpha = 0.5) +
geom_line(alpha = 0.5) +
geom_point(aes(y = exp(fitted)),
color = "red") +
geom_line(aes(y = exp(fitted)),
color = "red") +
expand_limits(x = 0 , y = 0)
This model gave me predictions for new data points based on the model m1 for each of the rats across country.
From this lmer() I have one slope across the whole measurements, this is:
And by exp(predicted):
However, I would like to plot this in a different way. I would like to plot the slope generated by each of the levels of country that I have.
The red lines would be the exp(slopes) generating by Chile, and England, but also depict the exp(slope) of the whole model containing both levels.
So, initially I thought that creating three lmer() models:
m1 <- lmer(lVolume ~ Country*Day + (1|Rat))
m2 <- lmer(lVolume ~ Day + (1|Rat)) (Rats in Chile)
m3 <- lmer(lVolume ~ Day + (1|Rat)) (Rats in England)
But I noticed that m2 and m3 are quite different models because they do not have the interaction from Country that is something that I would like to check. So, I don't know what to do here.
Update
I tried this and kind of worked:
Final.Fixed<-effect(c("Country*Day"), m1,
xlevels=list(Day=seq(0,168,14)))
Final.Fixed<-as.data.frame(Final.Fixed)
Final.Fixed.Plot <-ggplot(data = Final.Fixed, aes(x = Day, y =exp(fit), group=Country))+
coord_cartesian(xlim=c(0,170),ylim = c(0,8000))+
geom_line(aes(color=Country), size=2)+
geom_ribbon(aes(ymin=exp(fit-se), ymax=exp(fit+se),fill=Country),alpha=.2)+
xlab("Day")+
ylab("Volume")+
scale_color_manual(values=c("blue", "red"))+
scale_fill_manual(values=c("blue", "red"))+
theme_bw()+
theme(text=element_text(face="bold", size=12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(fill = NA, colour = "NA"),
axis.line = element_line(size = 1, colour = "grey80"),
legend.title=element_blank(),
legend.position = c(.2, .92))
Final.Fixed.Plot
Is this ok ? I think that I am still cosnidering the m1 with the country*Day interaction. Correct me if I am worng, please! Also, I don't know how I can add the exp(fit) curve for the whole model and the raw data points in this plot.
Could I get some hint/help, please ?
Clean summary on top
The first code chunk contains a cleaned up version that addresses all points of the question, using some input from the comments. I've left the original answer below which step by step builds to the final plot.
library(tidyverse)
library(lme4)
library(broom.mixed)
library(ggeffects)
m1 <- lme4::lmer(lVolume ~ Country*Day + (1|Rat), data = df_rats %>%
dplyr::mutate(lVolume = log(Volume)))
# predictions for each country
syn_df <- tidyr::expand_grid(
Day = 1:170,
Country = c("Chile", "England")
) %>%
dplyr::mutate(lVolume = predict(m1, ., re.form = ~0))
# marginal effects for variable "Day"
df_day_marginal <- ggeffect(model = m1, terms = "Day", type = "fe") %>%
as.data.frame() %>%
dplyr::rename(Day = x, lVolume = predicted) %>%
dplyr::mutate(Country = "overall")
#combine prediction curves
df_preds <- bind_rows(syn_df, df_day_marginal)
# manually assemble formulas [units missing]
y0 <- round(fixef(m1)[["(Intercept)"]], 2)
beta_day <- round(fixef(m1)[["Day"]], 3)
beta_englday <- round(fixef(m1)[["CountryEngland:Day"]], 3)
beta_engl <- round(fixef(m1)[["CountryEngland"]], 2)
f_chile <- paste0("volume = exp(", y0, " + ", beta_day, " * days)")
f_england <- paste0("volume = exp(", y0 + beta_engl , " + ", beta_day + beta_englday, " * days)")
df_labels <- data.frame(
x = c(50, 50),
y = c(1300, 1400),
form = c(f_chile, f_england),
country = c("Chile", "England")
)
m1 %>%
broom.mixed::augment()%>%
ggplot(aes(x = Day, y = exp(lVolume), color = Country)) +
geom_ribbon(data = df_preds, aes(ymin = exp(conf.low), ymax = exp(conf.high), color = NULL, fill = Country), alpha = 0.3) +
geom_line(data = df_preds, size = 1.5) +
geom_line(aes(group = Rat)) +
geom_point() +
coord_cartesian(ylim = c(0, 1500), xlim = c(0, 150)) +
geom_text(data = df_labels, aes(x = x, y = y, label = form, color = country)) +
labs(x = "days", y = "volume")
original answer
I've tried to stay as close as possible to your initial code for the first part of the question.
The first chunk trains the model and makes population-level predictions for Chile and England over the specified days. (using the re.form = ~0 argument as explained e.g. here)
library(tidyverse)
library(lme4)
library(broom.mixed)
#helpful to specify in that `lVolume` is the log of the data you provid in the question
m1 <- lme4::lmer(lVolume ~ Country*Day + (1|Rat), data = df_rats %>%
dplyr::mutate(lVolume = log(Volume)))
days <- seq(0,168,14)
syn_df <- tidyr::expand_grid(
Day = 1:170,
Country = c("Chile", "England")
)
syn_df <- syn_df %>%
dplyr::mutate(l_volume = predict(m1, syn_df, re.form = ~0)) %>%
janitor::clean_names()
This can then be added to your original plot with minor modifications:
m1 %>%
broom.mixed::augment() %>%
janitor::clean_names() %>%
ggplot(data = .,
mapping = aes(x = day,
y = exp(l_volume),
color = country)) +
geom_point(alpha = 0.7) +
geom_line(aes(group = rat), alpha = 0.7) +
expand_limits(x = 0 , y = 0) +
geom_line(data = syn_df, alpha = 1, size = 1.5) +
coord_cartesian(ylim = c(NA, 1500), xlim = c(NA, 150))
Added
In addition, we can add marginal effect for days to the plot.
df_day_marginal <- ggeffect(model = m1, terms = "Day", type = "fe")
m1 %>%
broom.mixed::augment() %>%
janitor::clean_names() %>%
ggplot() +
geom_ribbon(data = df_day_marginal, aes(x = x, ymin = exp(conf.low), ymax = exp(conf.high)), alpha = 0.3) +
geom_line(data = syn_df, aes(x = day, y = exp(l_volume), color = country), size = 1.5) +
geom_line(data = df_day_marginal, aes(x = x, y = exp(predicted)), size = 1.5) +
geom_point(aes(x = day, y = exp(l_volume), color = country), alpha = 0.7) +
geom_line(aes(x = day, y = exp(l_volume), color = country, group = rat), alpha = 0.7) +
expand_limits(x = 0 , y = 0) +
coord_cartesian(ylim = c(NA, 1500), xlim = c(NA, 150)) +
labs(x = "days", y = "volume")
Im making a scatterplot which shows a value plotted against the date since symptom onset. These patients are categorised based on disease severity, and i wanted to show how the values change over time in each severity category. I have coloured the dots based on severity score, but i prefer to use shape =21 so i can have a border. I also draw a line to see the trend, and i want that coloured in the same way, however, this has added another legend and it looks complicated. This issue doesnt happen if use a different shape that isnt filled, because scale_colour_manual can be used for both the lines and the dots, but i dont think it looks as nice. Any idea how i can fix this?
IC50SymObySS <- ggplot(data = isaric) +
geom_point(mapping = aes(x = Days_since_onset, y = log2IC50, fill = Severity_score), size = 2, colour = "black", shape = 21)+
geom_smooth(mapping = aes(x = Days_since_onset, y = log2IC50, colour = Severity_score), se = FALSE)+
scale_fill_manual(breaks=c("1","2","3","4","5"),
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"),
name = "Severity Score")+
scale_colour_manual(values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"))+
theme_minimal()+
JTheme+
ylab("Serum Log2 IC50")+
xlab("Days Since Symptom Onset")+
guides(colour = guide_legend(title.position = "top", title.hjust = 0.5))
IC50SymObySS
As per this answer, you need to use identical name and labels values for both fill and colour scale.
library(ggplot2)
library(dplyr)
isaric <- transmute(iris,
Days_since_onset = (Sepal.Length - 4)^3,
log2IC50 = Sepal.Width * 3,
Severity_score = cut(Petal.Length, breaks = quantile(Petal.Length, prob = 0:5 / 5), labels = 1:5))
ggplot(data = isaric) +
geom_smooth(mapping = aes(x = Days_since_onset, y = log2IC50, colour = Severity_score), se = FALSE)+
geom_point(mapping = aes(x = Days_since_onset, y = log2IC50, fill = Severity_score), size = 2, colour = "black", shape = 21)+
scale_colour_manual(
name = "Severity Score",
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"))+
scale_fill_manual(
name = "Severity Score",
breaks=c("1","2","3","4","5"),
values=c("1" = "lightblue1","2" = "lightblue3","3" = "lightblue4","4" = "lightcoral","5" = "firebrick2"),
labels=c("1","2","3","4","5"))+
theme_minimal()+
ylab("Serum Log2 IC50")+
xlab("Days Since Symptom Onset")+
guides(colour = guide_legend(title.position = "top", title.hjust = 0.5))
I search in R implementation (may be html widget on java script) a stacked bar chart in ribbon style, which allows you to see the rating change for each category in the dynamics.
It's look like ribbon chart in power bi desktop
Search rseek.org gave no results.
First off: Not a fan of that ribbon-styled stacked bar chart at all; while colourful and stylish, it's difficult to synthesise the relevant information. But that's just my opinion.
You could try building a similar plot in ggplot2 using geom_ribbon. See below for a minimal example:
# Sample data
set.seed(2017);
one <- sample(5:15, 10);
two <- rev(one);
df <- cbind.data.frame(
x = rep(1:10, 2),
y = c(one, two),
l = c(one - 1, two - 1),
h = c(one + 1, two + 1),
id = rep(c("one", "two"), each = 10));
require(ggplot2);
ggplot(df, aes(x = x, y = y)) +
geom_ribbon(aes(ymin = l, ymax = h, fill = id), alpha = 0.4) +
scale_fill_manual(values = c("#E69F00", "#56B4E9"));
If you need interactivity, you could wrap it inside plotly::ggplotly.
Using ggsankey package.
In the following you can make use of smooth argument geom_sankey_bump to control the look/feel of the chart as in ribbon chart of Power BI.
df <- data.frame (model = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018),
sales = c(450,678,456,344,984,456,234,244,655,789,234,567,234,567,232,900,1005,1900,450,345,567,235,456,345,144,333,555,777,111,444,222,223,445,776,331,788,980,1003,456,434))
#install.packages("remotes")
#remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)
library(tidyverse)
ggplot(df, aes(x = Year,
node = model,
fill = model,
value = sales)) +
geom_sankey_bump(space = 0, type = "alluvial", color = "transparent", smooth = 15) +
scale_fill_viridis_d(option = "A", alpha = .8) +
theme_sankey_bump(base_size = 16) +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Model",
color = NULL) +
theme(legend.position = "bottom") +
labs(title = "Sales per model per year")
On suggestion in comments, I tried replicating some of the features of power BI chart.
# Prepare some data
set.seed(1)
df <- data.frame(
occupation = rep(c("Clerical", "Management", "Manual", "Professional", "Skilled"), 12),
Month = factor(rep(month.abb, 5), levels = month.abb, ordered = TRUE),
Sales = sample(200:1000, 60, replace = TRUE)
)
df %>%
group_by(Month) %>%
mutate(Max = sum(Sales)) %>%
ungroup() %>%
mutate(Max = max(Sales)) %>%
ggplot(aes(x = Month,
node = occupation,
fill = occupation,
value = Sales)) +
geom_col(aes(x = Month, y = Max/1.2),
alpha = 0.5,
fill = 'grey',
width = 0.4) +
geom_sankey_bump(space = 15,
type = "alluvial",
color = "transparent",
smooth = 8,
alpha = 0.8) +
scale_fill_brewer(palette = "Set3") +
theme_minimal() +
labs(x = NULL,
y = "Sales ($ ths)",
fill = "Occupation",
color = NULL) +
theme(legend.position = "top") +
labs(title = "Sales per occupation per month")
Created on 2022-07-07 by the reprex package (v2.0.1)
You may find your answers with ggalluvial package.
https://cran.r-project.org/web/packages/ggalluvial/vignettes/ggalluvial.html