I have a dataframe where the first column has different types of bacteria, and the rest of the columns the samples, each sample belong to a specific time (T0, T1...) and the last 39 columns are the control group.
What I pretend is to plot each bacteria in one plot. And the plot must contain the different times in x-axis and the value in the y-axis (I was thinking in a bar plot or box plot with the errors coef.
Any idea about how can I group the data for different times and for different bacteria?
Here a small example of the data:
thanks!
structure(list(Bacteria = c("Methanobrevibacter", "Methanosphaera",
"Methanomassiliicoccus"), PIE2001_T0_TORUNDA = c(2.279974027,
0.670536115, -0.022611066), PIE2001_T1_TORUNDA = c(2.021643324,
-0.057798217, -0.057798217), PIE2001_T5_COMPL = c(2.788566988,
0.648500825, -0.044646356), PIE2006_T0_TORUNDA = c(0.07550014,
1.684938052, 0.07550014), PIE2007_T0_TORUNDA = c(2.072075243,
1.261145027, -0.125149334), PIE2007_T1_TORUNDA = c(2.601582257,
1.279826417, -0.106467944), PIE2007_T2 = c(2.81564899, 1.765826865,
-0.180083284), PIE2007_T3 = c(0.639040509, 3.081387545, -0.054106671
), PIE2013_T0_COMPLETA = c(2.683794403, -0.024255798, -0.024255798
), PIE2013_T1_COMPLETA = c(2.614756053, -0.024301277, -0.024301277
), PIE2013_T4_COMP = c(2.653056483, 0.013999154, 0.013999154),
PIE2013_T5_COMPL = c(1.861263144, -0.084647005, -0.084647005
), PIE2014_COMP = c(2.304771706, 1.005488722, -0.093123567
), PIE2016_T0_COMPLETA = c(-0.141271428, -0.141271428, -0.141271428
), PIE2016_T1_COMPLETA = c(-0.081696055, -0.081696055, -0.081696055
), PIE2016_T3 = c(-0.019385468, -0.019385468, -0.019385468
), PIE2016_T3_TOR = c(0.045856809, 0.045856809, 0.045856809
), PIE2017_T0_COMPLETA = c(4.493506636, 0.189441543, 0.189441543
), PIE2017_T1_COMPLETA = c(5.001671041, 0.71808448, 0.024937299
), PIE2017_T2_TOR = c(5.887191114, 0.672255357, -0.020891824
), PIE2017_T3 = c(3.306066839, 0.703377154, 0.010229973),
PIE2017_T4_COMP = c(5.560847286, 1.371192544, -0.015101817
), PIE2017_T5_COMPL = c(5.688626959, -0.025105846, -0.025105846
), PIE2018_T1 = c(0.158551089, 0.158551089, 0.158551089),
PIE2019_T1_COMPL = c(6.659430141, 0.833430034, 0.140282853
)), row.names = c(NA, 3L), class = "data.frame")
Script updated:
colnames(df)[363:401] <- gsub("T0", "T6", colnames(df)[363:401])
df %>%
pivot_longer(-Bacteria) %>%
mutate(group = gsub('_.*$', '', name),
time = gsub('^.*_(T\\d+).*$', '\\1', name)) %>%
filter(grepl('T\\d+', time)) %>%
ggplot(aes(time, value, fill = Bacteria)) +
geom_bar(stat = 'summary', fun = 'mean', position = 'dodge') +
stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
facet_grid(Bacteria ~ ., scale = 'free_y') +
scale_fill_brewer(palette = 'Set1') +
theme(panel.border = element_rect(fill = NA, color = 'gray75'))
You need to reshape your data. You can then do a comparative boxplot:
library(tidyverse)
df %>%
pivot_longer(-Bacteria) %>%
mutate(group = gsub('_.*$', '', name),
time = gsub('^.*_(T\\d+).*$', '\\1', name)) %>%
filter(grepl('T\\d+', time)) %>%
ggplot(aes(time, value, fill = Bacteria)) +
geom_boxplot() +
theme_minimal() +
facet_grid(Bacteria ~ ., scale = 'free_y') +
scale_fill_brewer(palette = 'Set2') +
theme(panel.border = element_rect(fill = NA, color = 'gray75'))
Pivot your data to long format, then facet by bacterium:
library(tidyr)
library(dplyr)
library(forcats)
library(ggplot2)
dat_long <- dat %>%
pivot_longer(!Bacteria, names_to = "sample") %>%
mutate(sample = fct_inorder(sample))
ggplot(dat_long, aes(sample, value)) +
geom_col() +
facet_wrap(vars(Bacteria), ncol = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Or make a line graph with bacteria mapped to color:
ggplot(dat_long, aes(sample, value)) +
geom_line(aes(color = Bacteria, group = Bacteria)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Related
I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.
I have below ggplot:
library(ggplot2)
data = rbind(data.frame('val' = c(10, 30, 15), 'name' = c('A', 'B', 'C'), group = 'gr1'), data.frame('val' = c(30, 40, 12), 'name' = c('A', 'B', 'C'), group = 'gr2'))
ggplot(data, # Draw barplot with grouping & stacking
aes(x = group,
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1)
With this, I am getting below plot
However, I want to connect these bars with a curved area where the area would be equal to the value of the corresponding bar-component. A close example could be like,
Is there any way to achieve this with ggplot?
Any pointer will be very helpful.
This is something like an alluvial plot. There are various extension packages that could help you create such a plot, but it is possible to do it in ggplot directly using a bit of data manipulation.
library(tidyverse)
alluvia <- data %>%
group_by(name) %>%
summarize(x = seq(1, 2, 0.01),
val = pnorm(x, 1.5, 0.15) * diff(val) + first(val))
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = 1:2, labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
EDIT
A more generalized solution for more than 2 groups would be
library(tidyverse)
alluvia <- data %>%
mutate(group = as.numeric(factor(group)),
name = factor(name)) %>%
arrange(group) %>%
group_by(name) %>%
mutate(next_group = lead(group),
next_val = lead(val)) %>%
filter(!is.na(next_val)) %>%
group_by(name, group) %>%
summarise(x = seq(group + 0.01, next_group - 0.01, 0.01),
val = (next_val - val) * pnorm(x, group + 0.5, 0.15) + val)
ggplot(data,
aes(x = as.numeric(factor(group)),
y = val,
fill = name)) +
geom_bar(stat = "identity",
position = "stack", width = .1) +
geom_area(data = alluvia, aes(x = x), position = "stack", alpha = 0.5) +
scale_x_continuous(breaks = seq(length(unique(data$group))),
labels = levels(factor(data$group)),
name = "Group", expand = c(0.25, 0.25)) +
scale_fill_brewer(palette = "Set2") +
theme_light(base_size = 20)
How can I do to insert in the "X axis" the months abbreviations ("xi") instead of the numbers?
I need to switch in the X axis the numbers for months abbreviations ("xi").
Reproductive example
library(ggplot2)
library(dplyr)
x<-c("2014-06","2014-07","2014-08","2014-09","2014-10","2014-11","2014-12")
xi<-c("Jun","Jul","Aug","Sep","Oct","Nov","Dez")
values.observed<-c(3.698,2.132,2.716,4.279,3.918,4.493,4.265)
values.estimated<-c(2.670,2.689,3.078,3.735,3.963,4.238,4.315)
yii<-c(0.629,1.394,1.957,2.677,2.913,3.190,3.299)
yiii<-c(4.567,3.982,4.185,4.785,4.996,5.279,5.349)
df<-data.frame(x,xi,values.observed,values.estimated,yii,yiii)
Year <- seq(min(as.integer(df$x)), max(as.integer(df$x)), by = 1)
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = Year, labels = Year) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
You can group the first geom_line with group and force the second geom_ribbon to take use as.numeric(xi) :
df$xi = factor(df$xi,levels=df$xi)
df %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot() +
geom_line(aes(x = xi, y = values,color = group,group = group), size=1.3) +
geom_ribbon(aes(x = as.numeric(xi),y = values,
ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
Or with what you have done, just provide the labels:
labels = split(as.character(df$xi),as.integer(df$xi))
df %>%
mutate(x = as.integer(x)) %>%
tidyr::pivot_longer(
cols = starts_with('values'),
names_to = 'group',
values_to = 'values'
) %>%
mutate(group = ifelse(group == "values.observed", "observed", "estimated")) %>%
ggplot(aes(x = x, y = values)) +
geom_line(aes(color = group), size=1.3) +
geom_ribbon(aes(ymin = yii, ymax = yiii), alpha = 0.3, show.legend = FALSE) +
scale_color_manual(values = c(observed = 'green', estimated = 'red'))+
scale_x_continuous(breaks = as.numeric(names(labels)), labels = labels) +
ylab("X") +
xlab("Months") +
theme(axis.text.x = element_text(angle = -15, vjust = 0))
I'm making a plot with two different geoms, both use fill. I'd like one geom to have a legend, but the other to not. However adding show.legend=F to the required geom doesn't switch off the legend for that geom.
Example:
library(tidyverse)
library(ggalluvial)
x = tibble(qms = c("grass", "cereal", "cereal"),
move1 = "Birth",
move2 = c("Direct", "Market", "Slaughter"),
move3 = c("Slaughter", "Slaughter", NA),
freq = c(10, 5, 7))
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:4, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms)) +
geom_stratum(aes(fill = stratum), show.legend=F) +
geom_text(stat = "stratum", size = 3) +
theme_void() +
labs(fill="")
Output:
Desired output:
Question:
How do I turn off the fill legend for one geom, but not the other? I can (if I have to) do this in inkscape/gimp, but would prefer a solution I can version control.
Have a look at the final line of code:
scale_fill_discrete(breaks = c("grass", "cereal"))
That defines the breaks for the fills to only include cereal and grass, as required.
library(tidyverse)
library(ggalluvial)
x = tibble(qms = c("grass", "cereal", "cereal"),
move1 = "Birth",
move2 = c("Direct", "Market", "Slaughter"),
move3 = c("Slaughter", "Slaughter", NA),
freq = c(10, 5, 7))
x %>%
mutate(id = qms) %>%
to_lodes_form(axis = 2:4, id = id) %>%
na.omit() %>%
ggplot(aes(x = x, stratum = stratum, alluvium = id,
y = freq, label = stratum)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow(aes(fill = qms)) +
geom_stratum(aes(fill = stratum), show.legend=FALSE) +
geom_text(stat = "stratum", size = 3) +
theme_void() +
labs(fill="") +
scale_fill_discrete(breaks = c("grass", "cereal")) #<- This line!
Created on 2019-03-18 by the reprex package (v0.2.1)
I try to limit the x-axis of a plot with several figures to the current week. So if we are now in week 45 all weeks from 1 till 45 should be displayed, but not from 46 onwards. I cannot get any xlim command to work e.g. xlim(1,45) returns Error:
Discrete value supplied to continuous scale.
Perhaps this has to do with the fact that the variable week is a factor, but this is necessary for correct plotting (no decimals). Any solutions?
set.seed(1)
dat <- data.frame(object = sample(c("A","B","C","D"),100,replace = TRUE),
week = sample(c(1:52),100,replace = TRUE),
year = sample(c(2016,2017,2018),100,replace = TRUE),
count = sample(c(0:10),100,replace = TRUE))
ggplot(dat, aes(factor(week), count )) +
geom_bar(stat="identity" , aes(fill = factor(year)), position = position_dodge2(width = 0.9, preserve = "single")) +
facet_wrap(~ object, ncol = 2, scales = "free_y") +
labs(x = "Week", y = "Count") +
scale_fill_discrete(name = "Year")
You can restrict in data itself.
Try with the below code :
data<-dat%>% filter(week < format(Sys.Date(),"%V")) ## filtering based on current week
Plotting :
ggplot(data, aes(factor(week), count )) +
geom_bar(stat="identity" , aes(fill = factor(year)), position = position_dodge2(width = 0.9, preserve = "single")) +
facet_wrap(~ object, ncol = 2, scales = "free_y") +
labs(x = "Week", y = "Count") +
scale_fill_discrete(name = "Year") +theme(axis.text.x = element_text(angle = 45, vjust = 0.4))
Why not add a filter before the plot call for week:
set.seed(1)
dat <- data.frame(object = sample(c("A","B","C","D"),100,replace = TRUE),
week = sample(c(1:52),100,replace = TRUE),
year = sample(c(2016,2017,2018),100,replace = TRUE),
count = sample(c(0:10),100,replace = TRUE))
dat %>%
filter(week <= 45) %>% # add filter before plot
ggplot(aes(factor(week), count )) +
geom_bar(stat="identity" , aes(fill = factor(year)), position = position_dodge2(width = 0.9, preserve = "single")) +
facet_wrap(~ object, ncol = 2, scales = "free_y") +
labs(x = "Week", y = "Count") +
scale_fill_discrete(name = "Year")