How to costumize bar chart in ggplot R? - r

I have a mirrored bar-chart, and I want avoid mirror bars and have the same graphs but 2 columns for each category:
Negative and positive (firstly positive values on right side and below negative values on left side)
Colours must be determined by 'Model' categorical variable and pos & neg values need to be different, like fully coloured bars = positive, bounded (with the same colour) but not filled bars = negative.
Also, USA based values must be on the top and Canada based values below
df <- data.frame (Origin = c("Canada", "Canada","Canada", "Canada","Canada", "Canada","USA","USA","USA","USA","USA","USA"),
Model = c("A","B","C","D","E","F","A","B","C","D","E","F"),
poschange = c(60, 45,34,56, 65, 44,40, 55, 35, 24,34,12),
negchange = c(-5,-2,-0.5,-2,-1,-0.05,-1,-3,-0.1,-3,-1.5,-0.9))
require(dplyr)
require(ggplot2)
require(tidyr)
df2 <- df %>% pivot_longer(., cols=c('poschange','negchange'),
names_to = 'value_category')
df2 <- df2 %>% mutate(Groups = paste(Origin, Model))
df2 <- df2 %>% mutate(label_position=ifelse(value>0, value-5,value-8)) # adjusting label position
df2 %>% arrange(value) %>% ggplot(aes(x=value, y=reorder(Groups,value),
fill=value_category,
group=value_category))+
geom_col(width=0.75)
coord_flip()
Output:
Desired output (something like this but colours must be corresponding to Model cat. variable):

Maybe something like this?
Use an ifelse statement to label the negative values as "white"
To have a fill of white, use scale_fill_manual with a my_color palette
To avoid "mirrored" bars, use position = "dodge"
To have negative and positive values side-by-side, you need to swap your x and y argument in ggplot
To avoid overlapping text on the x-axis, use theme(axis.text.x = element_text(angle = 90))
Use the breaks argument in both scale_xxx_manual function to remove the "white" label in the legend
library(tidyverse)
df <- data.frame (Origin = c("Canada", "Canada","Canada", "Canada","Canada", "Canada","USA","USA","USA","USA","USA","USA"),
Model = c("A","B","C","D","E","F","A","B","C","D","E","F"),
poschange = c(60, 45,34,56, 65, 44,40, 55, 35, 24,34,12),
negchange = c(-5,-2,-0.5,-2,-1,-0.05,-1,-3,-0.1,-3,-1.5,-0.9))
df2 <- df %>% pivot_longer(., cols=c('poschange','negchange'),
names_to = 'value_category') %>%
mutate(Groups = paste(Origin, Model),
value_category = factor(value_category, levels = c("negchange", "poschange")))
my_color = c("A" = '#7fc97f', "B" = '#beaed4', "C" = '#fdc086',
"D" = '#ffff99', "E" = '#386cb0', "F" = '#f0027f', "white" = "white")
ggplot(df2, aes(value, Model,
fill = ifelse(value_category == "negchange", "white", Model),
color = Model)) +
geom_col(position = "dodge") +
scale_fill_manual(values = my_color, breaks = df2$Model) +
scale_color_manual(values = my_color, breaks = df2$Model) +
labs(fill = "Model") +
facet_grid(Origin ~ ., switch = "y") +
theme(axis.text.x = element_text(angle = 90),
strip.background = element_rect(fill = "white"),
strip.placement = "outside",
strip.text.y.left = element_text(angle = 0),
panel.spacing = unit(0, "lines"))
Created on 2022-05-03 by the reprex package (v2.0.1)

Related

ggplot2: add two groups to one facet

I have a dataframe with of annual temperature time series from two locations (i.e., Site 1 & Site 2). The three temperature variables are:
Air temperature
Water temperature
Difference = Air - Water
I would like to produce a four-panel figure where the top and bottom rows are Site 1 and Site 2 respectively, the left column displays Air and Water and the right column shows Difference. Is there a way to do this using facet_wrap() or facet_grid()?
Example Data
library(data.table)
library(dplyr)
library(ggplot2)
set.seed(321)
# Create the example air and water temperature time series
df1 <- data.frame(matrix(ncol = 4, nrow = 365*4))
colnames(df1)[1:4] <- c("Location","Variable", "Date", "Temperature")
df1[1:730,1] <- "Site 1"
df1[731:NROW(df1),1] <- "Site 2"
df1[c(1:365,731:1095),2] <- "Air"
df1[c(366:730,1096:NROW(df1)),2] <- "Water"
df1$Date <- rep(seq.Date(as.Date("2021-01-01"),as.Date("2021-12-31"),"1 day"),4)
df1$noise <- rep(runif(365),4)
df1$t <- rep(seq(0,1*pi,,365),4)
for (i in 1:NROW(df1)) {
df1$Temperature[1:365] <- 20*sin(df1$t)+df1$noise*8
df1$Temperature[365:730] <- 17*sin(df1$t)+df1$noise*2
df1$Temperature[731:1095] <- 25*sin(df1$t)+df1$noise*6
df1$Temperature[1096:NROW(df1)] <- 18*sin(df1$t)+df1$noise*1.5
}
# Take the difference between air and water temperature
df1 <- df1[,1:4]
site1 <- df1[df1$Location == 'Site 1',]
site1 <- site1 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water) %>%
tidyr::pivot_longer(cols = c('Water','Air','Difference'),
names_to = 'Variable',
values_to = 'Temperature')
site2 <- df1[df1$Location == 'Site 2',]
site2 <- site2 %>%
tidyr::pivot_wider(names_from = Variable, values_from = Temperature) %>%
mutate(Difference = Air - Water) %>%
tidyr::pivot_longer(cols = c('Water','Air','Difference'),
names_to = 'Variable',
values_to = 'Temperature')
# Recombine data from site 1 and site 2 for final dataset
df1 <- rbind(site1,site2)
This is an example of what I am looking for, however instead of having a six-panel figure, I would like Air and Water displayed together, creating a four-panel figure.
df1 %>%
ggplot() +
geom_line(aes(x = Date, y = Temperature, group = Variable, color = Variable)) +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 14, color = "black")) +
facet_grid(Location~Variable)
Try creating a new variable that groups "Air" and "Water" observatiopns, and specifying it to the facet:
df1 %>%
mutate(var_air_water = ## Here is the new variable
if_else(Variable %in% c("Air", "Water"),
true = "Air & Water",
false = Variable)) %>%
ggplot() +
geom_line(aes(x = Date, y = Temperature, group = Variable, color = Variable)) +
theme_bw() +
theme(panel.grid = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14, color = "black", angle = 90, vjust = 0.5, hjust = 1),
axis.text.y = element_text(size = 14, color = "black")) +
facet_grid(Location~var_air_water)

Combining two heatmaps with the variables next to each other

I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.

Making multi-group line plot with many observations more readable

I have created the following plot:
From a bigger version (5 rows, 58 columns) of this df:
df <- data.frame(row.names = c("ROBERT", "FRANK", "MICHELLE", "KATE"), `1` = c(31, 87, 22, 12), `2` = c(37, 74, 33, 20), `3` = c(35, 32, 44, 14))
colnames(df) <- c("1", "2", "3")
In the following manner:
df = df %>%
rownames_to_column("Name") %>%
as.data.frame()
df <- melt(df , id.vars = 'Name', variable.name = 'ep')
ggplot(df, aes(ep,value)) + geom_line(aes(colour = Name, group=Name))
The plot kind of shows what I'd like to, but it really is a mess. Does anyone have a suggestion that would help me increasing its readability?
Any help is very much appreciated!
Here are a few options for visualizing lots of datapoints across a smallish number of cases. These are illustrated with a subset of the txhousing data included with ggplot2.
Solution 1: Faceting
As #rdelrossi suggested, one solution is to facet by Name:
library(ggplot2)
ggplot(df, aes(ep,value)) +
geom_line(aes(colour = Name, group=Name), show.legend = FALSE) +
scale_x_continuous(expand = c(0,0)) +
facet_wrap(vars(Name), ncol = 1, scales = "free_x") +
theme_bw()
Solution 2: Smoothing
Use geom_smooth() to smooth out local fluctuations to see larger longer-term trends:
ggplot(df, aes(ep,value)) +
geom_smooth(
aes(colour = Name, group=Name),
se = FALSE,
span = 1, # higher number = smoother
size = 1.25
) +
scale_x_date(expand = c(0,0)) +
theme_bw()
Solution 3: Lasagna
Sometimes called a "lasagna plot," this is a heatmap with cases on the y axis, time (or whatever) on the x axis, and values mapped to color. It's a different way of comparing changes within (left to right) and between (up and down) individuals.
ggplot(df, aes(ep, Name, colour = value, fill = value)) +
geom_tile(size = .5) +
scale_fill_viridis_c(option = "B", aesthetics = c("colour", "fill")) +
coord_cartesian(expand = FALSE) +
theme(
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_blank()
)
(may want to click through to larger image)
Data prep:
library(dplyr)
library(lubridate)
df <- txhousing %>%
filter(
city %in% c("Beaumont", "Amarillo", "Arlington", "Corpus Christi", "El Paso"),
between(year, 2004, 2012)
) %>%
group_by(city) %>%
mutate(
Name = city,
value = scale(sales),
ep = ym(str_c(year, month))
) %>%
ungroup()
If your readability concern is just the x axis labels, then I think the main issue is that when you use reshape2::melt() the result is that the column ep is a factor which means that the x axis of your plot will show all the levels and get crowded. The solution is to convert it to numeric and then it will adjust the labels in a sensible way.
I replace your use of reshape2::melt() with tidyr::pivot_longer() which has superseded it within the {tidyverse} but your original code would still work.
library(tidyverse)
df <- structure(list(`1` = c(31, 87, 22, 12), `2` = c(37, 74, 33, 20), `3` = c(35, 32, 44, 14)), class = "data.frame", row.names = c("ROBERT", "FRANK", "MICHELLE", "KATE"))
df %>%
rownames_to_column("Name") %>%
pivot_longer(-Name, names_to = "ep") %>%
mutate(ep = as.numeric(ep)) %>%
ggplot(aes(ep, value, color = Name)) +
geom_line()
Created on 2022-03-07 by the reprex package (v2.0.1)
Another solution could be the use of a geom_bar()
Sample code:
ggplot(df, aes(fill=Name)) +
geom_bar(aes(x=ep, y=value, group=Name),stat="identity", position = position_dodge(width = 0.9)) +
labs(x="ep", y="count")+
scale_y_continuous(expand=c(0,0))+
theme_bw()
Plot:
Also you can add facet_grid(~Name)+
Also you can add
geom_text(aes(label=value), position = position_stack(vjust = .5))+

Reorder and split the ggplot heatmap based on the clusters in one of the columns

I generated a heatmap with ggplot, and order the samples by using hclust, However, I still need more reordering to get all the similar values corespondent with one of the samples in the ordered cluster. Here I generate a samples data to explain better.
set.seed(99)
M <- data.frame(names = paste0("g", seq(1,30)), S1 = runif(30, 0 , 8), S2 = runif(30, -4, 5), S3 = runif(30, -5, 5))
M.mat <- M %>%
tibble::column_to_rownames('names') %>%
as.matrix()
M.dendro <- as.dendrogram(hclust(d = dist(x = M.mat)))
dendro.plot <- ggdendrogram(data = M.dendro, rotate = TRUE) +
theme(axis.text.y = element_text(size = 6))
print(dendro.plot)
str(M.dendro)
dend.order <- order.dendrogram(M.dendro)
df <- melt(M, id.vars = "names")
df$names <- factor(x = df$names,
levels = M$names[dend.order],
ordered = TRUE)
ggplot(df, aes(x = names, y = variable, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = muted("steelblue"), mid = "white", high = muted("red3"),
midpoint = 0, space = "Lab", na.value = "grey50",
guide = "colourbar", aesthetics = "fill"
) +
theme(axis.text.x = element_text(angle = 90, hjust=1), legend.key.size = unit(0.4, "cm")) +
coord_fixed()
For the generated heatmap, I need reorder it such that all the dark blue be on the bottom, the middle color and then the red on the top based on samples S3. Thank you

ggplot monthly date scale on x axis uses days as units

When plotting a bar chart with monthly data, ggplot shortens the distance between February and March, making the chart look inconsistent
require(dplyr)
require(ggplot2)
require(lubridate)
## simulating sample data
set.seed(.1073)
my_df <- data.frame(my_dates = sample(seq(as.Date('2010-01-01'), as.Date('2016-12-31'), 1), 1000, replace = TRUE))
### aggregating + visualizing counts per month
my_df %>%
mutate(my_dates = round_date(my_dates, 'month')) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_date(date_breaks = 'months', date_labels = '%y-%b') +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
I would keep the dates as dates rather than factors. Yes, factors will keep the bars uniform in size but you'll have to remember to join in any months that are missing so that blank months aren't skipped and factors are easy to get out of order. I would recommend adjusting your aesthetics to reduce the effect that the black outline has on the gap between February and March.
Here are two examples:
Adjust the outline color to be white. This will reduce the contrast and makes the gap less noticible.
Set the width to 20 (days).
As an aside, you don't need to summarize the data, you can use floor_date() or round_date() in an earlier step and go straight into geom_bar().
dates <- seq(as.Date("2010-01-01"), as.Date("2016-12-31"), 1)
set.seed(.1073)
my_df <-
tibble(
my_dates = sample(dates, 1000, replace = TRUE),
floor_dates = floor_date(my_dates, "month")
)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "white", fill = "slateblue", alpha = .5)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "black", fill = "slateblue", alpha = .5, width = 20)
using some parts from IceCream's answer you can try this.
Of note, geom_col is now recommended to use in this case.
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ungroup() %>%
mutate(my_dates_x = as.numeric(my_dates)) %>%
mutate(my_dates_label = paste(month(my_dates,label = T), year(my_dates))) %>%
{ggplot(.,aes(x = my_dates_x, y = n_row))+
geom_col(color = 'black',width = 0.8, fill = 'slateblue', alpha = .5) +
scale_x_continuous(breaks = .$my_dates_x, labels = .$my_dates_label) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))}
You can convert it to a factor variable to use as the axis, and fix the formatting with a label argument to scale_x_discrete.
library(dplyr)
library(ggplot2)
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_discrete(labels = function(x) format(as.Date(x), '%Y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Edit: Alternate method to account for possibly missing months which should be represented as blank spaces in the plot.
library(dplyr)
library(ggplot2)
library(lubridate)
to_plot <-
my_df %>%
mutate(my_dates = round_date(my_dates, 'month'),
my_dates_ticks = interval(min(my_dates), my_dates) %/% months(1))
to_plot %>%
group_by(my_dates_ticks) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates_ticks, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_continuous(
breaks = unique(to_plot$my_dates_ticks),
labels = function(x) format(min(to_plot$my_dates) + months(x), '%y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))

Resources