library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
# Generate data
df_durations = data.frame(x = as.character(1:100)) %>%
mutate(linesize = runif(n = n()),
linesize = linesize / sum(linesize),
linesize = linesize / min(linesize)) %>%
mutate(start_x1 = as.Date(sample(as.Date('2018-01-01'):as.Date('2018-04-01'), size = n(), replace = T), origin='1970-01-01'),
end_x1 = start_x1 + 20 + sample(-5:5, size = n(), replace = T),
start_x2 = end_x1 + sample(-5:10, size = n(), replace = T),
end_x2 = start_x2 + 15 + sample(-5:5, size = n(), replace = T),
start_x3 = end_x2 + sample(-5:10, size = n(), replace = T),
end_x3 = start_x1 + 30 + sample(-10:10, size = n(), replace = T)) %>%
arrange(start_x1) %>%
mutate(x = factor(x, levels = x, ordered = T),
fontsize = round(runif(n(), 5, 12)),
colour = sample(c('black', 'red', 'blue'), n(), replace = T),
location = cumsum(fontsize))
# Pivot data for plotting
df_durations_long = df_durations %>%
gather(key, value, contains('start'), contains('end')) %>%
separate(key, c('id', 'activity'), '_') %>%
spread(id, value)
# Plot
df_durations_long %>%
ggplot(aes(x=location)) +
geom_linerange(aes(ymin=start, ymax=end, colour=activity, size=linesize), alpha=.5) +
scale_y_date(date_labels = '%b-%Y', date_breaks = '2 month') +
scale_x_continuous(breaks = df_durations$location, labels = df_durations$x) +
theme(axis.text.y = element_text(colour = df_durations$colour,
size = df_durations$fontsize)) +
coord_flip()
The above codes generates the following chart.
As you see we have both overlapping and non-overlappping horizontal bars (intended).
However there is also overlap between vertically stacked bars (not intended).
What I am looking for is a way to introduce an equal amount of spacing between each of the bars while keeping the relative widths of all the bars intact.
I tried working with scale_size_identity, however I haven't managed to get the formatting right.
Related
#Sample data
set.seed(42)
DB = data.frame(Group =c(rep("1",16),
rep("2",4)) ,
Score1 = sample(1:20,20, replace = T),
Score2 = sample(1:20,20, replace = T),
Score3 = sample(1:20,20, replace = T),
Score4 = sample(1:20,20, replace = T))
I want to plot two bar charts comparing the mean of each score in both groups.
So the right side will be with a Title "Group 1 mean scores" and left side (left barchart) is "Group 2 mean scores"
Thanks.
You can pivot to long format and use stat = "summary"
library(tidyverse)
DB %>%
pivot_longer(-1, names_to = "Score") %>%
ggplot(aes(Group, value, fill = Score)) +
geom_bar(position = position_dodge(width = 0.8, preserve = "total"),
stat = "summary", fun = mean, width = 0.6) +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 20)
Or if you prefer facets, you can do:
library(tidyverse)
DB %>%
pivot_longer(-1, names_to = "Score") %>%
mutate(Group = paste("Group", Group)) %>%
ggplot(aes(Score, value, fill = Score)) +
geom_bar(stat = "summary", fun = mean, width = 0.6) +
scale_fill_brewer(palette = "Set2", guide = "none") +
facet_grid(.~Group) +
theme_bw(base_size = 20)
Created on 2022-11-13 with reprex v2.0.2
I'm trying to combine two heatmaps. I want var_a and var_x on the y axis with for example: var_a first and then var_x. I don't know if I should do this by changing the dataframe or combining them, or if I can do this in ggplot.
Below I have some example code and a drawing of what I want (since I don't know if I explained it right).
I hope someone has ideas how I can do this either in the dataframe or in ggplot!
Example code:
df_one <- data.frame(
vars = c("var_a", "var_b", "var_c"),
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_vars = c(5, 10, 20),
expression_organ_2_vars = c(50, 2, 10),
expression_organ_3_vars = c(5, 10, 3)
)
df_one_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_one <- ggplot(df_one_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_one
df_two <- data.frame(
corresponding_vars = c("var_x", "var_y", "var_z"),
expression_organ_1_corresponding_vars = c(100, 320, 120),
expression_organ_2_corresponding_vars = c(23, 30, 150),
expression_organ_3_corresponding_vars = c(89, 7, 200)
)
df_two_long <- pivot_longer(df_one,
cols=3:5,
names_to = "tissueType",
values_to = "Expression")
expression.df_two <- ggplot(df_two_long,
mapping = aes(y=tissueType, x=vars, fill = Expression)) +
geom_tile()
expression.df_two
Drawing:
You can bind your data frames together and pivot into a longer format so that vars and corresponding vars are in the same column, but retain a grouping variable to facet by:
df_two %>%
mutate(cor = corresponding_vars) %>%
rename_with(~sub('corresponding_', '', .x)) %>%
bind_rows(df_one %>% rename(cor = corresponding_vars)) %>%
pivot_longer(contains('expression'), names_to = 'organ') %>%
mutate(organ = gsub('expression_|_vars', '', organ)) %>%
group_by(cor) %>%
summarize(vars = vars, organ = organ, value = value,
cor = paste(sort(unique(vars)), collapse = ' cor ')) %>%
ggplot(aes(vars, organ, fill = value)) +
geom_tile(color = 'white', linewidth = 1) +
facet_grid(.~cor, scales = 'free_x', switch = 'x') +
scale_fill_viridis_c() +
coord_cartesian(clip = 'off') +
scale_x_discrete(expand = c(0, 0)) +
theme_minimal(base_size = 16) +
theme(strip.placement = 'outside',
axis.text.x = element_blank(),
axis.ticks.x.bottom = element_line(),
panel.spacing.x = unit(3, 'mm'))
Okay, so I solved the issue for my own project, which is to convert it to a scatter plot. I combined both datasets and then used a simple scatterplot.
df.combined <- dplyr::full_join(df_two_long, df_one_long,
by = c("vars", "corresponding_vars", "tissueType"))
ggplot(df.combined,
aes(x=vars, y=tissueType, colour=Expression.x, size = Expression.y)) +
geom_point()
It's not a solution with heatmaps, but I don't know how to do that at the moment.
library(tidyverse)
library(ggsci)
DF <- tibble(Decision = sample(c("Negative","Positive"), 500, T),
Category1 = sample(c("X", "Y", "Z"), 500, T),
Category2 = sample(c("Yellow", "Blue", "Black", "White"), 500, T),
Category3 = sample(c("Xyz", "Yes", "Zos"), 500, T),
Category4 = sample(c("O", "F"), 500, T),
Category5 = sample(c("Xxx", "Yyy", "Zzz", "ooo", "Aha!"), 500, T))
I have a dataset with five different questions, each with a unique set of answers. Each person who answered these five questions ended with either a positive or negative decision (the 6th variable). I created a bar plot which shows the percent of positive decisions separately for each answer to each question using the code below:
DF %>% pivot_longer(cols = 2:6, values_to = "Answer", names_to = "Category") %>%
count(Category, Decision, Answer) %>%
group_by(Category, Answer) %>% mutate(percent = n / sum(n) * 100) %>%
filter(Decision == "Positive") %>%
ggplot(aes(Answer %>% fct_reorder2(percent, Category), percent, fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.95, color = "black", alpha = 0.5) + coord_flip() +
scale_fill_uchicago() + labs(x = "", y = "", fill = "") + scale_y_continuous(breaks = seq(0, 100, 20), labels = str_c(seq(0, 100, 20), "%")) +
theme_classic() + theme(legend.position = "top")
This is the product:
My question is - is there anyway to bring some space between the sets of answers to each of the questions? I would like the columns of the same color to be next to each other, but at the same time, I would like to add some space between columns of different color, as to make it more clear visually that those are 5 different variables.
Additionally, if it is possible, I would like to present the columns in a descending order separately for each category.
Unfortunately, adding width to position_dodge
position_dodge(0.5)
does not work, which I guess makes sense.
I will be grateful for any help. Thank you in advance!
This could be achieved like so:
To add some space between categories you could make use of facet_grid, get rid of the strip texts and set the panel spacing to zero. Additionally I make use of space="free" so that you bars still have the same width.
To reorder your bars in descending order you could make use of tidytext:: reorder_within and tidytext::scale_x_reordered()
library(tidyverse)
library(ggsci)
library(tidytext)
set.seed(42)
DF <- tibble(
Decision = sample(c("Negative", "Positive"), 500, T),
Category1 = sample(c("X", "Y", "Z"), 500, T),
Category2 = sample(c("Yellow", "Blue", "Black", "White"), 500, T),
Category3 = sample(c("Xyz", "Yes", "Zos"), 500, T),
Category4 = sample(c("O", "F"), 500, T),
Category5 = sample(c("Xxx", "Yyy", "Zzz", "ooo", "Aha!"), 500, T)
)
DF %>%
pivot_longer(cols = 2:6, values_to = "Answer", names_to = "Category") %>%
count(Category, Decision, Answer) %>%
group_by(Category, Answer) %>%
mutate(percent = n / sum(n) * 100) %>%
filter(Decision == "Positive") %>%
ungroup() %>%
mutate(Answer = tidytext::reorder_within(Answer, by = percent, within = Category)) %>%
ggplot(aes(Answer, percent, fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(), width = 0.9, color = "black", alpha = 0.5) +
coord_flip() +
scale_fill_uchicago() +
labs(x = "", y = "", fill = "") +
scale_y_continuous(breaks = seq(0, 100, 20), labels = str_c(seq(0, 100, 20), "%")) +
tidytext::scale_x_reordered() +
facet_grid(Category ~ ., scales = "free_y", space = "free") +
theme_classic() +
theme(legend.position = "top", strip.text = element_blank(), panel.spacing.y = unit(0, "pt"))
I have a point plot with two different points on each category and I want to create a line segment joining the two points on each row.
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
pivot_longer(cols = c("med_buy_price", "med_sell_price"),
names_to = "measure",
values_to = "value") %>%
ggplot(aes(x = value, y = category)) +
geom_point(aes(color = measure), size = 3)
For creating a line segment, you need to have start and endpoints for the segment. Thus, you can stay with the wide format, so no pivot_longer needed.
Then create individual geom_point for sell and buy value and a geom_segment combining both points.
This code will work:
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
items <- tribble(
~category, ~buy_value, ~sell_value,
"Wallpaper", 2000, 5200,
"Usables", 500, 12500,
"Umbrellas", 200, 1800
)
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
ggplot() +
geom_point(aes(x = med_buy_price, y = category), size = 3, color = "red")+
geom_point(aes(x = med_sell_price, y = category), size = 3, color = "green")+
geom_segment(aes(x = med_buy_price, xend = med_sell_price, y = category, yend = category))
If you do not insist on using geom_point you could try geom_errorbar which simplifies thing a little bit
items %>%
group_by(category) %>%
summarise(med_buy_price = mean(buy_value, na.rm = TRUE),
med_sell_price = mean(sell_value, na.rm = TRUE)) %>%
ggplot(aes(xmin=med_buy_price,xmax=med_sell_price, y = category)) +
geom_errorbar(width=0.1)
When plotting a bar chart with monthly data, ggplot shortens the distance between February and March, making the chart look inconsistent
require(dplyr)
require(ggplot2)
require(lubridate)
## simulating sample data
set.seed(.1073)
my_df <- data.frame(my_dates = sample(seq(as.Date('2010-01-01'), as.Date('2016-12-31'), 1), 1000, replace = TRUE))
### aggregating + visualizing counts per month
my_df %>%
mutate(my_dates = round_date(my_dates, 'month')) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_date(date_breaks = 'months', date_labels = '%y-%b') +
theme(axis.text.x = element_text(angle = 60, hjust = 1))
I would keep the dates as dates rather than factors. Yes, factors will keep the bars uniform in size but you'll have to remember to join in any months that are missing so that blank months aren't skipped and factors are easy to get out of order. I would recommend adjusting your aesthetics to reduce the effect that the black outline has on the gap between February and March.
Here are two examples:
Adjust the outline color to be white. This will reduce the contrast and makes the gap less noticible.
Set the width to 20 (days).
As an aside, you don't need to summarize the data, you can use floor_date() or round_date() in an earlier step and go straight into geom_bar().
dates <- seq(as.Date("2010-01-01"), as.Date("2016-12-31"), 1)
set.seed(.1073)
my_df <-
tibble(
my_dates = sample(dates, 1000, replace = TRUE),
floor_dates = floor_date(my_dates, "month")
)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "white", fill = "slateblue", alpha = .5)
ggplot(my_df, aes(x = floor_dates)) +
geom_bar(color = "black", fill = "slateblue", alpha = .5, width = 20)
using some parts from IceCream's answer you can try this.
Of note, geom_col is now recommended to use in this case.
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ungroup() %>%
mutate(my_dates_x = as.numeric(my_dates)) %>%
mutate(my_dates_label = paste(month(my_dates,label = T), year(my_dates))) %>%
{ggplot(.,aes(x = my_dates_x, y = n_row))+
geom_col(color = 'black',width = 0.8, fill = 'slateblue', alpha = .5) +
scale_x_continuous(breaks = .$my_dates_x, labels = .$my_dates_label) +
theme(axis.text.x = element_text(angle = 60, hjust = 1))}
You can convert it to a factor variable to use as the axis, and fix the formatting with a label argument to scale_x_discrete.
library(dplyr)
library(ggplot2)
my_df %>%
mutate(my_dates = factor(round_date(my_dates, 'month'))) %>%
group_by(my_dates) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_discrete(labels = function(x) format(as.Date(x), '%Y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))
Edit: Alternate method to account for possibly missing months which should be represented as blank spaces in the plot.
library(dplyr)
library(ggplot2)
library(lubridate)
to_plot <-
my_df %>%
mutate(my_dates = round_date(my_dates, 'month'),
my_dates_ticks = interval(min(my_dates), my_dates) %/% months(1))
to_plot %>%
group_by(my_dates_ticks) %>%
summarise(n_row = n()) %>%
ggplot(aes(x = my_dates_ticks, y = n_row))+
geom_bar(stat = 'identity', color = 'black',fill = 'slateblue', alpha = .5)+
scale_x_continuous(
breaks = unique(to_plot$my_dates_ticks),
labels = function(x) format(min(to_plot$my_dates) + months(x), '%y-%b'))+
theme(axis.text.x = element_text(angle = 60, hjust = 1))