add text values to ggplot on secondary axis - r

I have to create graph
Following is my sample data frame
data <- data.frame(
"Tissue" = c("Adrenal gland", "Appendix", "Appendix"),
"protein.expression" = c("No detect","No detect", "Medium"),
"cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells")
)
Left y axis is unique tissue type. Left axis have comma separated cell types.
I am not sure how to get the celltypes corresponding to each tissue (on left y axis) to right axis (in comma separated form)
My code is
p1 <- ggplot(dat %>% filter(facet==1), aes(
x = tissue,
y = factor(protein.expression, levels = unique(protein.expression, decreasing = F), ordered = TRUE),
fill = protein.expression,
label = cell.type
)) +
geom_point(stat = 'identity', aes(col = protein.expression), size = 12) +
geom_text(size = 6, fontface = "bold", colour = "white") +
geom_label() +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
scale_fill_manual(values = myPalette, drop = FALSE) +
scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
labs(title = "Protein Atlas") +
guides(fill = guide_legend(title = "Protein expression")) +
ylab("Cell types measured per tissue") +
# ylim(1,4) +
coord_flip() +
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size = 30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
the cell types are with in the dots. I want them to be on the right side, comma sepearated and if possible color coded by corresponding protein expression label.

So this is a bit of a hack but it might work for you.
I introduce a third column in the graph to hold the labels as per my original post.
I pre-process your data to try and spread out the labels in this third column around the Tissue variable to that they don't appear all on top of each other.
my pre-processing is pretty ugly but works ok. Note that I only catered for a max of 4 cell.types as per your comment.
It gives me this graph:
My code:
data = data.frame("Tissue"=c("Adrenal gland", "Appendix", "Appendix"), "protein.expression" = c("No detect","No detect", "Medium"), "cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells"))
# Pre-processing section.
# Step 1: find out the n of cell.types per tissue type
counters <- data %>% group_by(Tissue) %>% summarise(count = n())
# Step 2: Join n back to original data. Transform protein.expression to ordered factor
data <- data %>%
inner_join(counters, by="Tissue") %>%
mutate(protein = factor(protein.expression, levels=unique(protein.expression, decreasing = F), ordered=TRUE),
positionTissue = as.numeric(Tissue))
results <- data.frame()
# Step 3: Spread the cell.type labels around the position of the Tissue. 4 scenarios catered for.
for(t in unique(data$Tissue)){
subData <- filter(data, Tissue == t)
subData$spreader <- as.numeric(subData$Tissue)
if(length(unique(subData$cell.type)) == 2){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.1,as.numeric(Tissue)+0.1)) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 3){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.15,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.15,as.numeric(Tissue)))) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 4){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.2,
ifelse(as.numeric(x)==2,as.numeric(Tissue)-0.1,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.1,
ifelse(as.numeric(x)==4,as.numeric(Tissue)+0.2,as.numeric(Tissue)))))) %>%
select(-x)
results <- rbind(results, subData)
} else{
results <- rbind(results, subData)
}
}
# Plot the data based on the new label position "spreader" variable
ggplot(results, aes(x = positionTissue, y = protein, label=cell.type)) +
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y=0.5,label=Tissue), size=8, fontface="bold", angle=90)+
geom_label(aes(y="zzz", x=spreader, fill=protein), colour="white") +
theme_classic() +
scale_x_continuous(limits = c(min(as.numeric(data$Tissue))-0.5,max(as.numeric(data$Tissue))+0.5))+
scale_y_discrete(breaks=c("Medium","No detect")) +
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
xlab("") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25),
axis.text.y = element_text(colour = NA),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
Edit #2:
Update to retain label colours by creating n positions where n is the number of cell.types:
data = data %>%
mutate(position = paste("z",cell.type))
Then you can use this new position variable instead of the static "zzz" I suggested in my original post. Your labels will have the correct colours, but your chart will look odd if there are a lot of cell.types.
geom_label(aes(y=position, label = cell.type)) +
EDIT #1: Update to avoid overlapping labels by grouping cell.types to a single label per tissue.
Creating a new label field that concatenates the individual labels for each tissue type:
data = data %>%
group_by(Tissue) %>%
mutate(label = paste(cell.type, collapse = "; "))
And amend the ggplot call to use this new field instead of the existing cell.type field:
geom_text(aes(y="zzz", label = label), size = 6, fontface = "bold", colour = "white")+
or:
geom_label(aes(y="zzz", label = label),) +
ORIGINAL POST:
You could plot your labels at a third position (e.g. "zzz") and then hide that position from the set of axis labels using scale_x_discrete(breaks=c()).
ggplot(data, aes(x = Tissue, y = factor(protein.expression,
levels=unique(protein.expression,
decreasing = F),
ordered=TRUE), fill = protein.expression,
label = cell.type))+
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y="zzz"), size = 6, fontface = "bold", colour = "white")+
geom_label(aes(y="zzz"),) +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
# scale_fill_manual(values = myPalette, drop = FALSE) +
# scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
scale_y_discrete(breaks=c("Medium","No detect"))+
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))

Related

Using stat_summary to produce individualized boxplots with aes(group = ...)?

I tried to produce 12 boxplots per ggplots stat_summary() functions, as you can see below in the reproducible example. I used stat_summary() instead of geom_boxplot(), because I want to whiskers to end at the 1st and 99th percentile of the data or to be individualized so to speak. I coded two functions, one for the whiskers and one for the outliers and used them as arguments in stat_summary(). This is the result:
I see two problems with this plot:
Not all outliers are coloured in red.
Outliers cut the whiskers, which is not supposed to happen by definition of my functions.
The help file has not been helping me in solving this issue. Comments are welcome.
The code:
library(stats)
library(ggplot2)
library(dplyr)
# Example Data
{
set.seed(123)
indexnumber_of_entity = rep(c(1:30),
each = 12)
month = rep(c(1:12),
each = 1,
times = 30)
variable_of_interest = runif(n = 360,
min = 0,
max = 100)
Data = as.data.frame(cbind(indexnumber_of_entity,
month,
variable_of_interest)) %>% mutate_at(.vars = c(1,2,3),
as.numeric)
Data_Above_99th_Percentile = filter(Data,
variable_of_interest > stats::quantile(Data$variable_of_interest,
0.99))
Data_Below_1st_Percentile = filter(Data,
variable_of_interest < stats::quantile(Data$variable_of_interest,
0.01))
}
# Functions that enable individualizing boxplots
{
Individualized_Boxplot_Quantiles <- function(x){
d <- data.frame(ymin = stats::quantile(x,0.01),
lower = stats::quantile(x,0.25),
middle = stats::quantile(x,0.5),
upper = stats::quantile(x,0.75),
ymax = stats::quantile(x,0.99),
row.names = NULL)
d[1, ]
}
Definition_of_Outliers = function(x)
{
subset(x,
stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
}
# Producing the ggplot
ggplot(data = Data) +
aes(x = month,
y = variable_of_interest,
group = month) +
stat_summary(fun.data = Individualized_Boxplot_Quantiles,
geom="boxplot",
lwd = 0.5) +
stat_summary(fun.y = Definition_of_Outliers,
geom="point",
size = 1) +
labs(title = "Distributions of Variable of Interest based on months",
x = "Month",
y = "Variable of Interest") +
theme(plot.title = element_text(size = 20,
hjust = 0.5,
face = "bold"),
axis.ticks.x = element_blank(),
axis.text.x = element_text(size = 12,
face = "bold"),
axis.text.y = element_text(size = 12,
face = "bold"),
axis.title.x = element_text(size = 16,
face = "bold",
vjust = -3),
axis.title.y = element_text(size = 16,
face = "bold",
vjust = 3)) +
scale_x_continuous(breaks = c(seq(from = 1,
to = 12,
by = 1))) +
scale_y_continuous(breaks = c(seq(from = 0,
to = 100,
by = 10))) +
geom_point(data = Data_Above_99th_Percentile,
colour = "red",
size = 1) +
geom_point(data = Data_Below_1st_Percentile,
colour = "red",
size = 1)
You can simplify the functions a little bit like this:
boxplot_quantiles <- function(x) {
y <- as.data.frame(t(stats::quantile(x, c(0.01, 0.25, 0.5, 0.75, 0.99))))
setNames(y, c('ymin', 'lower', 'middle', 'upper', 'ymax'))
}
outliers <- function(x) {
subset(x, stats::quantile(x,0.99) < x | stats::quantile(x,0.01) > x)
}
You can rely on the summary functions, since the Data_above_99th_Percentile and Data_Below_1st_Percentile were not groupwise calculations in your own code.
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3))
Edit
As long as you perform groupwise operations on the filtered data frames, your alternative method of drawing the outliers will work too. Note that I have added these in colored layers above the existing plot so that the red points are overplotted with blue and green dots:
Data_Above_99th_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest > quantile(variable_of_interest,0.99))
Data_Below_1st_Percentile <- Data %>%
group_by(month) %>%
filter(variable_of_interest < quantile(variable_of_interest, 0.01))
ggplot(data = Data, aes(x = month, y = variable_of_interest, group = month)) +
stat_summary(fun = outliers, geom = "point", col = 'red', size = 1) +
stat_summary(fun.data = boxplot_quantiles, geom = "boxplot", lwd = 0.5) +
scale_x_continuous('Month', breaks = 1:12) +
scale_y_continuous('Variable of Interest' , breaks = 0:10 * 10) +
labs(title = "Distributions of Variable of Interest based on months") +
theme(text = element_text(face = 'bold', size = 12),
plot.title = element_text(size = 20, hjust = 0.5),
axis.ticks.x = element_blank(),
axis.title.x = element_text(size = 16, margin = margin(20, 0, 0, 0)),
axis.title.y = element_text(size = 16, vjust = 3)) +
geom_point(data = Data_Below_1st_Percentile, color = 'green') +
geom_point(data = Data_Above_99th_Percentile, color = 'blue')

order y axis by count of one particular value in column with geom_bar

wondering how I can order the clusters on y-axis by decreasing count of kiwi?
df = data.frame()
df = data.frame(matrix(df, nrow=200, ncol=2))
colnames(df) <- c("cluster", "name")
df$cluster <- sample(20, size = nrow(df), replace = TRUE)
df$fruit <- sample(c("banana", "apple", "orange", "kiwi", "plum"), size = nrow(df), replace = TRUE)
p = ggplot(df, aes(x = as.factor(cluster), fill = as.factor(fruit)))+
geom_bar(stat = 'count') +
theme_classic()+
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab("Cluster")+
ylab("Fruit count") +
labs( fill = "")
p
I would probably do this as a data manipulation before plotting. Note I have moved kiwi to the first position in the stacking order so we can see the bars getting smaller as we move down the y axis.
library(tidyverse)
df %>%
mutate(cluster = factor(cluster,
names(sort(table(fruit == 'kiwi', cluster)[2,]))),
fruit = factor(fruit, c('kiwi', 'apple', 'banana',
'orange', 'plum'))) %>%
ggplot(aes(x = cluster, fill = fruit))+
geom_bar(position = position_stack(reverse = TRUE)) +
theme_classic()+
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
scale_fill_manual(values = c('olivedrab', 'yellowgreen', 'yellow2',
'orange2', 'plum4')) +
xlab("Cluster")+
ylab("Fruit count") +
labs( fill = "")
No need to modify the data, just use x = reorder(cluster, fruit=='kiwi', sum) in aes() (instead of as.factor(cluster)).
ggplot(df, aes(x = reorder(cluster, fruit=='kiwi', sum),
fill = as.factor(fruit))) +
geom_bar(stat = 'count') +
theme_classic() +
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab('Cluster') +
ylab('Fruit count') +
labs(fill = '')
Maybe there would be a more efficient way to do this, but one possibility is to count the number of times kiwi occurs for each cluster, then arrange the cluster varaible by that. Note that in this example there can be NAs for the number of kiwis (so we set those instances to 0).
order <- df %>%
# count how many times kiwi occurs per cluster
count(fruit, cluster) %>% filter(fruit == 'kiwi')
df <- df %>%
# join the counts to the original df by cluster
left_join(order %>% select(cluster, n)) %>%
# if na make zero (otherwise NAs appear at the top of the plot)
mutate(n = ifelse(is.na(n), 0, n),
# arrange the clusters by n
cluster = fct_reorder(as.factor(cluster), n))
and then then your plotting function should give the desired output.
Compute total kiwis by group, then convert cluster to a factor ordered by this grouped kiwi total. Using dplyr and forcats::fct_reorder():
set.seed(13)
library(dplyr)
library(forcats)
df <- df %>%
group_by(cluster) %>%
mutate(n_kiwi = sum(fruit == "kiwi")) %>%
ungroup() %>%
mutate(cluster = fct_reorder(factor(cluster), n_kiwi))
p = ggplot(df, aes(x = cluster, fill = fruit))+
geom_bar(stat = 'count') +
theme_classic()+
coord_flip() +
theme(axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text=element_text(size=20)) +
theme(legend.text = element_text(size = 20)) +
xlab("Cluster")+
ylab("Fruit count") +
labs( fill = "")
p

How to display p-values above boxplots on exponential (log10) y-axis?

I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())

How to remove one facet category from facet_wrap after using tidyr to reshape data

I am trying to plot some data and see below the replicable example, starting from the relevant libraries
library(ggplot2)
library(tidyr)
library(scales)
library(dplyr)
and the creation of the random dataset see below:
data <- data.frame(replicate(3, sample(0:100, 100, rep=TRUE)))
data$Place <- sample(c("PlaceA", "PlaceB","PlaceC"), size = nrow(data), prob = c(0.76, 0.14, 0.10), replace = TRUE)
data$Preference <- sample(c("Strong", "Medium","Low"), size = nrow(data), replace = TRUE)
data$Risk <- sample(c("Yes","No"), size = nrow(data), replace = TRUE)
colnames(data) <- c("A","B","C","Place","Preference","Risk")
rownames(data) <- NULL
After this step I am trying to get the data along a different shape by using tidyr package
data_long <- tidyr::gather(data, key = type_col, value = categories, -c("A","B","C","Place","Preference"))
And then I wish to plot the proportions of respondents saying yes to risk by place- see below the code to achieve the visual output
data_long %>%
count(type_col, categories,Place) %>%
left_join(data_long %>% count(type_col, Place, name = "m"),by = c("type_col", "Place")) %>%
mutate(Prop = n/m) %>%
ggplot(aes(x = categories, y = Prop, fill = Place)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = scales::percent(Prop)),
hjust = 0.1,
position = position_dodge(1)) +
facet_wrap(~ type_col, scales = "free_x", ncol = 3) +
scale_fill_brewer(palette = "Oranges") + #scale_x_discrete(limits = positions)+
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
xlab("") +
ylab("") +
coord_flip() +
theme(panel.background = element_rect(fill = "white"),
legend.position = "bottom",
strip.text.x = element_text(size = 15, colour = "black"),
plot.title = element_text(size = 20, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 12))
See below the output which is correct. Yet, I do not want to show the yes and nos, but just the yes proportions. Is there an easy way to just plot the output below while retaining just one option of the facets (Yes in this case)? Thanks for the help
Maybe this:
library(tidyverse)
#Code
data_long %>%
count(type_col, categories,Place) %>%
left_join(data_long %>% count(type_col, Place, name = "m"),by = c("type_col", "Place")) %>%
mutate(Prop = n/m) %>%
filter(categories=='Yes') %>%
mutate(Place=factor(Place,levels = rev(unique(Place)),ordered = T)) %>%
ggplot(aes(x = categories, y = Prop, fill = Place)) +
geom_col(position = position_dodge()) +
geom_text(aes(label = scales::percent(Prop)),
hjust = 0.1,
position = position_dodge(1)) +
facet_wrap(~ type_col, scales = "free_x", ncol = 3) +
scale_fill_brewer(palette = "Oranges",guide = guide_legend(reverse = TRUE)) + #scale_x_discrete(limits = positions)+
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
xlab("") +
ylab("") +
coord_flip() +
theme(panel.background = element_rect(fill = "white"),
legend.position = "bottom",
strip.text.x = element_text(size = 15, colour = "black"),
plot.title = element_text(size = 20, face = "bold"),
axis.text = element_text(size = 12),
axis.title = element_text(size = 12))
Output:

Dynamic midpoint in ggplot2's scale_fill_gradient2

I'm making a heatmap in R using ggplot2 and I want to dynamically change the value of midpoint for scale_fill_gradient2. I want the midpoint for every row to be the maximum of v1 and v2.
Here's the original plot and data:
library(ggplot2)
set.seed(1L)
s = sprintf("d%s", 1:9)
vars = sprintf("v%s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = round(runif(54, min=-100, max=100), 1)
pdf(save)
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 1)) +
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = 0, space = "Lab",
name = title) +
scale_color_discrete("exps", data$variable) +
theme_minimal() +
theme(axis.text.x = element_text(vjust = 1,
size = title.size), legend.title = element_blank(),
axis.text.y = element_text(size = title.size),
strip.text.x = element_text(size = title.size)) +
coord_fixed()
#add numbers to cells
heatmap = heatmap + geom_text(aes(x = variable, y = s, label = value), color = cbbPalette$black, size = 3) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(0.5, 0),
legend.direction = "horizontal",
legend.position = "top") +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
# Print the heatmap
print(heatmap)
dev.off()
I tried to change midpoint by taking max of v1 and v2 but that affects all rows instead each row separately.
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = data[data$variable == "v1", "value"], space = "Lab",
name = title)
Scales don't really work that way, as they map a range of values to a set of colours. Consequentially, a particular colour means a particular value for the whole plot. My best advice would be to pre-normalise the data by subtracting the max of v1/v2. See example in code below (there were a few variables in your example but not in the shared code which I've subsituted).
library(ggplot2)
library(tidyverse)
set.seed(1L)
s = sprintf("d%s", 1:9)
vars = sprintf("v%s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = round(runif(54, min=-100, max=100), 1)
new_data <- data %>% group_by(s) %>%
mutate(value = value - max(value[variable %in% c("v1", "v2")]))
ggplot(data = new_data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 1)) +
scale_fill_gradient2(low = "pink", high = "green", mid = "grey",
midpoint = 0, space = "Lab",
name = "title") +
scale_color_discrete("exps", data$variable) +
theme_minimal() +
coord_fixed()

Resources