Dynamic midpoint in ggplot2's scale_fill_gradient2 - r

I'm making a heatmap in R using ggplot2 and I want to dynamically change the value of midpoint for scale_fill_gradient2. I want the midpoint for every row to be the maximum of v1 and v2.
Here's the original plot and data:
library(ggplot2)
set.seed(1L)
s = sprintf("d%s", 1:9)
vars = sprintf("v%s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = round(runif(54, min=-100, max=100), 1)
pdf(save)
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 1)) +
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = 0, space = "Lab",
name = title) +
scale_color_discrete("exps", data$variable) +
theme_minimal() +
theme(axis.text.x = element_text(vjust = 1,
size = title.size), legend.title = element_blank(),
axis.text.y = element_text(size = title.size),
strip.text.x = element_text(size = title.size)) +
coord_fixed()
#add numbers to cells
heatmap = heatmap + geom_text(aes(x = variable, y = s, label = value), color = cbbPalette$black, size = 3) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(0.5, 0),
legend.direction = "horizontal",
legend.position = "top") +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
# Print the heatmap
print(heatmap)
dev.off()
I tried to change midpoint by taking max of v1 and v2 but that affects all rows instead each row separately.
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = data[data$variable == "v1", "value"], space = "Lab",
name = title)

Scales don't really work that way, as they map a range of values to a set of colours. Consequentially, a particular colour means a particular value for the whole plot. My best advice would be to pre-normalise the data by subtracting the max of v1/v2. See example in code below (there were a few variables in your example but not in the shared code which I've subsituted).
library(ggplot2)
library(tidyverse)
set.seed(1L)
s = sprintf("d%s", 1:9)
vars = sprintf("v%s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = round(runif(54, min=-100, max=100), 1)
new_data <- data %>% group_by(s) %>%
mutate(value = value - max(value[variable %in% c("v1", "v2")]))
ggplot(data = new_data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 1)) +
scale_fill_gradient2(low = "pink", high = "green", mid = "grey",
midpoint = 0, space = "Lab",
name = "title") +
scale_color_discrete("exps", data$variable) +
theme_minimal() +
coord_fixed()

Related

How to display p-values above boxplots on exponential (log10) y-axis?

I have a data frame with three groups (group1, group2, group3). I would like to show the p-value of their mean comparisons in ggplot2 which I can do however, the values are stacked ontop of one another making it difficult to see what is being compared. When I try to adjust where the p-values are located using the y_position() function, the boxplots collapse (I think because the y-axis is log10) but the p-values are no longer stacked ontop of one another. How can I keep the boxplots from collapsing and keep the p-values displayed so that you can see what is being compared?
Example data
library(ggplot2)
library(dplyr)
library(ggsignif)
df <- data.frame(matrix(ncol = 2, nrow = 30))
colnames(df)[1:2] <- c("group", "value")
df$group <- rep(c("group1","group2","group3"), each = 10)
df[1:10,2] <- rexp(10, 1/10)
df[11:20,2] <- rexp(10, 1/100)
df[21:30,2] <- rexp(10, 1/900)
# Need to say what should be compared for p-value determination
my_comparisons <- list(c("group1", "group2"),
c("group1", "group3"),
c("group2", "group3"))
Boxplots showing the distribution of value for each group however the p-values are ontop of one another so you cannot compare among groups.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
Adjusting the y_position() of where the p-values should display but this collapses the y-axis. I have tried several values within y_position.
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(y_position = c(2000,1800,1600),
comparisons = my_comparisons,
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white", outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black", fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())
For some reason this parameter ignores the axis transformation. You therefore need to use the log10 values of the desired positions:
df %>%
mutate(group = factor(group, levels = c("group3","group2","group1"))) %>%
ggplot(aes(x = group, y = value)) +
geom_signif(comparisons = my_comparisons,
y_position = log10(c(5000, 10000, 25000)),
map_signif_level = function(x) paste("p =", scales::pvalue(x))) +
scale_y_log10() +
geom_boxplot(outlier.colour="white", outlier.fill = "white",
-outlier.shape = 1, outlier.size = 0) +
geom_jitter(shape=1, position=position_jitter(0.2), color = "black",
fill = "white", size = 2) +
labs(x = "",
y = "value") +
theme_bw() +
theme(axis.text.x = element_text(size = 16, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title = element_text(size = 16, color = "black"),
axis.title.x = element_text(vjust = -0.5),
panel.grid = element_blank(),
panel.background = element_blank())

Reordering data in ascending order axis within 2 facets with ggplot in R

I have a dataframe of overnight stays per holiday location by origin-location, year and age group, which is plotted in ggplot, facetted by Year and age group:
As you can see, the lines don't sort correctly from smallest to largest. I use 'reorder_within' and 'scale_x_reordered' by Tyler Rinker, and it works like a charm when data is only faceted by one variable, e.g. Year or age_group, but not both, which is what I'd like.
Reproducible example including dummy data below:
library(tidyverse)
library(scales)
reorder_within <-
function(x,
by,
within,
fun = mean,
sep = "___",
...) {
new_x <- paste(x, within, sep = sep)
stats::reorder(new_x, by, FUN = fun)
}
scale_x_reordered <- function(..., sep = "___") {
reg <- paste0(sep, ".+$")
ggplot2::scale_x_discrete(
labels = function(x)
gsub(reg, "", x),
...
)
}
dummy_f <- data.frame(
Eco_T = rep("dest101", 65),
Eco_Code = rep(101, 65),
Year = c(2018,2018,2019,2020,2019,2020,2018,2020,2021,2019,2021,2018,2021,2021,
2020,2021,2019,2019,2018,2018,2019,2021,2020,2019,2019,2019,2019,
2019,2020,2018,2021,2020,2021,2021,2021,2019,2021,2021,2018,2019,2018,
2020,2020,2018,2020,2019,2018,2020,2021,2020,2021,2018,2019,2021,2019,
2020,2021,2020,2020,2018,2018,2021,2019,2021,2018),
age_groups = c(
"under35","over35","under35","under35","over35","under35","over35",
"under35","under35","under35","under35","under35","over35","over35",
"under35","under35","over35","over35","under35","over35","under35",
"over35","over35","under35","over35","under35","over35",
"over35","over35","under35","under35","over35","over35","over35","under35",
"under35","over35","over35","over35","over35","under35",
"over35","under35","over35","over35","over35","over35","over35",
"over35","under35","under35","over35","over35","under35", "over35",
"over35","over35","under35","over35","over35","over35","over35","under35",
"over35","over35"
),
origin_Code = c(
30301030,30303030,30305030,30301030,30301030,70701070,30303030,31314031,
30301030,30301030,30304030,31311031,30301030,30301030,30309030,30304030,
31314031,30301030,30301030,30301030,30301030,30303030,31316031,30301030,30301030,
30309030,30302030,31317031,30301030,30303030,30309030,
30309030,30309030,30303030,31310031,11110011,30309030,30305030,31314031,
30309030,30309030,30301030,30304030,30301030,30301030,30304030,30303030,
30309030,31314031,30305030,30301030,30302030,31313031,30305030,30302030,
31316031,30309030,30301030,31316031,30302030,31313031,31316031,30309030,
30301030,30301030
),
Origin_Name = c(
"Wyumwym","Candcar","Brababri","Wyumwym","Wyumwym","Lihflit","Matramt",
"Thithe","Calacap","Wyumwym","Shwoshe","Brnsbro","Clelecle","Calacap",
"Brdabro","Keorken","Thithe","Clelecle","Clelecle","Calacap","Clelecle","Candcar",
"Cauncal","Calacap","Calacap","Brdabro","Sagasan",
"Tooto","Clelecle","Candcar","Coancoo","Orauorm","Hoanhol","Fostfor",
"Inreive","Gocgol","Brababri","Thithe","Brababri","Orauorm","Mueemud",
"Calacap","Shwoshe","Wyumwym","Wyumwym","Keorken","Hoanhol","Gocgol",
"Thithe","Brababri","Wyumwym","Sagasan","Relired","Brababri","Chemsche",
"Suhisun","Brdabro","Calacap","Maochmar","Chemsche","Relired","Cauncal","Ronarob",
"Wyumwym","Clelecle"
),
overnight_stays = c(
266,132,158,143,964,78,134,47,45,130,94,42,500,105,95,590,5666,106,324,
412,99,433,511,54,506,46,471,48,254,5257,51,388,70,421,662,419,
130,539,60,69,435,135,2146,406,230,413,126,322,782,86,152,162,406,84,458,426,
406,46,426,127,109,626,44,1333,961
)
)
ggplot.object <- dummy_f %>%
ggplot() +
(aes(
x = reorder_within(Origin_Name, -overnight_stays, Year),
y = overnight_stays,
)) + geom_point(size = 4, color = "#374c92") +
geom_segment(
aes(
x = reorder_within(Origin_Name, -overnight_stays, Year),
xend = reorder_within(Origin_Name, -overnight_stays, Year),
y = 0,
yend = overnight_stays
),
color = "#374c92",
size = 2
) +
scale_x_reordered() +
scale_color_distiller(type = "seq", palette = "BuPu", direction = 1,
limits = c(-5, NA)) +
facet_wrap(
age_groups ~ Year,
dir = "v",
scales = "free",
ncol = 2
) +
scale_y_continuous(labels = comma) +
labs(y = "Unique Agents",
x = "") +
theme(
panel.spacing.y = unit(10, units = "mm"),
text = element_text(family = "sans-serif",
color = "#B6BAC3"),
axis.text = element_text(color = "#B6BAC3",
size = 8),
axis.title = element_text(color = "#B6BAC3",
size = 12),
axis.line = element_line(color = "#B6BAC3"),
strip.text = element_text(size = 15,
color = "#B6BAC3"),
legend.position = "none",
panel.background = element_rect(fill = "transparent",
color = NA),
plot.background = element_rect(fill = "transparent",
color = NA),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
coord_flip()
ggplot.object
We can modify reorder_within() to accept an arbitrary number of "within" variables by replacing the within argument with dots:
reorder_within2 <- function(x,
by,
...,
fun = mean,
sep = "___") {
new_x <- paste(x, ..., sep = sep)
stats::reorder(new_x, by, FUN = fun)
}
Then pass all facet levels to ...:
ggplot.object <- dummy_f %>%
ggplot() +
aes(
x = reorder_within2(Origin_Name, -overnight_stays, Year, age_groups),
y = overnight_stays,
) +
geom_point(size = 4, color = "#374c92") +
geom_segment(
aes(
xend = reorder_within2(Origin_Name, -overnight_stays, Year, age_groups),
y = 0,
yend = overnight_stays
),
color = "#374c92",
size = 2
) +
# rest of code unchanged from original:
scale_x_reordered() +
scale_color_distiller(type = "seq", palette = "BuPu", direction = 1,
limits = c(-5, NA)) +
facet_wrap(
age_groups ~ Year,
dir = "v",
scales = "free",
ncol = 2
) +
scale_y_continuous(labels = comma) +
labs(y = "Unique Agents",
x = "") +
theme(
panel.spacing.y = unit(10, units = "mm"),
text = element_text(family = "sans-serif",
color = "#B6BAC3"),
axis.text = element_text(color = "#B6BAC3",
size = 8),
axis.title = element_text(color = "#B6BAC3",
size = 12),
axis.line = element_line(color = "#B6BAC3"),
strip.text = element_text(size = 15,
color = "#B6BAC3"),
legend.position = "none",
panel.background = element_rect(fill = "transparent",
color = NA),
plot.background = element_rect(fill = "transparent",
color = NA),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
coord_flip()
ggplot.object
The y axis is now ordered smallest to largest within each facet:

ggplot: How to set different alignments on the geom_text position based on type of variable?

I have a 100% stacked bar chart that displays 3 types of variable. I've set a example db so I could create a graph more easily.
I've already adjust the chart with the colors and information I need. But I'm not being able to independently position the labels. Here's the current code and output.
Code:
(empilhado<-ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~.*1, name="Trab."), expand=expansion(mult=c(0,0.05)))+
geom_col(data = rotulo, aes(y = Year, x=abs(trabalho), fill=NULL), width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_label(data = rotulo, aes(y= Year, x = abs(trabalho), fill=NULL, label=paste(format(round(trabalho, digits=0), nsmall=0, decimal.mark=",", big.mark="."),
format(round(aprovado, digits=0), nsmall=0, decimal.mark=",", big.mark="."))
), color="black", size=4, position = position_fill(vjust=1.06)) +
geom_col(width = .7, colour="black", lwd=0.1, position = "fill", orientation = "y") +
geom_text(aes(label=format(round(value, digits=0), nsmall=0, decimal.mark=",", big.mark=".")),
size=4, color="white", position = position_fill(vjust=0.5)) +
theme(panel.grid.major = element_line(colour = "gray90",size=0.75), panel.grid.minor = element_line(colour = "gray90",size=0.75),
legend.position="top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank())+
scale_fill_manual(values = c("#000000","tomato","blue"))
Output:
How is it now? Position_fill(vjust=0.5), so all the labels are centered inside its respective bar.
What I want? To be able to set the position of the 'Marionete' label on the left(like a vjust=0 would do), keep the 'Pedido' label as is (in the center of the 'Pedido' stacked bar) and place the 'Fatura' label on the right (like a vjust=1 would do).
Thanks in advance!
One option to achieve your desired result would be to compute/set the positions for each label and the horizontal alignment manually instead of making use of position="fill":
Making use of some random mock data:
library(ggplot2)
library(dplyr)
dfm <- dfm %>%
group_by(Year) %>%
arrange(desc(variable)) %>%
mutate(
pct = value / sum(value),
x_label = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5 * (cumsum(pct) + lag(cumsum(pct))),
TRUE ~ 1
),
hjust = case_when(
variable == "Marionete" ~ 0,
variable == "Pedido" ~ .5,
TRUE ~ 1
)
)
ggplot(dfm, aes(y = Year, x = abs(value), fill = variable)) +
scale_x_continuous(sec.axis = sec_axis(trans = ~ . * 1, name = "Trab."), expand = expansion(mult = c(0, 0.05))) +
geom_col(width = .7, colour = "black", lwd = 0.1, position = "fill", orientation = "y") +
geom_text(aes(x = x_label, label = format(round(value, digits = 0), nsmall = 0, decimal.mark = ",", big.mark = "."), hjust = hjust),
size = 4, color = "white"
) +
theme(
panel.grid.major = element_line(colour = "gray90", size = 0.75), panel.grid.minor = element_line(colour = "gray90", size = 0.75),
legend.position = "top", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
axis.title.x = element_blank(), panel.background = element_blank()
) +
scale_fill_manual(values = c("#000000", "tomato", "blue"))
DATA
set.seed(123)
dfm <- data.frame(
Year = rep(c(2006:2016), each = 3),
value = sample(1:100, 3 * 11, replace = TRUE),
variable = c("Fatura", "Pedido", "Marionete")
)
dfm$variable <- factor(dfm$variable, levels = c("Fatura", "Pedido", "Marionete"))
dfm$Year <- factor(dfm$Year)

Change tile size for ggplot2 heatmaps

I'm making a heatmap in R using ggplot2 and I want to change the size of my tiles because the text in my heatmap doesn't fit in the cells. Below is my data and attempted plots.
library("ggplot2")
s = sprintf("dataset number %s", 1:9)
vars = sprintf("many many words here, here and here %s", 1:6)
data = data.frame(s = rep(s, 6), stringsAsFactors = FALSE)
data$variable = rep(vars, rep.int(9, 6))
data$variable = as.factor(data$variable)
data$value = rep.int(-1000, 54)
data$font = "plain"
for(v in (unique(data$s))) {
ids = which(data$s %in% "dataset number 1")
vals = data[ids, ]
row = rownames(vals[vals$value %in% max(vals$value, na.rm = TRUE), ])
data[row, ]$font = "bold"
}
I'm making a heatmap like this
title = "Heatmap"
cbbPalette = list(grey = "#999999", black = "#000000", orange = "#E69F00", sky = "#56B4E9", green = "#009E73", yellow = "#F0E442", blue = "#0072B2", darko = "#D55E00",
pink = "#CC79A7")
pdf(sprintf("./heatmap.pdf"))
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = cbbPalette$pink, high = cbbPalette$green, mid = cbbPalette$grey,
midpoint = 0, limit = c(-100,100), space = "Lab",
name=title) +
scale_x_discrete(limits = levels(data$variable)) +
geom_vline(xintercept = 6 - 0.5, color = "white", size = 1) +
geom_vline(xintercept = 2 + 0.5, color = "white", size = 1) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 16, hjust = 1), legend.title = element_text(size = 18),
axis.text.y = element_text(size = 16)) +
coord_fixed()
#add numbers to cells
heatmap = heatmap + geom_text(aes(x = variable, y = s, label = value, fontface = font), color = cbbPalette$black, size = 3) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(0.5, 0),
legend.direction = "horizontal",
legend.position = "top") +
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
# Print the heatmap
print(heatmap)
dev.off()
And I get the following result -- heatmap
Other SO posts suggested to change width and height within geom_tile, but I get the following result:
heatmap = ggplot(data = data, aes(x = variable, y = s, fill = value)) +
geom_tile(color = "black", aes(width = 2L)) + ...
wrong_heatmap
I also tried changing height in geom_tile as well as coord_fixed(ratio = 1L) but without any success.
Also, I know that I can simply change the size of the text and numbers will fit, but I need to keep the font size the same. I can also shorten the labels but assume this is not an option.
I would appreciate any help with this.

add text values to ggplot on secondary axis

I have to create graph
Following is my sample data frame
data <- data.frame(
"Tissue" = c("Adrenal gland", "Appendix", "Appendix"),
"protein.expression" = c("No detect","No detect", "Medium"),
"cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells")
)
Left y axis is unique tissue type. Left axis have comma separated cell types.
I am not sure how to get the celltypes corresponding to each tissue (on left y axis) to right axis (in comma separated form)
My code is
p1 <- ggplot(dat %>% filter(facet==1), aes(
x = tissue,
y = factor(protein.expression, levels = unique(protein.expression, decreasing = F), ordered = TRUE),
fill = protein.expression,
label = cell.type
)) +
geom_point(stat = 'identity', aes(col = protein.expression), size = 12) +
geom_text(size = 6, fontface = "bold", colour = "white") +
geom_label() +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
scale_fill_manual(values = myPalette, drop = FALSE) +
scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
labs(title = "Protein Atlas") +
guides(fill = guide_legend(title = "Protein expression")) +
ylab("Cell types measured per tissue") +
# ylim(1,4) +
coord_flip() +
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size = 30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
the cell types are with in the dots. I want them to be on the right side, comma sepearated and if possible color coded by corresponding protein expression label.
So this is a bit of a hack but it might work for you.
I introduce a third column in the graph to hold the labels as per my original post.
I pre-process your data to try and spread out the labels in this third column around the Tissue variable to that they don't appear all on top of each other.
my pre-processing is pretty ugly but works ok. Note that I only catered for a max of 4 cell.types as per your comment.
It gives me this graph:
My code:
data = data.frame("Tissue"=c("Adrenal gland", "Appendix", "Appendix"), "protein.expression" = c("No detect","No detect", "Medium"), "cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells"))
# Pre-processing section.
# Step 1: find out the n of cell.types per tissue type
counters <- data %>% group_by(Tissue) %>% summarise(count = n())
# Step 2: Join n back to original data. Transform protein.expression to ordered factor
data <- data %>%
inner_join(counters, by="Tissue") %>%
mutate(protein = factor(protein.expression, levels=unique(protein.expression, decreasing = F), ordered=TRUE),
positionTissue = as.numeric(Tissue))
results <- data.frame()
# Step 3: Spread the cell.type labels around the position of the Tissue. 4 scenarios catered for.
for(t in unique(data$Tissue)){
subData <- filter(data, Tissue == t)
subData$spreader <- as.numeric(subData$Tissue)
if(length(unique(subData$cell.type)) == 2){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.1,as.numeric(Tissue)+0.1)) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 3){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.15,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.15,as.numeric(Tissue)))) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 4){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.2,
ifelse(as.numeric(x)==2,as.numeric(Tissue)-0.1,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.1,
ifelse(as.numeric(x)==4,as.numeric(Tissue)+0.2,as.numeric(Tissue)))))) %>%
select(-x)
results <- rbind(results, subData)
} else{
results <- rbind(results, subData)
}
}
# Plot the data based on the new label position "spreader" variable
ggplot(results, aes(x = positionTissue, y = protein, label=cell.type)) +
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y=0.5,label=Tissue), size=8, fontface="bold", angle=90)+
geom_label(aes(y="zzz", x=spreader, fill=protein), colour="white") +
theme_classic() +
scale_x_continuous(limits = c(min(as.numeric(data$Tissue))-0.5,max(as.numeric(data$Tissue))+0.5))+
scale_y_discrete(breaks=c("Medium","No detect")) +
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
xlab("") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25),
axis.text.y = element_text(colour = NA),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
Edit #2:
Update to retain label colours by creating n positions where n is the number of cell.types:
data = data %>%
mutate(position = paste("z",cell.type))
Then you can use this new position variable instead of the static "zzz" I suggested in my original post. Your labels will have the correct colours, but your chart will look odd if there are a lot of cell.types.
geom_label(aes(y=position, label = cell.type)) +
EDIT #1: Update to avoid overlapping labels by grouping cell.types to a single label per tissue.
Creating a new label field that concatenates the individual labels for each tissue type:
data = data %>%
group_by(Tissue) %>%
mutate(label = paste(cell.type, collapse = "; "))
And amend the ggplot call to use this new field instead of the existing cell.type field:
geom_text(aes(y="zzz", label = label), size = 6, fontface = "bold", colour = "white")+
or:
geom_label(aes(y="zzz", label = label),) +
ORIGINAL POST:
You could plot your labels at a third position (e.g. "zzz") and then hide that position from the set of axis labels using scale_x_discrete(breaks=c()).
ggplot(data, aes(x = Tissue, y = factor(protein.expression,
levels=unique(protein.expression,
decreasing = F),
ordered=TRUE), fill = protein.expression,
label = cell.type))+
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y="zzz"), size = 6, fontface = "bold", colour = "white")+
geom_label(aes(y="zzz"),) +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
# scale_fill_manual(values = myPalette, drop = FALSE) +
# scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
scale_y_discrete(breaks=c("Medium","No detect"))+
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))

Resources