Problems with geom_tile and scale_colour_distiller - r

I want to create a correlation plot using geom_tile(). This is a reproducible chunk of code:
library(ggplot2)
df.m <- structure(list(Trait = structure(c(6L, 5L, 1L, 3L, 2L, 9L, 4L,
10L, 11L, 7L, 8L, 6L, 5L, 1L, 3L, 2L, 9L, 4L, 10L, 11L, 7L, 8L,
6L, 5L, 1L, 3L, 2L, 9L, 4L, 10L, 11L, 7L, 8L), .Label = c("R1",
"R2", "R3", "R4", "R5",
"R6", "R7", "R8",
"R9", "R10",
"R11"), class = "factor"), Variable = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("C1", "C2", "C3"), class = "factor"),
value = c(0.967444360256195, 0.937379062175751, 0.647411823272705,
0.512605130672455, 0.50750744342804, 0.508640229701996, 0.508640229701996,
0.503542542457581, 0.442936152219772, 0.510905921459198,
0.504675328731537, NA, 0.834005177021027, 0.667642116546631,
0.579914391040802, 0.579344689846039, 0.536050498485565,
0.532062888145447, 0.52408766746521, 0.520099997520447, 0.504719197750092,
0.450031787157059, NA, NA, 0.571457028388977, 0.451690584421158,
0.354736804962158, 0.46138596534729, 0.477354824542999, 0.447128057479858,
0.287439465522766, 0.498456537723541, 0.508722245693207)), .Names = c("Trait",
"Variable", "value"), row.names = c(NA, -33L), class = "data.frame")
p <- ggplot(na.omit(df.m)) + aes(x = Variable, y = Trait, fill = value) +
geom_tile(colour = "white", size = 0.75) +
scale_colour_distiller(limits = c(-1, 1), direction = -1, palette = "RdBu", name = "Coefficient") +
scale_size(range = c(0,20), name = title, guide = "none") +
geom_text(data = df.m, aes(x = Variable, y = Trait, label = sprintf("%.2f", value)), size = 4, inherit.aes = FALSE) +
theme(plot.title = element_text(size = 20), axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5),
legend.position = "bottom", legend.margin = unit(1.0, "cm"),
legend.text = element_text(size = 8), legend.title = element_text(size = 10),
legend.key.size = unit(1.0, "cm"),
panel.background = element_rect(fill = "white")) +
xlab("\nVariables 1") + ylab("Variables 2\n")
plot(p)
However, the tiles are filled with their own color gradient scale and not with the one defined bye scale_colour_distiller(). Moreover, both key legends appear in the plot:
I just want the tiles to be colored according to the gradient defined by the scale_colour_distiller(). How can I do that? From where geom_plot() is sucking the color scale?
Thanks in advance!

You need to use scale_fill_distiller since you are mapping value to the fill aesthetic and not to the color aesthetic.

Related

Plot a line and dot plot with a categorical variable on the x-axis, and two variables defining the groups

So I would like to get a plot with two categories on the x-axis, then the color of the lines and data points defined by one variable, and the shape of the points defined by the second variable
effectBaseline <- ggplot(
data = bdataPEV,
aes(x=variable, y=value, group=Electrode, color=subject)) +
geom_line()+
geom_jitter(aes(x=variable,
y = value,
shape = Electrode,
color=subject),
size=2,
show.legend=TRUE,
width = 0.1)+
labs(x = "",
y="PEV (dps)",
title = "Effect of baseline",
subtitle= "PEV values at UCL and T") +
theme_classic() +
theme(axis.title=element_text(size=8,face="bold"),
axis.text.x = element_text(face="bold", size=8, angle=0),
axis.text.y = element_text(face="bold",size=10, angle=90),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()) +
geom_hline(yintercept = 0)+
scale_x_discrete(labels=c("no Baseline","with Baseline"))
Example data:
structure(list(subject = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("1",
"2", "3", "6"), class = "factor"), Electrode = c("LAN", "SAN",
"PAN", "LAN", "SAN", "PAN", "LAN", "SAN", "PAN", "LAN", "SAN",
"PAN", "LAN", "SAN", "PAN", "LAN", "SAN", "PAN", "LAN", "SAN",
"PAN", "LAN", "SAN", "PAN", "LAN", "SAN", "PAN", "LAN", "SAN",
"PAN", "LAN", "SAN", "PAN", "LAN", "SAN", "PAN"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("PEVsum_block_noB", "PEVsum_block_B",
"PEVsum_block_B_norm", "PEVsum_block_B_inhibition", "PEVsum_blok_B_inhibition_norm"
), class = "factor"), value = c(26.5655048141819, 24.25, 4.30277563773199,
158.352853610442, 100.585046948683, 26.0372066865141, 147.516666666667,
156.7275, 65.9128571428571, 1.7378211394883, 1.9853826151828,
2.96232650874689, 4.24264068711928, 14, 1, 141.354165131417,
90.2108640907513, 5, 130.96, 137.72, 23.32, 2.4099999998795,
1.86214499960808, 2.96192610446308, -2.82842712474619, 0, -1,
-15.0332963783729, -9.4339811320566, -9.05538513813742, -31.13,
-28.57, -30.27, 0, 0, -0.679999999932)), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L), class = "data.frame")
So, I've changed two things in your plot:
I used interaction to group the lines by both the subject and electrode.
I set both lines and points to have the same position adjustment. For more see this question here: How to jitter both geom_line and geom_point by the same magnitude?
pos <- position_dodge(width = 0.1)
ggplot(data = bdataPEV,
aes(
x = variable,
y = value,
color = subject,
shape = Electrode,
group = interaction(subject, Electrode)
)) +
geom_line(position = pos) +
geom_point(
size = 2,
show.legend = TRUE,
position = pos
) +
labs(
x = "",
y = "PEV (dps)",
title = "Effect of baseline",
subtitle = "PEV values at UCL and T"
) +
theme_classic() +
theme(
axis.title = element_text(size = 8, face = "bold"),
axis.text.x = element_text(face = "bold", size = 8, angle = 0),
axis.text.y = element_text(face = "bold", size = 10, angle = 90),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
geom_hline(yintercept = 0) +
scale_x_discrete(labels = c("no Baseline", "with Baseline"))

ggplot sort descending points within group

I want to arrange the plot below so that 'group' is arranged in descending order by 'Distance' within Community (Out, In).
I've tried using dplyr::arrange and tidytext::reorder_within(group, -value, MPA_type), but neither of these work - ggplot continues to default to sorted by group name in descending order (see legend). If facetwrap is part of the solution, I'd prefer to have the labels at the bottom as in the example figure, rather than the typical facet header.
Here is my data:
sig_distance <- structure(list(Var1 = structure(c(2L, 2L, 5L, 5L, 2L, 2L, 5L,
5L, 2L, 2L, 5L, 5L, 2L, 2L, 5L, 5L, 2L, 2L, 5L, 5L), .Label = c("ref after",
"ref before", "ref during", "smr after", "smr before", "smr during"
), class = "factor"), Var2 = structure(c(1L, 3L, 4L, 6L, 1L,
3L, 4L, 6L, 1L, 3L, 4L, 6L, 1L, 3L, 4L, 6L, 1L, 3L, 4L, 6L), .Label = c("ref after",
"ref before", "ref during", "smr after", "smr before", "smr during"
), class = "factor"), value = c(0.0781171338765429, 0.070131485880327,
0.124219180798504, 0.0642584499973571, 0.16882716299913, 0.123057288279708,
0.185404402405965, 0.113660097900038, 0.14628853013894, 0.106462687516074,
0.179579889492142, 0.146317072898829, 0.163284273893779, 0.130083096905712,
0.0991349070859965, 0.106610448830353, 0.0499622399107518, 0.0563330614755333,
0.0391975833642552, 0.0435817314833789), MPA = structure(c(3L,
2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L,
2L, 4L, 1L), .Label = c("MPA___before-to-during", "Reference___before-to-during",
"Reference___before-to-after", "MPA___before-to-after"), scores = structure(c(`MPA___before-to-after` = 0.125507192629372,
`MPA___before-to-during` = 0.0948855602219911, `Reference___before-to-after` = 0.121295868163829,
`Reference___before-to-during` = 0.0972135240114708), .Dim = 4L, .Dimnames = list(
c("MPA___before-to-after", "MPA___before-to-during", "Reference___before-to-after",
"Reference___before-to-during"))), class = "factor"), name = c("sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during"), sd_pooled = c(0.0640133632403095,
0.059224209496302, 0.0418411590759088, 0.0420366263878186, 0.0697371748889264,
0.0713939572229526, 0.0662209469675998, 0.0673861920919254, 0.0952259175162696,
0.0973881903133112, 0.104688903793631, 0.104123442035945, 0.0831395386888112,
0.0736773344066338, 0.0870890086043125, 0.082205340195828, 0.0622386704700814,
0.0506360166386964, 0.0776340589400514, 0.057490214920967), group = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 2L,
2L, 2L, 2L), .Label = c("Group 5", "Group 4", "Group 3", "Group 2",
"Group 1"), class = "factor"), period = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("Before-to-after", "Before-to-during"), class = "factor"),
MPA_type = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Out",
"In"), class = "factor"), Df = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), SumOfSqs = c(0.644667474362213,
0.163718859184403, 0.531883138574452, 0.172377817596512,
1.36062525175964, 0.561260198791761, 1.26106114133322, 0.478287656340437,
1.12522630961807, 0.502450621711331, 0.779126478533436, 0.349888339609262,
0.64822862542131, 0.198815304465667, 0.329502905266225, 0.124876688005652,
0.197752601704789, 0.10862759418973, 0.142471900603864, 0.0840130152284379
), R2 = c(0.190110542714351, 0.0631029576320754, 0.217228522650464,
0.089364183731598, 0.076782933708657, 0.0349456770960147,
0.0732327885619086, 0.0303186954655169, 0.0788241854776924,
0.0376889492950336, 0.0656167575864088, 0.0312644101580068,
0.288412156204619, 0.123882033933067, 0.158350315762715,
0.0838360162000489, 0.0226863909348303, 0.0137219062151816,
0.0222248102598127, 0.0150399647150649), F = c(9.62419276009816,
2.4247130389196, 11.3779943270621, 3.53281801227694, 10.4792794679964,
4.27290961706825, 9.00823614929462, 3.37679925864692, 10.610568411745,
4.50397942095095, 7.93538804068383, 3.45325589560756, 6.07961811151075,
1.83818447230295, 2.44585620771756, 1.18959949296437, 3.29624747150235,
1.85040460507006, 2.25026799494279, 1.42007459023087), `p-val` = c(0.001,
0.05, 0.001, 0.006, 0.001, 0.001, 0.001, 0.002, 0.001, 0.001,
0.001, 0.005, 0.001, 0.076, 0.017, 0.31, 0.005, 0.069, 0.037,
0.195), sig = c("*", "", "*", "*", "*", "*", "*", "*", "*",
"*", "*", "*", "*", "", "*", "", "*", "", "*", "")), row.names = c(NA,
-20L), class = "data.frame")
Plotting code:
p1 <-
sig_distance %>%
rename("Period"=period)%>%
filter(Period == "Before-to-during")%>%
mutate(group = factor(group))%>%
arrange(MPA_type, -value, group)%>%
ggplot(aes(x=reorder(MPA_type, -value), y=value, color=group,
#shape=group,
#fill=MPA_type
))+
geom_point(position = position_dodge(width=0.8),
size=3)+
geom_errorbar(aes(ymin=value-sd_pooled,
ymax = value+sd_pooled), stat="identity",
position = position_dodge(width=0.8), size=0.3, width=.3)+
#add significance level
geom_text(aes(label=sig), size=5, vjust=-0.01,
position = position_dodge(width=0.8),
show.legend = FALSE)+
ylab("Distance (Bray-Curtis)")+
xlab("Community")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.key=element_blank())
One option would be to add a helper column to you dataset as the interaction of MPA_type and group for which you set the order using forcats::fct_inorder after you arranged your dataset according to your desired order. Thus helper column could then be mapped on the group aes:
library(ggplot2)
library(dplyr)
library(forcats)
sig_distance %>%
rename("Period" = period) %>%
filter(Period == "Before-to-during") %>%
mutate(group = factor(group)) %>%
arrange(MPA_type, -value, group) %>%
mutate(mpa_ordered = fct_inorder(paste(MPA_type, group, sep = "."))) |>
ggplot(aes(x = MPA_type, y = value, color = group, group = mpa_ordered)) +
geom_point(
position = position_dodge(width = 0.8),
size = 3
) +
geom_errorbar(
aes(
ymin = value - sd_pooled,
ymax = value + sd_pooled
),
stat = "identity",
position = position_dodge(width = 0.8), size = 0.3, width = .3
) +
geom_text(aes(label = sig),
size = 5, vjust = -0.01,
position = position_dodge(width = 0.8),
show.legend = FALSE
) +
ylab("Distance (Bray-Curtis)") +
xlab("Community") +
theme(
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.key = element_blank()
)

Labels along x axis disappear when using expand=c(0, 0)

I've a question regarding to the labels of the x-axis. Assume I've the following plot:
p <- ggplot(long_form_q, aes(reihe, variable)) + geom_tile(aes(fill = value), colour = "white")
pneu <- p + scale_fill_gradient(low = "white",high = "steelblue", limits= c(1,3), breaks=c(1,2,3)) +
geom_text(aes(label=long_form_textq$value)) +
theme(axis.title.x = element_blank(),axis.title.y =element_blank()) +
theme(axis.text.y = element_text(size=18, color = "black"), axis.text.x = element_text(size=14)) +
scale_y_discrete(labels=c(h_3x3.1="3x3", h_3x5.1="3x5", h_3x9.1 ="3x9"), expand=c(0,0))
of the following form:
How can I change the labels of the x-axis to (1,2,3,4,5,6,7,8,9,10) while using expand=c(0,0) for x ? If I'm using
scale_x_discrete(expand=c(0,0))
the labels are vanish
My dput is:
dput(long_form_q)
structure(list(reihe = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L), variable = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("h_3x3.1",
"h_3x5.1", "h_3x9.1"), class = "factor"), value = c(1, 1, 1,
2, 1, 1, 1, 1, 1, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 1,
3, 3, 3, 3, 3, 3)), row.names = c(NA, -30L), .Names = c("reihe",
"variable", "value"), class = "data.frame")
You're plotting continuous data along the x axis, so the correct scale is scale_x_continuous(). The reason the labels are disappearing is because you're erroneously using scale_x_discrete().
pneu <- p + scale_fill_gradient(low = "white",high = "steelblue", limits= c(1,3), breaks=c(1,2,3)) +
geom_text(aes(label=value)) +
theme(axis.title.x = element_blank(),axis.title.y =element_blank()) +
theme(axis.text.y = element_text(size=18, color = "black"), axis.text.x = element_text(size=14)) +
scale_y_discrete(labels=c(h_3x3.1="3x3", h_3x5.1="3x5", h_3x9.1 ="3x9"),
expand=c(0, 0)) +
scale_x_continuous(expand = c(0, 0), breaks = 1:10)
pneu
I didn't have your variable long_form_textq$value, so I used long_form_q$value instead. Note that it is almost always a bad idea to feed data into ggplot via the aes() function. Data should be provided via the data = argument.

Text color based on contrast against background

I have the following plot:
m <- structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("FE", "AG",
"NO", "SPH", "SEP", "H/I", "CMP"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L
), .Label = c("FE", "AG", "NO", "SPH", "SEP", "H/I", "CMP"), class = "factor"),
value = c(0, 0.0419753086419753, 0.172839506172839, 0.0740740740740741,
0.0123456790123457, 0.111111111111111, 0.0617283950617284,
0.0419753086419753, 0, 0.0765432098765432, 0.0246913580246914,
0.00493827160493827, 0.0567901234567901, 0.0320987654320988,
0.172839506172839, 0.0765432098765432, 0, 0.175308641975309,
0.0197530864197531, 0.177777777777778, 0.120987654320988,
0.0740740740740741, 0.0246913580246914, 0.175308641975309,
0, 0.00740740740740741, 0.0814814814814815, 0.0395061728395062,
0.0123456790123457, 0.00493827160493827, 0.0197530864197531,
0.00740740740740741, 0, 0.0197530864197531, 0.00987654320987654,
0.111111111111111, 0.0567901234567901, 0.177777777777778,
0.0814814814814815, 0.0197530864197531, 0, 0.0716049382716049,
0.0617283950617284, 0.0320987654320988, 0.120987654320988,
0.0395061728395062, 0.00987654320987654, 0.0716049382716049,
0), vtext = c("0.0%", "4.2%", "17.3%", "7.4%", "1.2%", "11.1%",
"6.2%", "4.2%", "0.0%", "7.7%", "2.5%", "0.5%", "5.7%", "3.2%",
"17.3%", "7.7%", "0.0%", "17.5%", "2.0%", "17.8%", "12.1%",
"7.4%", "2.5%", "17.5%", "0.0%", "0.7%", "8.1%", "4.0%",
"1.2%", "0.5%", "2.0%", "0.7%", "0.0%", "2.0%", "1.0%", "11.1%",
"5.7%", "17.8%", "8.1%", "2.0%", "0.0%", "7.2%", "6.2%",
"3.2%", "12.1%", "4.0%", "1.0%", "7.2%", "0.0%")), .Names = c("Var1",
"Var2", "value", "vtext"), row.names = c(NA, -49L), class = "data.frame")
library(ggplot2)
ggplot(data = m, aes(x = Var2, y = Var1, fill = value, label = vtext)) +
xlab("") + ylab("") +
geom_tile() +
geom_text() +
scale_fill_gradient(low="white", high="darkmagenta") +
# Sample code for subtitles: ggtitle(bquote(atop("Age distribution", atop(italic(.(subtitle)), ""))))
ggtitle(bquote(atop(.(title), atop(italic(.(subtitle)), "")))) +
theme(axis.text.y = element_text(size = 12), axis.text.x = element_text(size = 12),
axis.title = element_text(size = 16, face = "bold"),
plot.title = element_text(size = 20),
panel.background = element_rect(fill = "white"),
legend.key.size = unit(0.02, "npc"),
legend.text = element_text(size = 14),
legend.title = element_text(size = 16))
Which results in:
The problem, is that the text in the darker squares is difficult to read. Is it possible to change the text color based on the background color, so the text in the clear boxes is black and in the darker boxes is white?
Add these two code lines:
geom_text(aes(color = value > 0.1)) +
scale_color_manual(guide = FALSE, values = c("black", "white"))
Here text color depends on value (value > 0.1) and colors are specified with scale_color_manual.
For the output like this:

ggplot heatmap gradient colours for different categories

I am trying to adapt a worked example from ggplot2 heatmaps: using different gradients for categories
However the values that I want to plot are discrete (I think). I have already standardised my values (in a stored procedure) into percent values between 0 and 1. If the percent_value is 0 then I was to show white. If the percent value is 1 then I want to show the full colour. The colour gradates from white to full. Each category has it own colour.
Here is my code...
library(RColorBrewer)
rm(list=ls())
yval <- c("51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51141/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51149/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17")
cat <- c("cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1")
xval <- c("cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.output3" ,"cat1.ant" ,"cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.ant" ,"cat1.output3" ,"cat1.output3" ,"cat1.ant" ,"cat1.input5")
value <- c(0.75 ,1 ,1 ,0.1 ,1 ,1 ,0.75 ,0 ,1 ,1 ,1 ,1 ,0.75 ,1 ,1 ,0.75 ,1)
dat <- data.frame(xval, yval, cat, value)
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
# 2 categories. I've hard-coded the gradient ends in this example.
# I've tried translating the value up the number line to separate the categories into different colour bands.
gradientends <- c(0, 1, 2, 3)
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
ggplot(dat, aes(x = xval, y = factor(yval))) +
geom_tile(aes(fill = value), colour = "grey80") +
geom_text(aes(label = value)) +
scale_fill_gradientn(colours = colorends) + #, values = gradientends) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))
I have tried various approaches and it seems to me that scale_fill_gradient is probably not a good way to approach this. It appears that the scale function is "adjusting" values on the fly because depending on the values I am plotting then I get my heatmap looking correct or not.
Is there a way around it with this approach or perhaps there is a better approach?
Liam
I have figured out how to get my example working. It turns out I was getting the gradientends wrong and I should have been doing a rescale in scale_fill_gradientn(colours = colorends, values = rescale(gradientends)). To be honest, I'm not quite sure what is happening here! Presumeably the gradientends are getting rescaled in the same manner as scale_fill_ is filling in scaled rescaleoffset values, so everything lines up correctly with no overspill into neighbouring colour blocks.
Here is the working code. I have put the data in dput() ofrmat as suggested in SO guidelines. I've included the value and rescaloffset values in the geom_text (which helps debugging). I also added another category to complicate it a bit.
rm(list=ls())
library(RColorBrewer)
dat <- structure(list(xval = structure(c(5L, 3L, 2L, 4L, 2L, 3L, 1L,
1L, 3L, 2L, 4L, 2L, 1L, 3L, 3L, 1L, 2L), .Label = c("cat1.ant",
"cat1.input5", "cat1.output3", "cat2.cat2_active_state", "cat3.ant"
), class = "factor"), yval = structure(c(1L, 1L, 1L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("51140/1234.5985/16:25:17",
"51141/1234.5985/16:25:17", "51146/1234.5985/16:25:17", "51147/1234.5985/16:25:17",
"51149/1234.5985/16:25:17", "51150/1234.5985/16:25:17", "51153/1234.5985/16:25:17"
), class = "factor"), cat = structure(c(3L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cat1",
"cat2", "cat3"), class = "factor"), value = c(0.75, 1, 1, 0.1,
1, 1, 0.75, 0, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1), rescaleoffset = c(200.75,
1, 1, 100.1, 1, 1, 0.75, 0, 1, 1, 101, 1, 0.75, 1, 1, 0.75, 1
)), .Names = c("xval", "yval", "cat", "value", "rescaleoffset"
), row.names = c(NA, -17L), class = "data.frame")
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$cat)-1)
scalerange <- range(dat$value)
gradientends <- scalerange + rep(c(0,100,200), each=2)
colorends <- c("white", "red", "white", "green", "white", "blue")
ggplot(dat, aes(xval, yval)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 5), nsmall = 5), format(round(rescaleoffset, 5), nsmall = 5), sep='\n'))) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
Although the values are numeric and would appear to be continuous, they actually represent discrete categoric values. Overall, I'm happy with this and is exactly what I am looking for, although some work required on the formatting and parametrisation.
EDIT: Now I am really baffled. Here is a similar set of data but it is not plotting as I expect. I expect the BCU1 catgeory to be darkviolet (not white) because it has a value of 1.0. There is something that I am not understanding with the scaling. Could anyone help?
dat <- structure(list(heatmap_row_display = structure(c(2L, 6L, 5L,
8L, 4L, 3L, 7L, 9L, 1L, 3L, 7L, 9L, 4L, 1L, 4L, 1L, 3L, 7L, 9L
), .Label = c("051140/1084.8158/16:25:17", "051141/1084.8466/16:25:17",
"051146/1084.8803/16:25:17", "051147/1084.8876/16:25:17", "051148/1084.8965/16:25:17",
"051149/1084.9465/16:25:17", "051150/1084.9525/16:25:17", "051152/1084.9965/16:25:17",
"051153/1085.0193/16:25:17"), class = "factor"), msg_no = c(51141L,
51149L, 51148L, 51152L, 51147L, 51146L, 51150L, 51153L, 51140L,
51146L, 51150L, 51153L, 51147L, 51140L, 51147L, 51140L, 51146L,
51150L, 51153L), relative_time_ms = c(1084.8466, 1084.9465, 1084.8965,
1084.9965, 1084.8876, 1084.8803, 1084.9525, 1085.0193, 1084.8158,
1084.8803, 1084.9525, 1085.0193, 1084.8876, 1084.8158, 1084.8876,
1084.8158, 1084.8803, 1084.9525, 1085.0193), pcan_rx_datetime_adjusted = structure(c(1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517
), class = c("POSIXct", "POSIXt"), tzone = ""), block_name = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("BCU1", "BCU2", "IDC1_status"), class = "factor"),
pcan_attribute = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("BCU1.BCU1_active_state",
"BCU2.BCU2_active_state", "IDC1_status.IDC1_ant", "IDC1_status.IDC1_input5",
"IDC1_status.IDC1_output3"), class = "factor"), data_value_as_string = c(1L,
1L, 1L, 1L, 0L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), data_value = c(1L, 1L, 1L, 1L, 0L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), value = c(1,
1, 1, 1, 0, 0.75, 0.75, 0.75, 0.75, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), colour = structure(c(2L, 2L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("cyan",
"darkviolet", "deeppink"), class = "factor"), rescaleoffset = c(1,
1, 101, 101, 200, 200.75, 200.75, 200.75, 200.75, 201, 201,
201, 201, 201, 201, 201, 201, 201, 201)), .Names = c("heatmap_row_display",
"msg_no", "relative_time_ms", "pcan_rx_datetime_adjusted", "block_name",
"pcan_attribute", "data_value_as_string", "data_value", "value",
"colour", "rescaleoffset"), row.names = c(NA, 19L), class = "data.frame")
n <- length(unique(dat$block_name))
# Do it this way to avoid reordering the colours in the data frame
sample_colours <- levels(factor(dat$colour, levels=unique(dat$colour)))
# Rescale all the values into categories of 100
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$block_name)-1)
scalerange <- range(dat$value)
# Mark the end of each gradient for each category block.
gradientends <- scalerange + rep(seq(0, (n - 1) * 100, by = 100), each=2)
# Interleave two vectors, used to interleave "white" with each of the category colours.
# "white" is used to colour the values on lowest end of each category's gradient range.
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
p <- ggplot(dat, aes(pcan_attribute, heatmap_row_display)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 1), nsmall = 1), sep='\n')), size=rel(2.0)) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
print(p)

Resources