ggplot sort descending points within group - r

I want to arrange the plot below so that 'group' is arranged in descending order by 'Distance' within Community (Out, In).
I've tried using dplyr::arrange and tidytext::reorder_within(group, -value, MPA_type), but neither of these work - ggplot continues to default to sorted by group name in descending order (see legend). If facetwrap is part of the solution, I'd prefer to have the labels at the bottom as in the example figure, rather than the typical facet header.
Here is my data:
sig_distance <- structure(list(Var1 = structure(c(2L, 2L, 5L, 5L, 2L, 2L, 5L,
5L, 2L, 2L, 5L, 5L, 2L, 2L, 5L, 5L, 2L, 2L, 5L, 5L), .Label = c("ref after",
"ref before", "ref during", "smr after", "smr before", "smr during"
), class = "factor"), Var2 = structure(c(1L, 3L, 4L, 6L, 1L,
3L, 4L, 6L, 1L, 3L, 4L, 6L, 1L, 3L, 4L, 6L, 1L, 3L, 4L, 6L), .Label = c("ref after",
"ref before", "ref during", "smr after", "smr before", "smr during"
), class = "factor"), value = c(0.0781171338765429, 0.070131485880327,
0.124219180798504, 0.0642584499973571, 0.16882716299913, 0.123057288279708,
0.185404402405965, 0.113660097900038, 0.14628853013894, 0.106462687516074,
0.179579889492142, 0.146317072898829, 0.163284273893779, 0.130083096905712,
0.0991349070859965, 0.106610448830353, 0.0499622399107518, 0.0563330614755333,
0.0391975833642552, 0.0435817314833789), MPA = structure(c(3L,
2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L, 2L, 4L, 1L, 3L,
2L, 4L, 1L), .Label = c("MPA___before-to-during", "Reference___before-to-during",
"Reference___before-to-after", "MPA___before-to-after"), scores = structure(c(`MPA___before-to-after` = 0.125507192629372,
`MPA___before-to-during` = 0.0948855602219911, `Reference___before-to-after` = 0.121295868163829,
`Reference___before-to-during` = 0.0972135240114708), .Dim = 4L, .Dimnames = list(
c("MPA___before-to-after", "MPA___before-to-during", "Reference___before-to-after",
"Reference___before-to-during"))), class = "factor"), name = c("sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during", "sd_ref_pooled_before_after",
"sd_ref_pooled_before_during", "sd_smr_pooled_before_after",
"sd_smr_pooled_before_during"), sd_pooled = c(0.0640133632403095,
0.059224209496302, 0.0418411590759088, 0.0420366263878186, 0.0697371748889264,
0.0713939572229526, 0.0662209469675998, 0.0673861920919254, 0.0952259175162696,
0.0973881903133112, 0.104688903793631, 0.104123442035945, 0.0831395386888112,
0.0736773344066338, 0.0870890086043125, 0.082205340195828, 0.0622386704700814,
0.0506360166386964, 0.0776340589400514, 0.057490214920967), group = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 2L,
2L, 2L, 2L), .Label = c("Group 5", "Group 4", "Group 3", "Group 2",
"Group 1"), class = "factor"), period = structure(c(1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L), .Label = c("Before-to-after", "Before-to-during"), class = "factor"),
MPA_type = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Out",
"In"), class = "factor"), Df = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), SumOfSqs = c(0.644667474362213,
0.163718859184403, 0.531883138574452, 0.172377817596512,
1.36062525175964, 0.561260198791761, 1.26106114133322, 0.478287656340437,
1.12522630961807, 0.502450621711331, 0.779126478533436, 0.349888339609262,
0.64822862542131, 0.198815304465667, 0.329502905266225, 0.124876688005652,
0.197752601704789, 0.10862759418973, 0.142471900603864, 0.0840130152284379
), R2 = c(0.190110542714351, 0.0631029576320754, 0.217228522650464,
0.089364183731598, 0.076782933708657, 0.0349456770960147,
0.0732327885619086, 0.0303186954655169, 0.0788241854776924,
0.0376889492950336, 0.0656167575864088, 0.0312644101580068,
0.288412156204619, 0.123882033933067, 0.158350315762715,
0.0838360162000489, 0.0226863909348303, 0.0137219062151816,
0.0222248102598127, 0.0150399647150649), F = c(9.62419276009816,
2.4247130389196, 11.3779943270621, 3.53281801227694, 10.4792794679964,
4.27290961706825, 9.00823614929462, 3.37679925864692, 10.610568411745,
4.50397942095095, 7.93538804068383, 3.45325589560756, 6.07961811151075,
1.83818447230295, 2.44585620771756, 1.18959949296437, 3.29624747150235,
1.85040460507006, 2.25026799494279, 1.42007459023087), `p-val` = c(0.001,
0.05, 0.001, 0.006, 0.001, 0.001, 0.001, 0.002, 0.001, 0.001,
0.001, 0.005, 0.001, 0.076, 0.017, 0.31, 0.005, 0.069, 0.037,
0.195), sig = c("*", "", "*", "*", "*", "*", "*", "*", "*",
"*", "*", "*", "*", "", "*", "", "*", "", "*", "")), row.names = c(NA,
-20L), class = "data.frame")
Plotting code:
p1 <-
sig_distance %>%
rename("Period"=period)%>%
filter(Period == "Before-to-during")%>%
mutate(group = factor(group))%>%
arrange(MPA_type, -value, group)%>%
ggplot(aes(x=reorder(MPA_type, -value), y=value, color=group,
#shape=group,
#fill=MPA_type
))+
geom_point(position = position_dodge(width=0.8),
size=3)+
geom_errorbar(aes(ymin=value-sd_pooled,
ymax = value+sd_pooled), stat="identity",
position = position_dodge(width=0.8), size=0.3, width=.3)+
#add significance level
geom_text(aes(label=sig), size=5, vjust=-0.01,
position = position_dodge(width=0.8),
show.legend = FALSE)+
ylab("Distance (Bray-Curtis)")+
xlab("Community")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.key=element_blank())

One option would be to add a helper column to you dataset as the interaction of MPA_type and group for which you set the order using forcats::fct_inorder after you arranged your dataset according to your desired order. Thus helper column could then be mapped on the group aes:
library(ggplot2)
library(dplyr)
library(forcats)
sig_distance %>%
rename("Period" = period) %>%
filter(Period == "Before-to-during") %>%
mutate(group = factor(group)) %>%
arrange(MPA_type, -value, group) %>%
mutate(mpa_ordered = fct_inorder(paste(MPA_type, group, sep = "."))) |>
ggplot(aes(x = MPA_type, y = value, color = group, group = mpa_ordered)) +
geom_point(
position = position_dodge(width = 0.8),
size = 3
) +
geom_errorbar(
aes(
ymin = value - sd_pooled,
ymax = value + sd_pooled
),
stat = "identity",
position = position_dodge(width = 0.8), size = 0.3, width = .3
) +
geom_text(aes(label = sig),
size = 5, vjust = -0.01,
position = position_dodge(width = 0.8),
show.legend = FALSE
) +
ylab("Distance (Bray-Curtis)") +
xlab("Community") +
theme(
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.key = element_blank()
)

Related

Free scales for y-axis not working for facet_nested (ggh4x)

I am trying to make scales for the y-axis free. But it looks like it is not working. I want adaptation to have its own scale and post adaptation also having its own scales. I am trying to make scales for the y-axis free. But it looks like it is not working. I want adaptation to have its own scale and post adaptation also having its own scales.
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L
), .Label = c("CEN", "IPS", "CTL"), class = "factor"), test = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Pre-test", "Post-test"
), class = "factor"), Session = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Adaptation", "Post-adaptation"
), class = "factor"), N = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12), EE = c(2.134379625, 0.333942625, 1.742841125, 0.317361916666667,
2.84197270833333, 0.307057416666667, 2.403112375, 0.281202,
3.49590529166667, 0.305657666666667, 2.85211466666667, 0.3131155,
1.44857545833333, 0.269328166666667, 1.740270875, 0.243361833333333,
2.10702266666667, 0.286209125, 2.145855125, 0.305474083333333,
1.60016616666667, 0.281528625, 1.94182179166667, 0.294655916666667
), sd = c(0.727246182828044, 0.0816168443914292, 0.549168068103643,
0.0894916121701392, 1.14554677132408, 0.0958562360654162,
1.06827971273128, 0.0953131237162305, 1.18204258551111, 0.0896670491921828,
1.32864473484909, 0.109865886496798, 0.605344957514288, 0.0815454655757737,
0.833908172662699, 0.0798994165789182, 1.11582277105041,
0.0976064300150272, 0.667812406644538, 0.142929179817685,
0.686043669971901, 0.109794818975944, 1.39509308576833, 0.161854932615856
), se = c(0.209937889711449, 0.0235607535398997, 0.158531165974993,
0.0258340031883217, 0.330690868396632, 0.0276713118479362,
0.308385789857611, 0.0275145288174349, 0.341226302469221,
0.0258846474942731, 0.383546697661249, 0.0317155495718416,
0.174748037086728, 0.0235401482506832, 0.240728553983119,
0.0230649748349663, 0.322110288616933, 0.0281765493219072,
0.192780836372198, 0.0412601002213964, 0.198043748767058,
0.0316950341456936, 0.402728684306467, 0.0467234944577166
), ci = c(0.462070179795855, 0.0518568689018959, 0.348924743722983,
0.0568602576432562, 0.727845693918804, 0.0609041467375754,
0.678752547059741, 0.0605590696140879, 0.751034027967696,
0.0569717250090983, 0.844180589754564, 0.069805453951774,
0.384617836383033, 0.0518115169661108, 0.529839974927164,
0.0507656673296478, 0.708959965158704, 0.0620161669201078,
0.424307760005262, 0.0908128682911871, 0.435891352085212,
0.0697602998032695, 0.886399857701764, 0.102837717929058)), row.names = c(NA,
-24L), class = "data.frame")
library(ggh4x)
p <- ggplot(tgc, aes(x = Condition, y = EE), fill = test) +
geom_errorbar(aes(ymin=EE-se, ymax=EE+se, group = test), position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test), stat = "identity", width = 0.5, color = "black", position='dodge') + ylim(0,4) + theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),#, angle = 10, hjust = .5, vjust = .5),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 16),
axis.title.x = element_text(vjust= -0.5, size = 16),
axis.title = element_text(face = "bold")) + xlab("Workspace") + ylab("EE (cm)") + theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) + facet_nested(. ~ Session + Group, scales = "free_y") + theme(aspect.ratio = 6/4)
p + guides(fill=guide_legend(title="Test:")) + theme(legend.text=element_text(size=14),legend.title=element_text(size=14) ) +
theme(strip.text = element_text(face="bold", size=12))
The facet_nested() function is based on facet_grid(), in which you can have y scales that vary between rows in the grid, but not within a row.
Let's make the base of the plot.
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.1.1
library(ggh4x)
tgc <- structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Visible", "Remembered"), class = "factor"),
Condition = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L
), .Label = c("CEN", "IPS", "CTL"), class = "factor"), test = structure(c(1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Pre-test", "Post-test"
), class = "factor"), Session = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L), .Label = c("Adaptation", "Post-adaptation"
), class = "factor"), N = c(12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12), EE = c(2.134379625, 0.333942625, 1.742841125, 0.317361916666667,
2.84197270833333, 0.307057416666667, 2.403112375, 0.281202,
3.49590529166667, 0.305657666666667, 2.85211466666667, 0.3131155,
1.44857545833333, 0.269328166666667, 1.740270875, 0.243361833333333,
2.10702266666667, 0.286209125, 2.145855125, 0.305474083333333,
1.60016616666667, 0.281528625, 1.94182179166667, 0.294655916666667
), sd = c(0.727246182828044, 0.0816168443914292, 0.549168068103643,
0.0894916121701392, 1.14554677132408, 0.0958562360654162,
1.06827971273128, 0.0953131237162305, 1.18204258551111, 0.0896670491921828,
1.32864473484909, 0.109865886496798, 0.605344957514288, 0.0815454655757737,
0.833908172662699, 0.0798994165789182, 1.11582277105041,
0.0976064300150272, 0.667812406644538, 0.142929179817685,
0.686043669971901, 0.109794818975944, 1.39509308576833, 0.161854932615856
), se = c(0.209937889711449, 0.0235607535398997, 0.158531165974993,
0.0258340031883217, 0.330690868396632, 0.0276713118479362,
0.308385789857611, 0.0275145288174349, 0.341226302469221,
0.0258846474942731, 0.383546697661249, 0.0317155495718416,
0.174748037086728, 0.0235401482506832, 0.240728553983119,
0.0230649748349663, 0.322110288616933, 0.0281765493219072,
0.192780836372198, 0.0412601002213964, 0.198043748767058,
0.0316950341456936, 0.402728684306467, 0.0467234944577166
), ci = c(0.462070179795855, 0.0518568689018959, 0.348924743722983,
0.0568602576432562, 0.727845693918804, 0.0609041467375754,
0.678752547059741, 0.0605590696140879, 0.751034027967696,
0.0569717250090983, 0.844180589754564, 0.069805453951774,
0.384617836383033, 0.0518115169661108, 0.529839974927164,
0.0507656673296478, 0.708959965158704, 0.0620161669201078,
0.424307760005262, 0.0908128682911871, 0.435891352085212,
0.0697602998032695, 0.886399857701764, 0.102837717929058)), row.names = c(NA,
-24L), class = "data.frame")
p <- ggplot(tgc, aes(x = Condition, y = EE), fill = test) +
geom_errorbar(aes(ymin=EE-se, ymax=EE+se, group = test),
position = position_dodge(0.5), width=.1) +
geom_bar(aes(fill = test),
stat = "identity", width = 0.5, color = "black", position='dodge') +
theme_bw() + theme(
axis.text.x = element_text(size = 12,face="bold"),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.y = element_text(vjust= 1.8, size = 16),
axis.title.x = element_text(vjust= -0.5, size = 16),
axis.title = element_text(face = "bold")) +
xlab("Workspace") + ylab("EE (cm)") +
theme(legend.position="top") +
scale_fill_manual(values = c("grey80", "grey20")) +
theme(aspect.ratio = 6/4) +
guides(fill=guide_legend(title="Test:")) +
theme(legend.text=element_text(size=14),legend.title=element_text(size=14) ) +
theme(strip.text = element_text(face="bold", size=12))
If you wanted to have a non-grid layout, it is probably easier to use facet_nested_wrap() instead.
p + facet_nested_wrap(~Session + Group, scales = "free_y", nrow = 1)
Alternatively, if you must retain the grid layout and want the y-axes to be independent within a row, you can use independent = "y".
p + facet_nested(~Session + Group, scales = "free_y", independent = "y")
Created on 2021-11-09 by the reprex package (v2.0.1)
It is not directly an answer to that question, but an alternative would be to create the panels separately. You can do this semi-programmatically by creating a list first. The biggest disadvantage that I can see in this way is that patchwork still cannot merge / combine x-/y- axis titles like it does with legends, so you would need to create each plot separately which would in turn defeat the point of this approach...
library(tidyverse)
library(ggh4x)
library(patchwork)
ls_p <-
tgc %>%
split(., .$Session) %>%
map(function(x){
## I've tried to de-clutter your code.
ggplot(x, aes(x = Condition, y = EE, fill = test)) +
geom_errorbar(aes(ymin = EE - se, ymax = EE + se, group = test), position = position_dodge(0.5), width = .1) +
geom_col(width = 0.5, color = "black", position = "dodge") +
labs(x= "Workspace", y = "EE (cm)") +
scale_fill_manual("Test:", values = c("grey80", "grey20")) +
facet_nested(. ~ Session + Group, scales = "free_y") +
## I am using cowplot::theme_minimal because I think it's a well designed theme
cowplot::theme_minimal_hgrid()
})
wrap_plots(ls_p) + plot_layout(guides = "collect") &
theme(legend.position = "top")

problems in draw a bar chart with ggplot2 by using facet_grid function

my code about drawing bar chart is here
library(ggplot2)
fig_num <- ggplot(data=branch.fig, aes(y=branch.fig$num, x=branch.fig$film.type, fill = branch.fig$film.type)) +
geom_bar(position=position_dodge(), stat="identity", width = 0.3, colour = "gray35") +
scale_fill_manual(values = colors) +
facet_grid(branch.type~.) +
geom_errorbar(aes(ymin=branch.fig$num, ymax=branch.fig$num +branch.fig$SE), position="dodge", width=0.1, col = "gray40") +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(x = "Film Type",y = "Branch Number") +
ggtitle("DAS = 54") + coord_flip()
fig_num
The result I want to have should be like this,
but what I get is like this, the R combine part of the data, I don't know why would appear this, it's really a big problem for me to solve, sincerely looking for your help,thanks a lot!
following is dataset
dput(subset(branch.fig))
structure(list(film.type = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 6L, 6L), .Label = c("black", "ck", "dark-gray",
"green", "red", "white"), class = "factor"), branch.type = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("F",
"V"), class = "factor"), num = c(14.6666666666667, 3.33333333333333,
14, 2.66666666666667, 15.3333333333333, 2, 14.6666666666667,
2.66666666666667, 16.6666666666667, 3.33333333333333, 20.3333333333333,
1.66666666666667), SE = c(0.333333333333333, 0.333333333333333,
1.15470053837925, 0.333333333333333, 0.881917103688197, 0, 0.881917103688197,
0.666666666666667, 0.333333333333333, 0.666666666666667, 2.33333333333333,
0.666666666666667), n = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L)), .Names = c("film.type", "branch.type", "num", "SE",
"n"), row.names = c(NA, -12L), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), vars = list(film.type), drop = TRUE, indices = list(
0:1, 2:3, 4:5, 6:7, 8:9, 10:11), group_sizes = c(2L, 2L,
2L, 2L, 2L, 2L), biggest_group_size = 2L, labels = structure(list(
film.type = structure(1:6, .Label = c("black", "ck", "dark-gray",
"green", "red", "white"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame", vars = list(film.type), drop = TRUE, .Names =
"film.type"))
There are a number of problems with your code.
1. Your dput generated an error. The command below generates the data.
2. The data frame is named in data=branch.fig. No need to name it again as part of aes().
3. There is no dodging in this chart, so drop the dodge commands in both geom_bar and geom_errorbar.
4. As far as I can see there is no colors object. I've put in some colours for the manual colour scale.
# Data
branch.fig = structure(list(film.type = structure(c(1L, 1L, 2L, 2L, 3L, 3L,
4L, 4L, 5L, 5L, 6L, 6L), .Label = c("black", "ck", "dark-gray",
"green", "red", "white"), class = "factor"), branch.type = structure(c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("F",
"V"), class = "factor"), num = c(14.6666666666667, 3.33333333333333,
14, 2.66666666666667, 15.3333333333333, 2, 14.6666666666667,
2.66666666666667, 16.6666666666667, 3.33333333333333, 20.3333333333333,
1.66666666666667), SE = c(0.333333333333333, 0.333333333333333,
1.15470053837925, 0.333333333333333, 0.881917103688197, 0, 0.881917103688197,
0.666666666666667, 0.333333333333333, 0.666666666666667, 2.33333333333333,
0.666666666666667), n = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L)), .Names = c("film.type", "branch.type", "num", "SE",
"n"), row.names = c(NA, -12L), class = "data.frame")
library(ggplot2)
# Plot
fig_num = ggplot(data = branch.fig, aes(y = num, x = film.type, fill = film.type)) +
geom_bar(stat = "identity", width = 0.3, colour = "gray35") +
scale_fill_manual(values = c("black", "orange", "grey", "green", "red", "white")) +
facet_grid(branch.type ~ .) +
geom_errorbar(aes(ymin = num, ymax = num + SE), width = 0.1, col = "gray40") +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(x = "Film Type", y = "Branch Number") +
ggtitle("DAS = 54") + coord_flip()
fig_num

ggplot heatmap gradient colours for different categories

I am trying to adapt a worked example from ggplot2 heatmaps: using different gradients for categories
However the values that I want to plot are discrete (I think). I have already standardised my values (in a stored procedure) into percent values between 0 and 1. If the percent_value is 0 then I was to show white. If the percent value is 1 then I want to show the full colour. The colour gradates from white to full. Each category has it own colour.
Here is my code...
library(RColorBrewer)
rm(list=ls())
yval <- c("51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51140/1234.5985/16:25:17" ,"51141/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51146/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51147/1234.5985/16:25:17" ,"51149/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51150/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17" ,"51153/1234.5985/16:25:17")
cat <- c("cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat2" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1" ,"cat1")
xval <- c("cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.output3" ,"cat1.ant" ,"cat1.ant" ,"cat1.output3" ,"cat1.input5" ,"cat2.cat2_active_state" ,"cat1.input5" ,"cat1.ant" ,"cat1.output3" ,"cat1.output3" ,"cat1.ant" ,"cat1.input5")
value <- c(0.75 ,1 ,1 ,0.1 ,1 ,1 ,0.75 ,0 ,1 ,1 ,1 ,1 ,0.75 ,1 ,1 ,0.75 ,1)
dat <- data.frame(xval, yval, cat, value)
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
# 2 categories. I've hard-coded the gradient ends in this example.
# I've tried translating the value up the number line to separate the categories into different colour bands.
gradientends <- c(0, 1, 2, 3)
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
ggplot(dat, aes(x = xval, y = factor(yval))) +
geom_tile(aes(fill = value), colour = "grey80") +
geom_text(aes(label = value)) +
scale_fill_gradientn(colours = colorends) + #, values = gradientends) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))
I have tried various approaches and it seems to me that scale_fill_gradient is probably not a good way to approach this. It appears that the scale function is "adjusting" values on the fly because depending on the values I am plotting then I get my heatmap looking correct or not.
Is there a way around it with this approach or perhaps there is a better approach?
Liam
I have figured out how to get my example working. It turns out I was getting the gradientends wrong and I should have been doing a rescale in scale_fill_gradientn(colours = colorends, values = rescale(gradientends)). To be honest, I'm not quite sure what is happening here! Presumeably the gradientends are getting rescaled in the same manner as scale_fill_ is filling in scaled rescaleoffset values, so everything lines up correctly with no overspill into neighbouring colour blocks.
Here is the working code. I have put the data in dput() ofrmat as suggested in SO guidelines. I've included the value and rescaloffset values in the geom_text (which helps debugging). I also added another category to complicate it a bit.
rm(list=ls())
library(RColorBrewer)
dat <- structure(list(xval = structure(c(5L, 3L, 2L, 4L, 2L, 3L, 1L,
1L, 3L, 2L, 4L, 2L, 1L, 3L, 3L, 1L, 2L), .Label = c("cat1.ant",
"cat1.input5", "cat1.output3", "cat2.cat2_active_state", "cat3.ant"
), class = "factor"), yval = structure(c(1L, 1L, 1L, 2L, 3L,
3L, 3L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 7L, 7L, 7L), .Label = c("51140/1234.5985/16:25:17",
"51141/1234.5985/16:25:17", "51146/1234.5985/16:25:17", "51147/1234.5985/16:25:17",
"51149/1234.5985/16:25:17", "51150/1234.5985/16:25:17", "51153/1234.5985/16:25:17"
), class = "factor"), cat = structure(c(3L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cat1",
"cat2", "cat3"), class = "factor"), value = c(0.75, 1, 1, 0.1,
1, 1, 0.75, 0, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1), rescaleoffset = c(200.75,
1, 1, 100.1, 1, 1, 0.75, 0, 1, 1, 101, 1, 0.75, 1, 1, 0.75, 1
)), .Names = c("xval", "yval", "cat", "value", "rescaleoffset"
), row.names = c(NA, -17L), class = "data.frame")
n <- length(unique(dat$cat))
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
sample_colours <- sample(col_vector, n)
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$cat)-1)
scalerange <- range(dat$value)
gradientends <- scalerange + rep(c(0,100,200), each=2)
colorends <- c("white", "red", "white", "green", "white", "blue")
ggplot(dat, aes(xval, yval)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 5), nsmall = 5), format(round(rescaleoffset, 5), nsmall = 5), sep='\n'))) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
Although the values are numeric and would appear to be continuous, they actually represent discrete categoric values. Overall, I'm happy with this and is exactly what I am looking for, although some work required on the formatting and parametrisation.
EDIT: Now I am really baffled. Here is a similar set of data but it is not plotting as I expect. I expect the BCU1 catgeory to be darkviolet (not white) because it has a value of 1.0. There is something that I am not understanding with the scaling. Could anyone help?
dat <- structure(list(heatmap_row_display = structure(c(2L, 6L, 5L,
8L, 4L, 3L, 7L, 9L, 1L, 3L, 7L, 9L, 4L, 1L, 4L, 1L, 3L, 7L, 9L
), .Label = c("051140/1084.8158/16:25:17", "051141/1084.8466/16:25:17",
"051146/1084.8803/16:25:17", "051147/1084.8876/16:25:17", "051148/1084.8965/16:25:17",
"051149/1084.9465/16:25:17", "051150/1084.9525/16:25:17", "051152/1084.9965/16:25:17",
"051153/1085.0193/16:25:17"), class = "factor"), msg_no = c(51141L,
51149L, 51148L, 51152L, 51147L, 51146L, 51150L, 51153L, 51140L,
51146L, 51150L, 51153L, 51147L, 51140L, 51147L, 51140L, 51146L,
51150L, 51153L), relative_time_ms = c(1084.8466, 1084.9465, 1084.8965,
1084.9965, 1084.8876, 1084.8803, 1084.9525, 1085.0193, 1084.8158,
1084.8803, 1084.9525, 1085.0193, 1084.8876, 1084.8158, 1084.8876,
1084.8158, 1084.8803, 1084.9525, 1085.0193), pcan_rx_datetime_adjusted = structure(c(1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517,
1487089517, 1487089517, 1487089517, 1487089517, 1487089517, 1487089517
), class = c("POSIXct", "POSIXt"), tzone = ""), block_name = structure(c(1L,
1L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("BCU1", "BCU2", "IDC1_status"), class = "factor"),
pcan_attribute = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L), .Label = c("BCU1.BCU1_active_state",
"BCU2.BCU2_active_state", "IDC1_status.IDC1_ant", "IDC1_status.IDC1_input5",
"IDC1_status.IDC1_output3"), class = "factor"), data_value_as_string = c(1L,
1L, 1L, 1L, 0L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), data_value = c(1L, 1L, 1L, 1L, 0L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), value = c(1,
1, 1, 1, 0, 0.75, 0.75, 0.75, 0.75, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), colour = structure(c(2L, 2L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("cyan",
"darkviolet", "deeppink"), class = "factor"), rescaleoffset = c(1,
1, 101, 101, 200, 200.75, 200.75, 200.75, 200.75, 201, 201,
201, 201, 201, 201, 201, 201, 201, 201)), .Names = c("heatmap_row_display",
"msg_no", "relative_time_ms", "pcan_rx_datetime_adjusted", "block_name",
"pcan_attribute", "data_value_as_string", "data_value", "value",
"colour", "rescaleoffset"), row.names = c(NA, 19L), class = "data.frame")
n <- length(unique(dat$block_name))
# Do it this way to avoid reordering the colours in the data frame
sample_colours <- levels(factor(dat$colour, levels=unique(dat$colour)))
# Rescale all the values into categories of 100
dat$rescaleoffset <- dat$value + 100*(as.numeric(dat$block_name)-1)
scalerange <- range(dat$value)
# Mark the end of each gradient for each category block.
gradientends <- scalerange + rep(seq(0, (n - 1) * 100, by = 100), each=2)
# Interleave two vectors, used to interleave "white" with each of the category colours.
# "white" is used to colour the values on lowest end of each category's gradient range.
interleave <- function(v1,v2)
{
ord1 <- 2*(1:length(v1))-1
ord2 <- 2*(1:length(v2))
c(v1,v2)[order(c(ord1,ord2))]
}
colorends <- interleave(rep("white",n),sample_colours)
p <- ggplot(dat, aes(pcan_attribute, heatmap_row_display)) +
geom_tile(aes(fill = rescaleoffset), colour = "white") +
geom_text(aes(label = paste(format(round(value, 1), nsmall = 1), sep='\n')), size=rel(2.0)) +
scale_fill_gradientn(colours = colorends, values = rescale(gradientends)) +
scale_x_discrete("", expand = c(0, 0)) +
scale_y_discrete("", expand = c(0, 0)) +
theme_grey(base_size = 9) +
theme(axis.ticks = element_blank(),
axis.text.x = element_text(angle = 330, hjust = 0))+
theme(legend.background = element_rect(fill="gray90", size=30, linetype="dotted"))
print(p)

Problems with geom_tile and scale_colour_distiller

I want to create a correlation plot using geom_tile(). This is a reproducible chunk of code:
library(ggplot2)
df.m <- structure(list(Trait = structure(c(6L, 5L, 1L, 3L, 2L, 9L, 4L,
10L, 11L, 7L, 8L, 6L, 5L, 1L, 3L, 2L, 9L, 4L, 10L, 11L, 7L, 8L,
6L, 5L, 1L, 3L, 2L, 9L, 4L, 10L, 11L, 7L, 8L), .Label = c("R1",
"R2", "R3", "R4", "R5",
"R6", "R7", "R8",
"R9", "R10",
"R11"), class = "factor"), Variable = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("C1", "C2", "C3"), class = "factor"),
value = c(0.967444360256195, 0.937379062175751, 0.647411823272705,
0.512605130672455, 0.50750744342804, 0.508640229701996, 0.508640229701996,
0.503542542457581, 0.442936152219772, 0.510905921459198,
0.504675328731537, NA, 0.834005177021027, 0.667642116546631,
0.579914391040802, 0.579344689846039, 0.536050498485565,
0.532062888145447, 0.52408766746521, 0.520099997520447, 0.504719197750092,
0.450031787157059, NA, NA, 0.571457028388977, 0.451690584421158,
0.354736804962158, 0.46138596534729, 0.477354824542999, 0.447128057479858,
0.287439465522766, 0.498456537723541, 0.508722245693207)), .Names = c("Trait",
"Variable", "value"), row.names = c(NA, -33L), class = "data.frame")
p <- ggplot(na.omit(df.m)) + aes(x = Variable, y = Trait, fill = value) +
geom_tile(colour = "white", size = 0.75) +
scale_colour_distiller(limits = c(-1, 1), direction = -1, palette = "RdBu", name = "Coefficient") +
scale_size(range = c(0,20), name = title, guide = "none") +
geom_text(data = df.m, aes(x = Variable, y = Trait, label = sprintf("%.2f", value)), size = 4, inherit.aes = FALSE) +
theme(plot.title = element_text(size = 20), axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5),
legend.position = "bottom", legend.margin = unit(1.0, "cm"),
legend.text = element_text(size = 8), legend.title = element_text(size = 10),
legend.key.size = unit(1.0, "cm"),
panel.background = element_rect(fill = "white")) +
xlab("\nVariables 1") + ylab("Variables 2\n")
plot(p)
However, the tiles are filled with their own color gradient scale and not with the one defined bye scale_colour_distiller(). Moreover, both key legends appear in the plot:
I just want the tiles to be colored according to the gradient defined by the scale_colour_distiller(). How can I do that? From where geom_plot() is sucking the color scale?
Thanks in advance!
You need to use scale_fill_distiller since you are mapping value to the fill aesthetic and not to the color aesthetic.

Set factor order in MultiBarChart with NVD3 rCharts

I want to change the order of the bars in the following graph:
library(rCharts)
effectiveness <- structure(list(Ranking = structure(c(5L, 4L, 3L, 2L, 1L, 5L,
4L, 3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L, 1L, 2L, 3L, 4L, 5L), .Label = c("Bottom",
"Bellow Average", "Average", "Above Average", "Top"), class = c("ordered",
"factor")), Probability = c(0.4, 0.4, 0.1, 0.08, 0.02, 0.1, 0.2,
0.5, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.01, 0.04, 0.15, 0.7,
0.1), data = structure(c(4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("none",
"little", "some", "lots"), class = c("ordered", "factor"))), row.names = c(NA,
-20L), .Names = c("Ranking", "Probability", "data"), class = "data.frame")
library(rCharts)
n1 <- nPlot(Probability ~ data, group = 'Ranking', data = effectiveness[order(effectiveness$Ranking),], type = 'multiBarChart')
n1$chart(stacked = TRUE)
n1$yAxis( tickFormat = "#!d3.format('%')!#" )
n1$set(width = 1000)
n1$xAxis(axisLabel = "Data available")
n1
The order should be 'none' 'little' 'some' 'lots'.
This solution is not working for me:
effectiveness$data <- as.numeric(effectiveness$data)
n1 <- nPlot(Probability ~ data, group = 'Ranking', data = effectiveness[order(effectiveness$Ranking),], type = 'multiBarChart')
n1$chart(stacked = TRUE)
n1$yAxis( tickFormat = "#!d3.format('%')!#" )
n1$set(width = 1000)
n1$xAxis(axisLabel = "Data available")
n1
The order remains 4 2 1 3
Thanks!

Resources