I am using facet_grid() to display a 2x2 of different combinations of model types for racial groups and levels of participation in a program.
By using scales = "free" I am able to separate out the y axes for each row and only display the relevant coefficients. But, how can I then specify the model/variable order within each panel row? Typically, I would do something like:
model_order <- c("White", "Black", "Hispanic")
And then pass that through to scale_x_discrete(). (And would have High, then Medium, then Low in that order).
But that does not seem to work in this case because of using scales = "free". Is there a workaround for controlling the order?
Code:
mylabels <- c("1" = "Linear",
"2" = "Logit",
"3" = "Race",
"4" = "Level")
ggplot(dx, aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper)) +
geom_point(size = 2) +
geom_errorbar(width = 0.1,
size = 1) +
facet_grid(effect~model,
scales = "free",
labeller = as_labeller(mylabels)) +
scale_y_continuous(breaks = seq(-3, 3, by = 1)) +
coord_flip() +
theme_bw(base_size = 15) +
theme(legend.position = "none")
Data:
structure(list(var = c("White", "Black", "Hispanic", "White",
"Black", "Hispanic", "High", "Medium", "Low", "High", "Medium",
"Low"), coef = c(1.64, 1.2, 0.4, 1.45, 0.17, 0.6, 1.04, 0.05,
-0.74, -0.99, -0.45, -0.3045), ci_lower = c(1.3, 0.86, 0.06,
1.11, -0.17, 0.26, 0.7, -0.29, -1.08, -1.33, -0.79, -0.6445),
ci_upper = c(1.98, 1.54, 0.74, 1.79, 0.51, 0.94, 1.38, 0.39,
-0.4, -0.65, -0.11, 0.0355), model = c(1, 1, 1, 2, 2, 2,
1, 1, 1, 2, 2, 2), effect = c(3, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4, 4)), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L), spec = structure(list(cols = list(
var = structure(list(), class = c("collector_character",
"collector")), coef = structure(list(), class = c("collector_double",
"collector")), ci_lower = structure(list(), class = c("collector_double",
"collector")), ci_upper = structure(list(), class = c("collector_double",
"collector")), model = structure(list(), class = c("collector_double",
"collector")), effect = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
You can define your variable as a factor, and then reorder their levels:
library(dplyr)
library(ggplot2)
mylabels <- c("1" = "Linear",
"2" = "Logit",
"3" = "Race",
"4" = "Level")
dx %>%
mutate(var = forcats::fct_relevel(var,"High","Medium")) %>%
ggplot(aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper)) +
geom_point(size = 2) +
geom_errorbar(width = 0.1,
size = 1) +
facet_grid(effect~model,
scales = "free",
labeller = as_labeller(mylabels)) +
scale_y_continuous(breaks = seq(-3, 3, by = 1)) +
coord_flip() +
theme_bw(base_size = 15) +
theme(legend.position = "none")
Related
I want to customize my legend and the pattern shapes in ggpattern but when I run the code two legends appear on the side. I also would like the pattern color/stripes to be black when right now they are grey even when pattern_color="black.
df <- data.frame(
study_id = c(3, 3, 3), primary_therapy = c("Si", "Si", "Si"),
additional_therapy = c("NA", "S", "V+S"), end_yr = c(0.08, 0.39, 3.03)
)
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA
) + geom_col_pattern(aes(study_id, end_yr,
pattern = additional_therapy, pattern_angle = additional_therapy),
fill = NA, na.rm=FALSE, show.legend=NA, width=0.85,
pattern_spacing = 0.01, pattern_color = "black", pattern_size = 0.5,
pattern_linetype = 0.5, pattern_orientation="vertical") +
scale_pattern_manual(name="Additional Therapy", values = c("S"="stripe","NA"="none","V+S"="circle"))
First, with geom_col_pattern, show.legend = can only be TRUE or FALSE; NA isn't a valid option. Second, remove the pattern_angle argument from geom_col_pattern, since I think that's what's causing the second legend. Third, change pattern_color to pattern_fill to make the lines black.
library(swimplot)
library(ggpattern)
library(tidyverse)
df <- data.frame(
study_id = c(3, 3, 3), primary_therapy = c("Si", "Si", "Si"),
additional_therapy = c("NA", "S", "V+S"), end_yr = c(0.08, 0.39, 3.03)
)
swimmer_plot(
df = df, id = "study_id",
end = "end_yr", name_fill = "primary_therapy",
width = 0.85, color = NA) +
geom_col_pattern(aes(x = study_id, y = end_yr,
pattern = additional_therapy),
show.legend = TRUE,
fill = NA,
na.rm = FALSE,
width = 0.85,
pattern_spacing = 0.01,
pattern_fill = "black",
pattern_size = 0.5,
pattern_linetype = 0.5,
pattern_orientation = "vertical") +
scale_pattern_manual(name="Additional Therapy",
values = c("S"="stripe","NA"="none","V+S"="circle"))
I have a variable (-coef-) that is just regression coefficient values. I am hoping to arrange all the -var- (variables of different models) in my plot based on the size of their respective -coef- values. I have tried various solutions using reorder() (such as https://www.rpubs.com/dvdunne/reorder_ggplot_barchart_axis) but they all seem geared towards geom_bar or geom_col and haven't worked with geom_point/geom_errorbar.
ggplot(d, aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper,
color = var)) +
geom_point(size = 2) +
geom_errorbar(width = .5,
size = 1) +
coord_flip() +
theme_minimal()
EDIT: Some example data
d <- structure(list(var = c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j"), coef = c(0.1, 0.2, 0.3, 0.35, 0.46, 0.64, 0.54, 0.13,
0.87, 0.41), ci_lower = c(0.05, 0.15, 0.25, 0.3, 0.41, 0.59,
0.49, 0.08, 0.82, 0.36), ci_upper = c(0.15, 0.25, 0.35, 0.4,
0.51, 0.69, 0.59, 0.18, 0.92, 0.46)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L), spec = structure(list(
cols = list(var = structure(list(), class = c("collector_character",
"collector")), coef = structure(list(), class = c("collector_double",
"collector")), ci_lower = structure(list(), class = c("collector_double",
"collector")), ci_upper = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
You could arrange your data by "coef" and then recode "var" with factor(ordered = T). Note that ggplot will switch to a different color scale for ordered factors, so I also specify scale_color_hue() in the plot:
d %>%
arrange(coef) %>%
mutate(var = factor(var, unique(var), ordered = T)) %>%
ggplot(., aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper,
color = var)) +
geom_point(size = 2) +
geom_errorbar(width = .5,
size = 1) +
scale_color_hue() +
coord_flip() +
theme_minimal()
I'm trying to do almost exactly this. I have this graph:
From this data frame:
Data<-structure(list(Op = c("No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes"), Drug = c("No", "No", "Yes", "Yes", "No", "No", "Yes",
"Yes"), Follow = c("No", "Yes", "No", "Yes", "No", "Yes", "No",
"Yes"), n = c(46, 101, 25, 27, 2, 65, 2, 22), Percent = c(31.29251701,
68.70748299, 48.07692308, 51.92307692, 2.985074627, 97.01492537,
8.333333333, 91.66666667)), spec = structure(list(cols = list(
Op = structure(list(), class = c("collector_character", "collector"
)), Drug = structure(list(), class = c("collector_character",
"collector")), Follow = structure(list(), class = c("collector_character",
"collector")), n = structure(list(), class = c("collector_double",
"collector")), Percent = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x000001fa6e358930>, row.names = c(NA,
-8L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
And the graph was made with this code:
Data %>%
ggplot(aes(x = Drug,
y = Percent,
fill = Follow)) +
geom_col(position = "dodge2") +
labs(x = "",
fill = "Follow",
title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=",n)),
vjust = -0.2,
position = position_dodge(.9)) +
facet_grid(.~Op) +
geom_text(aes(label=paste0("n=",n)),
vjust = -0.2,
position = position_dodge(.9))
I'd love to manually draw in significance bars that look like this:
But I am having trouble following the code in that linked answer. When I add:
geom_signif(data = data.frame(Op = c("No","Yes")),
aes(y_position=c(5.3, 8.3),
xmin=c(0.8, 0.8),
xmax=c(1.2, 1.2),
annotations=c("**", "NS")),
tip_length=0,
manual = T)
to the end of my code, I get the error ' "Follow" not found '. (P.s. I'm sure the numbers were wrong and in wrong position, I was just hoping to get some bars and then I'd move them around)
Any help would be appreciated! Thank you!
The issue is that geom_signif inherits the global aesthetics you set inside ggplot(). In particular, as you set fill=Follow ggplot is expecting a variable with this name in the dataframe you passed via the data argument of geom_signif.
One option to solve this issue would be to make fill=Follow a local aesthetic to geom_col which also requires to map Follow on the group aes inside geom_text.
library(ggplot2)
library(ggsignif)
ggplot(Data, aes(x = Drug, y = Percent)) +
geom_col(aes(fill = Follow), position = "dodge2") +
labs(x = "", fill = "Follow", title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=", n), group = Follow), vjust = -0.2, position = position_dodge(.9)) +
facet_grid(. ~ Op) +
geom_signif(data = data.frame(Op = c("No", "Yes")), aes(y_position = c(5.3, 8.3), xmin = c(0.8, 0.8), xmax = c(1.2, 1.2), annotations = c("**", "NS")), tip_length = 0, manual = T)
#> Warning: Ignoring unknown aesthetics: y_position, xmin, xmax, annotations
A second option would be to set inherit.aes=FALSE in geom_signif.
ggplot(Data, aes(x = Drug, y = Percent, fill = Follow)) +
geom_col(position = "dodge2") +
labs(x = "", fill = "Follow", title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=", n)), vjust = -0.2, position = position_dodge(.9)) +
facet_grid(. ~ Op) +
geom_signif(data = data.frame(Op = c("No", "Yes")),
aes(y_position = c(5.3, 8.3), xmin = c(0.8, 0.8), xmax = c(1.2, 1.2),
annotations = c("**", "NS")), tip_length = 0, manual = T,
inherit.aes = FALSE)
#> Warning: Ignoring unknown aesthetics: y_position, xmin, xmax, annotations
When I run the code order of y-axis label is changed alphabetically, but I want to keep it as like as my coef column of new.table dataset. Here is my code:
library(ggplot2)
library(dplyr)
library(data.table)
coef<-c("<=40 years", "41-55 years", "56+ years", "Underweight", "Normal",
"Overweight", "Obese", "Uncontrolled", "Control", "Uncontrolled", "Control",
"< 5 years", "5-10 years", ">= 10 years", "Adherence", "Non-adherence")
or<-c(1,0.98, 1.16, 1.68, 1, 0.59, 0.71, 2.57, 1, 1.1, 1, 1, 2.03, 9.51, 1, 1.82)
ci_lb<-c(1, 0.41, 0.47, 0.25, 1, 0.33, 0.34, 1.3, 1, 0.63, 1, 1, 0.81, 3.85, 1, 1.07)
ci_ub<-c(1, 2.35, 2.87, 11.22, 1, 1.03, 1.48, 5.08, 1, 1.92, 1, 1, 5.09, 23.46, 1, 3.1)
term<-c("Age", "Age", "Age", "BMI", "BMI", "BMI", "BMI", "FBS", "FBS", "SBP", "SBP", "Duration", "Duration", "Duration", "Drug", "Drug")
is.reference<-rep(TRUE,16)
new.table<-data.frame(coef, or, ci_lb, ci_ub, term, is.reference)
p <- ggplot(new.table,
aes(x = or, xmin = ci_lb, xmax = ci_ub,
y = coef, color = term)) + coord_cartesian(xlim=c(0,25))+
geom_vline(xintercept = 1, linetype = "longdash") +
geom_errorbarh(height = 0.2) +
geom_point(size = 2, shape = 18) +
facet_grid(term~., scales = "free_y", space = "free_y")+
scale_alpha_identity()
To extend my comment a bit, we need to set factor order on coef and on term, then reverse y axis:
# factor
new.table$coef <- factor(new.table$coef, levels = unique(new.table$coef))
new.table$term <- factor(new.table$term, levels = unique(new.table$term))
ggplot(new.table,
aes(x = or, xmin = ci_lb, xmax = ci_ub,
y = coef, color = term)) + coord_cartesian(xlim=c(0,25))+
geom_vline(xintercept = 1, linetype = "longdash") +
geom_errorbarh(height = 0.2) +
geom_point(size = 2, shape = 18) +
facet_grid(term~., scales = "free_y", space = "free_y")+
scale_alpha_identity() +
# flip y-axis
scale_y_discrete(limits = rev)
I have a line graph with several lines, in which I want to highlight certain values with a different geom_point shape.
The dput of a similar data to mine is:
structure(list(Iso = structure(1:9, .Label = c("a", "b", "c",
"d", "e", "f", "g", "h", "i"), class = "factor"), z1 = c(342.6,
8.94, 6.91, 3.96, 1.89, 4.38, 1.43, 5.18, 189.1), z2 = c(187.34,
2.8, 8.42, 8.24, 2.36, 2.34, 7.6, 0.5, 136.01)), row.names = c(NA,
-9L), spec = structure(list(cols = list(Iso = structure(list(), class = c("collector_character",
"collector")), z1 = structure(list(), class = c("collector_double",
"collector")), z2 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
This is the code:
library(readr)
library(ggplot2)
library(RColorBrewer)
y <- read_csv("dummy.csv")
y$Iso <- factor(y$Iso, levels=y$Iso)
##
plot <- ggplot(y,aes(Iso,group=1)) +theme_bw() + ggtitle('') +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(aes(y=z1, colour='z1'), na.rm=FALSE, size=3, shape=16) +
geom_point(aes(y=z2, colour='z2'), na.rm=FALSE, size=3,shape=16) +
geom_line(aes(y=z1, colour='z1'), na.rm=FALSE, linetype=1,size=1) +
geom_line(aes(y=z2, colour='z2'), na.rm=FALSE,linetype=3, size=2) +
xlab('') +ylab('ZZ/CI') + scale_y_log10(limits=c(0.1,2000), breaks=c(1e-1,1,1e1,1e2,1e3))
##
plot + theme(axis.text.x = element_text(angle = 90, vjust = 0.5,size=19, face='bold'),
axis.text.y = element_text(size=12),
axis.title.y = element_text(size=15)) +
scale_colour_manual(name='',breaks=c('z1','z2'),values= c('brown','wheat'),labels= c('z1','z2')) +
guides(colour = guide_legend(override.aes = list (size = c(.75,.75),linetype=c(1,3),shape=c(16,16))))
I use the guide for every cosmetic element in order to make the legend more legible.
The data
# A tibble: 9 x 3
Iso z1 z2
<fct> <dbl> <dbl>
1 a 343.6 187.34
2 b 8.94 2.8
3 c 6.91 8.42
4 d 3.96 8.24
5 e 1.89 2.36
6 f 4.38 2.34
7 g 1.43 7.6
8 h 5.18 0.5
9 i 189.1 136.01
For example, in the case of these data, I want to change the shape from 16 to 13, in z1 for '342.6','1.43', and '5.18', and in z2 for '187.34,'2.36','7.6' and '0.5'.
How can I proceed?
Thank you for your time.
I think you should consider reformatting your data to long format, as it saves a lot of duplication. Secondly, you need to specify a new column in your data, which you map to shape. If there is no pattern like any value larger than 100 you need to cherry-pick the values you want to highlight by hand.
The following code should do what you want:
library(tidyverse)
y <- structure(list(Iso = structure(1:9, .Label = c("a", "b", "c",
"d", "e", "f", "g", "h", "i"),
class = "factor"),
z1 = c(342.6, 8.94, 6.91, 3.96, 1.89, 4.38, 1.43, 5.18, 189.1),
z2 = c(187.34, 2.8, 8.42, 8.24, 2.36, 2.34, 7.6, 0.5, 136.01)),
row.names = c(NA, -9L),
class = c("data.frame"))
## transform to long format
y.long <- y %>% gather(type, value, -Iso)
## add a new column which 'marks' the special rows
## NOTE: since we moved to long format rows corresponding to z2 are starting now at row 10
y.long <- y.long %>% mutate(highlight = ifelse(type == "z1",
ifelse(row_number() %in% c(1, 7, 8),
"special", "normal"),
ifelse(row_number() %in% c(10, 14, 16, 17),
"special", "normal")))
## in your ggplot you can now map the columns to the graphical elements like so:
ggplot(y.long, aes(Iso, value,
color = type, linetype = type, shape = highlight,
group = type)) +
geom_point(size = 3) +
geom_line(aes(size = type)) +
scale_y_log10(limits = c(0.1, 2000), breaks = c(1e-1, 1, 1e1, 1e2, 1e3)) +
scale_color_manual("", values = c(z1 = "brown", z2 = "wheat")) +
scale_size_manual("", values = c(z1 = 1, z2 = 2), guide = "none") +
scale_shape_manual("", values = c(normal = 16, special = 13), guide = "none") +
scale_linetype_manual("", values = c(z1 = "solid", z2 = "dotted"), guide = "none") +
labs(x = "", y = "ZZ/CI") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 19, face = "bold"),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 15))
This results in the following plot:
You can adapt the mutate satement to include/exclude other rows and the scale_* functions to show/hide legends.
Theoretically, you could use something like this in your mutate
mutate(highlight = ifelse(value %in% c(343.6, 1.43, 5.18, 187.34,
2.36, 7.6, 0.5),
"special", "normal"))
but due to floating point issues (cf. for instance this article), i would not do this and rather select by row number (as I did) or any other suitable criterion, or use all.equal to make proper floating point comparisons.