Related
I am using facet_grid() to display a 2x2 of different combinations of model types for racial groups and levels of participation in a program.
By using scales = "free" I am able to separate out the y axes for each row and only display the relevant coefficients. But, how can I then specify the model/variable order within each panel row? Typically, I would do something like:
model_order <- c("White", "Black", "Hispanic")
And then pass that through to scale_x_discrete(). (And would have High, then Medium, then Low in that order).
But that does not seem to work in this case because of using scales = "free". Is there a workaround for controlling the order?
Code:
mylabels <- c("1" = "Linear",
"2" = "Logit",
"3" = "Race",
"4" = "Level")
ggplot(dx, aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper)) +
geom_point(size = 2) +
geom_errorbar(width = 0.1,
size = 1) +
facet_grid(effect~model,
scales = "free",
labeller = as_labeller(mylabels)) +
scale_y_continuous(breaks = seq(-3, 3, by = 1)) +
coord_flip() +
theme_bw(base_size = 15) +
theme(legend.position = "none")
Data:
structure(list(var = c("White", "Black", "Hispanic", "White",
"Black", "Hispanic", "High", "Medium", "Low", "High", "Medium",
"Low"), coef = c(1.64, 1.2, 0.4, 1.45, 0.17, 0.6, 1.04, 0.05,
-0.74, -0.99, -0.45, -0.3045), ci_lower = c(1.3, 0.86, 0.06,
1.11, -0.17, 0.26, 0.7, -0.29, -1.08, -1.33, -0.79, -0.6445),
ci_upper = c(1.98, 1.54, 0.74, 1.79, 0.51, 0.94, 1.38, 0.39,
-0.4, -0.65, -0.11, 0.0355), model = c(1, 1, 1, 2, 2, 2,
1, 1, 1, 2, 2, 2), effect = c(3, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4, 4)), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L), spec = structure(list(cols = list(
var = structure(list(), class = c("collector_character",
"collector")), coef = structure(list(), class = c("collector_double",
"collector")), ci_lower = structure(list(), class = c("collector_double",
"collector")), ci_upper = structure(list(), class = c("collector_double",
"collector")), model = structure(list(), class = c("collector_double",
"collector")), effect = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
You can define your variable as a factor, and then reorder their levels:
library(dplyr)
library(ggplot2)
mylabels <- c("1" = "Linear",
"2" = "Logit",
"3" = "Race",
"4" = "Level")
dx %>%
mutate(var = forcats::fct_relevel(var,"High","Medium")) %>%
ggplot(aes(x = var, y = coef,
ymin = ci_lower, ymax = ci_upper)) +
geom_point(size = 2) +
geom_errorbar(width = 0.1,
size = 1) +
facet_grid(effect~model,
scales = "free",
labeller = as_labeller(mylabels)) +
scale_y_continuous(breaks = seq(-3, 3, by = 1)) +
coord_flip() +
theme_bw(base_size = 15) +
theme(legend.position = "none")
I'm trying to do almost exactly this. I have this graph:
From this data frame:
Data<-structure(list(Op = c("No", "No", "No", "No", "Yes", "Yes", "Yes",
"Yes"), Drug = c("No", "No", "Yes", "Yes", "No", "No", "Yes",
"Yes"), Follow = c("No", "Yes", "No", "Yes", "No", "Yes", "No",
"Yes"), n = c(46, 101, 25, 27, 2, 65, 2, 22), Percent = c(31.29251701,
68.70748299, 48.07692308, 51.92307692, 2.985074627, 97.01492537,
8.333333333, 91.66666667)), spec = structure(list(cols = list(
Op = structure(list(), class = c("collector_character", "collector"
)), Drug = structure(list(), class = c("collector_character",
"collector")), Follow = structure(list(), class = c("collector_character",
"collector")), n = structure(list(), class = c("collector_double",
"collector")), Percent = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x000001fa6e358930>, row.names = c(NA,
-8L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"))
And the graph was made with this code:
Data %>%
ggplot(aes(x = Drug,
y = Percent,
fill = Follow)) +
geom_col(position = "dodge2") +
labs(x = "",
fill = "Follow",
title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=",n)),
vjust = -0.2,
position = position_dodge(.9)) +
facet_grid(.~Op) +
geom_text(aes(label=paste0("n=",n)),
vjust = -0.2,
position = position_dodge(.9))
I'd love to manually draw in significance bars that look like this:
But I am having trouble following the code in that linked answer. When I add:
geom_signif(data = data.frame(Op = c("No","Yes")),
aes(y_position=c(5.3, 8.3),
xmin=c(0.8, 0.8),
xmax=c(1.2, 1.2),
annotations=c("**", "NS")),
tip_length=0,
manual = T)
to the end of my code, I get the error ' "Follow" not found '. (P.s. I'm sure the numbers were wrong and in wrong position, I was just hoping to get some bars and then I'd move them around)
Any help would be appreciated! Thank you!
The issue is that geom_signif inherits the global aesthetics you set inside ggplot(). In particular, as you set fill=Follow ggplot is expecting a variable with this name in the dataframe you passed via the data argument of geom_signif.
One option to solve this issue would be to make fill=Follow a local aesthetic to geom_col which also requires to map Follow on the group aes inside geom_text.
library(ggplot2)
library(ggsignif)
ggplot(Data, aes(x = Drug, y = Percent)) +
geom_col(aes(fill = Follow), position = "dodge2") +
labs(x = "", fill = "Follow", title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=", n), group = Follow), vjust = -0.2, position = position_dodge(.9)) +
facet_grid(. ~ Op) +
geom_signif(data = data.frame(Op = c("No", "Yes")), aes(y_position = c(5.3, 8.3), xmin = c(0.8, 0.8), xmax = c(1.2, 1.2), annotations = c("**", "NS")), tip_length = 0, manual = T)
#> Warning: Ignoring unknown aesthetics: y_position, xmin, xmax, annotations
A second option would be to set inherit.aes=FALSE in geom_signif.
ggplot(Data, aes(x = Drug, y = Percent, fill = Follow)) +
geom_col(position = "dodge2") +
labs(x = "", fill = "Follow", title = "Drug vs Follow") +
geom_text(aes(label = paste0("n=", n)), vjust = -0.2, position = position_dodge(.9)) +
facet_grid(. ~ Op) +
geom_signif(data = data.frame(Op = c("No", "Yes")),
aes(y_position = c(5.3, 8.3), xmin = c(0.8, 0.8), xmax = c(1.2, 1.2),
annotations = c("**", "NS")), tip_length = 0, manual = T,
inherit.aes = FALSE)
#> Warning: Ignoring unknown aesthetics: y_position, xmin, xmax, annotations
I am visualizing a time-series plot using ggplot2 and trying to combine the legend. I have tried many options but in not yet gotten my desired output. In one plot the lines are missing the color coding and in the other, the chart is missing the legend. My desired output is to have a chart with the legend and the color scheme being the same.
Here is the script where the lines are missing the color-coding;
library(tidyverse)
deviation <- read_csv("C:/Users/JohnWaweru/Documents/Thesis/Data/yearly_CSVs/Turkana_new/2018_new.csv")
deviation %>% ggplot() +
geom_line(aes(x = as.Date(Month), y = Upper_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Lower_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Mean_NDVI, col = 'red'), linetype = 1) +
geom_line(aes(x = as.Date(Month), y = NDVI_2018, col = 'green'), linetype = 1) +
scale_color_manual(name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green', 'Upper_curve' = 'red', 'Lower_curve' = 'red'),
labels = c('Mean_NDVI', 'NDVI_2018', 'Upper_curve','Lower_curve')) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018") ```
Here is the Sample data I am working with;
structure(list(Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
Setting literal colours only works outside the aes() function or when you use scale_colour_identity(). Most of the time when you want to label individual line layers, you can set aes(..., colour = "My legend label").
library(ggplot2)
deviation <- structure(list(
Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame")
)
ggplot(deviation) +
geom_line(aes(x = Month, y = Upper_curve, colour = 'Upper_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Lower_curve, colour = 'Lower_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Mean_NDVI, colour = 'Mean_NDVI'), linetype = 1) +
geom_line(aes(x = Month, y = NDVI_2018, colour = 'NDVI_2018'), linetype = 1) +
scale_color_manual(
name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green',
'Upper_curve' = 'red', 'Lower_curve' = 'red'),
# Setting appropriate linetypes
guide = guide_legend(
override.aes = list(linetype = c(2,1,1,2))
)
) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018")
Created on 2021-08-05 by the reprex package (v1.0.0)
I have created the following bar chart using:
structure(list(variable = structure(1:3, .Label = c("count_B",
"count_M", "count_T"), class = "factor"), value = c(10.7894136128261,
5.99274994891311, 4.10457180326646)), row.names = c(NA, -3L), class = "data.frame")
ggplot(meltedMUSICC, aes(x = variable, y = value, width = 0.95)) + geom_bar(stat = "identity") + coord_flip()
I have another data set that specifies the fill by way of a percentage or relative abundance (out of 1) of each of the variables, it looks like this:
structure(list(phylum = structure(1:4, .Label = c("Acidobacteria",
"Actinobacteria", "Alphaproteobacteria", "Amoebozoa"), class = "factor"),
count_T = c(0.2, 0.1, 0.5, 0.2), count_M = c(0.1, 0.1, 0.1,
0.7), count_B = c(0.4, 0.3, 0.2, 0.1)), class = "data.frame", row.names = c(NA,
-4L))
Is there a function I could use to fill the bar chart so it becomes a stacked bar chart with the second data set, colour it and get a legend?
I think your problem is in how you aggregate the data. I assume that the count in your first dataframe is the sum of all relative counts in your second data frame.
EDIT
Thanks for posting the data
library(ggplot2)
library(tidyverse)
df <- structure(list(variable = structure(1:3, .Label = c("count_B",
"count_M", "count_T"), class = "factor"), value = c(10.7894136128261,
5.99274994891311, 4.10457180326646)), row.names = c(NA, -3L), class = "data.frame")
df2 <- structure(list(phylum = structure(1:4, .Label = c("Acidobacteria",
"Actinobacteria", "Alphaproteobacteria", "Amoebozoa"), class = "factor"),
count_T = c(0.2, 0.1, 0.5, 0.2), count_M = c(0.1, 0.1, 0.1,
0.7), count_B = c(0.4, 0.3, 0.2, 0.1)), class = "data.frame", row.names = c(NA,
-4L))
df3 <- df2 %>% pivot_longer(cols = -phylum) %>%
left_join(df, by = c("name" = "variable")) %>%
mutate(new_count = value.x*value.y)
#> Warning: Column `name`/`variable` joining character vector and factor,
#> coercing into character vector
ggplot(df3, aes(x = name, y = new_count)) + geom_bar(stat = "identity", aes(fill = phylum))
Created on 2020-02-27 by the reprex package (v0.3.0)
I have a line graph with several lines, in which I want to highlight certain values with a different geom_point shape.
The dput of a similar data to mine is:
structure(list(Iso = structure(1:9, .Label = c("a", "b", "c",
"d", "e", "f", "g", "h", "i"), class = "factor"), z1 = c(342.6,
8.94, 6.91, 3.96, 1.89, 4.38, 1.43, 5.18, 189.1), z2 = c(187.34,
2.8, 8.42, 8.24, 2.36, 2.34, 7.6, 0.5, 136.01)), row.names = c(NA,
-9L), spec = structure(list(cols = list(Iso = structure(list(), class = c("collector_character",
"collector")), z1 = structure(list(), class = c("collector_double",
"collector")), z2 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
This is the code:
library(readr)
library(ggplot2)
library(RColorBrewer)
y <- read_csv("dummy.csv")
y$Iso <- factor(y$Iso, levels=y$Iso)
##
plot <- ggplot(y,aes(Iso,group=1)) +theme_bw() + ggtitle('') +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(aes(y=z1, colour='z1'), na.rm=FALSE, size=3, shape=16) +
geom_point(aes(y=z2, colour='z2'), na.rm=FALSE, size=3,shape=16) +
geom_line(aes(y=z1, colour='z1'), na.rm=FALSE, linetype=1,size=1) +
geom_line(aes(y=z2, colour='z2'), na.rm=FALSE,linetype=3, size=2) +
xlab('') +ylab('ZZ/CI') + scale_y_log10(limits=c(0.1,2000), breaks=c(1e-1,1,1e1,1e2,1e3))
##
plot + theme(axis.text.x = element_text(angle = 90, vjust = 0.5,size=19, face='bold'),
axis.text.y = element_text(size=12),
axis.title.y = element_text(size=15)) +
scale_colour_manual(name='',breaks=c('z1','z2'),values= c('brown','wheat'),labels= c('z1','z2')) +
guides(colour = guide_legend(override.aes = list (size = c(.75,.75),linetype=c(1,3),shape=c(16,16))))
I use the guide for every cosmetic element in order to make the legend more legible.
The data
# A tibble: 9 x 3
Iso z1 z2
<fct> <dbl> <dbl>
1 a 343.6 187.34
2 b 8.94 2.8
3 c 6.91 8.42
4 d 3.96 8.24
5 e 1.89 2.36
6 f 4.38 2.34
7 g 1.43 7.6
8 h 5.18 0.5
9 i 189.1 136.01
For example, in the case of these data, I want to change the shape from 16 to 13, in z1 for '342.6','1.43', and '5.18', and in z2 for '187.34,'2.36','7.6' and '0.5'.
How can I proceed?
Thank you for your time.
I think you should consider reformatting your data to long format, as it saves a lot of duplication. Secondly, you need to specify a new column in your data, which you map to shape. If there is no pattern like any value larger than 100 you need to cherry-pick the values you want to highlight by hand.
The following code should do what you want:
library(tidyverse)
y <- structure(list(Iso = structure(1:9, .Label = c("a", "b", "c",
"d", "e", "f", "g", "h", "i"),
class = "factor"),
z1 = c(342.6, 8.94, 6.91, 3.96, 1.89, 4.38, 1.43, 5.18, 189.1),
z2 = c(187.34, 2.8, 8.42, 8.24, 2.36, 2.34, 7.6, 0.5, 136.01)),
row.names = c(NA, -9L),
class = c("data.frame"))
## transform to long format
y.long <- y %>% gather(type, value, -Iso)
## add a new column which 'marks' the special rows
## NOTE: since we moved to long format rows corresponding to z2 are starting now at row 10
y.long <- y.long %>% mutate(highlight = ifelse(type == "z1",
ifelse(row_number() %in% c(1, 7, 8),
"special", "normal"),
ifelse(row_number() %in% c(10, 14, 16, 17),
"special", "normal")))
## in your ggplot you can now map the columns to the graphical elements like so:
ggplot(y.long, aes(Iso, value,
color = type, linetype = type, shape = highlight,
group = type)) +
geom_point(size = 3) +
geom_line(aes(size = type)) +
scale_y_log10(limits = c(0.1, 2000), breaks = c(1e-1, 1, 1e1, 1e2, 1e3)) +
scale_color_manual("", values = c(z1 = "brown", z2 = "wheat")) +
scale_size_manual("", values = c(z1 = 1, z2 = 2), guide = "none") +
scale_shape_manual("", values = c(normal = 16, special = 13), guide = "none") +
scale_linetype_manual("", values = c(z1 = "solid", z2 = "dotted"), guide = "none") +
labs(x = "", y = "ZZ/CI") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 19, face = "bold"),
axis.text.y = element_text(size = 12),
axis.title.y = element_text(size = 15))
This results in the following plot:
You can adapt the mutate satement to include/exclude other rows and the scale_* functions to show/hide legends.
Theoretically, you could use something like this in your mutate
mutate(highlight = ifelse(value %in% c(343.6, 1.43, 5.18, 187.34,
2.36, 7.6, 0.5),
"special", "normal"))
but due to floating point issues (cf. for instance this article), i would not do this and rather select by row number (as I did) or any other suitable criterion, or use all.equal to make proper floating point comparisons.