Related
I have the following dataframe:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = as.factor(rep(c("X", "Y", "Z"), times = 30)), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
I want to get boxplots of X1 for each method and across the three patterns and within each complexity. I use the following plot:
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1))
which gives me the attached figure:
Fabulous. However, each observation for method A, B and C are on the same dataset (with indicator nsim) within 'X' (same for the cases within 'Y' and same for within 'Z') and I would like to link the observations (values of X1) between the three methods in each of the three patterns (but not link the three patterns because that would be meaningless).
Specifically, I want a plot as follows (with hand-drawn lines imagined to connect the different simulations IDS here):
So, I tried the following, however,I am getting the boxplots to no longer be bunched together for each x (and the x axes is now also messed up).
library(ggplot2)
ggplot(dd, aes(x = interaction(Method,Pattern), y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method), position = "identity") +
geom_line(aes(x = interaction(Method,Pattern), y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = interaction(Method,Pattern),
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "Y", "", "", "Z","")) + xlab("Pattern")
Which gives the following:
but the boxplots for each setting of Pattern gets separated (I would like them bunched together) and also the x-axes gets messed up (which I have somewhat of an inelegant fix). So, the most important thing I need resolved still is the space between the boxplots inside each Pattern (which I would like to be smaller) than the space between boxplots of different Patterns.
How do I fix this? Many thanks for your suggestions.
Is this what you're looking for?
ggplot(dd, aes(x = Pattern, y = X1, fill = Method)) +
geom_line(aes(group=interaction(Method,nsim)),
position = position_dodge(width = 0.8),
size = 0.1, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
geom_boxplot() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(fill = guide_legend(nrow=1)) +
theme_light()
I am not sure if there are better methods of doing this, but I did the following:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2))
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
geom_line(aes(x = dummy, y = X1,
group=interaction(Pattern,nsim)),
size = 0.15, alpha = 0.5, colour = I("#525252")) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.5, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Dark2")
which yields the following:
I would like the boxplots to be closer to each other, and would like some advice on how to do this, if anyone has any ideas. Perhaps the next step will be to write this up as a general function.
I want to add significance stars for mean difference comparisons to a plot. Without the lines for the stars, the plot works:
da<-data.frame(group=c("condition1_high","condition1_low","condition2_high","condition2_low"),numb=c(30,25,26,20))
da %>% separate(group, c("A", "B"), remove = F) %>%
ggplot(aes(x=A, y=numb, fill = B)) +
geom_bar(position=position_dodge(), stat="identity") +
scale_fill_manual(values=rep(c("grey20","grey80"), ceiling(length(da$group)/2))[1:length(da$group)]) +
geom_text(aes(label=numb),
position = position_dodge(width = 0.9), vjust = -0.25) +
geom_signif(stat="identity",
data=data.frame(x=c(0.5,1.5), xend=c(1,2),
y=c(30, 30), annotation=c("**", "*","***","+")),
aes(x=x,xend=xend, y=y, yend=y, annotation=annotation))
Now I add a bit of code for the stars I found here on this platform:
da %>% separate(group, c("A", "B"), remove = F) %>%
ggplot(aes(x=A, y=numb, fill = B)) +
geom_bar(position=position_dodge(), stat="identity") +
scale_fill_manual(values=rep(c("grey20","grey80"), ceiling(length(da$group)/2))[1:length(da$group)]) +
geom_text(aes(label=numb),
position = position_dodge(width = 0.9), vjust = -0.25) +
geom_signif(stat="identity",
data=data.frame(x=c(0.5,1.5), xend=c(1,2),
y=c(30, 30), annotation=c("**", "*")),
aes(x=x,xend=xend, y=y, yend=y, annotation=annotation))
Now it says that object B is missing. What can I do?
You need to add inherit.aes = FALSE to the geom_signif call, otherwise it will try to find a column called B in the new data frame you defined. This is because you put an aes call inside your initial call to ggplot. When you do this, by default all subsequent geoms will inherit the aesthetics and data from this call. If you pass new data to a geom, it needs to include a value for all those aesthetics or override the aesthetics or you need to switch off inheritance with inherit.aes = FALSE
da %>%
separate(group, c("A", "B"), remove = FALSE) %>%
ggplot(aes(x = A, y = numb, fill = B)) +
geom_bar(position=position_dodge(), stat = "identity") +
scale_fill_manual(values = rep(c("grey20", "grey80"),
ceiling(length(da$group)/2))[1:length(da$group)]) +
geom_text(aes(label=numb),
position = position_dodge(width = 0.9), vjust = -0.25) +
geom_signif(stat="identity", inherit.aes = FALSE,
data=data.frame(x = c(0.5, 1.5), xend=c(1, 2),
y = c(30, 30), annotation = c("**", "*")),
aes(x = x, xend = xend, y = y, yend = y, annotation = annotation))
I'm trying to consistently plot histograms for zonal statistics from a thematic map. The data within a single zone often looks something like this:
dat <- data.frame("CLASS" = sample(LETTERS[1:6], 250, replace = TRUE,
prob = c(.15, .06, .35, .4, .02, 0)))
dat$CLASS <- factor(dat$CLASS, levels = LETTERS[1:6], ordered = T)
wherein not all possible classes may have been present in the zone.
I can pre-compute the data summary and use geom_bar and a manual colour scale to get consistent bar colours regardless of missing data:
library(dplyr)
library(ggplot2)
library(viridis)
dat_summ <- dat %>%
group_by(CLASS, .drop = FALSE) %>%
summarise(percentage = n() / nrow(.) * 100)
mancols <- viridis_pal()(6)
names(mancols) <- LETTERS[1:6]
ggplot(dat_summ) +
geom_bar(aes(x = CLASS, y = percentage, fill = CLASS),
stat = 'identity', show.legend = FALSE) +
scale_x_discrete(drop = FALSE) +
scale_fill_manual(values = mancols, drop = FALSE) +
labs(x = 'Class', y = 'Percent') +
theme_minimal() +
theme(panel.grid.minor = element_blank())
But I can't keep the colours consistent across plots when I try to use geom_histogram:
ggplot(dat) +
geom_histogram(aes(x = CLASS,
y = (..count../sum(..count..)) * 100,
fill = ..x..), stat = 'count', show.legend = FALSE) +
scale_x_discrete(drop = FALSE) +
scale_fill_viridis_c() +
labs(x = 'Class', y = 'Percent') +
theme_minimal() +
theme(panel.grid.minor = element_blank())
If any of the outside-edge columns (A, F) are count = 0, the colours rescale to where data is present. This doesn't happen if there's a gap in one of the middle classes. Using scale_fill_viridis_b() doesn't solve the problem - it always rescales the palette against the number of non-0 columns.
Is it possible to prevent this behaviour and output consistent colours no matter which columns are count = 0, or am I stuck with my geom_bar approach?
Maybe scale_fill_discrete/scale_fill_viridis_d(drop = F) is what you want (with fill = CLASS).
ggplot(dat) +
geom_histogram(aes(x = CLASS,
y = (..count../sum(..count..)) * 100,
fill = CLASS), stat = 'count', show.legend = FALSE) +
scale_x_discrete(drop = FALSE) +
scale_fill_viridis_d(drop = FALSE) +
labs(x = 'Class', y = 'Percent') +
theme_minimal() +
theme(panel.grid.minor = element_blank())
I think that the problem is that you pass the calculated variable ..x.. to fill in the aesthetics. It appears the length of this variable changes with your data set. You could replace it with scale_fill_manual and you will get the same plot colours regardless of how many levels there are in your CLASS variable:
ggplot(dat) +
geom_histogram(aes(x = CLASS, y = stat(count/sum(count) * 100), fill = CLASS), stat = 'count', show.legend = FALSE) +
scale_x_discrete(drop = FALSE) +
scale_fill_manual(values = c("#FF0000FF", "#CCFF00FF", "#00FF66FF", "#0066FFFF", "#CC00FFFF", "#FF99FFFF"))
labs(x = 'Class', y = 'Percent') +
theme_minimal() +
theme(panel.grid.minor = element_blank())
I can't fix the colors of my heat-maps according to their values. Same values should have same colors. The goal is to keep all values below a certain threshold (0.05) in (constant) gray. For values greather than this threshold, the colors should gradually change from "firebrick1" to "firebrick4".
For example, "Plant 5"/"202004" = 70.6 is red if I use variable utilization2 and gray if I use variable utilization. How can I fix that?
library(tidyverse)
library(rlang)
MONTHS <- str_c("2020", sprintf("%02d", 1:12))
PLANTS <- str_c("Plant ", 1:5)
crossing(month = MONTHS, plant = PLANTS) %>%
mutate(utilization = runif(nrow(.), 70, 100)) %>%
mutate(utilization2 = if_else(plant == "Plant 2", utilization * 0.67, utilization)) -> d
draw_plot <- function(fill) {
fill <- ensym(fill)
d %>%
ggplot(mapping = aes(x = month, y = plant, fill = !!fill)) +
geom_tile(aes(width = 0.85, height = 0.85)) +
geom_text(aes(label = round(!!fill, 1)), color = "white") +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
scale_fill_gradientn(colours = c("darkgray", "firebrick1", "firebrick4"),
values = c(0, 0.05, 1)) +
labs(x = "Month", y = "Production plant", title = str_c("fill = ", fill), color = "Utilization") +
theme_light() +
theme(legend.position = "none")
}
draw_plot(utilization)
draw_plot(utilization2)
library(tidyverse)
library(rlang)
MONTHS <- str_c("2020", sprintf("%02d", 1:12))
PLANTS <- str_c("Plant ", 1:5)
crossing(month = MONTHS, plant = PLANTS) %>%
mutate(utilization = runif(nrow(.), 70, 100)) %>%
mutate(utilization2 = if_else(plant == "Plant 2", utilization * 0.67, utilization)) -> d
draw_plot <- function(fill) {
fill <- ensym(fill)
d %>%
ggplot(mapping = aes(x = month, y = plant, fill = !!fill)) +
geom_tile(aes(width = 0.85, height = 0.85)) +
geom_text(aes(label = round(!!fill, 1)), color = "white") +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
scale_fill_gradientn(colours = c("darkgray", "firebrick1", "firebrick4"),
values = c(0, 0.05, 1), limits = c(min(d$utilization, d$utilization2), max(d$utilization, d$utilization2))) +
labs(x = "Month", y = "Production plant", title = str_c("fill = ", fill), color = "Utilization") +
scale_color_identity() +
theme_light() +
theme(legend.position = "none")
}
draw_plot(utilization)
draw_plot(utilization2)
The point is that scale_fill_gradientn() sets the limits of the scale to max and min of the vector of interest. You have to set them manually. In this case I chose both the max and min of both columns (limits = c(min(d$utilization, d$utilization2), max(d$utilization, d$utilization2))).
The colours are interpolated between the values, so a trick you could do is is to set both 0 and 0.05 as gray, and begin the next colour at a very small increment to 0.05.
draw_plot <- function(fill) {
fill <- ensym(fill)
d %>%
ggplot(mapping = aes(x = month, y = plant, fill = !!fill)) +
geom_tile(aes(width = 0.85, height = 0.85)) +
geom_text(aes(label = round(!!fill, 1)), color = "white") +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
scale_fill_gradientn(colours = c("darkgray", "darkgray", "firebrick1", "firebrick4"),
values = c(0, 0.05, 0.05 + .Machine$double.eps, 1)) +
labs(x = "Month", y = "Production plant", title = str_c("fill = ", fill), color = "Utilization") +
theme_light() +
theme(legend.position = "none")
}
draw_plot(utilization)
draw_plot(utilization2)
Maybe this is not necessary to mention, but the fill scale rescales all fill values to a range between 0-1 depending on the limits (see ?scales::rescale), so the 0.05 you put in the values argument is the bottom 5% of the range value and not unscaled data values in utilization that are below 0.05. If you want to have consistent fill scales over multiple plots, you'd have to set the limits argument manually.
I'm plotting a stacked bar graph and use geom_text to insert the value and name of each stack. The problem is some stacks are very small/narrow, so that the text of two stacks overlap each other and hence is not very readable. How can I modify the code to solve this issue.
Type<-c("ddddddddddd","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd","eeeeeeeeeeeeee","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","ddddddddddd",
"eeeeeeeeeeeeee","mmmmmmmmmmmmmmmmmmm","bbbbbbbbbbbbb","ddddddddddd","bbbbbbbbbbbbb","eeeeeeeeeeeeee")
Category<-c("mmmmm","mmmmm","gggggggggggggggggg","ffffffffffff","ffffffffffff","ffffffffffff","sanddddddddd","sanddddddddd","yyyyyyyyyyy",
"yyyyyyyyyyy","yyyyyyyyyyy","sssssssssssssss","sssssssssssssss","sssssssssssssss","ttttttttttttt")
Frequency<-c(4,1,30,7,127,11,1,1,6,9,1,200,3,4,5)
Data <- data.frame(Type, Category, Frequency)
p <- ggplot(Data, aes(x = Type, y = Frequency)) +
geom_bar(aes(fill = Category), stat="identity", show.legend = FALSE) +
geom_text(aes(label = Frequency), size = 3) +
geom_text(aes(label = Category), size = 3)
Considering your data, a facetted plot might be a better approach:
# summarise your data
library(dplyr)
d1 <- Data %>%
mutate_each(funs(substr(.,1,2)),Type,Category) %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq)) # create a label by pasting the 'Category' and the 'Freq' variables together
# plot
ggplot(d1, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(label = lbl), angle = 90, size = 5, hjust = -0.1, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14)
which gives:
In the above plot I shortened the labels on purpose. If you don't want to do that, you could consider this:
d2 <- Data %>%
group_by(Type,Category) %>%
summarise(Freq = sum(Frequency)) %>%
mutate(lbl = paste(Category,Freq))
ggplot(d2, aes(x = Category, y = Freq, fill = Category)) +
geom_bar(stat="identity", width = 0.7, position = position_dodge(0.8)) +
geom_text(aes(y = 5, label = lbl), alpha = 0.6, angle = 90, size = 5, hjust = 0, position = position_dodge(0.8)) +
scale_y_continuous(limits = c(0,240)) +
guides(fill = FALSE) +
facet_grid(.~Type, scales = "free", space = "free") +
theme_bw(base_size = 14) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
which gives: