Related
With ggplot2 I want to plot two vectors (vec1_num, vec2_num) in two dimensions and colour the points by a group variable (vec3_char). Some data points are overlapping.
library(ggplot2)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "C", "A", "B", "C", "C", "A", "B", "C")
# plot 1
ggplot(data = NULL) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
I know I can attenuate the overlap by reducing alpha or working with geom_jitter adding a bit of noise. Like this:
# plot 2
ggplot(data = NULL) +
geom_jitter(aes(x=vec1_num, y=vec2_num, colour=vec3_char), alpha=0.4, size=4, width = 0.1) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "C"="orange")) +
theme(panel.grid = element_blank())
However, is it possible to make use of plot 1 but colour the overlapping points differently? So that, for example, "A" = "darkblue, "AB" = "black", "ABC" = "grey", "B" = "darkred", "BC" = "pink", "C"="orange"? And can I additionally add a small Venn Diagram (legend) that visualises the color choice for the point overlap?
Thanks!
My way of doing this would be to convert the letters into numbers, sum them and covert back into letters.
NB The one complication is that the letters need to be A, B, D, H, ... so there is only one way of making each number combination. Though there is probably a way to start with A, B, C, ... and encode for unique values
library(tidyverse)
vec1_num = c(1,2,3,4,1,3,4,5,5,5)
vec2_num = c(1,2,3,4,1,3,4,5,5,5)
vec3_char = c("A", "B", "D", "A", "B", "D", "D", "A", "B", "D")
removeDup <- function(str) paste(rle(strsplit(str, "")[[1]])$values, collapse="") # Function to remove duplicated values in a string
data <- data.frame(x = vec1_num, y = vec2_num, col = match(vec3_char, LETTERS))
data <- data %>%
group_by(x) %>%
mutate(colour = glue::glue_collapse(col, sep = "")) %>%
select(-col) %>%
distinct(x, y, .keep_all = TRUE) %>%
mutate(colour = removeDup(colour)) %>%
mutate(colour = sapply(str_extract_all(colour, '\\d'), function(x) sum(as.integer(x)))) %>%
mutate(colour = case_when(
colour == 1 ~ "A",
colour == 2 ~ "B",
colour == 3 ~ "AB",
colour == 4 ~ "D",
colour == 5 ~ "AD",
colour == 6 ~ "BD",
colour == 7 ~ "ABD"
))
# plot 1
ggplot(data) +
geom_point(aes(x=x, y=y, colour = as_factor(colour)), alpha=0.4, size=4) +
geom_text(aes(x = x, y = y, label = colour), vjust = 2) +
scale_colour_manual(values=c("A"="darkblue", "B"="darkred", "AB"="orange", "D" = "green", "AD" = "black", "BD" = "orange", "ABD" = "purple"), name = "Colour") +
theme(panel.grid = element_blank())
.
I would firstly create a dataframe. Then I would extract for every x y combination (list(df$vec1_num, df$vec2_num)) what characters are present (...unique(xy_i$vec3_char)...). Like this:
df <- data.frame(vec1_num, vec2_num, vec3_char)
df_new <- do.call("rbind.data.frame", by(df, list(df$vec1_num, df$vec2_num), function(xy_i){
chars_i <- paste0(sort(unique(xy_i$vec3_char)),collapse= "")
xy_i$chars_comb <- factor(chars_i, levels= c("A", "AB", "AC", "ABC", "B", "BC", "C"))
xy_i
}))
If you now make the plot it shows you what characters overlap at which point.
ggplot(data = df_new) +
geom_point(aes(x=vec1_num, y=vec2_num, colour=chars_comb), alpha=0.4, size=4) +
scale_colour_manual(values=c("AB" = "black", "ABC" = "grey", "B" = "darkred", "C"="orange", "AC"= "red")) +
theme(panel.grid = element_blank())
This question builds off of enter link description here but is in the context of faceted boxplots.
So, I have the following code:
set.seed(20210714)
dd <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 10)
dd1 <- data.frame(Method = rep(c("A", "B", "C"), each = 60), Pattern = rep(c("X", "Y", "Z"), times = 30), X1 = runif(180), Complexity = rep(c("High", "Low"), times = 90), nsim = rep(rep(1:10, times = 9), each = 2), n = 5)
dd <- rbind(dd, dd1)
library(ggplot2)
# create dummy dataframe.
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = Method)) +
geom_boxplot(aes(fill = Method)) +
facet_grid(~Complexity) +
theme_light() +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("","X", "", "", "", "Y", "", "", "", "Z","","")) +
xlab("Pattern") +
scale_fill_brewer(breaks=c("A", "B", "C"), type="qual", palette="Paired")
dummy.df <- dd
dummy.df[nrow(dd) + 1:2,"Pattern"] <- unique(dd$Pattern)[-3]
dummy.df[nrow(dd) + 1:2,"Method"] <- "ZZZ"
dummy.df[nrow(dd) + 1:2,"Complexity"] <- c("High","Low")
dummy.df$dummy <- interaction(dummy.df$Method,dummy.df$Pattern)
dummy.df$fill <- interaction(dummy.df$Method, dummy.df$n)
dummy.df$dummy <- interaction(dummy.df$fill, dummy.df$Pattern)
dummy.df$dummy <- factor(dummy.df$dummy, levels = levels(dummy.df$dummy)[-c(4, 12, 20, 24)])
dummy.df$dummy[361:362] <- "A.10.Z" ## dummy variables to get rid of NAs
theme_set(theme_bw(base_size = 14))
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill),lwd=0.1,outlier.size = 0.01) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern") +
scale_fill_brewer(breaks= levels(dummy.df$fill)[-c(4,8)], type="qual", palette="Paired")
This yields the following plot.
All is well, except with the legend. I would like the following: the dark colors to be in the First group titled "n=5" on the left, with "A", "B", "C" for the three dark colors, and the light colors to be to the right, in a Second group titled "n=10" on the right, with "A", "B", "C" for the three light colors. Sort of like in the link enter link description here above.
What I can not figure out is how to call the boxplot twice to mimic the solution there.
Is there a way to do this? Please feel free to let me know if the question is not clear.
Thanks again, in advance, for any help!
Adapting my answer on your former question this could be achieved like so:
library(ggplot2)
fill <- levels(dummy.df$fill)[-c(4,8)]
fill <- sort(fill)
labels <- gsub("\\.\\d+", "", fill)
labels <- setNames(labels, fill)
colors <- scales::brewer_pal(type="qual", palette="Paired")(6)
colors <- setNames(colors, fill)
library(ggnewscale)
ggplot(dummy.df, aes(x = dummy, y = X1, fill = fill)) +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 5", breaks= fill[grepl("5$", fill)], labels = labels[grepl("5$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 1)) +
new_scale_fill() +
geom_boxplot(aes(fill = fill), lwd=0.1,outlier.size = 0.01) +
scale_fill_manual(name = "n = 10", breaks = fill[grepl("10$", fill)], labels = labels[grepl("10$", fill)], values = colors,
guide = guide_legend(title.position = "left", order = 2)) +
facet_grid(~Complexity) +
theme(legend.position = 'bottom') +
guides(fill = guide_legend(nrow=1)) +
geom_line(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.35, colour = I("#525252")) +
geom_point(aes(x = dummy,
group=interaction(Pattern,nsim,n)),
size = 0.35, alpha = 0.25, colour = I("#525252")) +
scale_x_discrete(labels = c("X", "Y", "Z"), breaks = paste("A.10.", c("X", "Y", "Z"), sep = ""),drop=FALSE) +
xlab("Pattern")
#> Warning: Removed 2 rows containing non-finite values (new_stat_boxplot).
I want to connect datapoints from two datasets with a vertical line. The points that should be connected vertically have the same identifier (V), but I was hoping to keep the datasets separate.
Here is my figure so far:
d1 <- data.frame (V = c("A", "B", "C", "D", "E", "F", "G", "H"),
O = c(9,2.5,7,8,7,6,7,7.5),
S = c(6,5,3,5,3,4,5,6))
d2 <- data.frame (V = c("A", "B", "C", "D"),
O = c(10,3,7.5,8.2),
S = c(6,5,3,5))
scaleFUN <- function(x) sprintf("%.0f", x)
p<-ggplot(data=d1, aes(x=S, y=O), group=factor(V), shape=V) +
geom_point(size = 5, aes(fill=V),pch=21, alpha = 0.35)+
theme_bw()+
geom_point(data = d2, size=5, aes(fill=V), pch=22,colour="black")+
theme(legend.title=element_blank())+
xlab(expression(italic("S"))) + theme(text = element_text(size=25))+
ylab(expression(italic("O")))+ theme(text = element_text(size=25))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
theme(axis.text.y=element_text(angle=90, hjust=1))+
theme(legend.position="none") # remove legend
print(p)
So the final figure would look something like this:
Can I do this with geom_line() without combining datasets (so the other formatting can be separate for each dataset)?
As bouncyball pointed out, you can use a separate data set (merged from d1 and d2) with geom_segment.
See the following:
ggplot(data = d1, aes(x = S, y = O), group = factor(V), shape = V) +
geom_point(size = 5, aes(fill = V), pch = 21, alpha = 0.35) +
geom_point(data = d2, size = 5, aes(fill = V), pch = 22, colour = "black") +
geom_segment(data = merge(d1, d2, by = 'V'),
aes(x = S.x, xend = S.y, y = O.x, yend = O.y)) +
guides(fill = FALSE)
Which yields:
You can add your themes also.
In the chart below, I'm looking to have each triangle in the label for subgroup A match the color of the subgroup they are referring to (green, blue, purple). Is this possible at all?
library(tibble)
library(dplyr)
library(ggplot2)
library(scales)
example_tibble <- tibble(Subgroup = c("A", "B", "C", "D"),
Result = c(0.288, 0.204, 0.206, 0.182),
A_vs_B = rep(1, 4),
A_vs_C = rep(1, 4),
A_vs_D = rep(1, 4))
ggplot(example_tibble, aes(x = Subgroup, y = Result, fill = Subgroup)) +
geom_bar(stat = "identity") + geom_text(aes(label =
paste0(percent(Result),
if_else(A_vs_B == 1 & Subgroup == "A", sprintf("\u25b2"), ""),
if_else(A_vs_C == 1 & Subgroup == "A", sprintf("\u25b2"), ""),
if_else(A_vs_D == 1 & Subgroup == "A", sprintf("\u25b2"), "")),
colour = Subgroup), hjust = -.25) +
coord_flip() + scale_y_continuous(limits = c(0,.5), labels = percent)
I'm trying to graph two plots together, where one I a main plot and the other is a sub plot that I'd like to be located in the legend area of the main plot.
This code produces this plot:
gLegend <- function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
set.seed(1)
main.df <- data.frame(group=rep(LETTERS[1:4],3),
y=rnorm(12),x=c(rep(1,4),rep(2,4),rep(3,4)),col=rep(c("gray","blue","red","magenta"),3))
main.df$group <- factor(main.df$group,levels=LETTERS[1:4])
sub.df <- data.frame(group=c("B","C","D"),x=1:3,effect=runif(3,0,1),col=c("blue","red","magenta"))
sub.df$group <- factor(sub.df$group,levels=LETTERS[2:4])
main.plot <- ggplot(main.df,aes(x=x,y=y,color=factor(group)))+geom_point(size=3)+facet_wrap(~group,ncol=4)+scale_fill_manual(values=c("gray","blue","red","magenta"),labels=c("A","B","C","D"),name="group")+scale_colour_manual(values=c("gray","blue","red","magenta"),labels=c("A","B","C","D"),name="group")+scale_x_continuous(breaks=unique(main.df$x))
sub.plot <- ggplot(sub.df,aes(x=x,y=effect,color=factor(group)))+geom_point(size=2)+scale_fill_manual(values=c("blue","red","magenta"),labels=c("B","C","D"),name="group",guide=FALSE)+scale_colour_manual(values=c("blue","red","magenta"),labels=c("B","C","D"),name="group",guide=FALSE)+labs(x="group",y="effect")+ggtitle("effect summary")+scale_x_continuous(breaks=unique(sub.df$x),labels=c("B","C","D"))
sub.plot.grob <- ggplotGrob(sub.plot)
combined.plot <- arrangeGrob(main.plot+theme(legend.position="none"),widths=c(0.75,0.25),arrangeGrob(20,sub.plot.grob),ncol=2)
What I'd like to do is get rid of the legend of the main plot.
If I add guide=FALSE to scale_fill_manual scale_colour_manual gLegend throws the error:
Error in tmp$grobs[[leg]] :
attempt to select less than one element in get1index
Obviously because it cannot fine "guide-box"
Any idea how to solve this?
Here is an alternative solution using cowplot
library(ggplot2)
library(cowplot)
theme_set(theme_bw()) #cowplot automatically sets theme_classic
set.seed(1)
main.df <- data.frame(group=rep(LETTERS[1:4],3),
y=rnorm(12),x=c(rep(1,4),rep(2,4),rep(3,4)),col=rep(c("gray","blue","red","magenta"),3))
main.df$group <- factor(main.df$group,levels=LETTERS[1:4])
sub.df <- data.frame(group=c("B","C","D"),x=1:3,effect=runif(3,0,1),col=c("blue","red","magenta"))
sub.df$group <- factor(sub.df$group,levels=LETTERS[2:4])
sub.plot <- ggplot(sub.df,aes(x=x,y=effect,color=factor(group)))+
geom_point(size=2)+
scale_fill_manual(values=c("blue","red","magenta"),labels=c("B","C","D"),name="group",guide=FALSE)+
scale_colour_manual(values=c("blue","red","magenta"),labels=c("B","C","D"),name="group",guide=FALSE)+
labs(x="group",y="effect")+
ggtitle("effect summary")+
scale_x_continuous(breaks=unique(sub.df$x),labels=c("B","C","D"))
main.plot <- ggplot(main.df,aes(x=x,y=y,color=factor(group)))+
geom_point(size=3)+facet_wrap(~group,ncol=4)+
scale_fill_manual(values=c("gray","blue","red","magenta"),labels=c("A","B","C","D"),name="group")+
scale_colour_manual(values=c("gray","blue","red","magenta"),labels=c("A","B","C","D"),name="group")+
scale_x_continuous(breaks=unique(main.df$x)) +
theme(legend.position = "none")
ggdraw() +
draw_plot(main.plot, x = 0, y = 0, width = 0.75, height = 1) +
draw_plot(sub.plot, 0.75, 0, .25, .5)
try this,
grid.arrange(ggplot(), ggplot(),
layout_matrix=matrix(c(1,1,NA,2), 2), widths=c(1.5,1))
Here is solution using grid package, if you want a blank space where is the current legend:
library(grid)
set.seed(1)
main.df <- data.frame(group=rep(LETTERS[1:4],3),
y = rnorm(12),
x = c(rep(1,4), rep(2,4), rep(3,4)),
col = rep(c("gray", "blue", "red", "magenta"), 3))
main.df$group <- factor(main.df$group,levels=LETTERS[1:4])
sub.df <- data.frame(group=c("B", "C", "D"),
x = 1:3,
effect = runif(3, 0, 1),
col = c("blue", "red", "magenta"))
sub.df$group <- factor(sub.df$group, levels=LETTERS[2:4])
main.plot <- ggplot(main.df, aes(x = x,y = y, color = factor(group))) +
geom_point(size = 3) + facet_wrap(~group, ncol = 4) +
scale_fill_manual(values = c("gray", "blue", "red", "magenta"),
labels = c("A", "B", "C", "D"),name = "group") +
scale_colour_manual(values = c("gray", "blue", "red", "magenta"),
labels = c("A", "B", "C", "D"), name = "group") +
scale_x_continuous(breaks = unique(main.df$x)) + guides(color = F)
sub.plot <- ggplot(sub.df, aes(x = x, y = effect, color = factor(group))) +
geom_point(size = 2) +
scale_fill_manual(values = c("blue", "red", "magenta"),
labels = c("B", "C", "D"), name= "group" ,guide = FALSE) +
scale_colour_manual(values = c("blue", "red", "magenta"),
labels = c("B", "C", "D"), name = "group", guide=FALSE) +
labs(x = "group", y = "effect") +
ggtitle("effect summary") +
scale_x_continuous(breaks = unique(sub.df$x), labels = c("B", "C", "D"))
blank = grid.rect(gp = gpar(col = "white"))
sub.plot.grob <- arrangeGrob(blank, sub.plot, ncol = 1)
combined.plot <- arrangeGrob(main.plot, sub.plot.grob, ncol=2, widths=c(0.75,0.25))
grid.arrange(combined.plot)