Related
I've searched and tried a bunch of suggestions to be able to display a custom legend instead of the default one in a grouped scatter ggplot. I've tried this and this and following this among others.
For instance, let's say I have a df like this one:
df = data.frame(id = c("A", "A", "B", "C", "C", "C"),
value = c(1,2,1,2,3,4),
ref = c(1.5, 1.5, 1, 2,2,2),
min = c(0.5, 0.5, 1,2,2,2))
and I want to display the values of each id as round dots, but also put the reference values and minimum values for each id as a differently shaped dot, as follows:
p = ggplot(data = df) +
geom_point(aes(x = id, y = value, color = factor(id)), shape = 19, size = 6) +
geom_point(aes(x = id, y = ref, color = factor(id)), shape = 0, size = 8) +
geom_point(aes(x = id, y = min, color = factor(id)), shape = 2, size = 8) +
xlab("") +
ylab("Value")
#print(p)
Now all is fine, but my legend doesn't add anything to the interpretation of the plot, as the X axis and colors are enough to understand it. I know I can remove the legend via theme(legend.position = "none").
Instead, I would like to have a legend of what the actual shapes of each dot represent (e.g., filled round dot = value, triangle = min, square = ref).
Among trying to manually set the scale values via scale_fill_manual and something along those lines
override.shape = shapes$shape
override.linetype = shapes$pch
guides(colour = guide_legend(override.aes = list(shape = override.shape, linetype = override.linetype)))...
....
I've also tried making a secondary plot, but not display it, using something suggested in one of the links pasted above:
shapes = data.frame(shape = c("value", "reference", "minimum"), pch = c(19,0,2), col = c("gray", "gray", "gray"))
p2 = ggplot(shapes, aes(shape, pch)) + geom_point()
#print(p2)
g_legend <- function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
legend <- g_legend(p2)
library(gridExtra)
pp <- arrangeGrob(p1 ,legend,
widths=c(5/4, 1/4),
ncol = 2)
but then I get the error:
> legend <- g_legend(p2)
Error in tmp$grobs[[leg]] :
attempt to select less than one element in get1index
for which I did not find a working solution.. so yeah.. any suggestion on how I could only show a legend related to the different dot shapes would be welcome.
Thank you
You can manually build a shape legend using scale_shape_manual:
library(ggplot2)
ggplot(data = df) +
geom_point(aes(x = id, y = value, color = factor(id), shape = 'value'), size = 6) +
geom_point(aes(x = id, y = ref, color = factor(id), shape = 'ref'), size = 8) +
geom_point(aes(x = id, y = min, color = factor(id), shape = 'min'), size = 8) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
xlab("") +
ylab("Value")
Created on 2020-04-15 by the reprex package (v0.3.0)
But a better way to do this would be to reshape the df to a long format, and map each aes to a variable:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-id) %>%
ggplot() +
geom_point(aes(x = id, y = value, color = factor(id), shape = name, size = name)) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
scale_size_manual(values = c('value' = 6, 'ref' = 8, 'min' = 8)) +
xlab("") +
ylab("Value")
Created on 2020-04-15 by the reprex package (v0.3.0)
To remove the legend for the color use guide_none():
library(tidyr)
library(ggplot2)
df %>%
pivot_longer(-id) %>%
ggplot() +
geom_point(aes(x = id, y = value, color = factor(id), shape = name, size = name)) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
scale_size_manual(values = c('value' = 6, 'ref' = 8, 'min' = 8)) +
guides(color = guide_none()) +
xlab("") +
ylab("Value")
Created on 2020-04-16 by the reprex package (v0.3.0)
Data:
df = data.frame(id = c("A", "A", "B", "C", "C", "C"),
value = c(1,2,1,2,3,4),
ref = c(1.5, 1.5, 1, 2,2,2),
min = c(0.5, 0.5, 1,2,2,2))
You can tidy your data first using tidyr, and then map the aes shape to the new variable
library(tidyr)
df2 <- pivot_longer(df, -id)
ggplot(data = df2) +
geom_point(aes(x = id, y = value, shape = name), size = 6) +
xlab("") +
ylab("Value")
My plot has two problems:
(1) the group bars are not ordered as I want them to be - I will like them to appear in the order entered and (2) for the legend, the order appears as V, E, B whereas in the groups, it appears as B, E, V. I can reverse the legend, however, what I will really like to get is change the order of the subplots to V, E, B.
library(ggplot2)
df2 <- data.frame(supp = rep(c("V","E","B"), each=5),
s = rep(c("C3","C1", "C2","C5","C6"), 3),
len = c(1,2,3,4,5,6,8,4,4,3,9,7,6,8,5))
p <- ggplot(data = df2, aes(x = s, y = len, fill = supp)) +
geom_bar(stat = "identity", color = "black", position = position_dodge())
p + scale_fill_brewer(palette = "Blues", guide = guide_legend(reverse = TRUE)) +
scale_x_discrete(limits = rev(levels(df2$s)))
You need to change df2$supp from character to factor and specify the levels as you want them to appear.
See modified code below. Also, check out this link for even more detail about how to control the colour of your variables so they are consistent.
library(ggplot2)
df2 <- data.frame(supp = rep(c("V","E","B"), each=5),
s = rep(c("C3","C1", "C2","C5","C6"), 3),
len = c(1,2,3,4,5,6,8,4,4,3,9,7,6,8,5))
df2$supp <- factor(df2$supp,
levels = c("V", "E", "B"))
p <- ggplot(data=df2, aes(x=(df2$s), y=len, fill=supp)) +
geom_bar(stat="identity", color="black", position=position_dodge())
p + scale_fill_brewer(palette="Blues", guide = guide_legend(reverse=TRUE)) +
scale_x_discrete(limits = rev(levels(df2$s)))
Data
df2 <- data.frame(supp = rep(c("V", "E", "B"), each = 5),
s = rep(c("C3", "C1", "C2", "C5", "C6"), 3),
len = c(1, 2, 3, 4, 5, 6, 8, 4, 4, 3, 9, 7, 6, 8, 5))
Adjustment
Because you use data.frame() to create data, R will set strings as factors by default. So you need to revise the types of variables to what you want.
df2$s <- as.character(df2$s)
df2$supp <- factor(df2$supp, levels = c("V", "E", "B"))
Plot
ggplot(data = df2, aes(x = s, y = len, fill = supp)) +
geom_bar(stat = "identity", color = "black", position = position_dodge()) +
scale_fill_brewer(palette = "Blues", direction = -1)
Here you don't need to use additional guide_legend() and scale_x_discrete() to change order. It will be more concise.
I used geom_count to visualise overlaying points as sized groups, but I also want to add the actual count as a label to the plotted points, like this:
However, to achieve this, I had to create a new data frame containing the counts and use these data in geom_text as shown here:
#Creating two data frames
data <- data.frame(x = c(2, 2, 2, 2, 3, 3, 3, 3, 3, 4),
y = c(1, 2, 2, 2, 2, 2, 3, 3, 3, 3),
id = c("a", "b", "b", "b", "c",
"c", "d", "d", "d", "e"))
data2 <- data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n())
# Creating the plot
ggplot(data = data, aes(x = x, y = y)) +
geom_count() +
scale_size_continuous(range = c(10, 15)) +
geom_text(data = data2,
aes(x = x, y = y, label = count),
color = "#ffffff")
Is there any way to achieve this in a more elegant way (i.e. without the need for the second data frame)? I know that you can access the count in geom_count using ..n.., yet if I try to access this in geom_text, this is not working.
Are you expecting this:
ggplot(data %>%
group_by(id) %>%
summarise(x = mean(x), y = mean(y), count = n()),
aes(x = x, y = y)) + geom_point(aes(size = count)) +
scale_size_continuous(range = c(10, 15)) +
geom_text(aes(label = count),
color = "#ffffff")
update:
If the usage of geom_count is must, then the expected output can be achieved using:
p <- ggplot(data = data, aes(x = x, y = y)) +
geom_count() + scale_size_continuous(range = c(10, 15))
p + geom_text(data = ggplot_build(p)$data[[1]],
aes(x, y, label = n), color = "#ffffff")
here would be a solution for a code with discrete values
f<-ggplot(data = STest, aes(x = x, y = y)) + geom_count()+scale_x_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))+scale_y_discrete(labels = c("strong decrease","decrease","no change","increase","strong increase","no opinion"))
f + geom_text(data = ggplot_build(p)$data[[1]],aes(x, y, label = n,vjust= -2))
Thank you so much!
A much easier way to change this is to use the labs() function so in this case it would be ...labs(size = "Count") + ....
That should be all you need.
I recognize that this has been an issue that's been asked in many other instances, but none of the solutions provided worked for my particular problem.
Here, I have the following data:
library(tidyverse)
library(scales)
mydata <- tibble(Category = c("A", "B", "C", "D"),
Result = c(0.442, 0.537, 0.426, 0.387),
A = c(NA, "A", NA, NA),
B = rep(NA, 4),
C = c(NA, "C", NA, NA),
D = c("D", "D", NA, NA))
mydata$Category <- factor(mydata$Category)
And I have the following vector for the colors:
colors_vct <- c(A = "#0079c0", B = "#cc9900", C = "#252525", D = "#c5120e")
With this information, I can create the following plot:
p <- ggplot(data = mydata , aes(x = Category, y = Result, fill = Category)) +
geom_bar(stat = "identity") + geom_text(aes(label = percent(Result), color = Category), hjust = -.25) +
coord_flip() + scale_y_continuous(limits = c(0,1), labels = percent) +
scale_colour_manual(values = colors_vct) + scale_fill_manual(values = colors_vct)
p
And I'd like to have little triangles appear after the labels based on whether a certain category is mentioned in the last 4 columns of mydata, colored by that category's color, as so:
p <- p + geom_text(data = filter(mydata, mydata[,3] == "A"), aes(label = sprintf("\u25b2")), colour = colors_vct["A"], hjust = -4)
#p <- p + geom_text(data = filter(mydata, mydata[,4] == "B"), aes(label = sprintf("\u25b2")), colour = colors_vct["B"], hjust = -5) #This is commented out because there are no instances where the layer ends up being applied.
p <- p + geom_text(data = filter(mydata, mydata[,5] == "C"), aes(label = sprintf("\u25b2")), colour = colors_vct["C"], hjust = -6)
p <- p + geom_text(data = filter(mydata, mydata[,6] == "D"), aes(label = sprintf("\u25b2")), colour = colors_vct["D"], hjust = -7)
p
This is what I want the final chart to look like (more or less, see bonus question below). Now, I'd like to iterate the last bit of code using a for loop. And this is where I'm running into trouble. It just ends up adding one layer only. How do I make this work? Here is my attempt:
#Set the colors into another table for matching:
colors_tbl <- tibble(Category = levels(mydata$Category),
colors = c("#0079c0", "#cc9900", "#252525", "#c5120e"))
for (i in seq_along(mydata$Category)) {
if (is_character(mydata[[i]])) { #This makes the loop skip if there is nothing to be applied, as with category B.
#Filters to just the specific categories I need to have the triangles shown.
triangles <- filter(mydata, mydata[,(i+2)] == levels(mydata$Category)[i])
#Matches up with the colors_tbl to determine which color to use for that triangle.
triangles <- mutate(triangles, colors = colors_tbl$colors[match(levels(triangles$Category)[i], colors_tbl$Category)])
#Sets a particular position for that triangle for the hjust argument below.
pos <- -(i+3)
#Adding the layer to the plot object
p <- p + geom_text(data = triangles, aes(label = sprintf("\u25b2")), color = triangles$colors, hjust = pos)
}
}
p
:(
Bonus question: Is there a way I can avoid gaps in between the triangles, as per the 2nd chart?
EDIT: As per #baptiste 's suggestion, I re-processed the data as such:
mydata2 <- mydata %>% gather(key = comp, value = Present, -Result, -Category)
mydata2 <- mydata2 %>% mutate(colors = colors_tbl$colors[match(mydata2$Present, colors_tbl$Category)]) %>%
filter(!is.na(mydata2$Present)) %>% select(-comp)
mydata2 <- mydata2 %>% mutate(pos = if_else(Present == "A", -4, if_else(Present == "B", -5, if_else(Present == "C", -6, -7))))
p <- p + geom_text(data = mydata2, aes(x = Category, label = sprintf("\u25b2")), colour = mydata2$colors, hjust = mydata2$pos)
p
Ok, I got it to work. my bonus question still stands.
With data
value <- c(9, 4, 10, 7, 10,
10, 10, 4, 10,
4, 10, 2, 5, 5, 4)
names <- c("a","b",
"c","d","e",
"f", "g","h",
"i","j","k","l",
"m","n","p")
df <- data.frame(value, names)
df$names <- as.character(df$names)
p <- ggplot(data = df, aes(y = value,x= names,group=1))+
geom_point(color = I("red"),shape=23, lwd=3,fill="red")+
geom_line(group = I(1),color = I("red"))+
theme_bw()+
coord_flip()
p + xlab("") +ylab("")
I produce this
But now I would like to create plot similar picture below, where "a", "b", "c" and "D" would be x aes labels and belong to PART 1 and names "p", "n", "m", "i", "k" would belong in PART 2 (and so on). the key part here is how to add circles inside plot.
I've also looked here
How can I add freehand red circles to a ggplot2 graph?
but no luck.
If this in upper pocture is not possible, than I would like my output to be like below picture
In order to achieve the facetting by part as in your last plot you can create a new column that groups your values, e.g.:
df$part <- rep(c("part3", "part2", "part1"), each = 5)
In order to plot the open circles you can add another geom_point() layer. I created a new data frame that consists of all combinations of names and value for each part:
library(dplyr)
library(tidyr)
df2 <- df %>%
group_by(part, names) %>%
expand(value = min(df$value):max(df$value))
Then you plot a facetted plot with circles:
ggplot() +
geom_point(data = df2, aes(x = value, y = names),
shape = 1) +
geom_point(data = df, aes(y = names, x = value, group = 1), colour = I("red"), shape = 23, lwd = 3, fill = "red") +
geom_line(data = df, aes(y = names, x = value, group = 1), group = I(1),color = I("red")) +
theme_bw() +
facet_wrap(~part, ncol = 1, scales = "free_y")
Note that I swapped x and y values as coord_flip() cannot be used with scales = "free_y" which is however necessary if you want only those names which have values in the respective facet.