I recognize that this has been an issue that's been asked in many other instances, but none of the solutions provided worked for my particular problem.
Here, I have the following data:
library(tidyverse)
library(scales)
mydata <- tibble(Category = c("A", "B", "C", "D"),
Result = c(0.442, 0.537, 0.426, 0.387),
A = c(NA, "A", NA, NA),
B = rep(NA, 4),
C = c(NA, "C", NA, NA),
D = c("D", "D", NA, NA))
mydata$Category <- factor(mydata$Category)
And I have the following vector for the colors:
colors_vct <- c(A = "#0079c0", B = "#cc9900", C = "#252525", D = "#c5120e")
With this information, I can create the following plot:
p <- ggplot(data = mydata , aes(x = Category, y = Result, fill = Category)) +
geom_bar(stat = "identity") + geom_text(aes(label = percent(Result), color = Category), hjust = -.25) +
coord_flip() + scale_y_continuous(limits = c(0,1), labels = percent) +
scale_colour_manual(values = colors_vct) + scale_fill_manual(values = colors_vct)
p
And I'd like to have little triangles appear after the labels based on whether a certain category is mentioned in the last 4 columns of mydata, colored by that category's color, as so:
p <- p + geom_text(data = filter(mydata, mydata[,3] == "A"), aes(label = sprintf("\u25b2")), colour = colors_vct["A"], hjust = -4)
#p <- p + geom_text(data = filter(mydata, mydata[,4] == "B"), aes(label = sprintf("\u25b2")), colour = colors_vct["B"], hjust = -5) #This is commented out because there are no instances where the layer ends up being applied.
p <- p + geom_text(data = filter(mydata, mydata[,5] == "C"), aes(label = sprintf("\u25b2")), colour = colors_vct["C"], hjust = -6)
p <- p + geom_text(data = filter(mydata, mydata[,6] == "D"), aes(label = sprintf("\u25b2")), colour = colors_vct["D"], hjust = -7)
p
This is what I want the final chart to look like (more or less, see bonus question below). Now, I'd like to iterate the last bit of code using a for loop. And this is where I'm running into trouble. It just ends up adding one layer only. How do I make this work? Here is my attempt:
#Set the colors into another table for matching:
colors_tbl <- tibble(Category = levels(mydata$Category),
colors = c("#0079c0", "#cc9900", "#252525", "#c5120e"))
for (i in seq_along(mydata$Category)) {
if (is_character(mydata[[i]])) { #This makes the loop skip if there is nothing to be applied, as with category B.
#Filters to just the specific categories I need to have the triangles shown.
triangles <- filter(mydata, mydata[,(i+2)] == levels(mydata$Category)[i])
#Matches up with the colors_tbl to determine which color to use for that triangle.
triangles <- mutate(triangles, colors = colors_tbl$colors[match(levels(triangles$Category)[i], colors_tbl$Category)])
#Sets a particular position for that triangle for the hjust argument below.
pos <- -(i+3)
#Adding the layer to the plot object
p <- p + geom_text(data = triangles, aes(label = sprintf("\u25b2")), color = triangles$colors, hjust = pos)
}
}
p
:(
Bonus question: Is there a way I can avoid gaps in between the triangles, as per the 2nd chart?
EDIT: As per #baptiste 's suggestion, I re-processed the data as such:
mydata2 <- mydata %>% gather(key = comp, value = Present, -Result, -Category)
mydata2 <- mydata2 %>% mutate(colors = colors_tbl$colors[match(mydata2$Present, colors_tbl$Category)]) %>%
filter(!is.na(mydata2$Present)) %>% select(-comp)
mydata2 <- mydata2 %>% mutate(pos = if_else(Present == "A", -4, if_else(Present == "B", -5, if_else(Present == "C", -6, -7))))
p <- p + geom_text(data = mydata2, aes(x = Category, label = sprintf("\u25b2")), colour = mydata2$colors, hjust = mydata2$pos)
p
Ok, I got it to work. my bonus question still stands.
Related
Below is a simple example. I wish to create an outline of the bar graph plot.
Below is an example how the desire plot looks like.
Outlines bars in bar graph ggplot
library(tidyverse)
level <- c("a", "b", "c", "d", "e", "f", "g")
value <- c(8.1, 5.6, 3.2, 4.4, 3.5, 2.5, 1.8)
tbl <- tibble(level = level,
value = value)
# create plot using geom_step()
ggplot(data = tbl,
aes(x = level,
y = value)) +
geom_step(col = "black") +
theme_bw()
Modifying the linked answer to apply to your data frame, we get:
ggplot(tbl,
aes(x = level,
y = value)) +
geom_col(width = 1, fill = "#e0a0e8", alpha = 0.5) +
geom_step(data = tbl %>%
mutate(level = as.numeric(factor(level)) - 0.5) %>%
summarise(level = c(level[1], level, rep(last(level) + 1, 2)),
value = c(0, value, last(value), 0)),
aes(group = 1), col = "black") +
theme_bw(base_size = 20)
I've created a dodged bar chart in ggplot2 with geom_col(). The code looks like this:
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = cat), position = "dodge")
This produces the following plot:
I would like each variable to have a different filling colour, for example T = Green, X = Blue etc. and still keep a colour separation between the categories, for example T-A = darkgreen, T-B = lightgreen, X-A = darkblue, X-B = lightblue etc.
Is there an easy way to add this feature?
Thanks!
I think the easiest way to do what you're asking is to use the alpha scale:
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none()) +
theme_classic()
If you really want to use a grid in the background and don't want to see lines through the pale bars, make sure you plot some white bars of the same dimension underneath:
ggplot(data = df) +
geom_col(aes(x = var, y = val, group = cat),
position = "dodge", fill = "white", alpha = 1) +
geom_col(aes(x = var, y = val, fill = var, alpha = cat),
position = "dodge") +
scale_alpha_discrete(range = c(0.5, 1), guide = guide_none())
Maybe this can be useful:
library(ggplot2)
#Data
cat <- c("A", "A", "A", "A","B", "B", "B", "B")
var <- c("X", "Y", "Z", "T", "X", "Y", "Z", "T")
val <- c(35, 25, 20, 20, 40, 10, 15, 35)
df <- data.frame(var, cat, val)
#Plot
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')
Output:
You can customize colors with scale_fill_*(). Here an example using a fill scale from ggsci package:
#Plot 2
ggplot(data = df) +
geom_col(aes(x = var, y = val, fill = interaction(var,cat)), position = "dodge")+
labs(fill='Var')+
ggsci::scale_fill_futurama()
Output:
I've searched and tried a bunch of suggestions to be able to display a custom legend instead of the default one in a grouped scatter ggplot. I've tried this and this and following this among others.
For instance, let's say I have a df like this one:
df = data.frame(id = c("A", "A", "B", "C", "C", "C"),
value = c(1,2,1,2,3,4),
ref = c(1.5, 1.5, 1, 2,2,2),
min = c(0.5, 0.5, 1,2,2,2))
and I want to display the values of each id as round dots, but also put the reference values and minimum values for each id as a differently shaped dot, as follows:
p = ggplot(data = df) +
geom_point(aes(x = id, y = value, color = factor(id)), shape = 19, size = 6) +
geom_point(aes(x = id, y = ref, color = factor(id)), shape = 0, size = 8) +
geom_point(aes(x = id, y = min, color = factor(id)), shape = 2, size = 8) +
xlab("") +
ylab("Value")
#print(p)
Now all is fine, but my legend doesn't add anything to the interpretation of the plot, as the X axis and colors are enough to understand it. I know I can remove the legend via theme(legend.position = "none").
Instead, I would like to have a legend of what the actual shapes of each dot represent (e.g., filled round dot = value, triangle = min, square = ref).
Among trying to manually set the scale values via scale_fill_manual and something along those lines
override.shape = shapes$shape
override.linetype = shapes$pch
guides(colour = guide_legend(override.aes = list(shape = override.shape, linetype = override.linetype)))...
....
I've also tried making a secondary plot, but not display it, using something suggested in one of the links pasted above:
shapes = data.frame(shape = c("value", "reference", "minimum"), pch = c(19,0,2), col = c("gray", "gray", "gray"))
p2 = ggplot(shapes, aes(shape, pch)) + geom_point()
#print(p2)
g_legend <- function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
legend <- g_legend(p2)
library(gridExtra)
pp <- arrangeGrob(p1 ,legend,
widths=c(5/4, 1/4),
ncol = 2)
but then I get the error:
> legend <- g_legend(p2)
Error in tmp$grobs[[leg]] :
attempt to select less than one element in get1index
for which I did not find a working solution.. so yeah.. any suggestion on how I could only show a legend related to the different dot shapes would be welcome.
Thank you
You can manually build a shape legend using scale_shape_manual:
library(ggplot2)
ggplot(data = df) +
geom_point(aes(x = id, y = value, color = factor(id), shape = 'value'), size = 6) +
geom_point(aes(x = id, y = ref, color = factor(id), shape = 'ref'), size = 8) +
geom_point(aes(x = id, y = min, color = factor(id), shape = 'min'), size = 8) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
xlab("") +
ylab("Value")
Created on 2020-04-15 by the reprex package (v0.3.0)
But a better way to do this would be to reshape the df to a long format, and map each aes to a variable:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-id) %>%
ggplot() +
geom_point(aes(x = id, y = value, color = factor(id), shape = name, size = name)) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
scale_size_manual(values = c('value' = 6, 'ref' = 8, 'min' = 8)) +
xlab("") +
ylab("Value")
Created on 2020-04-15 by the reprex package (v0.3.0)
To remove the legend for the color use guide_none():
library(tidyr)
library(ggplot2)
df %>%
pivot_longer(-id) %>%
ggplot() +
geom_point(aes(x = id, y = value, color = factor(id), shape = name, size = name)) +
scale_shape_manual(values = c('value' = 19, 'ref' = 0, 'min' = 2)) +
scale_size_manual(values = c('value' = 6, 'ref' = 8, 'min' = 8)) +
guides(color = guide_none()) +
xlab("") +
ylab("Value")
Created on 2020-04-16 by the reprex package (v0.3.0)
Data:
df = data.frame(id = c("A", "A", "B", "C", "C", "C"),
value = c(1,2,1,2,3,4),
ref = c(1.5, 1.5, 1, 2,2,2),
min = c(0.5, 0.5, 1,2,2,2))
You can tidy your data first using tidyr, and then map the aes shape to the new variable
library(tidyr)
df2 <- pivot_longer(df, -id)
ggplot(data = df2) +
geom_point(aes(x = id, y = value, shape = name), size = 6) +
xlab("") +
ylab("Value")
This is my df :
df <- data.frame(annee = rep(c(2003,2004), times = 1, each = 3), sps = c("a", "b", "c"), nb = 1:3)
I create a column containing my labels :
df$labels <- paste("nb", df$sps, "=", df$nb)
Then I do my plot :
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data=df, aes(x=8, y=2.5, label= labels), colour="black", inherit.aes=FALSE, parse=FALSE)
But I have a problem with my text in each facet : I would like to have 3 lines (one for each sps).
I tried with the symbol "\n" but I failed in trying to obtain :
"nb a = 1 \n nb b = 2 \n nb c = 3" for each year
Thanks for help
You will have to concatenate what you want broken into several lines into one single string.
newdf <- aggregate(labels ~ annee, data = df, FUN = paste, collapse = "\n")
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~ annee) +
geom_text(data = newdf, aes(x = 8, y = 2, label = labels), color = "black") +
scale_x_continuous(limits = c(0, 11)) +
scale_y_continuous(limits = c(0, 2.25))
You can achieve what you want by creating a separate data.frame for your labels:
library(tidyverse)
df <- data.frame(annee = rep(c(2003,2004),
times = 1, each = 3),
sps = c("a", "b", "c"),
nb = 1:3)
# create labels in separate data.frame
label_df <- df %>%
mutate(labels = paste("nb", sps, "=", nb)) %>%
group_by(annee) %>%
summarise(labels = paste(labels, collapse = "\n")) %>%
mutate(x = 6.5,
y = 2.2)
ggplot(df, aes(nb)) +
geom_density(aes(fill = sps, colour = sps), alpha = 0.1) +
facet_wrap(~annee) +
geom_text(data = label_df, aes(x = x, y = y, label = labels)) +
coord_cartesian(ylim = c(0, 2.4), xlim = c(1, 8))
I'm trying to draw a simple (scree)-plot with some extra geom_hline and geom_vlines thrown in.
Problem is: whenever I so much as add show_guide=TRUE or add some aes() to the geom_xline, I screw up the original legend.
Here's some (ugly) fake data:
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
And here's my plot:
g <- ggplot(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue))
g <- g + geom_line(mapping = aes(group = factor(variable), linetype = variable))
g <- g + geom_vline(xintercept = 7, colour = "green", show_guide = TRUE)
How do I add a separate legend for the geom_vline without polluting the other legend?
Can't wrap my head around why one layer's color would change that of another legend.
This partly solves the problem:
g <- ggplot(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue))
g <- g + geom_line(mapping = aes(group = factor(variable), linetype = variable))
g <- g + geom_vline(aes(xintercept = x, colour = Threshold), data.frame(x = 7, Threshold = "A"), show_guide = TRUE) + scale_colour_manual(values = c(A = "green")
But the legend will still have crosses for the variable section, albeit not green ones.
Alternatively you could use a geom_line with a new data frame with two rows, both with the same x and y equal to the lower and upper bounds of your data. This will give you a legend that has a horizontal green line for your threshold and no vertical lines.
Based on #Nick K's suggestion in the above, here's a way to do this with clean legends via different data = for the different layers.
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
g <- ggplot()
g <- g + geom_line(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue, group = factor(variable), linetype = variable))
g
thresholds <- data.frame(threshold = "Threshold-A", PC = 7, ymin = min(exdf$eigenvalue), ymax = max(exdf$eigenvalue))
g <- g + geom_linerange(data = thresholds, mapping = aes(x = PC, ymin = ymin, ymax = ymax, color = threshold))
g
yields:
Notice:
I know, the original data exdf are dumb and make an ugly plot; that's not the point here.
Notice that you have to set the data = argument for both layers, and keep the first g <- ggplot() blank, otherwise ggplot2 gets confused about the dataframes.
yeah, it's a hack job (see below), and it also doesn't fill the y-height of the plot, as a geom_vline should.
As an add-on, (not a solution!), here's how it should work with geom_vline:
exdf <- data.frame(rep(x=1:12, times = 3), rep(x = c("A", "B", "C"), times = 6), rnorm(36), stringsAsFactors = FALSE)
colnames(exdf) <- c("PC", "variable", "eigenvalue")
g <- ggplot()
g <- g + geom_line(data = exdf, mapping = aes(x = factor(PC), y = eigenvalue, group = factor(variable), linetype = variable))
g
g + geom_vline(data = thresholds, mapping = aes(xintercept = PC, color = threshold), show_guide = TRUE)
yields:
That fills the yheight, as you would expect from geom_vline, but somehow messes up the legend of variable (notice the vertical lines).
Not sure why this is so, feels like a bug to me.
Here reported: https://github.com/hadley/ggplot2/issues/1267