So I create a boxplot of data and then add a set point over that data. I want my legend to capture what the data type of the geom_points represents. Thanks!
ggplot(data = NULL) +
geom_boxplot(data = discuss_impact_by_county,
aes(x=reorder(State,discuss, FUN = median),y=discuss),
outlier.shape = NA) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "States") +
geom_point(data = by_state,
aes(x = State, y = discuss_happen_difference),
col = "red",
size = 3,
show.legend = TRUE)
If you want a legend you have to map on aesthetics. In your case map something on the color aes, i.e. move col="red" into aes() and use scale_color_manual to set the value and the legend label to be assgined to the color label "red".
As you have only one "category" of points you can simply do scale_color_manual(values = "red", label = "We are red points") to set the color and label. In case that your have multiple points with different colors it's best to make use of a named vector to assign the colors and legend labels to the right "color label"s, i.e use scale_color_manual(values = c(red = "red"), label = c(red = "We are red points")).
Using some random example data try this:
library(ggplot2)
library(dplyr)
set.seed(42)
discuss_impact_by_county <- data.frame(
State = sample(LETTERS[1:4], 100, replace = TRUE),
discuss = runif(100, 1, 5)
)
by_state <- discuss_impact_by_county %>%
group_by(State) %>%
summarise(discuss_happen_difference = mean(discuss))
#> `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = NULL) +
geom_boxplot(data = discuss_impact_by_county,
aes(x=reorder(State,discuss, FUN = median),y=discuss),
outlier.shape = NA) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x = "States") +
geom_point(data = by_state,
aes(x = State, y = discuss_happen_difference, col = "red_points"),
size = 3,
show.legend = TRUE) +
scale_color_manual(values = "red", label = "We are red points")
Related
I am trying to get my ggplot2 legends to sit together well.
I have a fill legend and a colour legend and I want them to be over multiple rows at the base of the plot but with the colour legend continuing directly after the fill legend, rather than starting a new column.
I've made a quick example and desired output (just made in paint) below to illustrate
library(ggplot2)
set.seed(1)
testdf <- data.frame(mon = factor(month.abb, levels = month.abb), y = rnorm(84,mean = 20, sd = 10), cat = rep(paste0("class ",letters[1:7]), each = 12))
thresholds <- data.frame(ThresholdNm = c("low","high"), ThresholdVal = c(110,150))
ggplot(testdf, aes(x = mon, y = y, fill = cat))+
geom_bar(stat = "identity")+
geom_hline(data = thresholds, aes(yintercept = ThresholdVal, colour = ThresholdNm))+
scale_colour_manual(values = c("red","black"))+
theme(legend.position = "bottom", legend.title = element_blank())+
guides(fill = guide_legend(nrow=3,byrow=FALSE,order = 1),colour = guide_legend(nrow=2,byrow=FALSE,order = 2))
This is what I get:
But what I am hoping for is this:
Created on 2022-11-10 by the reprex package (v0.3.0)
Adapting my answer on this post to your case you could achieve your desired result using a custom key glyph like so:
Basically this involves mapping ThresholdVal on the fill aes in geom_hline. Doing so will add the items to the fill legend too.
Create a color palette which could be used for both the fill and the color scale and which takes care of the right order of the items.
Write custom key glyph function which conditional on the color value switches between the key glyph used for bars and the one used for geom_hline
Remove the color legend.
Use theme options to get a border around all legend keys including the ones for the hlines.
library(ggplot2)
nclass <- nlevels(factor(testdf$cat))
pal <- c(scales::hue_pal()(nclass), "red", "black")
names(pal) <- c(levels(factor(testdf$cat)), "high", "low")
draw_key_cust <- function(data, params, size) {
if (data$fill %in% c("red", "black")) {
data$colour <- data$fill
data$fill <- NA
draw_key_path(data, params, size)
} else {
GeomCol$draw_key(data, params, size)
}
}
ggplot(testdf, aes(x = mon, y = y, fill = cat)) +
geom_bar(stat = "identity", key_glyph = "cust") +
geom_hline(data = thresholds, aes(yintercept = ThresholdVal, colour = ThresholdNm, fill = ThresholdNm)) +
scale_fill_manual(values = pal, aesthetics = c("fill", "color")) +
theme(legend.position = "bottom", legend.title = element_blank(),
legend.key = element_rect(linewidth = .25 * .pt, color = "white")) +
guides(fill = guide_legend(nrow = 3, byrow = FALSE, order = 1), colour = "none")
#> Warning in geom_hline(data = thresholds, aes(yintercept = ThresholdVal, :
#> Ignoring unknown aesthetics: fill
I woul like to be able to make the geom_text inside the geom_point to follow the re-positioning when applying position_dodge. That is, I would like to go from the code below:
Q <- as_tibble(data.frame(series = rep(c("diax","diay"),3),
value = c(3.25,3.30,3.31,3.36,3.38,3.42),
year = c(2018,2018,2019,2019,2020,2020))) %>%
select(year, series, value)
ggplot(data = Q, mapping = aes(x = year, y = value, color = series, label = sprintf("%.2f",value))) +
geom_point(size = 13) +
geom_text(vjust = 0.4,color = "white", size = 4, fontface = "bold", show.legend = FALSE)
which produces the following chart:
to the following change:
ggplot(data = Q, mapping = aes(x = year, y = value, color = series, label = sprintf("%.2f",value))) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(position = position_dodge(width = 1), vjust = 0.4,
color = "white", size = 4, fontface = "bold",
show.legend = FALSE)
which produces the following chart:
The curious thing about this is the fact that excatly the same change works just fine if I change from geom_point to geom_bar:
ggplot(Q, aes(year, value, fill = factor(series), label = sprintf("%.2f",value))) +
geom_bar(stat = "identity", position = position_dodge(width = 1)) +
geom_text(color = "black", size = 4,fontface= "bold",
position = position_dodge(width = 1), vjust = 0.4, show.legend = FALSE)
This happens because the the dodging is based on the group aesthetic, automatically set in this case to series because of the mapping to color. The issue is that the text layer has it's own color ("white") and so the grouping is dropped. Manually set the grouping, and all is good:
ggplot(Q, aes(x = year, y = value, color = series, label = sprintf("%.2f",value), group = series)) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(position = position_dodge(width = 1), vjust = 0.4, color = "white", size = 4,
fontface = "bold", show.legend = FALSE)
One patch work would be the following. Since you cannot add labels on top of the data point using geom_text() right away, you may want to go round a bit. I first created a temporary graphic with geom_point(). Then, I accessed to the data frame which is used for drawing the graphic. You can find the values of x and y axis. Using them, I created a new data frame called temp which include the axis information and the label information. Once I had this data frame, I could draw the expected outcome using temp. Make sure that you use inherit.aes = FALSE in geom_text() since you are using another data frame.
library(dplyr)
library(ggplot2)
g <- ggplot(data = Q, aes(x = year, y = value, color = series)) +
geom_point(size = 13, position = position_dodge(width = 1))
temp <- as.data.frame(ggplot_build(g)$data) %>%
select(x, y) %>%
arrange(x) %>%
mutate(label = sprintf("%.2f",Q$value))
ggplot(data = Q, aes(x = year, y = value, color = series)) +
geom_point(size = 13, position = position_dodge(width = 1)) +
geom_text(data = temp, aes(x = x, y = y, label = label),
color = "white", inherit.aes = FALSE)
I've made a barplot in ggplot, and added a couple of lines. What happens is that the color and description of the lines don't correspond:
The yellow line should have the description 'Median Member', but is displayed as 'avg Member'. What happens here? The code I used:
library(ggplot2)
library(dplyr)
MemberID=c(1,1,1, 2, 2, 2)
ClientCode = c(10,100,1000, 20, 200, 2000)
Duration = c(2356, 1560, 9000, 4569, 3123, 8000)
df <- data.frame(MemberID, ClientCode, Duration)
dr <- df %>%
filter(MemberID == 1)
dr_avg <- df
ggplot(dr, aes(reorder(as.character(ClientCode), -Duration), Duration, fill=-Duration)) +
geom_bar(stat="identity") + # the height of the bar will represent the value in a column of the data frame
xlab('ClientCode') +
ylab('Duration (Minutes)') +
geom_hline(data=dr, aes(yintercept=mean(Duration), linetype = 'Avg Member'), color = 'red', show.legend = TRUE) +
geom_hline(data=dr, aes(yintercept=median(Duration), linetype = 'Median Member'), color = 'orange', show.legend = TRUE) +
geom_hline(data=dr_avg, aes(yintercept=mean(Duration), linetype = 'Avg all data'), color = 'blue', show.legend = TRUE) +
scale_linetype_manual(name = "Line", values = c(2, 2, 2), guide = guide_legend(override.aes = list(color = c("red", "orange", "blue")))) +coord_flip()
Don't create geom_hline for every line you want to insert. What if you have hundred of them? Create a separate object d and specify different linetypes and colors there geom_hline(data = d, aes(yintercept = value, linetype = name, color = name)). When you want to specify colors use: scale_colour_manual(values = c("red", "orange", "blue")).
d1 <- summarize(df, mean(Duration), median(Duration))
d2 <- summarize(dr_avg, mean(Duration))
d <- data.frame(value = as.numeric(c(d1, d2)),
name = c('Avg Member', 'Median Member', 'Avg all data'))
ggplot(dr, aes(reorder(as.character(ClientCode), -Duration),
Duration,
fill = factor(-Duration))) +
geom_bar(stat = "identity") +
labs(x = "ClientCode",
y = "Duration (Minutes)") +
geom_hline(data = d, aes(yintercept = value, linetype = name, color = name)) +
scale_fill_brewer(palette = "Dark2") +
scale_colour_manual(values = c("red", "orange", "blue")) +
coord_flip() +
theme_bw()
PS.: Data that you provided doesn't make sense as two lines overlap.
I have created a line chart (plot) in R with labels on each data point. Due to the large number of data points, the plot becomes very fully with labels. I would like to apply the labels only for the last N (say 4) data points. I have tried subset and tail in the geom_label_repel function but was not able to figure them our or got an error message. My data set consist of 99 values, spread over 3 groups (KPI).
I have the following code in R:
library(ggplot)
library(ggrepel)
data.trend <- read.csv(file=....)
plot.line <- ggplot(data=data.trend, aes(x = Version, y = Value, group = KPI, color = KPI)) +
geom_line(aes(group = KPI), size = 1) +
geom_point(size = 2.5) +
# Labels defined here
geom_label_repel(
aes(Version, Value, fill = factor(KPI), label = sprintf('%0.1f%%', Value)),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.4, "lines"),
segment.color = 'grey50',
show.legend = FALSE
)
);
I all fairness, I am quite new to R. Maybe I miss something basic.
Thanks in advance.
The simplest approach is to set the data = parameter in geom_label_repel to only include the points you want labeled.
Here's a reproducible example:
set.seed(1235)
data.trend <- data.frame(Version = rnorm(25), Value = rnorm(25),
group = sample(1:2,25,T),
KPI = sample(1:2,25,T))
ggplot(data=data.trend, aes(x = Version, y = Value, group = KPI, color = KPI)) +
geom_line(aes(group = KPI), size = 1) +
geom_point(size = 2.5) +
geom_label_repel(aes(Version, Value, fill = factor(KPI), label = sprintf('%0.1f%%', Value)),
data = tail(data.trend, 4),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.4, "lines"),
segment.color = 'grey50',
show.legend = FALSE)
Unfortunately, this messes slightly with the repel algorithm, making the label placement suboptimal with respect to the other points which are not labelled (you can see in the above figure that some points get covered by labels).
So, a better approach is to use color and fill to simply make the unwanted labels invisible (by setting both color and fill to NA for labels you want to hide):
ggplot(data=data.trend, aes(x = Version, y = Value, group = KPI, color = KPI)) +
geom_line(aes(group = KPI), size = 1) +
geom_point(size = 2.5) +
geom_label_repel(aes(Version, Value, fill = factor(KPI), label = sprintf('%0.1f%%', Value)),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.4, "lines"),
show.legend = FALSE,
color = c(rep(NA,21), rep('grey50',4)),
fill = c(rep(NA,21), rep('lightblue',4)))
If you want to show just the last label, using group_by and filter may work:
data = data.trend %>% group_by(KPI) %>% filter(Version == max(Version))
Full example:
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(ggrepel)
set.seed(1235)
data.trend <- data.frame(Version = rnorm(25), Value = rnorm(25),
group = sample(1:2,25,T),
KPI = sample(1:2,25,T))
ggplot(data = data.trend, aes(x = Version, y = Value, group = KPI, color = KPI)) +
geom_line(aes(group = KPI), size = 1) +
geom_point(size = 2.5) +
# Labels defined here
geom_label_repel(
data = data.trend %>% group_by(KPI) %>% filter(Version == max(Version)),
aes(Version, Value, fill = factor(KPI), label = sprintf('%0.1f%%', Value)),
color = "black",
fill = "white")
Or if you want to show 4 random labels per KPI, data.trend %>% group_by(KPI) %>% sample_n(4):
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(ggrepel)
set.seed(1235)
data.trend <- data.frame(Version = rnorm(25), Value = rnorm(25),
group = sample(1:2,25,T),
KPI = as.factor(sample(1:2,25,T)))
ggplot(data = data.trend, aes(x = Version, y = Value, group = KPI, color = KPI)) +
geom_line(aes(group = KPI), size = 1) +
geom_point(size = 2.5) +
# Labels defined here
geom_label_repel(
data = data.trend %>% group_by(KPI) %>% sample_n(4),
aes(Version, Value, fill = factor(KPI), label = sprintf('%0.1f%%', Value), fill = KPI),
color = "black", show.legend = FALSE
)
#> Warning: Duplicated aesthetics after name standardisation: fill
Created on 2021-08-27 by the reprex package (v2.0.1)
I have a problem with plot. I want to show only dot points in group A, not in each name. Here is an example:
name <- c("a","b","c","d")
df <- data.frame(id = rep(1:5,3),
value = c(seq(50,58,2),seq(60,68,2),seq(70,78,2)),
name = c(rep("A",5),rep("B",5),rep("C",5)),
type = rep(c("a","b","c","d","r"),3))
df$name <- factor(df$name, levels = c("C","B","A"),ordered = TRUE)
ggplot(df, aes(id, value, fill = name,color = type))+
geom_area( position = 'identity', linetype = 1, size = 1 ,colour="black") +
geom_point(size = 8)+
guides(fill = guide_legend(override.aes = list(colour = NULL, shape = NA)))
If I am reading the question correctly, it seems that you want dots for the blue area only. In that case, you could subset the data and use it for geom_point.
ggplot(df, aes(id, value, fill = name,color = type))+
geom_area( position = 'identity', linetype = 1, size = 1 ,colour="black") +
geom_point(data = subset(df, name == "A"), size = 8) +
guides(fill = guide_legend(override.aes = list(colour = NULL, shape = NA)))