ggplot : Adding mean/error bars for dotplot with different groups - r

I'm new to ggplot and starting from this graph :
library(ggplot2)
library(reshape2)
data <- read.delim(textConnection("
Sample Day_0 Day_1 Day_4 Day_5 Day_7
NM 1000 221000 6620000 17200000 43700000
OG 1000 351000 1750000 6880000 18300000
OD 1000 961000 1090000 6380000 4400000
ODD 1000 1060000 3550000 12000000 13100000"), sep = " ")
data_melt <- melt(data, id.var = "Sample")
data_melt$value <- as.numeric(data_melt$value)
ggplot(data=data_melt, aes(x=variable, y=value, color = Sample)) + geom_point(size = 2.5) + scale_y_continuous(trans=log2_trans(), breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) +
ggtitle("My_Title") + xlab("My_X") + ylab("My_Axis") + theme(plot.title = element_text(hjust = 0.5)) + expand_limits(y = c(10^3, 10^8))
see the graph result
What I would like to do is to add mean and error bars of the 4 points each of the "Days" (in this kind of way for example, picture from http://www.sthda.com/).
Any method/advice would be helpful !

You could do this using geom_errorbarand adding the relevant statistics when defining your data set. For the length of the error bars the code below simply uses the the 0.25/0.75 empirical quantiles. If you want to change that, just change lower and upper to the ranges you are interested in.
library(dplyr)
data_melt <- data_melt %>% group_by(variable) %>% mutate(upper = quantile(value, 0.75),
lower = quantile(value, 0.25),
mean = mean(value))
# How the first 9 values of your data set should look like now:
#A tibble: 20 x 6
## Groups: variable [5]
#Sample variable value upper lower mean
#<fctr> <fctr> <dbl> <dbl> <dbl> <dbl>
#1 NM Day_0 1000 1000 1000 1000
#2 OG Day_0 1000 1000 1000 1000
#3 OD Day_0 1000 1000 1000 1000
#4 ODD Day_0 1000 1000 1000 1000
#5 NM Day_1 221000 985750 318500 648250
#6 OG Day_1 351000 985750 318500 648250
#7 OD Day_1 961000 985750 318500 648250
#8 ODD Day_1 1060000 985750 318500 648250
#9 NM Day_4 6620000 4317500 1585000 3252500
ggplot(data=data_melt, aes(x=variable, y=value, color = Sample)) +
geom_point(size = 2.5) + scale_y_continuous(trans=log2_trans(),
breaks = trans_breaks("log10",
function(x) 10^x),
labels = trans_format("log10",
math_format(10^.x))) +
ggtitle("My_Title") +
xlab("My_X") + ylab("My_Axis") +
theme(plot.title = element_text(hjust = 0.5)) +
expand_limits(y = c(10^3, 10^8)) +
geom_errorbar(aes(ymin = lower, ymax = upper),col = "red",
width = 0.25) +
geom_point(aes(x = variable, y = mean), size = 3, col = "red")

Related

label mean lines in ggplot that are mapped in a group

I have density plots for each shift and year. The means are plotted by grouping in a df called mu. I also add vertical reference lines which I can label without issue but I cannot seem to get the labels on the grouped vertical lines. You will see my latest attempt which throws an error "Aesthetics must be either length 1 or the same as the data (134): x"
My code
library(ggplot2)
library(dplyr)
df <- read.csv("f4_bna_no_cup.csv")
head(df)
ï..n yr s ys x
1 1 2021 1 2021-1 116.83
2 2 2021 1 2021-1 114.83
3 3 2021 1 2021-1 115.50
4 4 2021 1 2021-1 115.42
5 5 2021 1 2021-1 115.58
6 6 2021 1 2021-1 115.58
#summarize means by ys (year-shift)
mu <- df %>%
group_by(ys,s) %>%
summarise(grp.mean = mean(x))
mu
ys s grp.mean
<chr> <int> <dbl>
1 2021-1 1 116.
2 2021-2 2 117.
3 2022-1 1 114.
4 2022-2 2 115.
llab<-mu
shift <- c("Shift 1", "Shift 2")
#density charts on df
ggplot(data=df, aes(x=x,group =ys, fill = yr, color = yr)) +
geom_density(alpha = 0.4) +
scale_x_continuous(limits=c(112,120))+
geom_vline(aes(xintercept = grp.mean), data = mu, linetype = "dashed", size = 0.5) +
geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys) + #this throws the error
geom_vline(aes(xintercept=114.8), linetype="dashed", size=0.5, color = 'green3') +
geom_text(aes(x=114.8, y=.6), label = "Target", angle = 90, color="black",size=3) +
geom_vline(aes(xintercept=114.1), linetype="solid", size=0.5, color = 'limegreen') +
geom_text(aes(x=114.1, y=.55), label = "Potential", angle = 90, color="black",size=3 ) +
geom_vline(aes(xintercept=113.4), linetype="solid", size=0.5, color = 'firebrick3') +
geom_text(aes(x=113.4, y=.62), label = "Label wt", angle = 90,
color="black",size=3, family = "Times New Roman", vjust=0) +
facet_grid(
.~s,
labeller = labeller(
s = c(`1` = "Shift 1", `2` = "Shift 2")
))+
theme_light()+
theme(legend.position = "none")
Output so far...I'm so close.
Persistence pays off. I figured it out and thought I would share it in case someone else has a similar problem:
All code remains the same as in my question except a slight change to grouping for the mu df, AND replace the line that I noted as throwing the error as follows:
#small change to group_by, retaining yr
mu <- df %>%
group_by(yr,s,ys) %>%
summarise(grp.mean = mean(x))
Replace: geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys), with
geom_text(data = mu, aes(label = yr), x = mu$grp.mean, y = .60, color = "black", angle = 90, vjust = 0)

Label of last value in geom_interval

I'm projecting a variable for the next 120 months. I'm having trouble with the following when using ggplot:
In the intervals I'm creating I want to display the last value of each one. Ideally, I want some label that says -for example- for the interval 0.8: "80%:(here would go the last value of that interval)". If this is too difficult, then just the value would be perfect.
Here is a reproducible example
#libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggfan)
library(gridExtra)
library(stringr)
library(scales)
#Create a dataframe
month <- 1:120
price_a <- 5000
demand <- 10
data <- data.frame(month, price_a, demand)
#Create 100 simulations to project price_a and demand for the future
simulations <- 100
intervalo <- seq_len(120)
set.seed(96)
lista_meses <- lapply(setNames(intervalo, paste0("data", intervalo)), function(i) {
cbind(
data[rep(i, simulations),],
growth_pricea = as.numeric(runif(simulations, min = -0.02, max = 0.05)),
growth_demand = as.numeric(runif(simulations, min = -0.03, max = 0.03)),
revenue = demand*price_a
)
})
#Calculate the growth of each variable and revenue
for (i in 2:length(lista_meses)){
lista_meses[[i]][["price_a"]] <- lista_meses[[i-1]][["price_a"]]*(1+lista_meses[[i]][["growth_pricea"]])
lista_meses[[i]][["demand"]] <- lista_meses[[i-1]][["demand"]]*(1+lista_meses[[i]][["growth_demand"]])
lista_meses[[i]][["revenue"]] <- lista_meses[[i]][["price_a"]]*lista_meses[[i]][["demand"]]
}
#Extract revenue columns from all dataframes in list
time <- 1:120 #10 years.
extract_column <- lapply(lista_meses, function(x) x["revenue"])
fandataq <- do.call("cbind", extract_column)
mandataq <- as.matrix.data.frame(fandataq)
pdataq <- data.frame(x=time, t(fandataq)) %>% gather(key=sim, value=y, -x)
#Graph: I WANT TO SHOW THE LAST VALUES OF EACH INTERVAL IN GEOM_INTERVAL
ggplot(pdataq, aes(x=x, y= y)) + geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) + scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()
Does anybody knows how to achieve this? Thanks in advance!
This could be accomplished by pre-calculating the labels and feeding those in as text:
probs = c(0, 0.1, 0.9, 1) # 80% interval from 0.1 to 0.9
label_table <- tibble(x = max(pdataq$x),
probs,
y = quantile(pdataq[pdataq$x == max(pdataq$x), "y"],
probs = probs),
y_label = scales::comma(y))
# OR, using ggfan::calc_quantiles:
#label_table <- calc_quantiles(pdataq, intervals = c(0.8, 1), x_var = "x", y_var = "y") %>%
# ungroup() %>%
# filter(x == max(x)) %>%
# mutate(y_label = scales::comma(y))
## A tibble: 4 x 4
# x probs y y_label
# <int> <dbl> <dbl> <chr>
#1 120 0 124311. 124,311
#2 120 0.1 198339. 198,339
#3 120 0.9 434814. 434,814
#4 120 1 520464. 520,464
ggplot(pdataq, aes(x=x, y= y)) +
geom_fan(intervals =c(80)/100, show.legend = FALSE) +
scale_fill_gradient(low="steelblue1", high="steelblue")+
scale_y_continuous(labels = scales::comma)+
geom_interval(intervals = c(0.80,1), show.legend = FALSE) +
geom_text(data = label_table,
aes(label = y_label), hjust = -0.1, size = 3) +
coord_cartesian(clip = "off") +
scale_x_continuous(expand = expansion(add = c(5, 20))) +
scale_linetype_manual(values=c("dotted", "dotted")) +
theme_bw()

How to show ticks label in x-axis for ggplot cumsum plot

I have the following data frame. It consists of two columns and ninety-four rows.
library(tidyverse)
ndat <- structure(list(sample_name = c("scFOOBAR_96_S98", "scFOOBAR_20_S22",
"scFOOBAR_83_S85", "scFOOBAR_24_S26", "scFOOBAR_76_S78", "scFOOBAR_72_S74",
"scFOOBAR_19_S21", "scFOOBAR_60_S62", "scFOOBAR_18_S20", "scFOOBAR_23_S25",
"scFOOBAR_92_S94", "scFOOBAR_67_S69", "scFOOBAR_08_S10", "scFOOBAR_77_S79",
"scFOOBAR_27_S29", "scFOOBAR_71_S73", "scFOOBAR_63_S65", "scFOOBAR_80_S82",
"scFOOBAR_36_S38", "scFOOBAR_31_S33", "scFOOBAR_86_S88", "scFOOBAR_82_S84",
"scFOOBAR_22_S24", "scFOOBAR_14_S16", "scFOOBAR_04_S6", "scFOOBAR_30_S32",
"scFOOBAR_10_S12", "scFOOBAR_88_S90", "scFOOBAR_91_S93", "scFOOBAR_46_S48",
"scFOOBAR_25_S27", "scFOOBAR_29_S31", "scFOOBAR_38_S40", "scFOOBAR_34_S36",
"scFOOBAR_51_S53", "scFOOBAR_85_S87", "scFOOBAR_35_S37", "scFOOBAR_79_S81",
"scFOOBAR_95_S97", "scFOOBAR_56_S58", "scFOOBAR_48_S50", "scFOOBAR_52_S54",
"scFOOBAR_03_S5", "scFOOBAR_47_S49", "scFOOBAR_73_S75", "scFOOBAR_87_S89",
"scFOOBAR_40_S42", "scFOOBAR_55_S57", "scFOOBAR_65_S67", "scFOOBAR_43_S45",
"scFOOBAR_41_S43", "scFOOBAR_09_S11", "scFOOBAR_05_S7", "scFOOBAR_33_S35",
"scFOOBAR_90_S92", "scFOOBAR_57_S59", "scFOOBAR_01_S3", "scFOOBAR_94_S96",
"scFOOBAR_70_S72", "scFOOBAR_49_S51", "scFOOBAR_81_S83", "scFOOBAR_75_S77",
"scFOOBAR_68_S70", "scFOOBAR_21_S23", "scFOOBAR_74_S76", "scFOOBAR_64_S66",
"scFOOBAR_17_S19", "scFOOBAR_53_S55", "scFOOBAR_26_S28", "scFOOBAR_78_S80",
"scFOOBAR_06_S8", "scFOOBAR_84_S86", "scFOOBAR_15_S17", "scFOOBAR_66_S68",
"scFOOBAR_28_S30", "scFOOBAR_44_S46", "scFOOBAR_32_S34", "scFOOBAR_50_S52",
"scFOOBAR_54_S56", "scFOOBAR_02_S4", "scFOOBAR_62_S64", "scFOOBAR_69_S71",
"scFOOBAR_07_S9", "scFOOBAR_59_S61", "scFOOBAR_13_S15", "scFOOBAR_45_S47",
"scFOOBAR_37_S39", "scFOOBAR_61_S63", "scFOOBAR_42_S44", "scFOOBAR_11_S13",
"scFOOBAR_58_S60", "scFOOBAR_16_S18", "scFOOBAR_12_S14", "scFOOBAR_39_S41"
), readcount = c(7.5e-05, 0.208259, 0.317617, 0.217022, 0.24163,
0.178144, 0.203187, 0.326574, 0.46154, 0.241296, 3.8e-05, 0.180657,
0.296669, 0.2436, 0.372329, 0.154357, 0.332183, 0.100498, 0.110694,
0.304405, 0.150185, 0.20115, 0.28345, 0.411268, 0.249103, 0.389757,
0.348236, 0.071293, 5.3e-05, 0.383666, 0.221019, 0.368074, 0.164428,
0.121094, 0.056566, 0.12801, 0.045516, 0.054762, 2.3e-05, 0.037221,
0.053614, 0.0308, 0.060173, 0.061752, 0.019005, 0.011073, 0.004948,
0.00827, 0.011163, 0.010636, 0.017856, 0.019902, 0.021611, 0.010224,
2.9e-05, 0.015984, 0.011805, 3.1e-05, 0.017305, 0.00265, 0.018211,
0.010304, 0.011447, 0.033347, 0.011484, 0.015949, 0.042047, 0.005027,
0.033604, 0.019413, 0.032072, 0.010956, 0.012573, 0.014042, 0.021858,
0.01491, 0.017772, 0.008882, 0.016791, 0.022836, 0.023896, 0.012391,
0.026814, 0.011281, 0.015943, 0.01875, 0.010579, 0.017783, 0.019474,
0.016439, 0.015619, 0.009522, 0.009722, 0.011995)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -94L))
ndat
#> # A tibble: 94 x 2
#> sample_name readcount
#> <chr> <dbl>
#> 1 scFOOBAR_96_S98 0.000075
#> 2 scFOOBAR_20_S22 0.208
#> 3 scFOOBAR_83_S85 0.318
#> 4 scFOOBAR_24_S26 0.217
#> 5 scFOOBAR_76_S78 0.242
#> 6 scFOOBAR_72_S74 0.178
#> 7 scFOOBAR_19_S21 0.203
#> 8 scFOOBAR_60_S62 0.327
#> 9 scFOOBAR_18_S20 0.462
#> 10 scFOOBAR_23_S25 0.241
#> # ... with 84 more rows
What I want to do is to make a cumulative plot.
This is what I use:
ggplot(data = ndat, aes(x = 1:dim(ndat)[1], y = cumsum(readcount))) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = ndat$sample_name) +
ylab("Cumulative read counts (million)") +
xlab("barcode")
This is the result I get:
Notice that the x-axis tick labels are gone, despite of I have this line in my code: scale_x_discrete(labels = ndat$sample_name).
The text like scFOOBAR_96_S98 should appear as the tick label in x-axis.
What's the right way to make the plot?
Here's an approach where I made sample_name into an ordered factor so that it plots in the order of the table row instead of alphabetically.
ndat %>%
mutate(cuml_read = cumsum(readcount),
sample_name = fct_reorder(sample_name, row_number())) %>%
ggplot(aes(x = sample_name, y = cuml_read, group = 1)) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1, size = 6)) +
ylab("Cumulative read counts (million)") +
xlab("barcode")
Edit: OP noted a problem with running the plot in plotly::ggplotly. Here's an alternative to try, which switches from using a factor for the x axis to a continuous numeric scale with labels taken from the sample_name column.
sample_names <- ndat$sample_name
ndat %>%
mutate(cuml_read = cumsum(readcount),
row = row_number(),
sample_name = fct_reorder(sample_name, row_number())) %>%
ggplot(aes(x = row, y = cuml_read, group = 1)) +
geom_line() +
geom_point() +
theme(axis.text.x = element_text(angle=90, hjust = 1, size = 6)) +
scale_x_continuous(breaks = 1:nrow(ndat),
labels = sample_names) +
ylab("Cumulative read counts (million)") +
xlab("barcode")

Small ggplots on a ggmap - a purrr map version

Based on Small ggplot2 plots placed on coordinates on a ggmap
I would like to have the same solution, but with ggplot function outside the pipeline, applied with purrr::map().
The data for small bar subplots indicating 2 values, may contain
lon, lat, id, valueA, valueB,
After tidyr::gather operation it may look like:
Town, Potential_Sum, lon, lat, component , sales
Aaa, 9.00, 20.80, 54.25, A, 5.000
Aaa, 9.00, 20.80, 54.25, B, 4.000
Bbb, 5.00, 19.60, 50.50, A, 3.000
Bbb, 5.00, 19.60, 50.50, B, 2.000
Current working solution is to use do() to generate sublopts and then ggplotGrob to generate a column with objects "grobs" to be placed at lon,lat locations on a ggmap.
maxSales <- max(df$sales)
df.grobs <- df %>%
do(subplots = ggplot(., aes(1, sales, fill = component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, maxSales)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(sales>0,round(sales), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
df.grobs %>%
{p + geom_label(aes(x = 15, y = 49.8, label = "A"), colour = c("black"),fill = "green", size=3)+
geom_label(aes(x = 15, y = 5.01, label = "B"), colour = c("black"),fill = "red", size=3)+
.$subgrobs +
geom_text(data=df, aes(label = Miasto), vjust = 3.5,nudge_x = 0.05, size=2.5) +
geom_col(data = df,
aes(0,0, fill = component),
colour = "white")}
p is a ggmap object, map of Poland, on which I would like to place small plots:
# p <-
# get_googlemap(
# "Poland",
# maptype = "roadmap",
# zoom = 6,
# color = "bw",
# crop = T,
# style = "feature:all|element:labels|visibility:off" # 'feature:administrative.country|element:labels|visibility:off'
# ) %>% # or 'feature:all|element:labels|visibility:off'
# ggmap() + coord_cartesian() +
# scale_x_continuous(limits = c(14, 24.3), expand = c(0, 0)) +
# scale_y_continuous(limits = c(48.8, 55.5), expand = c(0, 0))
#
How to translate this solution to the syntax nest - apply -unnest so that the ggplot part should be outside of the piped expression as a function.
In other words. How to replace do() with map(parameters, GGPlot_function) and then plot grobs on a ggmap .
What I did so far was I tried to write a ggplot function
#----barplots----
maxSales <- max(df$sales)
fn_ggplot <- function (df, x, component, maxX) {
x <- enquo(x)
component <-enquo(component)
maxX <-enquo(maxX)
p <- ggplot(df, aes(1, !!x, fill = !!component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, !!maxX)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(x>0,round(!!x), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)
return(p)
}
And got totaly confused trying to apply it like this (I am a constant beginner unfortunately)... this is not working, showing
df.grobs <- df %>%
mutate(subplots = pmap(list(.,sales,component,Potential_Sum),fn_ggplot)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
I get errors indicating I do not know what I am doing, ie lengths of arguments are incorrect and something else is expected.
message: Element 2 of `.l` must have length 1 or 7, not 2
class: `purrr_error_bad_element_length`
backtrace:
1. dplyr::mutate(...)
12. purrr:::stop_bad_length(...)
13. dplyr::mutate(...)
Call `rlang::last_trace()` to see the full backtrace
> rlang::last_trace()
x
1. +-`%>%`(...)
2. | +-base::withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
3. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
4. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
5. | \-global::`_fseq`(`_lhs`)
6. | \-magrittr::freduce(value, `_function_list`)
7. | \-function_list[[i]](value)
8. | +-dplyr::mutate(...)
9. | \-dplyr:::mutate.tbl_df(...)
10. | \-dplyr:::mutate_impl(.data, dots, caller_env())
11. +-purrr::pmap(list(., sales, component, Potential_Sum), fn_ggplot)
12. \-purrr:::stop_bad_element_length(...)
13. \-purrr:::stop_bad_length(...)
data
First let's build some sample data close to yours but reproducible without the need for an api key.
As a starting point we have a plot of a country map stored in p, and some data in long form to build the charts stored in plot_data.
library(maps)
library(tidyverse)
p <- ggplot(map_data("france"), aes(long,lat,group=group)) +
geom_polygon(fill = "lightgrey") +
theme_void()
set.seed(1)
plot_data <- tibble(lon = c(0,2,5), lat = c(44,48,46)) %>%
group_by(lon, lat) %>%
do(tibble(component = LETTERS[1:3], value = runif(3,min=1,max=5))) %>%
mutate(total = sum(value)) %>%
ungroup()
plot_data
# # A tibble: 9 x 5
# lon lat component value total
# <dbl> <dbl> <chr> <dbl> <dbl>
# 1 0 44 A 2.06 7.84
# 2 0 44 B 2.49 7.84
# 3 0 44 C 3.29 7.84
# 4 2 48 A 4.63 11.0
# 5 2 48 B 1.81 11.0
# 6 2 48 C 4.59 11.0
# 7 5 46 A 4.78 11.9
# 8 5 46 B 3.64 11.9
# 9 5 46 C 3.52 11.9
define a plotting function
we isolate the plotting code in a separate function
my_plot_fun <- function(data){
ggplot(data, aes(1, value, fill = component)) +
geom_col(position = position_dodge(width = 1),
alpha = 0.75, colour = "white") +
geom_text(aes(label = round(value, 1), group = component),
position = position_dodge(width = 1),
size = 3) +
theme_void()+ guides(fill = F)
}
build a wrapper
This function takes a data set, some coordinates and the plotting function as parameters, to annotate at the right spot.
annotation_fun <- function(data, lat,lon, plot_fun) {
subplot = plot_fun(data)
sub_grob <- annotation_custom(ggplotGrob(subplot),
x = lon-0.5, y = lat-0.5,
xmax = lon+0.5, ymax = lat+0.5)
}
The final code
The the code becomes simple, using nest and pmap
subgrobs <- plot_data %>%
nest(-lon,-lat) %>%
pmap(annotation_fun,plot_fun = my_plot_fun)
p + subgrobs

Enforce same color palette for `color` and `fill` of a subset of data

Having the following sample dataset:
set.seed(20)
N <- 20
df1 <- data.frame(x = rnorm(N),
y = rnorm(N),
grp = paste0('grp_', sample(1:500, N, T)),
lab = sample(letters, N, T))
# x y grp lab
# 1 1.163 0.237 grp_104 w
# 2 -0.586 -0.144 grp_448 y
# 3 1.785 0.722 grp_31 m
# 4 -1.333 0.370 grp_471 z
# 5 -0.447 -0.242 grp_356 o
I want to plot all points but label only subset of them (say, those df1$x>0). It works fine when I use the same color=grp aesthetics for both geom_point and geom_text:
ggplot(df1, aes(x=x,y=y,color=grp))+
geom_point(size=4) +
geom_text(aes(label=lab),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
But if I want to change points design to fill=grp, colors of labels do not match anymore:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none")
I understand palette is different because levels of the subset are not the same as levels of the whole dataset. But what would be the simplest solution to enforce using the same palette?
The issue arises from different factor levels for the text and fill colours. We can avoid dropping unused factor levels by using drop = FALSE inside scale_*_discrete:
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
Update
With your real data we need to make sure that grp is in fact a factor.
# Load sample data
load("df1.Rdat")
# Make sure `grp` is a factor
library(tidyverse)
df1 <- df1 %>% mutate(grp = factor(grp))
# Or in base R
# df1$grp = factor(df1$grp)
# Same as before
ggplot(df1, aes(x=x,y=y))+
geom_point(aes(fill=grp),size=4,shape=21) +
geom_text(aes(label=lab,color=grp),data=df1[df1$x>1,],size=5,hjust=1,vjust=1)+
theme(legend.position="none") +
scale_fill_discrete(drop = F) +
scale_colour_discrete(drop = F)
One way is to leave the colour / fill palettes alone, & set all unwanted labels to be transparent instead:
ggplot(df1, aes(x = x, y = y)) +
geom_point(aes(fill = grp), size = 4, shape = 21) +
geom_text(aes(label = lab, color = grp,
alpha = x > 1),
size = 5, hjust = 1, vjust = 1) +
scale_alpha_manual(values = c("TRUE" = 1, "FALSE" = 0)) +
theme(legend.position = "none")

Resources