How to get rid of annotations on faceted graph? - r

Problem
I am trying to label the left facet side of my graph while leaving out the annotations on the right side.
Data
Here are my libraries and data:
#### Libraries ####
library(tidyverse)
library(ggpubr)
library(plotly)
#### Dput ####
emlit <- structure(list(X = 1:20, Ethnicity = c("Asian (other than Chinese)",
"Filipino", "Indonesian", "Thai", "Japanese", "Korean", "South Asian",
"Indian", "Nepalese", "Pakistani", "Other South Asian", "Other Asian",
"White", "Mixed", "With Chinese parent", "Other mixed", "Others",
"All ethnic minorities", "All ethnic minorities, excluding\n foreign domestic helpers",
"Whole population"), Age_5.14 = c(65.8, 72.2, 69.4, 83.1, 26.6,
52.4, 67.4, 60.4, 69.5, 71.5, 92.5, 92, 34.8, 76.6, 84.2, 45.3,
51.3, 64.3, 64.3, 94.8), Age_15.24 = c(28.1, 29.2, 4.4, 72.9,
34.8, 50.3, 38.7, 41.4, 22.2, 54.3, 41.9, 64.7, 24.4, 82.9, 90.7,
37.4, 53.2, 40.6, 52.9, 96.9), Age_25.34 = c(4.5, 1.8, 4.6, 20,
17.2, 26.8, 6.6, 4.2, 6.4, 11.9, 12, 33.9, 15, 60.5, 82, 6.7,
11.2, 7.8, 21.8, 84.9), Age_35.44 = c(6.3, 2, 6.1, 35.7, 36.5,
25.5, 9.4, 6.2, 10.5, 10.1, 22.4, 35.7, 8.6, 63, 83.2, 4.5, 12.2,
9.5, 23.4, 84.6), Age_45.54 = c(8.1, 2.3, 8, 23.2, 43.4, 59.6,
7.5, 6.3, 3.9, 13.5, 28.3, 47.5, 13.1, 72.1, 84, 4.4, 22.4, 14.2,
27.7, 92.5), Age_55.64 = c(15.9, 4.4, 44, 27, 41.7, 52.8, 11.8,
7.4, 9.5, 2, 54.2, 39.6, 12.7, 75.3, 80.1, 2.6, 20.6, 25, 32.4,
94.8), Age_65. = c(31.1, 11.9, 82.6, 39, 46.4, 57, 9.5, 3.9,
NA, 11.4, 66.5, 74.5, 14.5, 80.5, 81, 57.5, 13.6, 42.7, 44, 82.3
), Age_Overall = c(10.1, 3.5, 6.4, 31.4, 35.1, 39.8, 20.4, 15.3,
16.4, 33.8, 30.4, 46.3, 15.4, 72.7, 83.9, 19.4, 19.8, 16.9, 35.2,
89.4)), class = "data.frame", row.names = c(NA, -20L))
I have also pivoted the data for my graph:
#### Pivot Data ####
emlitpivot <- emlit %>%
pivot_longer(cols = contains("Age"),
names_to = "Age_Range",
values_to = "Percent")
Plot
Here is my plot so far, a faceted graph that breaks down literacy by age with some notes on some important points on the left:
#### EM vs all ####
# Order
order <- c("5-14", "15-24", "25-34", "35-44", "45-54", "55-64", "65+", "Overall",
"5-14", "15-24", "25-34", "35-44", "45-54", "55-64", "65+", "Overall")
# Plot
plot <- emlitpivot %>%
filter(Ethnicity %in% c("All ethnic minorities",
"Whole population")) %>%
ggbarplot(x="Age_Range",
y="Percent",
fill = "Ethnicity",
label = T,
palette = "jco",
facet.by = "Ethnicity",
title = "EM x Native Chinese Literacy by Age",
xlab = "Age Range",
ylab = "Literacy in Chinese (By Percent)",
caption = "*Data obtained from Census and Statistics Department Hong Kong SAR, 2016.")+
theme_cleveland()+
theme(axis.text.x = element_text(angle = 45,
hjust = .5,
vjust = .5),
legend.position = "none",
plot.caption = element_text(face = "italic"))+
scale_x_discrete(labels=order)+
geom_segment(aes(x = 3, y = 15, xend = 3, yend = 48))+
geom_segment(aes(x = 1, y = 71, xend = 1, yend = 80))+
geom_segment(aes(x = 7, y = 50, xend = 7, yend = 65))+
annotate("text",
x=4,
y=53,
label = "Post-college workers can't read.")+
annotate("text",
x=3.5,
y=85,
label = "School age supports seem to boost initial literacy.")+
annotate("text",
x=6,
y=70,
label = "Increase due to generational literacy?")
# Print plot:
plot
However, you can probably guess what the problem is:
How do I get rid of the annotations on the right? I'm not sure if there is a simple way of getting rid of them, but it would be helpful to only have text on the left side.

In this case, I'll use geom_text instead of annotate, since it allows you to have subset of your data.
library(tidyverse)
library(ggpubr)
emlitpivot %>%
filter(Ethnicity %in% c(
"All ethnic minorities",
"Whole population"
)) %>%
ggbarplot(
x = "Age_Range",
y = "Percent",
fill = "Ethnicity",
label = T,
palette = "jco",
facet.by = "Ethnicity",
title = "EM x Native Chinese Literacy by Age",
xlab = "Age Range",
ylab = "Literacy in Chinese (By Percent)",
caption = "*Data obtained from Census and Statistics Department Hong Kong SAR, 2016."
) +
theme_cleveland() +
theme(
axis.text.x = element_text(
angle = 45,
hjust = .5,
vjust = .5
),
legend.position = "none",
plot.caption = element_text(face = "italic")
) +
scale_x_discrete(labels = order) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 3, y = 15, xend = 3, yend = 48)) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 1, y = 71, xend = 1, yend = 80)) +
geom_segment(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(x = 7, y = 50, xend = 7, yend = 65)) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(4, 53), label = "Post-college workers can't read.", check_overlap = T) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(3.5, 85), label = "School age supports seem to boost initial literacy.", check_overlap = T) +
geom_text(data = subset(emlitpivot, Ethnicity == "All ethnic minorities"), aes(6, 70), label = "Increase due to generational literacy?", check_overlap = T)

Update remove lines in second facet:
Create a dataframe with your text labels and position and add it to the plot,
to remove the lines do the same procedure:
df for text = ann_text
df for lines = segm
ann_text <- data.frame(x = c(4, 3.5, 6),
y = c(53, 85, 70),
lab = c("Post-college workers can't read.", "School age supports seem to boost initial literacy.",
"Increase due to generational literacy?"),
Ethnicity = rep("All ethnic minorities", 3))
segm <- data.frame(x = c(3,1,7),
y = c(15, 71, 50),
xend = c(3,1,7),
yend = c(48,80,65),
Ethnicity = rep("All ethnic minorities", 3))
plot1 <- plot +
geom_text(
data = ann_text,
mapping = aes(x = x, y = y, label = lab)
)
plot1 + geom_segment(
data = segm,
mapping = aes(x = x, y = y, xend = xend, yend = yend)
)
remove the following from your code:
annotate("text",
x=4,
y=53,
label = "Post-college workers can't read.")+
annotate("text",
x=3.5,
y=85,
label = "School age supports seem to boost initial literacy.")+
annotate("text",
x=6,
y=70,
label = "Increase due to generational literacy?")

Related

How to connect points according to grouping instead of connecting all points in ggplot?

As the title stated, I want to connect points in every group instead of all points.
Here is the original date:
df<-structure(list(TN = c(13.6, 18, 18.5, 17, 16.9, 13.6, 17.6, 14.8,
14, 11, 12.6, 18.6, 18.8, 18.3, 19.4, 18.5, 18.9, 22, 22.3),
TX = c(29.9, 26.9, 30.5, 26.6, 25.4, 29.7, 24.1, 21.1, 23.8,
29.3, 34.4, 31.1, 32, 35.9, 36.7, 37.5, 39.2, 34.8, 33.6),
TM = c(22.5, 21.4, 23.3, 21.4, 20.2, 21.4, 19.9, 17.8, 18.9,
20.9, 24.5, 24.5, 25.1, 27.3, 28.2, 28.5, 29.2, 28.2, 26.8
), Date = c("01/06/2022", "02/06/2022", "03/06/2022", "04/06/2022",
"05/06/2022", "06/06/2022", "07/06/2022", "08/06/2022", "09/06/2022",
"10/06/2022", "11/06/2022", "12/06/2022", "13/06/2022", "14/06/2022",
"15/06/2022", "16/06/2022", "17/06/2022", "18/06/2022", "19/06/2022"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-19L))
Here is my code:
library(ggplot2)
library(tidyr)
library(dplyr)
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line()+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
I want the points of the two groups (two colors) to be connected separately as the date changes, but I don't know why all the points are connected?
Here is final graph I got:
Any suggestions are welcome! Thank you in adavance!
Add group=Tcombine.
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine))+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
Incidentally, while the ordering of your x-axis works here, the moment you get another month it will break. I suggest you convert your Date column to a proper Date-class and add scale_x_date.
df %>%
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) %>%
select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine)) +
scale_x_date(date_breaks = "1 day") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
While this looks very similar, you have much better control over breaks (e.g., date_breaks = "3 days") and formatting (e.g., date_labels ="%d/%m/%Y" if you really want that formatting of the dates).

Manually draw boxplot using ggplot

I think my question is very similar to this one, the only difference being that I'd love to use ggplot (and the answer with ggplot was missing a tiny bit of detail). I have data like this:
show<-structure(list(Median = c(20, 39, 21, 52, 45.5, 24, 36, 20, 134,
27, 44, 43), IQR = c(4, 74, 28, 51.5, 73.5, 18, 47.5, 26.5, 189.5,
46, 54, 61), FirstQuartile = c(`25%` = 19, `25%` = 24, `25%` = 12,
`25%` = 30.5, `25%` = 36.5, `25%` = 18, `25%` = 16.5, `25%` = 13,
`25%` = 53.5, `25%` = 15, `25%` = 24.5, `25%` = 27), ThirdQuartile = c(`75%` = 23,
`75%` = 98, `75%` = 40, `75%` = 82, `75%` = 110, `75%` = 36,
`75%` = 64, `75%` = 39.5, `75%` = 243, `75%` = 61, `75%` = 78.5,
`75%` = 88), Group = c("Program Director", "Editor", "Everyone",
"Board Director", "Board Director", "Program Director", "Editor",
"Everyone", "Board Director", "Everyone", "Editor", "Program Director"
), Decade = c("1980's", "1980's", "1980's", "1980's", "1990's",
"1990's", "1990's", "1990's", "2000's", "2000's", "2000's", "2000's"
)), row.names = c(NA, -12L), class = c("tbl_df", "tbl", "data.frame"
))
And I would like to draw a graph like this:
With "group" as the color, instead of "fellowship". The problem is, that graph was drawn from "complete" data (with 800ish rows), and I clearly only have summary data above. I realize it won't be able to draw outliers but that is ok. Any help would be appreciated! I'm specifically struggling with how I would draw the ymin/max and the edges of the notch. Thank you
You can use geom_boxplot() with stat = "identity" and fill in the five boxplot numbers as aesthetics.
library(ggplot2)
# show <- structure(...) # omitted for previty
ggplot(show, aes(Decade, fill = Group)) +
geom_boxplot(
stat = "identity",
aes(lower = FirstQuartile,
upper = ThirdQuartile,
middle = Median,
ymin = FirstQuartile - 1.5 * IQR, # optional
ymax = ThirdQuartile + 1.5 * IQR) # optional
)
As pointed out by jpsmith in the comments below, the 1.5 * IQR rule becomes hairy if you don't have the range of the data. However, if you have information about the data extrema or the data domain, you can limit the whiskers as follows:
# Dummy values assuming data is >= 0 up to infinity
show$min <- 0
show$max <- Inf
ggplot(show, aes(Decade, fill = Group)) +
geom_boxplot(
stat = "identity",
aes(lower = FirstQuartile,
upper = ThirdQuartile,
middle = Median,
ymin = pmax(FirstQuartile - 1.5 * IQR, min),
ymax = pmin(ThirdQuartile + 1.5 * IQR, max))
)

Create a heatmaps with average values on the very right column and bottom row

I want to create a heat map, where average values are depicted on the very right column and in the bottom row. I found a question that is very similar to mine -
heatmap with values and some additional features in R
Everything works well, however, I do not need highlighted cells for average values. Could you help me to unhighlight cells with average values?
library(ggplot2)
q1<-structure(list(hour = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), E = c(-15.6,
-17.2, -13, -11.6, -9.8, -13.2, -16.8, -15.8, -13.8, -12.8, -26.4,
-42.2, -40.8, -38, -41.2, -48.2, -47.2, -42.4, -18.4, -35.4,
-47.2, -42, -26.6, -24.8), K = c(-15.8, -23, -22.4, -15.8, -15.6,
-18.5, -21.4, -24.4, -18.8, -15.4, -46.8, -53.4, -43.6, -42.4,
-48.6, -48, -57, -19.2, -14.8, -23.2, -39.2, -43.8, -28, -13),
L = c(-20.6, -19.8, -12.6, -7.4, -11, -23.8, -25.2, -21.4,
-20.4, -20.2, -17.6, -18, -20.2, -30.4, -22.6, -25.8, -31.8,
-35.6, -43, -37, -36.8, -44, -47.4, -33.2), N = c(-15.4,
-28.6, -16.6, -15.6, -18.4, -20.2, -25, -22.8, -14.2, -10.8,
-31.4, -37.6, -37.2, -32, -37.4, -40.4, -40.4, -23, -8.6,
-11, -23.2, -33, -26.2, -8.2), P = c(-17, -16.8, -22.6, -15.8,
-15, -23, -23.8, -21.8, -17.4, -17.4, -21.2, -18.2, -11,
-14.6, -26.6, -31, -27.4, -29.4, -27.4, -29.6, -33, -34.8,
-16, -17.4), R = c(-7, -22.4, -20, -12, -15.4, -18.8, -22.6,
-20.4, -10.2, -4.4, -21.2, -28, -30.6, -29.4, -26, -22.8,
-31.4, -20.4, -4.8, -18.6, -27.2, -35, -48.8, -32.6), T = c(-19.4,
-21.2, -16.2, -16.6, -16.8, -16.2, -21.8, -23.4, -13, -13,
-36.2, -42.2, -41, -38.4, -39.6, -47, -43.2, -42.4, -21.8,
-23.8, -40.2, -34.6, -23.8, -15), Average = c(-15.8285714285714,
-21.2857142857143, -17.6285714285714, -13.5428571428571,
-14.5714285714286, -19.1, -22.3714285714286, -21.4285714285714,
-15.4, -13.4285714285714, -28.6857142857143, -34.2285714285714,
-32.0571428571429, -32.1714285714286, -34.5714285714286,
-37.6, -39.7714285714286, -30.3428571428571, -19.8285714285714,
-25.5142857142857, -35.2571428571429, -38.1714285714286,
-30.9714285714286, -20.6)), row.names = c(NA, -24L), class = "data.frame")
q1$Average<-rowMeans(q1[,2:8])
dat2 <- stack(q1[-1])
dat2$hour <- q1$hour
dat2$ind <- factor(dat2$ind, levels=c("E","T","K","N","R","L","P", "Average"))
ggplot(mapping = aes(ind, hour)) +
geom_tile(aes(fill = values), subset(dat2, hour != "Average" & ind != "Sum")) +
geom_text(aes(label = round(values, 1)), dat2) +
scale_y_discrete(limits = c("Average", 24:1)) +
scale_x_discrete(limits = c("E","T","K","N","R","L","P", "Average"), position = "top") +
viridis::scale_fill_viridis() +
theme_minimal() + theme(axis.title = element_blank())
You could make use of an ifelse to replace the values mapped on fill to NA for your average column and row like so. The value to be used for the NA value could then be set via the na.value argument of scale_fill_xxx where I chose NA or transparent:
library(ggplot2)
ggplot(mapping = aes(ind, hour)) +
geom_tile(aes(fill = ifelse(!(ind == "Average" | hour == 1), values, NA)), subset(dat2, hour != "Average" & ind != "Sum")) +
geom_text(aes(label = round(values, 1)), dat2) +
scale_y_discrete(limits = c("Average", 24:1)) +
scale_x_discrete(limits = c("E","T","K","N","R","L","P", "Average"), position = "top") +
viridis::scale_fill_viridis(na.value = NA) +
theme_minimal() + theme(axis.title = element_blank()) +
labs(fill = "values")
You can try barplots instead of numbers.
library(ComplexHeatmap)
row_ha = rowAnnotation(Average = anno_barplot(q1$Average,axis_param = list(direction = "reverse")))
column_ha = HeatmapAnnotation(Average = anno_barplot(colMeans(q1[,2:8])))
ComplexHeatmap::Heatmap(as.matrix(q1[,-c(1, ncol(q1))]), right_annotation = row_ha, top_annotation = column_ha, col = viridis::viridis(10))
Or points with regression line and boxplots showing medians instead of means
row_ha = rowAnnotation(Average = anno_lines(q1$Average,axis_param = list(direction = "reverse"), smooth =T))
column_ha = HeatmapAnnotation(summary = anno_boxplot(as.matrix(q1[,-c(1, ncol(q1))])))
Without clustering on row and columns
Heatmap(name = "value", as.matrix(q1[,-c(1, ncol(q1))]), right_annotation = row_ha, top_annotation = column_ha,
col = viridis::viridis(10),cluster_rows = F, cluster_columns = F)
There are a couple of R-packages availabel for heatmaps. Thus, you can try the superheat package as well.
library(superheat)
superheat(as.matrix(q1[,-c(1, ncol(q1))]),
yr = q1$Average,
yr.plot.type = "scattersmooth",
yr.axis.name = "Average",
yt = colMeans(q1[,2:8]),
yt.plot.type = "bar",
yt.axis.name = "Average")

Add labels to scatter plot

I cannot seem to add labels to my scatter plot.
I want to label the scatter with the variable states. It is currently not working either due to conflict with annotate or some other geom function.
Here is my data:
datatrials <- structure(list(states = c(
"AP", "AR", "AS", "BR", "CH", "GJ",
"HR", "HP", "JK", "JH", "KA", "KL", "MP", "NL", "OR", "PY", "PB",
"RJ", "SK", "TG", "TR", "UP", "UT"
), cured = c(
60.44117647, 2.631578947,
24.70095694, 47.31207289, 71.09634551, 67.46961326, 36.86526743,
41.78272981, 35.24676234, 42.68617021, 37.26310608, 43.57429719,
63.40242198, 0, 59.29648241, 30.48780488, 85.3956229, 69.87152922,
0, 51.52317881, 36.96581197, 25.99078341, 59.29659755
), Total = c(
4080L,
38L, 1672L, 4390L, 301L, 18100L, 2954L, 359L, 2857L, 752L, 4063L,
1494L, 8588L, 58L, 2388L, 82L, 2376L, 9652L, 2L, 3020L, 468L,
1085L, 8729L
), deaths = c(
1.666666667, 0, 0.23923445, 0.569476082,
1.661129568, 6.198895028, 0.778605281, 1.39275766, 1.190059503,
0.664893617, 1.304454836, 0.736278447, 4.319981369, 0, 0.293132328,
0, 1.978114478, 2.165354331, 0, 3.278145695, 0, 0.737327189,
2.623439111
), SLT_preval = c(
7.1, 39.3, 41.7, 23.5, 6.1, 19.2,
6.3, 3.1, 4.3, 35.4, 16.3, 5.4, 28.1, 39, 42.9, 4.7, 8, 14.1,
9.7, 10.1, 48.5, 29.4, 12.4
), smoking_preval = c(
14.2, 22.7,
13.3, 5.1, 9.4, 7.7, 19.7, 14.2, 20.8, 11.1, 8.8, 9.3, 10.2,
13.2, 7, 7.2, 7.3, 13.2, 10.9, 8.3, 27.7, 13.5, 18.1
), density_Population = c(
330.7605972,
18.75330475, 453.9513884, 1325.360556, 10162.04386, 325.839688,
637.9420067, 133.8522264, 108.3866651, 484.1552049, 352.2724528,
918.5972004, 276.9192201, 135.6954581, 297.7151573, 2951.02714,
598.4943608, 236.7722235, 97.27325254, 351.2115064, 397.6534427,
987.360228, 210.3632556
), avg_tobacco_use = c(
10.65, 31, 27.5,
14.3, 7.75, 13.45, 13, 8.65, 12.55, 23.25, 12.55, 7.35, 19.15,
26.1, 24.95, 5.95, 7.65, 13.65, 10.3, 9.2, 38.1, 21.45, 15.25
), urbanization = c(
29.47, 22.94, 14.1, 11.29, 97.25, 42.6, 34.88,
10.03, 26.11, 24.05, 38.67, 47.7, 27.63, 28.86, 16.69, 68.33,
37.48, 24.87, 25.15, 38.88, 26.17, 22.27, 30.23
), gats2_tobacco_india = c(
20,
45.5, 48.2, 25.9, 13.7, 25.1, 23.6, 16.1, 23.7, 38.9, 22.8, 12.7,
34.2, 43.3, 45.6, 11.2, 13.4, 24.7, 17.9, 17.8, 64.5, 35.5, 26.5
), Cases_per_pop = c(
7.56909681, 2.419676298, 4.695700757, 3.517630291,
25.98247866, 28.33774883, 10.4734347, 4.817527749, 20.99759524,
1.948492028, 6.013674471, 4.184939244, 10.06104045, 2.578127257,
5.151399591, 5.80103032, 7.882852583, 11.91124239, 0.289749671,
7.672231694, 11.22357603, 0.456107101, 77.58519395
)), class = "data.frame", row.names = c(
NA,
-23L
))
This is my code:
library(ggplot2)
library(ggExtra)
library(gridExtra)
#working plot
plot1 <- ggplot(datatrials, aes(SLT_preval,urbanization)) + geom_point(color = '#CC9933') +
geom_smooth(fullrange=TRUE,method = "lm", level=0.95) +
ylab("Urbanization %") +
xlab("Smokeless Tobacco Use %") +
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
scale_x_continuous(expand=c(0,0), limits=c(0,100)) +
scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
theme(axis.title.y = element_text(margin=margin (t=0, r=5, b=0, l=0))) +
geom_label(x = 0.95*max(SLT_preval), y = 0.92*max(urbanization), size = 4.3, label = "n = 32; p-value = 0.015; \n CI = -0.799:-0.050; rho = -0.426")
plot2 <- ggplot(datatrials, aes(smoking_preval,urbanization)) + geom_point(color = '#615513') +
geom_smooth(fullrange=TRUE,method = "lm", se=FALSE) +
ylab("Urbanization %") +
xlab("Smoking %") +
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
scale_x_continuous(expand=c(0,0), limits=c(0,100)) +
scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
geom_label(x = 1.35*max(smoking_preval), y = 0.92*max(urbanization), size = 4.3, label = "n = 32; p-value = 0.186; \n CI = -0.641:0.165; rho = -0.239")
p1 <- ggMarginal(plot1, type="histogram", colour = '#FF0000', fill = '#FAC95F')
p2 <- ggMarginal(plot2, type="histogram", colour = '#FF0000', fill = '#615513')
grid.arrange(p1, p2, ncol=2)
Try this. I added the geom_text. Next. There was an error in your use of geom_label which lacked the datatrials$. Also I switched to annotate which works fine if you add geom = "label".
library(ggplot2)
library(ggExtra)
# working plot
plot1 <- ggplot(datatrials, aes(SLT_preval, urbanization)) +
geom_point(color = "#CC9933") +
geom_smooth(fullrange = TRUE, method = "lm", level = 0.95) +
# Add geom_text
geom_text(aes(label = states)) +
ylab("Urbanization %") +
xlab("Smokeless Tobacco Use %") +
theme(
axis.text = element_text(size = 14),
axis.title = element_text(size = 14)
) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) +
scale_y_continuous(expand = c(0, 0), limits = c(-50, 100)) +
coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 5, b = 0, l = 0))) +
# Switch to annotate. Add geom. Add datatrials$
annotate(
geom = "label", x = 0.95 * max(datatrials$SLT_preval),
y = 0.92 * max(datatrials$urbanization), size = 4.3, label = "n = 32; p-value = 0.015; \n CI = -0.799:-0.050; rho = -0.426"
)
plot2 <- ggplot(datatrials, aes(smoking_preval, urbanization)) +
geom_point(color = "#615513") +
geom_smooth(fullrange = TRUE, method = "lm", se = FALSE) +
# Add geom_text
geom_text(aes(label = states)) +
ylab("Urbanization %") +
xlab("Smoking %") +
theme(
axis.text = element_text(size = 14),
axis.title = element_text(size = 14)
) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 100)) +
scale_y_continuous(expand = c(0, 0), limits = c(-50, 100)) +
coord_cartesian(xlim = c(0, 70), ylim = c(0, 100)) +
# Switch to annotate. Add geom. Add datatrials$
annotate(geom = "label", x = 1.35 * max(datatrials$smoking_preval), y = 0.92 * max(datatrials$urbanization), size = 4.3, label = "n = 32; p-value = 0.186; \n CI = -0.641:0.165; rho = -0.239")
p1 <- ggMarginal(plot1, type = "histogram", colour = "#FF0000", fill = "#FAC95F")
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
p2 <- ggMarginal(plot2, type = "histogram", colour = "#FF0000", fill = "#615513")
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#par(mfrow = c(1, 2))
gridExtra::grid.arrange(p1, p2, ncol = 2)
Created on 2020-06-23 by the reprex package (v0.3.0)

Data labels for mean and percentiles in a distribution chart

I'm creating a custom chart to visualize a variable's distribution using geom_density. I added 3 vertical lines for a custom value, the 5th percentile and the 95th percentile.
How do I add labels for those lines?
I tried using geom_text but i don't know how to parameter the x and y variables
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
geom_text(aes(x = dataset$`Estimated percent body fat`[12],
label = "Custom", y = 0),
colour = "red", angle = 0)
I'd like to obtain the following:
for the custom value, I'd like to add the label at the top of the chart, just to the right of the line
for the percentiles label, I'd like to add them in the middle of the chart; at the left of the line for the 5th percentile and right of the line for 95th percentile
Here is what I was able to obtain https://i.imgur.com/thSQwyg.png
And these are the first 50 lines of my dataset:
structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")
First I recommend clean column names.
dat <- dataset
names(dat) <- tolower(gsub("\\s", "\\.", names(dat)))
Whith base R plots you could do the following. The clou is, that you can store the quantiles and custom positions to use them as coordinates later which gives you a dynamic positioning. I'm not sure if/how this is possible with ggplot.
plot(density(dat$estimated.percent.body.fat, na.rm=TRUE), ylim=c(0, .05),
main="Density curve")
abline(v=c1 <- dat$estimated.percent.body.fat[12], col="red")
abline(v=q1 <- quantile(dat$estimated.percent.body.fat, .05, na.rm=TRUE), col="grey")
abline(v=q2 <- quantile(dat$estimated.percent.body.fat, .95, na.rm=TRUE), col="grey")
text(c1 + 4, .05, c(expression("" %<-% "custom")), cex=.8)
text(q1 - 5.5, .025, c(expression("5% percentile" %->% "")), cex=.8)
text(q2 + 5.5, .025, c(expression("" %<-% "95% percentile")), cex=.8)
Note: Case you don't like the arrows just do e.g. "5% percentile" instead of c(expression("5% percentile" %->% "")).
Or in ggplot you could use annotate.
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
annotate("text", x=16, y=.05, label="custom") +
annotate("text", x=9.5, y=.025, label="5% percentile") +
annotate("text", x=38, y=.025, label="95% percentile")
Note, that in either solution the result (i.e. exact label positions) depends on your export size. To learn how to control this, take e.g. a look into How to save a plot as image on the disk?.
Data
dataset <- structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")

Resources