ggplot2 in R: annotate outside of plot and underline text

ggplot2 in R: annotate outside of plot and underline text - r

I've been scratching my head for hours on this. What I have up to now:
library(ggplot2)
library(grid)
all_data = data.frame(country=rep(c("A","B","C","D"),times=1,each=20),
value=rep(c(10,20,30,40),times=1,each=20),
year = rep(seq(1991,2010),4))
# PLOT GRAPH
p1 <- ggplot() + theme_bw() + geom_line(aes(y = value, x = year,
colour=country), size=2,
data = all_data, stat="identity") +
theme(plot.title = element_text(size=18,hjust = -0.037), legend.position="bottom",
legend.direction="horizontal", legend.background = element_rect(size=0.5, linetype="solid", colour ="black"),
legend.text = element_text(size=16,face = "plain"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(),axis.line = element_line(colour = "black"),legend.title = element_blank(),
axis.text=element_text(size=18,face = "plain"),axis.title.x=element_text(size=18,face = "plain", hjust = 1,
margin = margin(t = 10, r = 0, b = 0, l = 0)),
axis.title.y=element_blank())
p1 <- p1 + ggtitle("Index")
p1 <- p1 + xlab("Year")
p1 <- p1 + scale_x_continuous(expand=c(0,0),breaks=seq(1991,2010,4))
p1 <- p1 + theme(plot.margin=unit(c(5.5, 300, 5.5, 5.5), "points"))
p1 <- p1 + geom_text(aes(label = "Country", x = 2011, y =
max(all_data$value)+10), hjust = 0, vjust = -2.5, size = 6)
p1 <- p1 + geom_text(aes(label = "Average", x = Inf, y =
max(all_data$value)+10), hjust = -1.5, vjust = -2, size = 6)
p1 <- p1 + geom_text(aes(label = all_data$country, x = 2011, y =
all_data$value), hjust = 0, size = 6)
p1 <- p1 + geom_text(aes(label = as.character(all_data$value), x = Inf,
y = all_data$value), hjust = -5, size = 6)
p1 <- p1 +
annotate("segment",x=2011,xend=2014,y=Inf,yend=Inf,color="black",lwd=1)
# Override clipping
gg2 <- ggplot_gtable(ggplot_build(p1))
gg2$layout$clip[gg2$layout$name == "panel"] <- "off"
grid.draw(gg2)
What I am struggling with is the following:
1) how to annotate outside of the plot, underline both "Country" and "Average" without extending the x-axis.
2) Isn't there more systematic approach to the whole annotation process. Adjusting hjust and vjust by visual inspection seems very troublesome.
Any help is appreciated!

See if this works for you:
# define some offset parameters
x.offset.country = 2
x.offset.average = 5
x.range = range(all_data$year) + c(0, x.offset.average + 2)
y.range = range(all_data$value) + c(-5, 10)
y.label.height = max(all_data$value) + 8
# subset of data for annotation
all_data_annotation <- dplyr::filter(all_data, year == max(year))
p <- ggplot(all_data,
aes(x = year, y = value, group = country, colour = country)) +
geom_line(size = 2) +
# fake axes (x-axis stops at year 2009, y-axis stops at value 45)
annotate("segment", x = 1991, y = 5, xend = 2009, yend = 5) +
annotate("segment", x = 1991, y = 5, xend = 1991, yend = 45) +
# country annotation
geom_text(data = all_data_annotation, inherit.aes = FALSE,
aes(x = year + x.offset.country, y = value, label = country)) +
annotate("text", x = max(all_data$year) + x.offset.country, y = y.label.height,
label = "~underline('Country')", parse = TRUE) +
# average annotation
geom_text(data = all_data_annotation, inherit.aes = FALSE,
aes(x = year + x.offset.average, y = value, label = value)) +
annotate("text", x = max(all_data$year) + x.offset.average, y = y.label.height,
label = "~underline('Average')", parse = TRUE) +
# index (fake y-axis label)
annotate("text", x = 1991, y = y.label.height,
label = "Index") +
scale_x_continuous(name = "Year", breaks = seq(1991, 2009, by = 4), expand = c(0, 0)) +
scale_y_continuous(name = "", breaks = seq(10, 40, by = 10), expand = c(0, 0)) +
scale_colour_discrete(name = "") +
coord_cartesian(xlim = x.range, ylim = y.range) +
theme_classic() +
theme(axis.line = element_blank(),
legend.position = "bottom",
legend.background = element_rect(size=0.5, linetype="solid", colour ="black"))
# Override clipping (this part is unchanged)
gg2 <- ggplot_gtable(ggplot_build(p))
gg2$layout$clip[gg2$layout$name == "panel"] <- "off"
grid.draw(gg2)

Related

Adding a black outline for each x category in heat map in R

Here is my data which produces a heat map. What I am hoping to do is produce multiple difference heatmaps with an outline around each of x categories.
data <- data.frame(id=c("john","john","john","kate","kate","kate","chris","chris","chris"),
group=c("geo","his","math","geo","his","math","geo","his","math"),
grade=c(65,76,87,67,89,98,99,97,96),
class=c("A","A","A","A","A","A","B","B","B"))
data
mine.heatmap <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
xlab(label = "id") +
ylab(label="group") +
labs(fill="grade")+
scale_fill_gradient2(low = "#800080",
high = "#FF8C00",mid = "white")
x <- mine.heatmap + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
x + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 18, margin = margin(b=2)))+
theme(axis.text.y= element_text(angle = 0, vjust = 0.5, hjust=1, size = 18)) +
theme(legend.text = element_text(size=14))+
theme(legend.title = element_text(size=14))+
theme(strip.text = element_text(size=14))+
theme(axis.title.x = element_text(size=18)) +theme(axis.title.y = element_text(size=18))
Original Heat map:
What I am hoping to get are the following heatmaps:

One option to achieve your desired result would be to
put your plotting code in a function which takes as one argument the id for which you want to draw a outline.
Use some data wrangling to convert the categories to be plotted on the x and y aes to numerics per facet variable.
Add a geom_rect to your plotting code to draw the outline which uses the numerics computed in step 2.
library(ggplot2)
library(dplyr)
mine_heatmap <- function(x) {
p <- ggplot(data = data, mapping = aes(x = id, y = group, fill = grade)) +
geom_tile() +
# Add outline via a geom_rect
geom_rect(
data = subset(data, id == x),
aes(
xmin = id_num - .5, xmax = id_num + .5,
ymin = min(group_num) - .5, ymax = max(group_num) + .5
), fill = NA, color = "black", size = 1
) +
labs(x = "id", y = "group", fill = "grade") +
scale_fill_gradient2(
low = "#800080",
high = "#FF8C00", mid = "white"
)
p <- p + facet_grid(
cols = vars(class), scales = "free", space = "free"
)
p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 18, margin = margin(b = 2))) +
theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust = 1, size = 18)) +
theme(legend.text = element_text(size = 14)) +
theme(legend.title = element_text(size = 14)) +
theme(strip.text = element_text(size = 14)) +
theme(axis.title.x = element_text(size = 18)) + theme(axis.title.y = element_text(size = 18))
}
# Convert id and group to numerics per facet variable
data <- data |>
group_by(class) |>
mutate(
id_num = as.numeric(factor(id)),
group_num = as.numeric(factor(group))
) |>
ungroup()
mine_heatmap("john")
mine_heatmap("kate")
mine_heatmap("chris")

Adapting Forest Plot R Script

I am a novice coder and have been trying to understand the code posted here: Forest plot with table ggplot coding
I am hoping to use the script to display my own univariate analysis results for a project. I want the script to read the data from a csv file with the columns: "Predictor", "N", "rr", "rrlow", "rrhigh", and "arr". There are in total 19 variables ("Predictors") that I need to display. I have altered the script to read in the values into a single dataframe (rather than having a separate forestdf and fplottable like in the linked thread). However, I am getting multiple "replacement has x rows, data has y".
Here is the code in question:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
levels(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3) +
xlab("Variable") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
forestdf$colour <- rep(c("white", "gray95"), length.out=19)
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 5, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
And the errors I have been receiving:
> ###dataframe
> library(ggplot2)
> library(tidyr)
> library(grid)
> library(gridExtra)
> library(forcats)
>
> forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
> forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> levels(forestdf$Predictor)
NULL
> forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
> p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_pointrange(shape = 22, fill = "black") +
+ geom_vline(xintercept = 1, linetype = 3) +
+ xlab("Variable") +
+ ylab("Hazard Ratio with 95% Confidence Interval") +
+ theme_classic() +
+ scale_colour_identity() +
+ scale_y_discrete(limits = rev(forestdf$Predictor)) +
+ scale_x_log10(limits = c(0.25, 4),
+ breaks = c(0.25, 0.5, 1, 2, 4),
+ labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
+ theme(axis.text.y = element_blank(), axis.title.y = element_blank())
>
> forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> forestdf$colour <- rep(c("white", "gray95"), length.out=19)
>
> data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_text(aes(x = 0, label = Predictor), hjust = 0) +
+ geom_text(aes(x = 5, label = N)) +
+ geom_text(aes(x = 7, label = arr), hjust = 1) +
+ scale_colour_identity() +
+ theme_void() +
+ theme(plot.margin = margin(5, 0, 35, 0))
>
> grid.arrange(data_table,p, ncol = 2)
Error in FUN(X[[i]], ...) : object 'Predictor' not found
I greatly appreciate any help or suggestions you may provide.
Thanks!
EDIT:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- factor(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
I have made some changes as per IRTFM (thank you!) and it now produces a plot and table. I'm not sure why but it wasn't reading the csv correctly. My main issues now are the following:
The alternating grey and white bars do not alternate correctly on the table side
The header for the columns does not show up on the table
The table is not aligned with the forestplot (ie. top row's forest plot is not the correct forest plot for Albumin) Example Plot
EDIT2:
I was able to fix the alternating colours and alignment with the forestplot. My issue now is that the column titles I've made are now cut off: New Plot. Also, how would I go about only bolding the values with an asterisk?
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- rev(factor(forestdf$Predictor))
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = forestdf$Predictor) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
#forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = rev(factor(Predictor)))) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), show.legend=TRUE, hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 5.5, label = arr), hjust = 1) +
geom_text(aes(x = 7, label = PVALUE), hjust = 1) +
geom_text(aes(x = 0, y = 20, label = "Predictor"), hjust = 0) +
geom_text(aes(x = 3, y= 20, label = "N")) +
geom_text(aes(x = 5, y= 20, label = "95% CI"), hjust = 1) +
geom_text(aes(x = 7, y= 20, label = "P Value"), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
Thanks!

How do I combined a line graph and a bar graph in R with two different y axis

I have my code with that created two different graphs. I want to combine the graphs into one graph with "Elevation on the primary y axis and "Precipitation" on the secondary y axis. Is this possible or would it be best to stick with having the two graphs on top of each other?
pe1.plot <- combine.df %>% filter(site== "VWP 1") %>%
ggplot(aes(x = datetime, y = elevation)) +
geom_line(color = "blue")+
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
plot.background = element_rect(linetype = "solid")) +labs(title = "VWP 1", x = "Date", y = "Elevation (MSL)")
precip.plot <- ggplot(precip.df, aes(x = datetime, y = precipitation)) +
geom_bar(stat = "identity")+
theme(plot.subtitle = element_text(vjust = 1),
plot.caption = element_text(vjust = 1),
plot.background = element_rect(linetype = "solid")) +labs(x = "Date", y = "Pecipitation (in.)")
pe1.plot+precip.plot + plot_layout(ncol = 1)

Possible but kind of a pain:
library(tidyverse)
set.seed(42)
my_data = tibble(date = seq.Date(as.Date("2020-01-01"),
as.Date("2020-06-30"), by = "day"),
line_y = seq(698, 700, length.out = 182) + rnorm(182, sd = 0.1),
bar_y = rpois(182, c(0,0,5))/10)
ggplot(my_data, aes(date)) +
geom_line(aes(y = line_y)) +
geom_col(aes(y = (2*bar_y) + 695)) +
scale_y_continuous(breaks = seq(695, 699.5, by = 0.5),
labels = c(rep("", 6), seq(698, 699.5, by = 0.5)),
sec.axis = sec_axis(~ (. - 695) / 2,
breaks = seq(0, 1, by = 0.25))) +
coord_cartesian(ylim = c(695, NA), expand = 0)

How to add a box around the legend in ggplot2?

Using the following script, how can I add a bottom legend box like the image bellow?
start <- c('2002 Q1', '2008 Q4')
end <- c('2003 Q3', '2011 Q2')
dates <- as.yearqtr(seq(as.Date('2002-01-01'), as.Date('2019-06-01'), by='quarter'))
cod <- tibble(start = as.yearqtr(start), end = as.yearqtr(end)) %>%
filter(start %in% dates) %>%
mutate(start = as.Date(start)) %>%
mutate(end = as.Date(end))
dates <- as.Date(dates)
tbl_fz <- tibble(x = dates, fz = 0.5)
plot <- ggplot(data = tbl_fz) +
geom_rect(data = cod, aes(xmin = start, xmax = end,
ymin = 0, ymax = 1, fill = "b"), alpha = 0.9) +
geom_line(aes(x = x, y = fz), size = 0.5) +
ggtitle('') +
theme_classic() +
theme(title = element_text(size = 8),
plot.title = element_text(hjust = 0.5),
legend.position = c(0.5, -0.5)) +
ylab('') +
xlab('') +
scale_x_date(date_breaks = '2 year', date_labels = '%Y',
expand = c(0, 0)) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(name = 'kkkk',
values = c('grey'),
labels = c('kkkk'))
plot

This could be achieved like so. To put the legend on the bottom I would recommend to
use legend.position = "bottom". To add the line to the legend map something on the color aes and use scale_color_manual as you have done for the fill aes. To get a box around the legend use legend.box.background = element_rect(color = "black"). I also added some margin on the top and to the left as the box was partially overlayed. Finally, to get the order of the legends right and e.g. to get a thicker line you can make use of guide_legend to style the legend.
# Packages ----------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(zoo)
start <- c('2002 Q1', '2008 Q4')
end <- c('2003 Q3', '2011 Q2')
dates <- as.yearqtr(seq(as.Date('2002-01-01'), as.Date('2019-06-01'), by='quarter'))
cod <- tibble(start = as.yearqtr(start), end = as.yearqtr(end)) %>%
filter(start %in% dates) %>%
mutate(start = as.Date(start)) %>%
mutate(end = as.Date(end))
dates <- as.Date(dates)
tbl_fz <- tibble(x = dates, fz = 0.5)
plot <- ggplot(data = tbl_fz) +
geom_rect(data = cod, aes(xmin = start, xmax = end,
ymin = 0, ymax = 1, fill = "b"), alpha = 0.9) +
geom_line(aes(x = x, y = fz, color = "c"), size = 0.5) +
ggtitle('') +
theme_classic() +
theme(title = element_text(size = 8),
plot.title = element_text(hjust = 0.5),
legend.position = "bottom",
legend.box.background = element_rect(color = "black"),
legend.box.margin = margin(t = 1, l = 1)) +
scale_x_date(date_breaks = '2 year', date_labels = '%Y',
expand = c(0, 0)) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = c('grey'), labels = c('kkkk'))+
scale_color_manual(values = c('black'), labels = c('llll')) +
labs(x = NULL, y = NULL, fill = NULL, color = NULL) +
guides(fill = guide_legend(order = 1), color = guide_legend(order = 2, override.aes = list(size = 2)))
plot

Combine 2 plots (ggplot) into one plot and differenciate them

I have a problem similar to the following example. I want to differentiate the lines from different group; for example, I want distinguish the "m" sexe cdf from group 1 and the "m" sexe cdf from group 2.
library(ggplot2)
sexe <- rep(c("m", "w", "x"), 50)
weight1 <- runif(150, 30, 90)
weight2 <- runif(150, 30, 90)
visual1 = data.frame(sexe = sexe, weight = weight1)
visual2 = data.frame(sexe = sexe, weight = weight2)
visual1$group <- 1
visual2$group <- 2
visual12 <- rbind(visual1, visual2)
p <- ggplot(dat = visual12, aes(x = as.numeric(weight), group = interaction(group, sexe), col = sexe)) +
# geom_point(dat = dat2, aes(x = as.numeric(dura), col = TYPE_DE_TERMINAL)) +
stat_ecdf(geom = "step") +
# scale_colour_discrete(guide = guide_legend(override.aes = list(alpha = 1))) +
scale_colour_brewer(name = "sexe", palette = "Set1") +
theme(axis.text = element_text(size = 15), legend.justification = c(1, 0),
legend.position = c(1, 0), axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("CDF") + xlab("...") + theme_bw() +
# scale_y_continuous(limits=c(0,1), labels= percent) +
ggtitle("Cumulative distribution function of ...")
# scale_x_log10(limits = c(1,1e3), breaks = c(10 , 100))
p

What if you change the linetype by group?
p <- ggplot(dat = visual12, aes(x = as.numeric(weight),
group = interaction(group, sexe),
linetype=factor(group), col = sexe)) +
stat_ecdf(geom = "step") +
scale_colour_brewer(name = "sexe", palette = "Set1") +
theme(axis.text = element_text(size = 15),
legend.justification = c(1, 0),
legend.position = c(1, 0),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("CDF") + xlab("...") + theme_bw() +
ggtitle("Cumulative distribution function of ...")
p

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

ggplot2 in R: annotate outside of plot and underline text - r

Related

Adding a black outline for each x category in heat map in R

Adapting Forest Plot R Script

How do I combined a line graph and a bar graph in R with two different y axis

How to add a box around the legend in ggplot2?

Combine 2 plots (ggplot) into one plot and differenciate them

Categories

Resources