I am a novice coder and have been trying to understand the code posted here: Forest plot with table ggplot coding
I am hoping to use the script to display my own univariate analysis results for a project. I want the script to read the data from a csv file with the columns: "Predictor", "N", "rr", "rrlow", "rrhigh", and "arr". There are in total 19 variables ("Predictors") that I need to display. I have altered the script to read in the values into a single dataframe (rather than having a separate forestdf and fplottable like in the linked thread). However, I am getting multiple "replacement has x rows, data has y".
Here is the code in question:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
levels(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3) +
xlab("Variable") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
forestdf$colour <- rep(c("white", "gray95"), length.out=19)
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 5, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
And the errors I have been receiving:
> ###dataframe
> library(ggplot2)
> library(tidyr)
> library(grid)
> library(gridExtra)
> library(forcats)
>
> forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
> forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> levels(forestdf$Predictor)
NULL
> forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
> p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_pointrange(shape = 22, fill = "black") +
+ geom_vline(xintercept = 1, linetype = 3) +
+ xlab("Variable") +
+ ylab("Hazard Ratio with 95% Confidence Interval") +
+ theme_classic() +
+ scale_colour_identity() +
+ scale_y_discrete(limits = rev(forestdf$Predictor)) +
+ scale_x_log10(limits = c(0.25, 4),
+ breaks = c(0.25, 0.5, 1, 2, 4),
+ labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
+ theme(axis.text.y = element_blank(), axis.title.y = element_blank())
>
> forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> forestdf$colour <- rep(c("white", "gray95"), length.out=19)
>
> data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_text(aes(x = 0, label = Predictor), hjust = 0) +
+ geom_text(aes(x = 5, label = N)) +
+ geom_text(aes(x = 7, label = arr), hjust = 1) +
+ scale_colour_identity() +
+ theme_void() +
+ theme(plot.margin = margin(5, 0, 35, 0))
>
> grid.arrange(data_table,p, ncol = 2)
Error in FUN(X[[i]], ...) : object 'Predictor' not found
I greatly appreciate any help or suggestions you may provide.
Thanks!
EDIT:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- factor(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
I have made some changes as per IRTFM (thank you!) and it now produces a plot and table. I'm not sure why but it wasn't reading the csv correctly. My main issues now are the following:
The alternating grey and white bars do not alternate correctly on the table side
The header for the columns does not show up on the table
The table is not aligned with the forestplot (ie. top row's forest plot is not the correct forest plot for Albumin) Example Plot
EDIT2:
I was able to fix the alternating colours and alignment with the forestplot. My issue now is that the column titles I've made are now cut off: New Plot. Also, how would I go about only bolding the values with an asterisk?
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- rev(factor(forestdf$Predictor))
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = forestdf$Predictor) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
#forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = rev(factor(Predictor)))) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), show.legend=TRUE, hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 5.5, label = arr), hjust = 1) +
geom_text(aes(x = 7, label = PVALUE), hjust = 1) +
geom_text(aes(x = 0, y = 20, label = "Predictor"), hjust = 0) +
geom_text(aes(x = 3, y= 20, label = "N")) +
geom_text(aes(x = 5, y= 20, label = "95% CI"), hjust = 1) +
geom_text(aes(x = 7, y= 20, label = "P Value"), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
Thanks!
Related
What is this type of data visualization plot called and how do I recreate it in R?
Image Source: https://www.pewresearch.org/global/2020/04/30/worldwide-optimism-about-future-of-gender-equality-even-as-many-see-advantages-for-men/pg_2020-04-30_global-gender-equality_0-02/
My Google search only resulted in regular bubble plots like this: https://r-graph-gallery.com/320-the-basis-of-bubble-plot.html
Something like this?
df <- data.frame(Question = rep(c("Getting\nhigh-paying jobs",
"Being leaders in\ntheir community",
"Expressing their\npolitical views",
"Getting a good\neducation"), 3),
Answer = rep(c("Men have more\nopportunities",
"Women have more\nopportunities",
"Both about\nthe same"), each = 4),
Value = c(54, 44, 31, 11, 3, 4, 3, 6, 38, 49, 63, 81))
library(ggplot2)
ggplot(df, aes(y = factor(Question, rev(unique(Question))),
x = factor(Answer, unique(Answer)),
fill = factor(Answer, unique(Answer)))) +
geom_point(shape = 21, aes(size = Value, color = after_scale(fill))) +
geom_text(aes(label = Value, color = Answer)) +
annotate("segment", x = rep(-Inf, 3), xend = rep(Inf, 3),
y = 1:3 + 0.5, yend = 1:3 + 0.5, linetype = 2, alpha = 0.5) +
scale_y_discrete() +
scale_x_discrete(position = "top") +
scale_size_continuous(range = c(5, 30)) +
scale_fill_manual(values = c("#959e4a", "#0f6599", "#dddac8")) +
scale_color_manual(values = c("black", "white", "white")) +
ggtitle(paste("Many think men have more opportunities than women",
"when it comes to getting high-paid jobs", sep = "\n")) +
theme_void() +
theme(legend.position = "none",
axis.text.x = element_text(face = 2),
axis.text.y = element_text(hjust = 1, face = 2),
plot.margin = margin(30, 30, 30, 30),
plot.title = element_text(size = 16, face = 2, family = "serif",
margin = margin(20, 0, 50, 0)))
Here's an example. Lots more formatting tweaks could be done, but I'd think of this fundamentally as a geom_point and a geom_text layer, the rest is tidying up.
library(ggplot2)
fake_data <- data.frame(x = rep(LETTERS[1:3], each = 4),
y = letters[1:4],
val = (1:12) / 12)
ggplot(fake_data, aes(x=1, y = 1, label = scales::percent(val))) +
geom_point(aes(size = val, color = x), alpha = 0.3) +
geom_text() +
scale_size_area(max_size = 20) +
guides(size = "none", color = "none") +
facet_grid(y ~ x, switch = "y") +
theme_void() +
theme(strip.text = element_text())
What is the function for generating data for plotting an exponential curve between two points? Here's a logarithmically spaced sequence. I want to create more of a hockey stick between the start and end point, and the real end goal is the vector of values not the plot.
My use case is that I have a parameter for a plotting function that needs to ramp up slowly between the given values as I try to plot more data. This log sequence is better than a linear sequence, but it still rises too rapidly. I need to keep the values lower and then increase exponentially.
library(emdbook)
plot(lseq(.08, .25, 10000))
Update
Here is the full challenge for context. I'm plotting every 400th index value of s. The geom_dotplot in the final plot, p_diff, is wacky and needs certain binwidth values to correctly size the plot. I tried creating a log sequence called binsize and passing it to the parameter. It looks fine at low values of s, but increases to 0.25 too quickly (0.25 works for the final version with 10000 dots).
library(tidyverse)
library(ggtext)
library(patchwork)
library(truncnorm)
library(ggtext)
library(emdbook)
# simulate hypothetical population at control group mean/sd
set.seed(1)
pop <- data.frame(bdi3 = rtruncnorm(10000, a=0, b=63, mean=24.5, sd=10.7),
id = seq(1:10000))
# create plots
diff <- data.frame(NULL)
binsize = lseq(0.08695510, .25, 10000)
for (s in 1:10000) {
set.seed(s)
samp <-
pop %>%
sample_n(332, replace = FALSE)
ctr <-
samp %>%
sample_n(166, replace = FALSE) %>%
mutate(trt = 0)
trt <-
samp %>%
left_join(dplyr::select(ctr, id, trt), by="id") %>%
mutate(trt = ifelse(is.na(trt), 1, trt)) %>%
filter(trt==1)
diff[s,1] <- s
diff[s,2] <- (mean(trt$bdi3)-mean(ctr$bdi3))
names(diff) <- c("id", "diff")
dat <-
ctr %>%
bind_rows(trt)
if (s %in% seq(1, 10000, by=400)) {
# population
p_pop <-
pop %>%
left_join(dplyr::select(dat, id, trt), by="id") %>%
# mutate(trt = ifelse(is.na(trt), 3, trt),
# trt = factor(trt)) %>%
mutate(selected = ifelse(!is.na(trt), 1, 0),
selected = factor(selected)) %>%
ggplot(., aes(x=bdi3, fill=selected, group=id, alpha=selected)) +
geom_dotplot(method = 'dotdensity', binwidth = 0.25, dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "#e69138")) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(pop$bdi3)+1, y = 25,
label = paste0("Population mean = ",
format(round(mean(pop$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("Imaginary population of 10,000 patients who meet study criteria",
subtitle="<span style='color:#e69138'>**Orange**</span> dots represent 332 selected patients")
p_samp <-
ggplot(dat, aes(x=bdi3)) + # group=id, fill=factor(trt)
geom_dotplot(method = 'dotdensity', binwidth = 1.2,
fill="#e69138", alpha=.8, color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up", stroke=1) +
#scale_fill_manual(values=c("#f7f265", "#1f9ac9")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(dat$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(dat$bdi3)+2, y = 1,
label = paste0("Sample mean = ",
format(round(mean(dat$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("One possible sample of these patients (N=332)",
subtitle="Each dot is a patient sampled from the population who gets randomly assigned to a study arm") +
annotate("text", x = 50, y = .3,
label = "randomize to study arms",
size = 10, color="#696865") +
geom_curve(aes(x = 35, y = .6, xend = 50, yend = .35),
color = "#696865", arrow = arrow(type = "open",
length = unit(0.15, "inches")),
curvature = -.5, angle = 100, ncp =15)
p_ctr <-
ggplot(ctr, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#f7f265", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(pop$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(ctr$bdi3)+2, y = 1,
label = paste0("Control mean = ",
format(round(mean(ctr$bdi3), 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#f7f265'>**control**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#f7f265'>**yellow**</span>")
p_trt <-
ggplot(trt, aes(x=bdi3)) +
geom_dotplot(method = 'dotdensity', binwidth = 1.6,
color="white", fill="#1f9ac9", alpha=1,
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(limits=c(-0, 63)) +
xlab("\nDepression Severity as measured by BDI-II") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_markdown(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27),
plot.margin = margin(0, 0, 1.5, 0, "cm")) +
geom_vline(xintercept = mean(trt$bdi3), linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = mean(trt$bdi3)+2, y = 1,
label = paste0("Treatment mean = ",
format(round(trt$bdi3, 1), nsmall = 1)),
hjust = 0, color = "#696865", size=10) +
annotate("text", x = 0, y = .75,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10) +
ggtitle("50% patients randomly assigned<br>to the <span style='color:#1f9ac9'>**treatment**</span> group",
subtitle="166 of the <span style='color:#e69138'>**orange**</span> dots turn <span style='color:#1f9ac9'>**blue**</span>")
p_diff <-
diff %>%
mutate(color=ifelse(diff < -2.3 | diff > 2.3, 1, 0)) %>%
mutate(color=factor(color)) %>%
ggplot(., aes(x=diff, fill=color, group=id)) +
geom_dotplot(method = 'dotdensity', binwidth = binsize[s], dotsize = 1,
color="white",
binpositions="all", stackgroups=TRUE,
stackdir = "up") +
scale_fill_manual(values=c("grey", "red")) +
scale_y_continuous(NULL, breaks = NULL) +
theme_minimal() +
scale_x_continuous(breaks=c(-5:5), limits=c(-5, 5)) +
xlab("\nAverage Treatment Effect (Treatment Mean - Control Mean)") +
theme(legend.position = "none",
axis.title = element_text(size=30, color = "#696865"),
axis.text = element_text(size=24, color = "#696865"),
plot.title = element_text(size=36, color = "#696865",
face="bold"),
plot.subtitle = element_markdown(size=27)) +
geom_vline(xintercept = 0, linetype="dashed",
color = "#696865", size=1) +
annotate("text", x = 0.2, y = 25, label = "No effect",
hjust = 0, color = "#696865", size=10) +
ggtitle("Simulation based null distribution",
subtitle="Plausible estimates of the treatment effect if the hypothesis of no effect is true") +
geom_vline(xintercept = 2.3, linetype="dotted",
color = "red", size=1) +
geom_vline(xintercept = -2.3, linetype="dotted",
color = "red", size=1) +
annotate("text", x = 2.5, y = 25, label = "Reject null",
hjust = 0, color = "red", size=10) +
annotate("text", x = -2.5, y = 25, label = "Reject null",
hjust = 1, color = "red", size=10) +
annotate("text", x = -5, y = 20,
label = paste0("Sample #", s),
hjust = 0, color = "#e69138", size=10)
p_all <- p_pop / p_samp / (p_trt + p_ctr) / p_diff +
plot_layout(heights = c(2, 2, 1, 2))
ggsave(paste0("animate/", s, ".png"),
height = 40, width = 18.5, units = "in",
dpi = 300)
}
}
The second plot to generate, s==401, looks fine. binsize[401] works for this many dots. But by the 5th plot, s==1601, the dots to not fit. binsize[1601] is too high.
I'm thinking that if I could create a better vector of values for binsize that rises more slowly to 0.25 this will work.
This is more of a maths question rather than a programming question, but there's a fairly simple programming solution.
Here's a simple function you can try. It allows you to produce a sequence of numbers between a starting and ending number just like the lseq function, but includes a shape parameter that controls how "exponential" the numbers appear.
seq_exp <- function(start, stop, n, shape)
{
(stop - start) * exp(seq(0, shape, length.out = n))/exp(shape) + start
}
So you're probably looking for something like this:
plot(seq_exp(0.08, 0.25, 10000, shape = 10))
If you set the shape parameter to 1 it is just a normal exponential curve like in lseq:
plot(seq_exp(0.08, 0.25, 10000, shape = 1))
And of course you can play around with different values:
plot(seq_exp(0.08, 0.25, 10000, shape = 5))
plot(seq_exp(0.08, 0.25, 10000, shape = 30))
Created on 2020-04-01 by the reprex package (v0.3.0)
I've been scratching my head for hours on this. What I have up to now:
library(ggplot2)
library(grid)
all_data = data.frame(country=rep(c("A","B","C","D"),times=1,each=20),
value=rep(c(10,20,30,40),times=1,each=20),
year = rep(seq(1991,2010),4))
# PLOT GRAPH
p1 <- ggplot() + theme_bw() + geom_line(aes(y = value, x = year,
colour=country), size=2,
data = all_data, stat="identity") +
theme(plot.title = element_text(size=18,hjust = -0.037), legend.position="bottom",
legend.direction="horizontal", legend.background = element_rect(size=0.5, linetype="solid", colour ="black"),
legend.text = element_text(size=16,face = "plain"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(),axis.line = element_line(colour = "black"),legend.title = element_blank(),
axis.text=element_text(size=18,face = "plain"),axis.title.x=element_text(size=18,face = "plain", hjust = 1,
margin = margin(t = 10, r = 0, b = 0, l = 0)),
axis.title.y=element_blank())
p1 <- p1 + ggtitle("Index")
p1 <- p1 + xlab("Year")
p1 <- p1 + scale_x_continuous(expand=c(0,0),breaks=seq(1991,2010,4))
p1 <- p1 + theme(plot.margin=unit(c(5.5, 300, 5.5, 5.5), "points"))
p1 <- p1 + geom_text(aes(label = "Country", x = 2011, y =
max(all_data$value)+10), hjust = 0, vjust = -2.5, size = 6)
p1 <- p1 + geom_text(aes(label = "Average", x = Inf, y =
max(all_data$value)+10), hjust = -1.5, vjust = -2, size = 6)
p1 <- p1 + geom_text(aes(label = all_data$country, x = 2011, y =
all_data$value), hjust = 0, size = 6)
p1 <- p1 + geom_text(aes(label = as.character(all_data$value), x = Inf,
y = all_data$value), hjust = -5, size = 6)
p1 <- p1 +
annotate("segment",x=2011,xend=2014,y=Inf,yend=Inf,color="black",lwd=1)
# Override clipping
gg2 <- ggplot_gtable(ggplot_build(p1))
gg2$layout$clip[gg2$layout$name == "panel"] <- "off"
grid.draw(gg2)
What I am struggling with is the following:
1) how to annotate outside of the plot, underline both "Country" and "Average" without extending the x-axis.
2) Isn't there more systematic approach to the whole annotation process. Adjusting hjust and vjust by visual inspection seems very troublesome.
Any help is appreciated!
See if this works for you:
# define some offset parameters
x.offset.country = 2
x.offset.average = 5
x.range = range(all_data$year) + c(0, x.offset.average + 2)
y.range = range(all_data$value) + c(-5, 10)
y.label.height = max(all_data$value) + 8
# subset of data for annotation
all_data_annotation <- dplyr::filter(all_data, year == max(year))
p <- ggplot(all_data,
aes(x = year, y = value, group = country, colour = country)) +
geom_line(size = 2) +
# fake axes (x-axis stops at year 2009, y-axis stops at value 45)
annotate("segment", x = 1991, y = 5, xend = 2009, yend = 5) +
annotate("segment", x = 1991, y = 5, xend = 1991, yend = 45) +
# country annotation
geom_text(data = all_data_annotation, inherit.aes = FALSE,
aes(x = year + x.offset.country, y = value, label = country)) +
annotate("text", x = max(all_data$year) + x.offset.country, y = y.label.height,
label = "~underline('Country')", parse = TRUE) +
# average annotation
geom_text(data = all_data_annotation, inherit.aes = FALSE,
aes(x = year + x.offset.average, y = value, label = value)) +
annotate("text", x = max(all_data$year) + x.offset.average, y = y.label.height,
label = "~underline('Average')", parse = TRUE) +
# index (fake y-axis label)
annotate("text", x = 1991, y = y.label.height,
label = "Index") +
scale_x_continuous(name = "Year", breaks = seq(1991, 2009, by = 4), expand = c(0, 0)) +
scale_y_continuous(name = "", breaks = seq(10, 40, by = 10), expand = c(0, 0)) +
scale_colour_discrete(name = "") +
coord_cartesian(xlim = x.range, ylim = y.range) +
theme_classic() +
theme(axis.line = element_blank(),
legend.position = "bottom",
legend.background = element_rect(size=0.5, linetype="solid", colour ="black"))
# Override clipping (this part is unchanged)
gg2 <- ggplot_gtable(ggplot_build(p))
gg2$layout$clip[gg2$layout$name == "panel"] <- "off"
grid.draw(gg2)
I have a problem similar to the following example. I want to differentiate the lines from different group; for example, I want distinguish the "m" sexe cdf from group 1 and the "m" sexe cdf from group 2.
library(ggplot2)
sexe <- rep(c("m", "w", "x"), 50)
weight1 <- runif(150, 30, 90)
weight2 <- runif(150, 30, 90)
visual1 = data.frame(sexe = sexe, weight = weight1)
visual2 = data.frame(sexe = sexe, weight = weight2)
visual1$group <- 1
visual2$group <- 2
visual12 <- rbind(visual1, visual2)
p <- ggplot(dat = visual12, aes(x = as.numeric(weight), group = interaction(group, sexe), col = sexe)) +
# geom_point(dat = dat2, aes(x = as.numeric(dura), col = TYPE_DE_TERMINAL)) +
stat_ecdf(geom = "step") +
# scale_colour_discrete(guide = guide_legend(override.aes = list(alpha = 1))) +
scale_colour_brewer(name = "sexe", palette = "Set1") +
theme(axis.text = element_text(size = 15), legend.justification = c(1, 0),
legend.position = c(1, 0), axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("CDF") + xlab("...") + theme_bw() +
# scale_y_continuous(limits=c(0,1), labels= percent) +
ggtitle("Cumulative distribution function of ...")
# scale_x_log10(limits = c(1,1e3), breaks = c(10 , 100))
p
What if you change the linetype by group?
p <- ggplot(dat = visual12, aes(x = as.numeric(weight),
group = interaction(group, sexe),
linetype=factor(group), col = sexe)) +
stat_ecdf(geom = "step") +
scale_colour_brewer(name = "sexe", palette = "Set1") +
theme(axis.text = element_text(size = 15),
legend.justification = c(1, 0),
legend.position = c(1, 0),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("CDF") + xlab("...") + theme_bw() +
ggtitle("Cumulative distribution function of ...")
p
Here is my plot
dat <- data.frame(
pos = c(1, 3, 5, 8, 10, 12),
start = c(1,3, 6, 7, 10, 11),
end = c(5, 6, 9, 9, 13, 12)
)
library(ggplot2)
p <- ggplot(dat) + geom_segment(aes(x=start, y=pos, xend=end, yend=pos),
color="blue", size=2) + ylab("Fragments)") + xlab("Position")
scale_y_reverse() + theme_bw()
p1 <- p + opts(legend.position="left",
panel.background=theme_blank(),panel.border=theme_blank(),
panel.grid.major=theme_blank(),
panel.grid.minor=theme_blank(),plot.background=theme_blank())
p1
Bitmapped desired version is, with axis line and labels near the segments. [ just exta-note: Note that bitmap changed the line to round ended (it would be interesting to see if we can do in the ggplot2)]
Interestingly, I actually think this is easier in base graphics:
plot(c(0,13),c(1,12),type = "n",axes = FALSE,xlab = "Position",ylab = "")
segments(x0 = dat$start,
y0 = dat$pos,
x1 = dat$end,
y1 = dat$pos,
col = "blue",
lwd = 6,
lend = 2)
text(x = dat$start - 0.5,y = dat$pos,labels = dat$pos,font = 2)
axis(1)
axis(1,at = c(0,12),labels = FALSE,tcl = 0.5)
Edit Added additional axis call to get the outer most tick in both directions.
EDIT: Updating code for ggplot2 version 0.9.3.1.
The task is much easier with recent versions of ggplot2. The following code does it all:
# Load required packages
library(ggplot2)
# Your data
dat <- data.frame(
pos = c(1, 3, 5, 8, 10, 12),
start = c(1,3, 6, 7, 10, 11),
end = c(5, 6, 9, 9, 13, 12) )
# Get the plot
p <- ggplot(dat) +
geom_segment(aes(x=start, y=pos, xend=end, yend=pos),
color="blue", size=2, lineend = "round") +
ylab("Fragments") + xlab("Position") +
theme_bw() +
geom_text(aes(label = pos, x = start, y = pos), hjust = 1.7) +
scale_x_continuous(breaks = seq(0,14,2), labels = seq(0,14,2), expand = c(0,0)) +
scale_y_continuous(limits = c(-1, 14), expand = c(0,0)) +
geom_hline(yintercept = -1) +
geom_segment(aes(x = 0, y = -1, xend = 0, yend = -0.9)) +
geom_segment(aes(x = 14, y = -1, xend = 14, yend = -0.9)) +
theme(panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
panel.border=element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank())
p
Original answer:
It can be done in ggplot2, with a bit of fiddling. Functions from the grid() package are needed to remove the y-axis tick marks.
# Load required packages
library(ggplot2)
library(grid)
# Your data
dat <- data.frame(
pos = c(1, 3, 5, 8, 10, 12),
start = c(1,3, 6, 7, 10, 11),
end = c(5, 6, 9, 9, 13, 12) )
# Get the base plot
p <- ggplot(dat) +
geom_segment(aes(x=start, y=pos, xend=end, yend=pos),
color="blue", size=2) + ylab("Fragments") + xlab("Position") + theme_bw() +
geom_text(aes(label = pos, x = start, y = pos), hjust = 1.7) +
scale_x_continuous(breaks = seq(0,14,2), labels = seq(0,14,2), expand = c(0,0)) +
scale_y_continuous(limits = c(-1, 14), expand = c(0,0)) +
geom_hline(yintercept = -1) +
geom_segment(aes(x = 0, y = -1, xend = 0, yend = -0.9)) +
geom_segment(aes(x = 14, y = -1, xend = 14, yend = -0.9)) +
opts(panel.grid.major=theme_blank(),
panel.grid.minor=theme_blank(),
panel.border=theme_blank(),
axis.title.y = theme_blank(),
axis.text.y = theme_blank())
p
# Remove the y-axis tick marks
g <- ggplotGrob(p)# Save plot as a grob
#grid.ls(g)
grid.remove(grid.get("axis.ticks", grep=T, global = TRUE)[[1]]$name)
The result:
With some more fiddling, you can get round ends on the line segments. It needs the proto package installed. Then to run some code obtained from here to enable a new geom geom_segment2 to be used which takes a "line end" argument.
# To create the new `geom_segment2`
library(proto)
GeomSegment2 <- proto(ggplot2:::GeomSegment, {
objname <- "geom_segment2"
draw <- function(., data, scales, coordinates, arrow=NULL, ...) {
if (is.linear(coordinates)) {
return(with(coord_transform(coordinates, data, scales),
segmentsGrob(x, y, xend, yend, default.units="native",
gp = gpar(col=alpha(colour, alpha), lwd=size * .pt,
lty=linetype, lineend = "round"),
arrow = arrow)
))
}
}})
geom_segment2 <- function(mapping = NULL, data = NULL, stat =
"identity", position = "identity", arrow = NULL, ...) {
GeomSegment2$new(mapping = mapping, data = data, stat = stat,
position = position, arrow = arrow, ...)
}
# The base plot
p <- ggplot(dat) +
geom_segment2(aes(x=start, y=pos, xend=end, yend=pos),
color="blue", size=2, lineend = "round") + ylab("Fragments") + xlab("Position") + theme_bw() +
geom_text(aes(label = pos, x = start, y = pos), hjust = 1.7) +
scale_x_continuous(breaks = seq(0,14,2), labels = seq(0,14,2), expand = c(0,0)) +
scale_y_continuous(limits = c(-1, 14), expand = c(0,0)) +
geom_hline(yintercept = -1) +
geom_segment(aes(x = 0, y = -1, xend = 0, yend = -0.9)) +
geom_segment(aes(x = 14, y = -1, xend = 14, yend = -0.9)) +
opts(panel.grid.major=theme_blank(),
panel.grid.minor=theme_blank(),
panel.border=theme_blank(),
axis.title.y = theme_blank(),
axis.text.y = theme_blank())
p
## Remove the y-axis tick marks
g <- ggplotGrob(p)
#grid.ls(g)
grid.remove(grid.get("axis.ticks", grep=T, global = TRUE)[[1]]$name)
The result: