I have the following data frame:
df.test <- data.frame(
id = c("EIF3H", "USP9X", "USP44", "USP51", "USP15",
"USP48", "USP47", "USP43", "USPL1", "UCHL5", "USP50", "USP7",
"UCHL1", "USP11", "USP26", "PAN2", "VCPIP1", "USP46", "USP29",
"USP22", "USP49", "ZRANB1", "OTUD4", "OTUD7B", "USP54", "PSMD14",
"USP20", "USP6", "OTUD3", "USP39", "UCHL3", "USP19", "USP21",
"USP30", "TNFAIP3", "USP17L2", "USP32", "JOSD2", "PSMD7", "ATXN3L",
"SENP2", "STAMBPL1", "USP37", "USP35", "USP3", "ALG13", "USP45",
"Control", "USP9Y", "ATXN3", "OTUD6A", "USP42", "USP12", "MPND",
"USP40", "OTUD1", "USP31", "USP8", "USP13", "USP53", "USP34",
"USP17L5", "MYSM1", "USP36", "OTUD7A", "USP10", "USP2", "USP18",
"OTUB1", "EIF3F", "USP1", "USP14", "COPS5", "USP24", "USP4",
"CYLD", "COPS6", "STAMBP", "USP5", "OTUD6B", "BAP1", "USP25",
"YOD1", "USP28", "USP38", "USP41", "JOSD1", "UCK2", "USP16",
"USP27X", "BRCC3", "USP33", "OTUD5", "OTUB2"),
log.score = c(4.22265293851218, 3.03983376346562,
2.4139305569695, 2.32586482009754, 2.30391458369018, 2.19017103893211,
2.10803347738743, 2.10011933499842, 1.82596928196197, 1.79890343496053,
1.78330640083025, 1.58384231036782, 1.4480988629484, 1.4331502122056,
1.41965675282741, 1.37552194849409, 1.37548070593268, 1.3126672736385,
1.27123241483349, 1.25213781606166, 1.1643918571801, 1.14738583497561,
1.0423927129399, 1.03157776352028, 1.0279685056071, 0.953426802337995,
0.94104282122269, 0.929925173732472, 0.886424283199432, 0.886123467368948,
0.815961921373111, 0.811437095842094, 0.767054687254773,
0.754314635766764, 0.750654863646671, 0.728646377897516,
0.707899061519581, 0.703532261199885, 0.692546751828376,
0.684554481775416, 0.652104306506768, 0.642046105413661,
0.630116510664521, 0.62908000782908, 0.619354680809075, 0.614876544107784,
0.61293067306798, 0.606898831140113, 0.603504247802433, 0.578642901486857,
0.576246380387172, 0.549612309171809, 0.53101794103743, 0.513442014568548,
0.506304999011214, 0.492144128304169, 0.462596515841992,
0.454185884038717, 0.450163300207299, 0.434529992991809,
0.429725658566606, 0.42864060724616, 0.419896514762075, 0.409715596281838,
0.365946146577929, 0.363963683646553, 0.357614629472314,
0.352851847129221, 0.343470593766502, 0.313051079788499,
0.304614649499993, 0.291604597354374, 0.287030586811975,
0.272263598289704, 0.27175988000523, 0.265200170411153, 0.264528852761016,
0.244704590019742, 0.179680291853473, 0.154102353851514,
0.147800680553723, 0.127575655021633, 0.126051956011554,
0.1207205737776, 0.118712371231544, 0.11046860245595, 0.0939775902962627,
0.0673791277640148, 0.066320409857141, 0.0582650179118847,
0.0548860857591892, 0.0374554663486737, 0.0147532091971383,
0.0134163514896924),
neg.rank = 1:94)
From this data frame I made this plot:
library(ggplot2)
x <- "neg.rank"
p <- ggplot(df.test, aes_string(x = x, y = df.test$log.score)) +
geom_point()
I want to add labels to the top10 ids and I tried the following:
library(ggrepel)
library(dplyr)
p + geom_label_repel(data = df.test[df.test[[x]] %in% 1:10, ], aes_string(x = x, y = df$log.score, label = df.test$id))
But this gives me a More than one expression parsed error:
More than one expression parsed
Backtrace:
█
1. ├─ggrepel::geom_label_repel(...)
2. │ └─ggplot2::layer(...)
3. └─ggplot2::aes_string(x = x, y = df$log.score, label = df.test$id)
4. └─base::lapply(...)
5. └─ggplot2:::FUN(X[[i]], ...)
6. └─rlang::parse_expr(x)
I have no clue what is wrong with the code.
It is not working as you are inserting the vectors directly into your aes_string.
If you want yours to be working you need to be strict with your aes_string and really should only use strings:
p +
geom_label_repel(
data = df.test[df.test[[x]] %in% 1:10, ],
aes_string(x = x, y = "log.score", label = "id"),
)
I also added a "cleaner" solution. I changed your subsetting logic to use dplyr, as you are already loading the package anyway and changed all your aes_string() to aes().
library(ggplot2)
library(ggrepel)
library(dplyr)
ggplot(df.test, aes(x = neg.rank, y = log.score)) +
geom_point() +
geom_label_repel(
data = df.test %>% slice_min(neg.rank, n = 10),
aes(label = id),
max.overlaps = 10,
xlim = c(10, NA),
ylim = c(3, NA),
direction = "x"
)
Cheers
Hannes
Related
I want to create a new stat which calculates interval-censored survival with survival::survfit.formula. But I seem to get a wrong data frame in the compute_group function, and I struggle to find the reason for it.
Creating a data frame with exactly the same code "outside" and using geom_path (which I want to use for the stat), results in a fine result (see expected result). - it seems as if survfit.formula() is creating NAs within compute_group(), but I don't understand why.
setting /adding na.rm = TRUE/FALSE does not change anything.
Also using Inf instead of NA for time2 does not help.
library(ggplot2)
library(survival)
set.seed(42)
testdf <- data.frame(time = sample(30, replace = TRUE), time2 = c(20, 10, 10, 30, rep(NA, 26)))
fit_icens <-
survival::survfit.formula(
survival::Surv(time = time, time2 = time2, type = "interval2") ~ 1,
data = testdf
)
Expected result
path <- data.frame(time = fit_icens$time, time2= fit_icens$surv)
ggplot(path, aes(x = time, y = time2)) +
geom_path() +
coord_cartesian(ylim = c(0, 1))
Failing
StatIcen <- ggplot2::ggproto("StatIcen", Stat,
required_aes = c("time", "time2"),
compute_group = function(data, scales) {
fit_icens <-
survival::survfit.formula(
survival::Surv(time = data$time, time2 = data$time2, type = "interval2") ~ 1,
data = data
)
path <- data.frame(x = fit_icens$time, y = fit_icens$surv)
path
}
)
stat_icen <- function(mapping = NULL, data = NULL, geom = "path",
position = "identity", show.legend = NA,
inherit.aes = TRUE, ...) {
layer(
stat = StatIcen, data = data, mapping = mapping, geom = geom,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(...)
)
}
ggplot(testdf, aes(time = time, time2 = time2)) +
stat_icen()
#> Warning: Removed 26 rows containing non-finite values (stat_icen).
Created on 2020-05-04 by the reprex package (v0.3.0)
Great question Tjebo, thanks for posting.
As you have already figured out, the problem is that the NA values are being stripped out of your data before it is passed to compute_group. The Extending ggplot vignette doesn't mention this, but your data is first passed through the compute_layer member function of your ggproto object. Since you haven't defined a compute_layer method, your StatIcen class inherits the method from the class ggplot2::Stat.
If you look at the source code for this method in ggplot2::Stat$compute_layer, you will see this is where your NA values are stripped out, using the remove_missing function, which removes rows in your data frame with missing values in any of the named columns. Presumably, you still want NA values removed if they appear in your time column, but not if they appear in time2.
So all I have done here is to copy the source code from Stat$compute_layer and adjust the remove_missing call slightly, then make it a member of StatIcen:
StatIcen <- ggplot2::ggproto("StatIcen", Stat,
required_aes = c("time", "time2"),
compute_group = function(data, scales){
fit_icens <- survival::survfit.formula(
survival::Surv(time = data$time, time2 = data$time2,
type = "interval2") ~ 1, data = data)
data.frame(x = fit_icens$time, y = fit_icens$surv)
},
compute_layer = function (self, data, params, layout)
{
ggplot2:::check_required_aesthetics(self$required_aes, c(names(data),
names(params)), snake_class(self))
data <- remove_missing(data, params$na.rm, "time",
ggplot2:::snake_class(self), finite = TRUE)
params <- params[intersect(names(params), self$parameters())]
args <- c(list(data = quote(data), scales = quote(scales)), params)
ggplot2:::dapply(data, "PANEL", function(data) {
scales <- layout$get_scales(data$PANEL[1])
tryCatch(do.call(self$compute_panel, args),
error = function(e) {
warning("Computation failed in `",
ggplot2:::snake_class(self),
"()`:\n", e$message, call. = FALSE)
ggplot2:::new_data_frame()
})
})
}
)
So now we get:
ggplot(testdf, aes(time = time, time2 = time2)) + stat_icen()
I'd like to implement a cross-talk functionality between a table and plot in both directions:
select the row in the table which will be reflected in the plot
select a dot in the plot which will be reflected in the table. Same idea as here.
I've managed to implement a script, which works beautifully if I make scatter plot with ggplot() and table (both objects cross-talk!). However, when used EnhancedVolcano() and table I got the following error:
Error in EnhancedVolcano(toptable = data_shared, lab = "disp", x = "qsec", :
qsec is not numeric!
If I replace data_shared variable with df_orig, no error is raised, but there is no cross-talking between objects :(
Does this mean that SharedData$new() doesn't recognize numeric values as numeric? How to fix this error?
Any help is highly appreciated.
Thank you
Toy example:
library(plotly) # '4.9.1'
library(DT) # '0.11'
library(crosstalk) # ‘1.0.0’
library(EnhancedVolcano) # ‘1.4.0’
# Input
data1 = mtcars #dim(data1) # 32 11
data_shared = SharedData$new(data1) #, key = c("qsec", "hp"))
# df_orig = data_shared$origData()
# V-Plot
vp =EnhancedVolcano( toptable = data_shared,
lab = 'disp',
x = 'qsec',
y = 'hp',
xlab ='testX',
ylab = 'testY')
bscols(
ggplotly(vp + aes(x= qsec, y= -log10(hp/1000))),
datatable(data_shared, style="bootstrap", class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Same script, which works with ggplot():
data1 = mtcars #dim(data1) # 32 11
data_shared = SharedData$new(data1)
vp = ggplot(data = data_shared, mapping = aes(qsec, hp)) +
geom_point()
bscols(
ggplotly(vp) ,
datatable(data_shared, style="bootstrap", class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Note: Related (same) question was posted at BioStars, and the package author posted an answer, with author's permission copying an answer here:
Hi,
Thanks - that's very useful code and I may add it to the main package vignette, eventually.
I tried it here on my computer and I was able to get it working in my browser, but some components of the original plot seem to have been lost. I think that you just need to convert your column, 'qsec', to numerical values.
Re-using an example from my Vignette, here is a perfectly reproducible example:
library("pasilla")
pasCts <- system.file("extdata", "pasilla_gene_counts.tsv",
package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata", "pasilla_sample_annotation.csv",
package="pasilla", mustWork=TRUE)
cts <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
coldata <- read.csv(pasAnno, row.names=1)
coldata <- coldata[,c("condition","type")]
rownames(coldata) <- sub("fb", "", rownames(coldata))
cts <- cts[, rownames(coldata)]
library("DESeq2")
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition)
featureData <- data.frame(gene=rownames(cts))
mcols(dds) <- DataFrame(mcols(dds), featureData)
dds <- DESeq(dds)
res <- results(dds)
library(EnhancedVolcano)
p1 <- EnhancedVolcano(res,
lab = rownames(res),
x = "log2FoldChange",
y = "pvalue",
pCutoff = 10e-4,
FCcutoff = 2,
xlim = c(-5.5, 5.5),
ylim = c(0, -log10(10e-12)),
pointSize = c(ifelse(res$log2FoldChange>2, 8, 1)),
labSize = 4.0,
shape = c(6, 6, 19, 16),
title = "DESeq2 results",
subtitle = "Differential expression",
caption = "FC cutoff, 1.333; p-value cutoff, 10e-4",
legendPosition = "right",
legendLabSize = 14,
col = c("grey30", "forestgreen", "royalblue", "red2"),
colAlpha = 0.9,
drawConnectors = TRUE,
hline = c(10e-8),
widthConnectors = 0.5)
p1 <- p1 +
ggplot2::coord_cartesian(xlim=c(-6, 6)) +
ggplot2::scale_x_continuous(
breaks=seq(-6,6, 1))
library(plotly)
library(DT)
library(crosstalk)
bscols(
ggplotly(p1 + aes(x= log2FoldChange, y= -log10(pvalue))),
datatable(
data.frame(res),
style="bootstrap",
class="compact", width="100%",
options=list(deferRender=FALSE, dom='t')))
Unfortunately, plotly and/or bscols don't like the use of bquote(), so, one cannot have the fancy axes names that I use in EnhancedVolcano:
... + xlab(bquote(~Log[2] ~ "fold change")) + ylab(bquote(~-Log[10] ~ italic(P)))
When i try to add these, it throws an error.
Kevin
tried to modify few things in volcano function, got following error:
Error in as.data.frame.default(toptable) :
cannot coerce class ‘c("SharedData", "R6")’ to a data.frame
not sure yet, how to fix it.
I have the following data
dput(head(new_data))
structure(list(series = c("serie1", "serie2", "serie3",
"serie4"), Chr1_Coverage = c(0.99593043561, 0.995148711122,
0.996666194154, 1.00012127128), Chr2_Coverage = c(0.998909597935,
0.999350808049, 0.999696737431, 0.999091916132), Chr3_Coverage = c(1.0016871729,
1.00161108919, 0.997719609642, 0.999887319775), Chr4_Coverage = c(1.00238874787,
1.00024296426, 1.0032143002, 1.00118558895), Chr5_Coverage = c(1.00361001984,
1.00233184803, 1.00250793369, 1.00019989912), Chr6_Coverage = c(1.00145962318,
1.00085036645, 0.999767433622, 1.00018523387), Chr7_Coverage = c(1.00089620637,
1.00201715802, 1.00430458519, 1.00027257509), Chr8_Coverage = c(1.00130277775,
1.00332841536, 1.0027493578, 0.998107829176), Chr9_Coverage = c(0.998473062701,
0.999400379593, 1.00130178863, 0.9992796405), Chr10_Coverage = c(0.996508132358,
0.999973856701, 1.00180072957, 1.00172163916), Chr11_Coverage = c(1.00044015107,
0.998982489577, 1.00072330837, 0.998947935281), Chr12_Coverage = c(0.999707836898,
0.996654676531, 0.995380321719, 1.00116773966), Chr13_Coverage = c(1.00199118466,
0.99941499519, 0.999850500793, 0.999717689167), Chr14_Coverage = c(1.00133747054,
1.00232593477, 1.00059139379, 1.00233368187), Chr15_Coverage = c(0.997036875653,
1.0023727983, 1.00020943048, 1.00089130742), Chr16_Coverage = c(1.00527426537,
1.00318861724, 1.0004269482, 1.00471256502), Chr17_Coverage = c(0.995530811404,
0.995103514254, 0.995135851149, 0.99992196636), Chr18_Coverage = c(0.99893371568,
1.00452723685, 1.00006262572, 1.00418478844), Chr19_Coverage = c(1.00510422346,
1.00711968194, 1.00552123413, 1.00527171097), Chr20_Coverage = c(1.00113612137,
1.00130658886, 0.999390191542, 1.00178637085), Chr21_Coverage = c(1.00368753618,
1.00162782873, 1.00056883447, 0.999797571642), Chr22_Coverage = c(0.99677846234,
1.00168287612, 0.997645576841, 0.999297594524), ChrX_Coverage = c(1.04015901555,
0.934772492047, 0.98981339011, 0.999960536561), ChrY_Coverage = c(9.61374227868e-09,
2.50609172398e-07, 8.30448295172e-08, 1.23741398572e-08)), .Names = c("series",
"Chr1_Coverage", "Chr2_Coverage", "Chr3_Coverage", "Chr4_Coverage",
"Chr5_Coverage", "Chr6_Coverage", "Chr7_Coverage", "Chr8_Coverage",
"Chr9_Coverage", "Chr10_Coverage", "Chr11_Coverage", "Chr12_Coverage",
"Chr13_Coverage", "Chr14_Coverage", "Chr15_Coverage", "Chr16_Coverage",
"Chr17_Coverage", "Chr18_Coverage", "Chr19_Coverage", "Chr20_Coverage",
"Chr21_Coverage", "Chr22_Coverage", "ChrX_Coverage", "ChrY_Coverage"
), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"
))
and I would like to plot it as this
I thought of transposing the data starting from the second column and name the new transposed data by the first column in the initial data with the following code:
output$Plot_1 <- renderPlotly({
Plot_1_new_data[,2:24] <- lapply(Plot_1_new_data[,2:24], as.numeric)
# first remember the names
n <- as.data.frame(Plot_1_new_data[0:nrow(Plot_1_new_data),1])
# transpose all but the first column (name)
Plot_1_new_data_T <- as.data.frame(t(Plot_1_new_data[,-1]))
colnames(Plot_1_new_data_T) <- n
#plot data
library(reshape)
melt_Transposed_Plot_1_new_data <- melt(Plot_1_new_data_T,id="series")
ggplotly(melt_Transposed_Plot_1_new_data,aes(x=series,y=value,colour=variable,group=variable)) + geom_line()
})
However, when I check the "Plot_1_new_data_T" it seems that the first column is named as c("serie1","serie2",..."serie14") and the rest is named as NA.
Any idea how to proceed because I am new to both R and shiny.
Something like this?
xm = melt(x)
ggplot(xm[xm$variable != 'ChrY_Coverage' & xm$variable != 'ChrX_Coverage', ],
aes(as.integer(variable), value, color=series)) +
geom_line() +
scale_x_continuous(breaks = as.integer(xm$variable),
labels = as.character(xm$variable)) +
theme(axis.text.x = element_text( angle=45, hjust = 1))
ggplotly()
Note that the last two columns were removed from this plot, because they are of such a different scale that including them masks any variation in the other columns. If you want to include all the columns, you could use this instead:
ggplot(xm, aes(as.integer(variable), value, color=series)) +
geom_line() +
...
I've run a spline through some points from a regression and I would like to plot them with ggplot2 where the x scale are years, but I'm not sure how to do this.
How would I override the x-scale to go from 1920-1950?
Data:
df <- structure(list(x = 1:200, y = c(0.00122973667762024, 6.62098801946071e-05,
-0.000959979058174531, -0.00185343528846307, -0.00261876396164689,
-0.0032605702287019, -0.00378345924060399, -0.00419203614832906,
-0.00449090610285299, -0.00468467425515169, -0.00477794575620104,
-0.00477532575697695, -0.0046814194084553, -0.00450083186161199,
-0.00423816826742291, -0.00389803377686397, -0.00348503354091104,
-0.00300377271054004, -0.00245885643672684, -0.00185488987044735,
-0.00119647878632586, -0.000489056540407894, 0.000259484465636334,
0.00104079719045607, 0.00184653459270055, 0.00266834963101903,
0.00349789526406075, 0.00432682445047494, 0.00514679014891087,
0.00594944531801776, 0.00672644291644486, 0.00746943590284142,
0.00817007723585667, 0.00882001987413988, 0.00941091677634026,
0.00993442090110708, 0.0103821852070896, 0.010745862652937, 0.0110171061972986,
0.0111875687988235, 0.011248928362097, 0.0111981513301005, 0.0110440035734643,
0.0107968475027119, 0.010467045528367, 0.010064960060953, 0.00960095351099359,
0.00908538828901222, 0.00852862680553249, 0.00794103147107794,
0.00733296469617213, 0.00671478889133861, 0.00609686646710094,
0.00548955983398266, 0.00490323140250733, 0.00434824358319851,
0.00383495878657975, 0.00337373942317461, 0.00297494790350662,
0.00264894663809936, 0.00240601257949406, 0.00224981076448868,
0.00217279857375538, 0.00216634775507979, 0.00222183005624753,
0.00233061722504423, 0.00248408100925552, 0.00267359315666704,
0.00289052541506439, 0.00312624953223322, 0.00337213725595915,
0.00361956033402782, 0.00385989051422484, 0.00408449954433585,
0.00428475917214646, 0.00445204114544233, 0.00457771721200906,
0.00465315911963229, 0.00466973861609765, 0.00461882744919076,
0.00449196986691963, 0.0042874160634374, 0.00401212749412751,
0.00367364780262395, 0.0032795206325607, 0.00283728962757174,
0.00235449843129108, 0.00183869068735268, 0.00129741003939055,
0.000738200131038661, 0.000168604605931003, -0.000403832892298435,
-0.000971568720015669, -0.00152705923358671, -0.00206276078937758,
-0.00257112974375428, -0.00304462245308283, -0.00347569527372924,
-0.00385680456205953, -0.00418040667443971, -0.00443922939659004,
-0.00463224338937841, -0.00476466218882034, -0.00484197076028562,
-0.00486965406914401, -0.00485319708076528, -0.00479808476051921,
-0.00470980207377557, -0.00459383398590413, -0.00445566546227465,
-0.00430078146825693, -0.00413466696922072, -0.00396280693053579,
-0.00379068631757193, -0.00362379009569889, -0.00346760323028646,
-0.00332761068670441, -0.0032092974303225, -0.00311814842651051,
-0.00305964864063822, -0.00303897089201381, -0.00305661736998701,
-0.00310949480371679, -0.00319441743464015, -0.00330819950419407,
-0.00344765525381556, -0.00360959892494162, -0.00379084475900925,
-0.00398820699745545, -0.00419849988171722, -0.00441853765323156,
-0.00464513455343546, -0.00487510482376593, -0.00510526270565997,
-0.00533242244055458, -0.00555339826988675, -0.00576500443509349,
-0.00596405517761179, -0.00614736473887866, -0.00631174736033109,
-0.00645423379806727, -0.00657409002222406, -0.00667190065928587,
-0.00674826737916529, -0.00680379185177487, -0.00683907574702718,
-0.0068547207348348, -0.0068513284851103, -0.00682950066776623,
-0.00678983895271517, -0.00673294500986969, -0.00665942050914235,
-0.00656986712044573, -0.00646488651369238, -0.00634508035879489,
-0.00621105032566582, -0.00606339808421773, -0.0059027253043632,
-0.00572963365601479, -0.00554472480908507, -0.0053486844807736,
-0.00514281955026036, -0.00492871530336347, -0.00470795833913992,
-0.0044821352566467, -0.0042528326549408, -0.0040216371330792,
-0.0037901352901189, -0.00355991372511687, -0.00333255903713012,
-0.00310965782521562, -0.00289279668843038, -0.00268356222583137,
-0.00248354103647558, -0.00229431971942, -0.00211748487372163,
-0.00195462309843743, -0.00180732099262442, -0.00167716515533957,
-0.00156574218563988, -0.00147461474880411, -0.00140521605759051,
-0.00135893575805815, -0.00133716346343512, -0.00134128878694955,
-0.00137270134182952, -0.00143279074130315, -0.00152294659859854,
-0.0016445585269438, -0.00179901613956701, -0.00198770904969631,
-0.00221202687055977, -0.00247335921538553, -0.00277309569740165,
-0.00311262592983628, -0.00349333952591749, -0.00391662609887341,
-0.00438387526193212, -0.00489647662832175, -0.00545581981127037
)), .Names = c("x", "y"), row.names = c(NA, -200L), class = "data.frame")
I've tried this, but no luck:
ggplot(df, aes(x = x, y = y)) + geom_line() + scale_x_discrete(breaks = 1:31, labels = seq(1920,1950))
You can use tidyr::seq_range() for this
library(ggplot2)
library(tidyr)
df$x <- seq_range(1920:1950,dim(df)[1])
ggplot(df, aes(x = x, y = y) ) + geom_line()
Let's say I have a saved plot named my_plot, produced with ggplot. Also, let's say that the column in my_plot[[1]] data frame used for horizontal axis is named my_dates
Now, I want to add some vertical lines to the plot, which, of course, can be done by something like that:
my_plot +
geom_vline(aes(xintercept = my_dates[c(3, 8)]))
Since I perform this task quite on a regular basis, I want to write a function for that -- something like that:
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot +
geom_vline(aes(xintercept = given_plot[[1]]$my_dates[given_points]))
return(finale)
}
Which, as it's probably obvious to everyone, doesn't work:
> ggplot.add_lines(my_plot, c(3, 5))
Error in eval(expr, envir, enclos) : object 'given_plot' not found
So, my question would be what am I doing wrong, and how can it be fixed? Below is some data for a reproducible example:
> dput(my_plot)
structure(list(data = structure(list(my_dates = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10), my_points = c(-2.20176409422924, -1.12872396340683,
-0.259703895194354, 0.634233385649338, -0.678983982973015, -1.83157126614836,
1.33360095418957, -0.120455389285709, -0.969431974863616, -1.20451262626184
)), .Names = c("my_dates", "my_points"), row.names = c(NA, -10L
), class = "data.frame"), layers = list(<environment>), scales = <S4 object of class structure("Scales", package = "ggplot2")>,
mapping = structure(list(x = my_dates, y = my_points), .Names = c("x",
"y"), class = "uneval"), theme = list(), coordinates = structure(list(
limits = structure(list(x = NULL, y = NULL), .Names = c("x",
"y"))), .Names = "limits", class = c("cartesian", "coord"
)), facet = structure(list(shrink = TRUE), .Names = "shrink", class = c("null",
"facet")), plot_env = <environment>, labels = structure(list(
x = "my_dates", y = "my_points"), .Names = c("x", "y"
))), .Names = c("data", "layers", "scales", "mapping", "theme",
"coordinates", "facet", "plot_env", "labels"), class = c("gg",
"ggplot"))
According to this post, below is my solution to this problem. The environment issue in the **ply and ggplot is annoying.
ggplot.add_lines <- function(given_plot, given_points) {
finale <- eval(substitute( expr = {given_plot +
geom_vline(aes(xintercept = my_dates[given_points]))}, env = list(given_points = given_points)))
return(finale)
}
The following code runs well on my machine. (I cannot make your reproducible work on my machine...)
df <- data.frame(my_dates = 1:10, val = 1:10)
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
my_plot <- ggplot.add_lines(my_plot, c(3, 5))
print(my_plot)
Update: The above solution fails when more than two points are used.
It seems that we can easily solve this problem by not including the aes (subsetting together with aescauses problems):
ggplot.add_lines <- function(given_plot, given_points) {
finale <- given_plot + geom_vline(xintercept = given_plot[[1]]$my_dates[given_points])
return(finale)
}
I would take the following approach: extract the data.frame of interest, and pass it to the new layer,
df <- data.frame(my_dates = 1:10, val = rnorm(10))
my_plot <- ggplot(df, aes(x = my_dates, y = val)) + geom_line()
add_lines <- function(p, given_points=c(3,5), ...){
d <- p[["data"]][given_points,]
p + geom_vline(data = d, aes_string(xintercept="my_dates"), ...)
}
add_lines(my_plot, c(3,5), lty=2)