Related
Sorry if this is a duplicate question but I cannot seem to find the answer to my question anywhere. I have two plots and I would like to overlay plot two on plot one so that they form one plot. Is this possible? I will attach how both plots look separately. They are both facetted by the same variable which is by location and are on the same x and y-axis scale so theoretically should be possible.
Thank you.
## Plot one
Proxy<-read.csv("ALLRSL.csv",header=T)
p1<-ggplot()+
geom_ribbon(data=Proxy,aes(x=YEAR,ymin=LOWER,ymax=UPPER,fill=SITE),alpha=.5)+
geom_line(data=Proxy,aes(x=YEAR,y=RSL,col=SITE))+
facet_wrap(~ SITE,ncol= 1)+
scale_fill_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme_classic()+
xlim(1900, 2020)+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p1
plot two
tgsm<-read.csv("tgsm.csv",header=T)
tgsm<-na.omit(tgsm)
tglonger<-pivot_longer(tgsm, cols=c(-Year),names_to="Site", values_to = "value")
p2<-ggplot()+
geom_point(data=tglonger,aes(x=Year,y=value,col=Site),alpha=.7,size=1)+
facet_wrap(~Site,ncol=1)+
theme_classic()+
xlim(1900,2020)+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p2
Data
Proxy <- structure(list(RSL = c(-0.305251214, -0.306414006, -0.307194187,
-0.308202139, -0.309150572, -0.309679123), UPPER = c(-0.182716456,
-0.186724068, -0.189331305, -0.193118273, -0.197069799, -0.20118809
), LOWER = c(-0.416725663, -0.413606073, -0.411131729, -0.408930899,
-0.406531588, -0.404478981), YEAR = 1820:1825, SITE = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Little Swanport", "Lutregala",
"Tarra", "Wapengo"), class = "factor")), row.names = c(NA, 6L
), class = "data.frame")
tgsm <- structure(list(Year = 1993:1998, Lg2002 = c(-0.001164223, -0.002229453,
-0.002734792, -0.002977787, -0.002786098, -0.002026994), Wap2002 = c(-0.002531348,
-0.002051993, -0.001468704, -0.001182162, -0.001027132, -0.00020881
), Tar2002 = c(-0.029020612, -0.024330561, -0.019927593, -0.015682528,
-0.012907219, -0.009784772), LSP2002 = c(-0.034514531, -0.030171621,
-0.026095138, -0.021952898, -0.018480702, -0.014531318)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L,
`21` = 21L, `22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L,
`27` = 27L, `28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L,
`39` = 39L, `40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L,
`45` = 45L, `46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L,
`51` = 51L, `52` = 52L, `53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L,
`57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L,
`63` = 63L, `64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L,
`75` = 75L, `76` = 76L, `77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L,
`81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L,
`87` = 87L, `88` = 88L, `89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L,
`93` = 93L, `94` = 94L, `95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `102` = 102L, `103` = 103L,
`104` = 104L, `105` = 105L, `106` = 106L, `107` = 107L, `108` = 108L,
`109` = 109L, `110` = 110L, `111` = 111L, `112` = 112L, `113` = 113L,
`114` = 114L, `115` = 115L, `116` = 116L, `117` = 117L, `118` = 118L,
`119` = 119L, `120` = 120L, `121` = 121L, `122` = 122L, `123` = 123L,
`124` = 124L, `125` = 125L, `126` = 126L, `127` = 127L, `128` = 128L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L,
`144` = 144L, `145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L,
`149` = 149L, `150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L,
`154` = 154L, `155` = 155L, `156` = 156L, `157` = 157L, `183` = 183L
), class = "omit"), row.names = 158:163, class = "data.frame")
See plot one how you can do that with patchwork.
However. Conceptually, I am guessing you want to add a sort of prediction to some historic values or so. I personally would put everything in one data frame and plot this. If there is a too large gap between the two time points, you can facet by timepoints (as in my suggestion).
The plots look a bit different than your plot because you only provided data for one Site in Proxy (so I filtered the other for what I thought is the equivalent, it will work nonetheless, because the faceting remains) - and I removed all those theme elements that are not relevant to the problem.
Plot one - combining plots.
library(tidyverse)
library(patchwork)
tgsm<-na.omit(tgsm)
tglonger <-
pivot_longer(tgsm, cols=c(-Year), names_to="SITE", values_to = "RSL") %>%
filter(SITE == "LSP2002") %>%
rename(YEAR = Year)
p1 <- ggplot() +
geom_ribbon(data = Proxy, aes(x = YEAR, ymin = LOWER, ymax = UPPER, fill = SITE), alpha = .5) +
geom_line(data = Proxy, aes(x = YEAR, y = RSL, col = SITE)) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1800, 1830), ylim = c(-1, 0)) +
theme_classic() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none"
)
p2 <- ggplot() +
geom_point(data = tglonger, aes(x = YEAR, y = RSL, col = SITE), alpha = .7, size = 1) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1990, 2000), ylim = c(-1, 0)) +
theme_classic() +
## only one call to theme!!
theme(
## this is where the theme call is different to above
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
)
p1 + p2
Suggestion for an alternative visualisation
df_new <-
bind_rows(time1 = Proxy, time2 = tglonger, .id = "timevar") %>%
mutate(SITE = "LSP2002")
ggplot(df_new)+
geom_point(aes(x=YEAR,y=RSL))+
facet_grid(SITE~timevar, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))
You can also use this data frame in order to create a list of plots, and then stitch it together with patchwork. This approach doesn't allow to change individual plots though.
ls_p <-
df_new %>%
split(., .$timevar) %>%
map(~{ggplot(.x)+
geom_point(aes(x=YEAR,y=RSL))+
coord_cartesian(ylim = c(-0.4,0))+
facet_grid(~SITE, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))})
library(patchwork)
wrap_plots(ls_p)
I wanted to create a visualisation for some data I had collected using ggplot2. Everything works fine except I cannot add error bars for some reasons. The code I used is the following
graph2 <- ggplot(enth_comb, aes(saturated, eocv, color=oil))
graph2 <- graph2 + geom_point()
This worked fine and resulted in the graph I expected. Then I added the following
graph2 <- graph2 + geom_errorbar(aes(ymin = v_lowlim, ymax = v_highlim))
This gives me the error "Error: geom_errorbar requires the following missing aesthetics: ymin, ymax" despite having provided ymin and ymax. I also tried adding an x value and removing 'aes' but it resulted in the same error.
The data is the following
I appreciate any help or suggestions.
Edit: Added output of dput(enth_comb)
structure(list(oil = structure(c(4L, 6L, 3L, 5L, 2L, 1L), .Label = c("coconut",
"palm", "peanut", "rapeseed", "rice", "sunflower"), class = "factor"),
saturated = c(8L, 11L, 17L, 25L, 82L, 88L), sonounsaturated = c(64L,
20L, 46L, 38L, 7L, 12L), Polyunsaturated = c(28L, 69L, 32L,
37L, 11L, 0L), eocv = c(26991L, 26746L, 28817L, 30056L, 20635L,
29497L), eocm = c(31204L, 30892L, 32964L, 34436L, 22979L,
33233L), eocv_error = c(2073L, 602L, 1932L, 5578L, 2128L,
1267L), eocm_error = c(2396L, 695L, 2210L, 6391L, 2369L,
1427L), v_highlim = c(29064L, 27348L, 30749L, 35634L, 22763L,
30764L), v_lowlim = c(24918L, 26144L, 26885L, 24478L, 18507L,
28230L), m_highlim = c(33600L, 31587L, 35174L, 40827L, 25348L,
34660L), m_lowlim = c(28808L, 30197L, 30754L, 28045L, 20610L,
31806L)), class = "data.frame", row.names = c(NA, -6L))
The full solution would be concatening all elements:
ggplot(enth_comb, aes(saturated, eocv, color=oil))+
geom_point()+
geom_errorbar(aes(ymin = v_lowlim, ymax = v_highlim))
I am trying to create a regression plot for a multiple regression equation at the points that carb= 20, 30, 40 and 50. My end product will be a graph that has four plots. I don't know what I am doing wrong here. Could anyone help?
This is my regression model:
lm1 = lm(data = diet, Calories~ carb + fat+ carb*fat)
Here I am trying to create models for carb = 20, 30, 40, 50 values:
pred = data.frame(cal_pred = predict(lm1, diet), carb= c(20,30,40,50))
ggplot(data = diet, aes(x = Calories, y = fat)) +
geom_point(color = 'blue') +
geom_line(color = 'red', data = pred, aes(x=cal_pred, y=carb))
dput(diet)
structure(list(Calories = c(202.149, 275.272, 294.67, 451.524,
261.315, 365.13, 296.69, 353.464, 383.429, 420.748, 296.021),
carb = c(20L, 25L, 30L, 20L, 30L, 20L, 35L, 50L, 40L, 24L,
32L), fat = c(8L, 12L, 13L, 40L, 12L, 30L, 10L, 15L, 32L,
45L, 20L)), class = "data.frame", row.names = c(NA, -11L))
I am trying to use NanoStringDiff to identify differentially expressed microRNAs between treatment and control samples, but I am obtaining extraordinarily large values for the fold change (log2FC is 27 for certain genes, but expected to be around 1 by inspection). I think there is an error with my code. I am using the following guide http://www.bioconductor.org/packages/release/bioc/vignettes/NanoStringDiff/inst/doc/NanoStringDiff.pdf
I've included my code below and the data I am using is publicly available on https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE84971 (GSE84971_non-normalized.txt.gz)
I took this data and converted it into an Excel file in the format as specified in the guide which I called "data".
Any help is much appreciated.
source("https://bioconductor.org/biocLite.R")
biocLite()
biocLite("NanoStringDiff")
biocLite("Biobase")
library("BioBase")
library("NanoStringDiff")
designs=data.frame(group=c("Control","Control","Control", "Treatment", "Treatment", "Treatment"))
designs
directory <- "/Users/admin/Desktop"
path <- paste(directory, "data.csv", sep ="/", collapse = NULL)
NanoStringData=createNanoStringSetFromCsv(path,header=TRUE,designs)
pheno=pData(NanoStringData)
group=pheno$group
design.full=model.matrix(~0+group) #create a design (or model) matrix
design.full
contrast=c(-1,1)
NanoStringData=estNormalizationFactors(NanoStringData)
positiveFactor(NanoStringData)
negativeFactor(NanoStringData)
housekeepingFactor(NanoStringData)
result=glm.LRT(NanoStringData,design.full, Beta= ncol(design.full), contrast=contrast)
result$table
I attempted to add the data using dput(NanoString), hopefully this makes my code more self-contained. Below is shown the output.
new("NanoStringSet"
, positiveFactor = numeric(0)
, negativeFactor = numeric(0)
, housekeepingFactor = numeric(0)
, positiveControl = structure(c(51117L, 9153L, 2357L, 749L, 133L, 88L, 30283L, 6423L,
1178L, 444L, 83L, 35L, 46143L, 8040L, 2014L, 554L, 114L, 82L,
48365L, 9338L, 2158L, 603L, 135L, 91L, 52744L, 10177L, 2391L,
786L, 143L, 72L, 70189L, 12069L, 3186L, 693L, 176L, 110L), .Dim = c(6L,
6L), .Dimnames = list(c("POS_A(128)", "POS_B(32)", "POS_C(8)",
"POS_D(2)", "POS_E(0.5)", "POS_F(0.125)"), c("Control.1", "Control.2",
"Control.3", "Treatment.1", "Treatment.2", "Treatment.3")))
, negativeControl = structure(c(52L, 32L, 40L, 14L, 104L, 74L, 28L, 25L, 29L, 11L,
74L, 45L, 31L, 32L, 29L, 13L, 80L, 60L, 61L, 44L, 32L, 5L, 103L,
74L, 56L, 42L, 44L, 15L, 135L, 62L, 55L, 54L, 36L, 12L, 108L,
61L), .Dim = c(6L, 6L), .Dimnames = list(c("NEG_B(0)", "NEG_C(0)",
"NEG_A(0)", "NEG_F(0)", "NEG_E(0)", "NEG_D(0)"), c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
)))
, housekeepingControl = structure(c(825L, 1892L, 1293L, 1496L, 2157L, 1254L, 1081L, 1121L,
914L, 1223L, 2123L, 3912L, 1876L, 2217L, 3363L, 1392L, 1750L,
1626L, 1196L, 1917L, 1378L, 1446L, 1300L, 1077L, 1875L, 2098L,
8006L, 2989L, 4447L, 4930L), .Dim = 5:6, .Dimnames = list(c("Actb|0",
"B2m|0", "Gapdh|0", "Rpl19|0", "Rplp0|0"), c("Control.1", "Control.2",
"Control.3", "Treatment.1", "Treatment.2", "Treatment.3")))
, experimentData = new("MIAME"
, name = ""
, lab = ""
, contact = ""
, title = ""
, abstract = ""
, url = ""
, pubMedIds = ""
, samples = list()
, hybridizations = list()
, normControls = list()
, preprocessing = list()
, other = list()
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 0L, 0L), c(1L, 1L, 0L))
)
)
, assayData = <environment>
, phenoData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = NA_character_), .Names = "labelDescription", row.names = "group", class = "data.frame")
, data = structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Control",
"Treatment"), class = "factor")), .Names = "group", row.names = c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
), class = "data.frame")
, dimLabels = c("sampleNames", "sampleColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, featureData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = character(0)), .Names = "labelDescription", row.names = character(0), class = "data.frame")
, data = structure(list(), .Names = character(0), class = "data.frame", row.names = c("hsa-miR-10a-5p|0",
"hsa-miR-1234|0", "hsa-miR-185-5p|0", "hsa-miR-27a-3p", "hsa-miR-34c-3p",
"hsa-miR-1181|0", "hsa-miR-601", "hsa-miR-4454"))
, dimLabels = c("featureNames", "featureColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, annotation = character(0)
, protocolData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = character(0)), .Names = "labelDescription", row.names = character(0), class = "data.frame")
, data = structure(list(), .Names = character(0), class = "data.frame", row.names = c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
))
, dimLabels = c("sampleNames", "sampleColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, .__classVersion__ = new("Versions"
, .Data = list(c(3L, 3L, 1L), c(2L, 34L, 0L), c(1L, 3L, 0L), c(1L, 0L,
0L))
)
)
Thanks!
I have the following data set
structure(list(Collimator = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("n", "y"), class = "factor"), angle = c(0L,
15L, 30L, 45L, 60L, 75L, 90L, 105L, 120L, 135L, 150L, 165L, 180L,
0L, 15L, 30L, 45L, 60L, 75L, 90L, 105L, 120L, 135L, 150L, 165L,
180L), X1 = c(2099L, 11070L, 17273L, 21374L, 23555L, 23952L,
23811L, 21908L, 19747L, 17561L, 12668L, 6008L, 362L, 53L, 21L,
36L, 1418L, 6506L, 10922L, 12239L, 8727L, 4424L, 314L, 38L, 21L,
50L), X2 = c(2126L, 10934L, 17361L, 21301L, 23101L, 23968L, 23923L,
21940L, 19777L, 17458L, 12881L, 6051L, 323L, 40L, 34L, 46L, 1352L,
6569L, 10880L, 12534L, 8956L, 4418L, 344L, 58L, 24L, 68L), X3 = c(2074L,
11109L, 17377L, 21399L, 23159L, 23861L, 23739L, 21910L, 20088L,
17445L, 12733L, 6046L, 317L, 45L, 26L, 46L, 1432L, 6495L, 10862L,
12300L, 8720L, 4343L, 343L, 38L, 34L, 60L), average = c(2099.6666666667,
11037.6666666667, 17337, 21358, 23271.6666666667, 23927, 23824.3333333333,
21919.3333333333, 19870.6666666667, 17488, 12760.6666666667,
6035, 334, 46, 27, 42.6666666667, 1400.6666666667, 6523.3333333333,
10888, 12357.6666666667, 8801, 4395, 333.6666666667, 44.6666666667,
26.3333333333, 59.3333333333)), .Names = c("Collimator", "angle",
"X1", "X2", "X3", "average"), row.names = c(NA, -26L), class = "data.frame")
I first scale average counts for both collimator y and n to a make the highest counts 1
df <- ddply(df, .(Collimator), transform,
norm.average = average / max(average))
and plot the curves:
ggplot(df, aes(x=angle,y=norm.average,col=Collimator)) +
geom_point() + geom_line()
Using geom_line is quite unpleasing on the eye and I would rather fit to the data using stat_smooth. Each data set should be symmetric about the mean so I think a Gaussian fit should be ideal. How can I fit a Gaussian to the dataset collimator="y" and collimator="n" in ggplot2 or using base R. Also I would like to output the mean and standard deviation. Can this be done?
By definition your data is not Gaussian but a kind of Gaussian-like shape, and here is the example of the visualization of fitting:
fit <- dlply(df, .(Collimator), function(x) {
co <- coef(nls(norm.average ~ exp(-(angle - m)^2/(2 * s^2)), data = x, start = list(s = 50, m = 80)))
stat_function(fun = function(x) exp(-(x - co["m"])^2/(2 * co["s"]^2)), data = x)
})
ggplot(df, aes(x = angle, y = norm.average, col = Collimator)) + geom_point() + fit
Updated
To obtain the parameters:
fit <- dlply(df, .(Collimator), function(x) {
co <- coef(nls(norm.average ~ exp(-(angle - m)^2/(2 * s^2)), data = x, start = list(s = 50, m = 80)))
r <- stat_function(fun = function(x) exp(-(x - co["m"])^2/(2 * co["s"]^2)), data = x)
attr(r, ".coef") <- co
r
})
then,
> ldply(fit, attr, ".co")
Collimator s m
1 n 52.99117 82.60820
2 y 21.99518 86.61268