Plot multiple regression lines on one plot in ggplot2 - r

Sorry if this is a repeat question but I haven't managed to find an answer yet since my data frame has to be split. I am trying to plot two regression lines on one plot, with a regression line for data in period 1 (1815-1899)and a regression line for data in period 2 (1900-2013). I have used dplyr to split the data to run the two separate regressions but can't work out how to get them on the same graph as you seem to need the data frame in the ggplot() command for it to plot the line. Can anyone help?
Thanks.
library(tidyverse)
brest<-read.csv("brest.csv",header=TRUE) ## read in csv
brest<- na.omit(brest) ## get rid of NAs
brestp1<- select(filter(brest, period == 1),c(year,slr,period)) ## Divide into periods
brestp2<- select(filter(brest, period == 2),c(year,slr,period))
fit1 <- lm(slr ~ year, data = brestp1) ## Run lms
summary(fit1)
fit2<- lm(slr ~ year, data = brestp2)
summary(fit2)
## plot graph
ggplot(brestp1, aes(x = year, y = slr)) + ### Need not only brestp1 but also brestp2
geom_point() +
stat_smooth(method = "lm",se=FALSE)+
theme_classic()
## Data
## Brest period 1
structure(list(year = 1815:1820, slr = c(6926L, 6959L, 6945L,
6965L, 6941L, 6909L), period = c(1L, 1L, 1L, 1L, 1L, 1L)), na.action = structure(c(`30` = 30L,
`31` = 31L, `32` = 32L, `33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L,
`37` = 37L, `38` = 38L, `39` = 39L, `51` = 51L, `52` = 52L, `53` = 53L,
`54` = 54L, `138` = 138L, `139` = 139L, `140` = 140L, `141` = 141L,
`142` = 142L, `143` = 143L, `144` = 144L, `145` = 145L, `146` = 146L
), class = "omit"), row.names = c(NA, 6L), class = "data.frame")
##Brest period 2
structure(list(year = 1900:1905, slr = c(6936L, 6916L, 6923L,
6976L, 6931L, 6913L), period = c(2L, 2L, 2L, 2L, 2L, 2L)), na.action = structure(c(`30` = 30L,
`31` = 31L, `32` = 32L, `33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L,
`37` = 37L, `38` = 38L, `39` = 39L, `51` = 51L, `52` = 52L, `53` = 53L,
`54` = 54L, `138` = 138L, `139` = 139L, `140` = 140L, `141` = 141L,
`142` = 142L, `143` = 143L, `144` = 144L, `145` = 145L, `146` = 146L
), class = "omit"), row.names = c(NA, 6L), class = "data.frame")

Use geom_smooth with separate data:
ggplot() +
geom_smooth(aes(x = year, y = slr), data = brest1,
method = "lm", se = FALSE, color = "red") +
geom_smooth(aes(x = year, y = slr), data = brest2,
method = "lm", se = FALSE, color = "blue") +
geom_point(aes(x = year, y = slr), data = brest1, color = "red") +
geom_point(aes(x = year, y = slr), data = brest2, color = "blue")

Related

Combine two faceted plots on one plot

Sorry if this is a duplicate question but I cannot seem to find the answer to my question anywhere. I have two plots and I would like to overlay plot two on plot one so that they form one plot. Is this possible? I will attach how both plots look separately. They are both facetted by the same variable which is by location and are on the same x and y-axis scale so theoretically should be possible.
Thank you.
## Plot one
Proxy<-read.csv("ALLRSL.csv",header=T)
p1<-ggplot()+
geom_ribbon(data=Proxy,aes(x=YEAR,ymin=LOWER,ymax=UPPER,fill=SITE),alpha=.5)+
geom_line(data=Proxy,aes(x=YEAR,y=RSL,col=SITE))+
facet_wrap(~ SITE,ncol= 1)+
scale_fill_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme_classic()+
xlim(1900, 2020)+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p1
plot two
tgsm<-read.csv("tgsm.csv",header=T)
tgsm<-na.omit(tgsm)
tglonger<-pivot_longer(tgsm, cols=c(-Year),names_to="Site", values_to = "value")
p2<-ggplot()+
geom_point(data=tglonger,aes(x=Year,y=value,col=Site),alpha=.7,size=1)+
facet_wrap(~Site,ncol=1)+
theme_classic()+
xlim(1900,2020)+
scale_color_manual(values=c("#4E193D","#342955","#4E617E","#97B4CB"))+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
theme(
strip.background = element_blank(),
strip.text.x = element_blank()
)+
theme(legend.position="none")
p2
Data
Proxy <- structure(list(RSL = c(-0.305251214, -0.306414006, -0.307194187,
-0.308202139, -0.309150572, -0.309679123), UPPER = c(-0.182716456,
-0.186724068, -0.189331305, -0.193118273, -0.197069799, -0.20118809
), LOWER = c(-0.416725663, -0.413606073, -0.411131729, -0.408930899,
-0.406531588, -0.404478981), YEAR = 1820:1825, SITE = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Little Swanport", "Lutregala",
"Tarra", "Wapengo"), class = "factor")), row.names = c(NA, 6L
), class = "data.frame")
tgsm <- structure(list(Year = 1993:1998, Lg2002 = c(-0.001164223, -0.002229453,
-0.002734792, -0.002977787, -0.002786098, -0.002026994), Wap2002 = c(-0.002531348,
-0.002051993, -0.001468704, -0.001182162, -0.001027132, -0.00020881
), Tar2002 = c(-0.029020612, -0.024330561, -0.019927593, -0.015682528,
-0.012907219, -0.009784772), LSP2002 = c(-0.034514531, -0.030171621,
-0.026095138, -0.021952898, -0.018480702, -0.014531318)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L,
`21` = 21L, `22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L,
`27` = 27L, `28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L,
`33` = 33L, `34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L,
`39` = 39L, `40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L,
`45` = 45L, `46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L,
`51` = 51L, `52` = 52L, `53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L,
`57` = 57L, `58` = 58L, `59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L,
`63` = 63L, `64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L,
`69` = 69L, `70` = 70L, `71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L,
`75` = 75L, `76` = 76L, `77` = 77L, `78` = 78L, `79` = 79L, `80` = 80L,
`81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `85` = 85L, `86` = 86L,
`87` = 87L, `88` = 88L, `89` = 89L, `90` = 90L, `91` = 91L, `92` = 92L,
`93` = 93L, `94` = 94L, `95` = 95L, `96` = 96L, `97` = 97L, `98` = 98L,
`99` = 99L, `100` = 100L, `101` = 101L, `102` = 102L, `103` = 103L,
`104` = 104L, `105` = 105L, `106` = 106L, `107` = 107L, `108` = 108L,
`109` = 109L, `110` = 110L, `111` = 111L, `112` = 112L, `113` = 113L,
`114` = 114L, `115` = 115L, `116` = 116L, `117` = 117L, `118` = 118L,
`119` = 119L, `120` = 120L, `121` = 121L, `122` = 122L, `123` = 123L,
`124` = 124L, `125` = 125L, `126` = 126L, `127` = 127L, `128` = 128L,
`129` = 129L, `130` = 130L, `131` = 131L, `132` = 132L, `133` = 133L,
`134` = 134L, `135` = 135L, `136` = 136L, `137` = 137L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `143` = 143L,
`144` = 144L, `145` = 145L, `146` = 146L, `147` = 147L, `148` = 148L,
`149` = 149L, `150` = 150L, `151` = 151L, `152` = 152L, `153` = 153L,
`154` = 154L, `155` = 155L, `156` = 156L, `157` = 157L, `183` = 183L
), class = "omit"), row.names = 158:163, class = "data.frame")
See plot one how you can do that with patchwork.
However. Conceptually, I am guessing you want to add a sort of prediction to some historic values or so. I personally would put everything in one data frame and plot this. If there is a too large gap between the two time points, you can facet by timepoints (as in my suggestion).
The plots look a bit different than your plot because you only provided data for one Site in Proxy (so I filtered the other for what I thought is the equivalent, it will work nonetheless, because the faceting remains) - and I removed all those theme elements that are not relevant to the problem.
Plot one - combining plots.
library(tidyverse)
library(patchwork)
tgsm<-na.omit(tgsm)
tglonger <-
pivot_longer(tgsm, cols=c(-Year), names_to="SITE", values_to = "RSL") %>%
filter(SITE == "LSP2002") %>%
rename(YEAR = Year)
p1 <- ggplot() +
geom_ribbon(data = Proxy, aes(x = YEAR, ymin = LOWER, ymax = UPPER, fill = SITE), alpha = .5) +
geom_line(data = Proxy, aes(x = YEAR, y = RSL, col = SITE)) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1800, 1830), ylim = c(-1, 0)) +
theme_classic() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none"
)
p2 <- ggplot() +
geom_point(data = tglonger, aes(x = YEAR, y = RSL, col = SITE), alpha = .7, size = 1) +
facet_wrap(~SITE) +
coord_cartesian(xlim = c(1990, 2000), ylim = c(-1, 0)) +
theme_classic() +
## only one call to theme!!
theme(
## this is where the theme call is different to above
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
legend.position = "none",
)
p1 + p2
Suggestion for an alternative visualisation
df_new <-
bind_rows(time1 = Proxy, time2 = tglonger, .id = "timevar") %>%
mutate(SITE = "LSP2002")
ggplot(df_new)+
geom_point(aes(x=YEAR,y=RSL))+
facet_grid(SITE~timevar, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))
You can also use this data frame in order to create a list of plots, and then stitch it together with patchwork. This approach doesn't allow to change individual plots though.
ls_p <-
df_new %>%
split(., .$timevar) %>%
map(~{ggplot(.x)+
geom_point(aes(x=YEAR,y=RSL))+
coord_cartesian(ylim = c(-0.4,0))+
facet_grid(~SITE, scales = "free_x")+
theme(legend.position="none") +
theme(panel.spacing = unit(.5, "lines"))})
library(patchwork)
wrap_plots(ls_p)

Using geom_errorbar in ggplot2 results in "Error: geom_errorbar requires the following missing aesthetics: ymin, ymax"

I wanted to create a visualisation for some data I had collected using ggplot2. Everything works fine except I cannot add error bars for some reasons. The code I used is the following
graph2 <- ggplot(enth_comb, aes(saturated, eocv, color=oil))
graph2 <- graph2 + geom_point()
This worked fine and resulted in the graph I expected. Then I added the following
graph2 <- graph2 + geom_errorbar(aes(ymin = v_lowlim, ymax = v_highlim))
This gives me the error "Error: geom_errorbar requires the following missing aesthetics: ymin, ymax" despite having provided ymin and ymax. I also tried adding an x value and removing 'aes' but it resulted in the same error.
The data is the following
I appreciate any help or suggestions.
Edit: Added output of dput(enth_comb)
structure(list(oil = structure(c(4L, 6L, 3L, 5L, 2L, 1L), .Label = c("coconut",
"palm", "peanut", "rapeseed", "rice", "sunflower"), class = "factor"),
saturated = c(8L, 11L, 17L, 25L, 82L, 88L), sonounsaturated = c(64L,
20L, 46L, 38L, 7L, 12L), Polyunsaturated = c(28L, 69L, 32L,
37L, 11L, 0L), eocv = c(26991L, 26746L, 28817L, 30056L, 20635L,
29497L), eocm = c(31204L, 30892L, 32964L, 34436L, 22979L,
33233L), eocv_error = c(2073L, 602L, 1932L, 5578L, 2128L,
1267L), eocm_error = c(2396L, 695L, 2210L, 6391L, 2369L,
1427L), v_highlim = c(29064L, 27348L, 30749L, 35634L, 22763L,
30764L), v_lowlim = c(24918L, 26144L, 26885L, 24478L, 18507L,
28230L), m_highlim = c(33600L, 31587L, 35174L, 40827L, 25348L,
34660L), m_lowlim = c(28808L, 30197L, 30754L, 28045L, 20610L,
31806L)), class = "data.frame", row.names = c(NA, -6L))
The full solution would be concatening all elements:
ggplot(enth_comb, aes(saturated, eocv, color=oil))+
geom_point()+
geom_errorbar(aes(ymin = v_lowlim, ymax = v_highlim))

How can I add a regression plot for a multiple regression for certain x values?

I am trying to create a regression plot for a multiple regression equation at the points that carb= 20, 30, 40 and 50. My end product will be a graph that has four plots. I don't know what I am doing wrong here. Could anyone help?
This is my regression model:
lm1 = lm(data = diet, Calories~ carb + fat+ carb*fat)
Here I am trying to create models for carb = 20, 30, 40, 50 values:
pred = data.frame(cal_pred = predict(lm1, diet), carb= c(20,30,40,50))
ggplot(data = diet, aes(x = Calories, y = fat)) +
geom_point(color = 'blue') +
geom_line(color = 'red', data = pred, aes(x=cal_pred, y=carb))
dput(diet)
structure(list(Calories = c(202.149, 275.272, 294.67, 451.524,
261.315, 365.13, 296.69, 353.464, 383.429, 420.748, 296.021),
carb = c(20L, 25L, 30L, 20L, 30L, 20L, 35L, 50L, 40L, 24L,
32L), fat = c(8L, 12L, 13L, 40L, 12L, 30L, 10L, 15L, 32L,
45L, 20L)), class = "data.frame", row.names = c(NA, -11L))

NanoStringDiff produces very large logFC values

I am trying to use NanoStringDiff to identify differentially expressed microRNAs between treatment and control samples, but I am obtaining extraordinarily large values for the fold change (log2FC is 27 for certain genes, but expected to be around 1 by inspection). I think there is an error with my code. I am using the following guide http://www.bioconductor.org/packages/release/bioc/vignettes/NanoStringDiff/inst/doc/NanoStringDiff.pdf
I've included my code below and the data I am using is publicly available on https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE84971 (GSE84971_non-normalized.txt.gz)
I took this data and converted it into an Excel file in the format as specified in the guide which I called "data".
Any help is much appreciated.
source("https://bioconductor.org/biocLite.R")
biocLite()
biocLite("NanoStringDiff")
biocLite("Biobase")
library("BioBase")
library("NanoStringDiff")
designs=data.frame(group=c("Control","Control","Control", "Treatment", "Treatment", "Treatment"))
designs
directory <- "/Users/admin/Desktop"
path <- paste(directory, "data.csv", sep ="/", collapse = NULL)
NanoStringData=createNanoStringSetFromCsv(path,header=TRUE,designs)
pheno=pData(NanoStringData)
group=pheno$group
design.full=model.matrix(~0+group) #create a design (or model) matrix
design.full
contrast=c(-1,1)
NanoStringData=estNormalizationFactors(NanoStringData)
positiveFactor(NanoStringData)
negativeFactor(NanoStringData)
housekeepingFactor(NanoStringData)
result=glm.LRT(NanoStringData,design.full, Beta= ncol(design.full), contrast=contrast)
result$table
I attempted to add the data using dput(NanoString), hopefully this makes my code more self-contained. Below is shown the output.
new("NanoStringSet"
, positiveFactor = numeric(0)
, negativeFactor = numeric(0)
, housekeepingFactor = numeric(0)
, positiveControl = structure(c(51117L, 9153L, 2357L, 749L, 133L, 88L, 30283L, 6423L,
1178L, 444L, 83L, 35L, 46143L, 8040L, 2014L, 554L, 114L, 82L,
48365L, 9338L, 2158L, 603L, 135L, 91L, 52744L, 10177L, 2391L,
786L, 143L, 72L, 70189L, 12069L, 3186L, 693L, 176L, 110L), .Dim = c(6L,
6L), .Dimnames = list(c("POS_A(128)", "POS_B(32)", "POS_C(8)",
"POS_D(2)", "POS_E(0.5)", "POS_F(0.125)"), c("Control.1", "Control.2",
"Control.3", "Treatment.1", "Treatment.2", "Treatment.3")))
, negativeControl = structure(c(52L, 32L, 40L, 14L, 104L, 74L, 28L, 25L, 29L, 11L,
74L, 45L, 31L, 32L, 29L, 13L, 80L, 60L, 61L, 44L, 32L, 5L, 103L,
74L, 56L, 42L, 44L, 15L, 135L, 62L, 55L, 54L, 36L, 12L, 108L,
61L), .Dim = c(6L, 6L), .Dimnames = list(c("NEG_B(0)", "NEG_C(0)",
"NEG_A(0)", "NEG_F(0)", "NEG_E(0)", "NEG_D(0)"), c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
)))
, housekeepingControl = structure(c(825L, 1892L, 1293L, 1496L, 2157L, 1254L, 1081L, 1121L,
914L, 1223L, 2123L, 3912L, 1876L, 2217L, 3363L, 1392L, 1750L,
1626L, 1196L, 1917L, 1378L, 1446L, 1300L, 1077L, 1875L, 2098L,
8006L, 2989L, 4447L, 4930L), .Dim = 5:6, .Dimnames = list(c("Actb|0",
"B2m|0", "Gapdh|0", "Rpl19|0", "Rplp0|0"), c("Control.1", "Control.2",
"Control.3", "Treatment.1", "Treatment.2", "Treatment.3")))
, experimentData = new("MIAME"
, name = ""
, lab = ""
, contact = ""
, title = ""
, abstract = ""
, url = ""
, pubMedIds = ""
, samples = list()
, hybridizations = list()
, normControls = list()
, preprocessing = list()
, other = list()
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 0L, 0L), c(1L, 1L, 0L))
)
)
, assayData = <environment>
, phenoData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = NA_character_), .Names = "labelDescription", row.names = "group", class = "data.frame")
, data = structure(list(group = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Control",
"Treatment"), class = "factor")), .Names = "group", row.names = c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
), class = "data.frame")
, dimLabels = c("sampleNames", "sampleColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, featureData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = character(0)), .Names = "labelDescription", row.names = character(0), class = "data.frame")
, data = structure(list(), .Names = character(0), class = "data.frame", row.names = c("hsa-miR-10a-5p|0",
"hsa-miR-1234|0", "hsa-miR-185-5p|0", "hsa-miR-27a-3p", "hsa-miR-34c-3p",
"hsa-miR-1181|0", "hsa-miR-601", "hsa-miR-4454"))
, dimLabels = c("featureNames", "featureColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, annotation = character(0)
, protocolData = new("AnnotatedDataFrame"
, varMetadata = structure(list(labelDescription = character(0)), .Names = "labelDescription", row.names = character(0), class = "data.frame")
, data = structure(list(), .Names = character(0), class = "data.frame", row.names = c("Control.1",
"Control.2", "Control.3", "Treatment.1", "Treatment.2", "Treatment.3"
))
, dimLabels = c("sampleNames", "sampleColumns")
, .__classVersion__ = new("Versions"
, .Data = list(c(1L, 1L, 0L))
)
)
, .__classVersion__ = new("Versions"
, .Data = list(c(3L, 3L, 1L), c(2L, 34L, 0L), c(1L, 3L, 0L), c(1L, 0L,
0L))
)
)
Thanks!

Fitting gaussian to data geom_point in ggplot2

I have the following data set
structure(list(Collimator = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), .Label = c("n", "y"), class = "factor"), angle = c(0L,
15L, 30L, 45L, 60L, 75L, 90L, 105L, 120L, 135L, 150L, 165L, 180L,
0L, 15L, 30L, 45L, 60L, 75L, 90L, 105L, 120L, 135L, 150L, 165L,
180L), X1 = c(2099L, 11070L, 17273L, 21374L, 23555L, 23952L,
23811L, 21908L, 19747L, 17561L, 12668L, 6008L, 362L, 53L, 21L,
36L, 1418L, 6506L, 10922L, 12239L, 8727L, 4424L, 314L, 38L, 21L,
50L), X2 = c(2126L, 10934L, 17361L, 21301L, 23101L, 23968L, 23923L,
21940L, 19777L, 17458L, 12881L, 6051L, 323L, 40L, 34L, 46L, 1352L,
6569L, 10880L, 12534L, 8956L, 4418L, 344L, 58L, 24L, 68L), X3 = c(2074L,
11109L, 17377L, 21399L, 23159L, 23861L, 23739L, 21910L, 20088L,
17445L, 12733L, 6046L, 317L, 45L, 26L, 46L, 1432L, 6495L, 10862L,
12300L, 8720L, 4343L, 343L, 38L, 34L, 60L), average = c(2099.6666666667,
11037.6666666667, 17337, 21358, 23271.6666666667, 23927, 23824.3333333333,
21919.3333333333, 19870.6666666667, 17488, 12760.6666666667,
6035, 334, 46, 27, 42.6666666667, 1400.6666666667, 6523.3333333333,
10888, 12357.6666666667, 8801, 4395, 333.6666666667, 44.6666666667,
26.3333333333, 59.3333333333)), .Names = c("Collimator", "angle",
"X1", "X2", "X3", "average"), row.names = c(NA, -26L), class = "data.frame")
I first scale average counts for both collimator y and n to a make the highest counts 1
df <- ddply(df, .(Collimator), transform,
norm.average = average / max(average))
and plot the curves:
ggplot(df, aes(x=angle,y=norm.average,col=Collimator)) +
geom_point() + geom_line()
Using geom_line is quite unpleasing on the eye and I would rather fit to the data using stat_smooth. Each data set should be symmetric about the mean so I think a Gaussian fit should be ideal. How can I fit a Gaussian to the dataset collimator="y" and collimator="n" in ggplot2 or using base R. Also I would like to output the mean and standard deviation. Can this be done?
By definition your data is not Gaussian but a kind of Gaussian-like shape, and here is the example of the visualization of fitting:
fit <- dlply(df, .(Collimator), function(x) {
co <- coef(nls(norm.average ~ exp(-(angle - m)^2/(2 * s^2)), data = x, start = list(s = 50, m = 80)))
stat_function(fun = function(x) exp(-(x - co["m"])^2/(2 * co["s"]^2)), data = x)
})
ggplot(df, aes(x = angle, y = norm.average, col = Collimator)) + geom_point() + fit
Updated
To obtain the parameters:
fit <- dlply(df, .(Collimator), function(x) {
co <- coef(nls(norm.average ~ exp(-(angle - m)^2/(2 * s^2)), data = x, start = list(s = 50, m = 80)))
r <- stat_function(fun = function(x) exp(-(x - co["m"])^2/(2 * co["s"]^2)), data = x)
attr(r, ".coef") <- co
r
})
then,
> ldply(fit, attr, ".co")
Collimator s m
1 n 52.99117 82.60820
2 y 21.99518 86.61268

Resources