Add an offset in a nls fit in R - r

I have a dataframe df
df<-structure(list(ID = structure(c(1L, 3L, 5L, 6L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 28L, 29L, 30L, 33L, 34L, 37L, 38L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 48L, 49L, 50L, 52L, 53L, 54L, 56L, 57L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 69L, 71L, 74L, 75L, 86L, 88L, 90L, 92L,
94L, 95L, 100L, 101L, 107L, 108L, 109L, 110L, 111L, 112L), .Label = c("AU-Tum",
"AU-Wac", "BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Sa3", "CA-Ca1",
"CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1", "CA-NS2", "CA-NS3",
"CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas", "CA-Obs", "CA-Ojp",
"CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3", "CA-SJ1", "CA-SJ2",
"CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha", "CN-Ku1", "CZ-Bk1",
"De-Bay", "DE-Hai", "DE-Har", "DE-Tha", "DE-Wet", "DK-Sor", "ES-Es1",
"FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes", "FR-Lbr", "FR-Pue", "GF-Guy",
"ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz", "IT-Lav", "IT-Non", "IT-Pt1",
"IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo",
"PT-Esp", "RU-Fyo", "RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1",
"SE-Sk2", "SE-St1", "UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1",
"US-Bn2", "Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2",
"US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1",
"US-Oho", "US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv",
"US-Umb", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"),
VarA = c(98.5, 77, 63.2222222222222, 97, 52.5, 3.5, 15.5,
71, 161.833333333333, 153.5, 73, 39, 40, 23, 14, 5.5, 78,
129.5, 73.5, 4, 100, 10, 3, 30, 65.5, 198, 45.5, 20, 111.5,
44, 68.5, 102.5, 39.1111111111111, 83.8, 136, 31.5, 56.5,
101, 39.25, 108.5, 52.1666666666667, 9.5, 13, 52.1428571428571,
66.5, 1, 44.25, 106, 19, 202.571428571429, 200, 36.6, 2,
21.2, 69, 135, 46.5, 17.5, 96, 80.6666666666667, 10.6666666666667,
86.5, 70.5, 19.5, 85, 200, 50, 250, 30.5), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 560, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 481.359543363331,
467.379381521489, 444.812610935212, 308.198167469197, -638.973101716489,
321.395064735785, 181.896345832773, 629.214319321327, -176.181996958815,
214, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 255.499246957091, 368.347069109092, 141.97570288631,
39.5917358684237, 105.039591642989, 77.9087587283187, 153.700042322307,
198.276033313996, 358.242634316906, 156.666666666667, 270,
247, 100, -10)), .Names = c("ID", "VarA", "Y"), row.names = c(NA,
-69L), class = "data.frame")
I am performing a non-linear regression analysis to fit the parameters of a model.
library (minpack.lm)
fit1<- nlsLM(Y~A*(1-exp(k*VarA)), data = NEP_Mean_Site,
start = list(A=192.93829, k=-0.08976), control = list(maxiter = 500))
When I plot the output of the non-linear model against my data, I get this output.
coeff(fit1)
f<- function (x) {211.00044*(1-exp(-0.07224*x))}
plot(df$VarA, df$Y)
curve(f, add=T)
However, I would like my model to start below zero. I guess I have to tune-up my model and include an offset in my nls fit but could not find a way on how to do it.
Anyone knows how to do it?

Just add the offset to the formula:
fit1<- nlsLM(Y~offset + A*(1-exp(k*VarA)),
data = df,
start = list(offset = -300, A=192.93829, k=-0.08976),
control = list(maxiter = 500))
To plot the function, you can use slightly more generic code. Then you don't have to manually construct a function every time you have a different fit:
plot(Y~ VarA, data = df)
id <- order(df$VarA)
lines(predict(fit1)[id] ~ df$VarA[id])
gives you:

Related

Show all datapoints while specifying axis labels in ggplot in R

I am creating a scatterplot using ggplot. I am able to create a scatterplot using the following code.
ggplot(df2, aes(x = date, y = mean, color = NULL)) +
geom_point(position = "jitter") +
labs(title = "ShotSpotter incidents around July 4th",
x = "Day of year", y = "Mean daily gunshots") +
labs(fill = "Treatment Status") +
geom_segment(aes(x = "07-01", xend = "07-01", y = 0, yend = 50), colour = "red")
I would like to change the labels on the x-axis so that they are easier to read. When I try to do so using scale_x_discrete(), most of the datapoints disappear except for those corresponding to the values now labeled on the x-axis.
ggplot(df2, aes(x = date, y = mean, color = NULL)) +
geom_point(position = "jitter") +
labs(title = "ShotSpotter incidents around July 4th",
x = "Day of year", y = "Mean daily gunshots") +
labs(fill = "Treatment Status") +
geom_segment(aes(x = "07-01", xend = "07-01", y = 0, yend = 50), colour = "red") +
scale_x_discrete(limits = c("05-01", "06-01", "07-01", "08-01", "09-01"),
labels = c("May 1", "June 1", "July 1", "Aug 1", "Sept 1"))
How can I keep the labels from the 2nd graph and include all the datapoints shown in the 1st?
Data using dput():
structure(list(date = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L,
76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L,
112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L,
123L, 124L), .Label = c("05-01", "05-02", "05-03", "05-04", "05-05",
"05-06", "05-07", "05-08", "05-09", "05-10", "05-11", "05-12",
"05-13", "05-14", "05-15", "05-16", "05-17", "05-18", "05-19",
"05-20", "05-21", "05-22", "05-23", "05-24", "05-25", "05-26",
"05-27", "05-28", "05-29", "05-30", "05-31", "06-01", "06-02",
"06-03", "06-04", "06-05", "06-06", "06-07", "06-08", "06-09",
"06-10", "06-11", "06-12", "06-13", "06-14", "06-15", "06-16",
"06-17", "06-18", "06-19", "06-20", "06-21", "06-22", "06-23",
"06-24", "06-25", "06-26", "06-27", "06-28", "06-29", "06-30",
"07-01", "07-02", "07-03", "07-04", "07-05", "07-06", "07-07",
"07-08", "07-09", "07-10", "07-11", "07-12", "07-13", "07-14",
"07-15", "07-16", "07-17", "07-18", "07-19", "07-20", "07-21",
"07-22", "07-23", "07-24", "07-25", "07-26", "07-27", "07-28",
"07-29", "07-30", "07-31", "08-01", "08-02", "08-03", "08-04",
"08-05", "08-06", "08-07", "08-08", "08-09", "08-10", "08-11",
"08-12", "08-13", "08-14", "08-15", "08-16", "08-17", "08-18",
"08-19", "08-20", "08-21", "08-22", "08-23", "08-24", "08-25",
"08-26", "08-27", "08-28", "08-29", "08-30", "08-31", "09-01"
), class = "factor"), mean = c(13, 15, 16.5, 17.6666666666667,
14.5, 13.3333333333333, 11.8333333333333, 13, 13, 14.3333333333333,
13.8333333333333, 15.5, 11.1666666666667, 15, 12.5, 15.6666666666667,
14.5, 10.5, 11.6666666666667, 17.5, 14.5, 13, 14.6666666666667,
15.6666666666667, 21.3333333333333, 30.6666666666667, 18.5, 17.5,
13.5, 18.5, 13.3333333333333, 14.5, 14.8333333333333, 9.66666666666667,
15.8333333333333, 13.5, 20.5, 16.1666666666667, 15.1666666666667,
14.8333333333333, 15.3333333333333, 14.1666666666667, 14.5, 13.6666666666667,
20.1666666666667, 17.8333333333333, 22.3333333333333, 15.8333333333333,
15.5, 16.1666666666667, 15, 20, 20.8333333333333, 20.8333333333333,
25, 21.1666666666667, 18.1666666666667, 27, 19.5, 19.5, 19.6666666666667,
25.6666666666667, 36.8333333333333, 46.6666666666667, 40.5, 21.3333333333333,
16.3333333333333, 18, 20.1666666666667, 22.6666666666667, 16.8333333333333,
13.8333333333333, 14.5, 14.1666666666667, 16.5, 15.1666666666667,
15.1666666666667, 13.3333333333333, 13.3333333333333, 12.6666666666667,
12.8333333333333, 12.3333333333333, 16.5, 19.6666666666667, 16.3333333333333,
10.6666666666667, 13.1666666666667, 17.5, 10.3333333333333, 15.5,
12.1666666666667, 14.3333333333333, 13.8333333333333, 11.6666666666667,
13, 10.6666666666667, 17.5, 19.3333333333333, 12.6666666666667,
12.5, 12.5, 13.5, 15.8333333333333, 13.5, 15.6666666666667, 16.3333333333333,
14.5, 13.8333333333333, 14.3333333333333, 11, 13.3333333333333,
23.8333333333333, 14.1666666666667, 13.5, 13.3333333333333, 13.3333333333333,
14, 10.6666666666667, 14.3333333333333, 13.3333333333333, 13.1666666666667
)), class = "data.frame", row.names = c(NA, -121L))
df2$date <- as.Date(paste0("2000-", as.character(df2$date)))
ggplot(df2, aes(x = date, y = mean, color = NULL)) +
geom_point(position = "jitter") +
labs(title = "ShotSpotter incidents around July 4th",
x = "Day of year", y = "Mean daily gunshots") +
labs(fill = "Treatment Status") +
geom_segment(aes(x = as.Date("2000-07-01"), xend = as.Date("2000-07-01"), y = 0, yend = 50), colour = "red") +
scale_x_date(labels = scales::date_format("%b-%d"))
This required the two changes: as.Date (I'm supposing 2000 year just for something), and changing geom_segment so that x and xend are Date objects.
We can use
library(dplyr)
library(lubridate)
library(ggplot2)
df2 %>%
mutate(date = mdy(date, truncated = 2)) %>%
ggplot(aes(x = date, y = mean, color = NULL)) +
geom_point(position = "jitter") +
labs(title = "ShotSpotter incidents around July 4th",
x = "Day of year", y = "Mean daily gunshots") +
labs(fill = "Treatment Status") +
geom_segment(aes(x = mdy('07-01', truncated = 2),
xend = mdy('07-01', truncated = 2), y=0, yend = 50), colour = 'red') +
scale_x_date(labels = scales::date_format("%b-%d"))
-output

Plotting a multiple linear regression in R using scatter3D() (package plot3D)

I have the following data in a csv file.
y,x1,x2,x3,x4,x5,x6,x7,x8,x9
10,2113,1985,38.9,64.7,4,868,59.7,2205,1917
11,2003,2855,38.8,61.3,3,615,55,2096,1575
11,2957,1737,40.1,60,14,914,65.6,1847,2175
13,2285,2905,41.6,45.3,-4,957,61.4,1903,2476
10,2971,1666,39.2,53.8,15,836,66.1,1457,1866
11,2309,2927,39.7,74.1,8,786,61,1848,2339
10,2528,2341,38.1,65.4,12,754,66.1,1564,2092
11,2147,2737,37,78.3,-1,761,58,1821,1909
4,1689,1414,42.1,47.6,-3,714,57,2577,2001
2,2566,1838,42.3,54.2,-1,797,58.9,2476,2254
7,2363,1480,37.3,48,19,984,67.5,1984,2217
example = data.frame(x1,x2,x3,x4,y)
How can I graph the variables x1, x2, x3 using scatter3D(x,y,z)?
I have tried:
library("plot3D")
with(example,scatter3D(y ~ x1 + x2 + x3))
But I get error:
Error in min(x,na.rm) : invalid 'type' (list) of argument
Looks like you want to plot a regression plane. The scatter3d function in package car will do that. You need to install car and rgl. First let's make your data more accessible:
dput(example)
structure(list(y = c(10L, 11L, 11L, 13L, 10L, 11L, 10L, 11L,
4L, 2L, 7L), x1 = c(2113L, 2003L, 2957L, 2285L, 2971L, 2309L,
2528L, 2147L, 1689L, 2566L, 2363L), x2 = c(1985L, 2855L, 1737L,
2905L, 1666L, 2927L, 2341L, 2737L, 1414L, 1838L, 1480L), x3 = c(38.9,
38.8, 40.1, 41.6, 39.2, 39.7, 38.1, 37, 42.1, 42.3, 37.3), x4 = c(64.7,
61.3, 60, 45.3, 53.8, 74.1, 65.4, 78.3, 47.6, 54.2, 48), x5 = c(4L,
3L, 14L, -4L, 15L, 8L, 12L, -1L, -3L, -1L, 19L), x6 = c(868L,
615L, 914L, 957L, 836L, 786L, 754L, 761L, 714L, 797L, 984L),
x7 = c(59.7, 55, 65.6, 61.4, 66.1, 61, 66.1, 58, 57, 58.9,
67.5), x8 = c(2205L, 2096L, 1847L, 1903L, 1457L, 1848L, 1564L,
1821L, 2577L, 2476L, 1984L), x9 = c(1917L, 1575L, 2175L,
2476L, 1866L, 2339L, 2092L, 1909L, 2001L, 2254L, 2217L)),
class = "data.frame", row.names = c(NA, -11L))
install.packages("car")
install.packages("rgl")
library(car)
library(rgl)
scatter3d(y~x1+x2, example)
The plot window will be small. Use the mouse to drag the lower right corner to make it bigger. You can drag within the plot to rotate it.

Getting an Error Message while making a Bar Plot with Error Bars in R

My code looks like this right now:
library(ggplot2)
ggplot (y=mean, data=a)+geom_bar(stat="identity")+geom_errorbar(aes(ymin=mean - sd, ymax=mean + sd,width=0.15))
I'm getting the following error:
Error in mean - sd : non-numeric argument to binary operator
These are the libraries that I have loaded:
if(!require(psych)){install.packages("psych")}
if(!require(FSA)){install.packages("FSA")}
if(!require(Rmisc)){install.packages("Rmisc")}
if(!require(ggplot2)){install.packages("ggplot2")}
if(!require(car)){install.packages("car")}
if(!require(multcompView)){install.packages("multcompView")}
if(!require(lsmeans)){install.packages("lsmeans")}
if(!require(rcompanion)){install.packages("rcompanion")}
EDIT:
structure(list(morning.time = c(39.28, 42.32, 45.56, 43.47, 45.1,
42.44, 49.2, 45.99, 52.48, 49.16, 49.63, 47.4, 48.14, 47.89,
52.91, 51.56, 49.28, 53.62), morning.pushups = c(32L, 34L, 37L,
38L, 42L, 45L, 51L, 51L, 52L, 54L, 53L, 52L, 53L, 54L, 57L, 56L,
54L, 59L), evening.time = c(37.75, 39.58, 42.88, 38.45, 40.72,
37.12, 39.89, 45.31, 39.73, 42.69, 42.47, 45.47, 43.65, 47.78,
46.97, 47.75, 46.72, 42.12), evening.pushups = c(30L, 32L, 38L,
34L, 39L, 37L, 35L, 42L, 39L, 47L, 48L, 48L, 48L, 54L, 52L, 47L,
49L, 47L)), .Names = c("morning.time", "morning.pushups", "evening.time",
"evening.pushups"), class = "data.frame", row.names = c(NA, -18L
))
You didn't specify where your data is coming from it has no clue what "mean" you are referring to as well as putting aes() aroudn x and y and most likely you will haev to refer to your data as data$mean or data$sd and I recommend having a column called "UCL" and "LCL" so you don't do calculations in the code.
`ggplot (data=YOURDATAFRAMEGOESHERE,aes(y=mean,x=XVARGOESHERE))+
geom_bar(stat="identity")+
geom_errorbar(aes(ymin=mean - sd,
ymax=mean + sd,width=0.15))`
So any time you see "something rather to binary operator" it simply means something is off with the program recognizing your variables or you forgot a "+" between your geoms

Constrain nlsLM() with a cost function

I have a dataframe df
df<- structure(list(ID = structure(c(1L, 3L, 5L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
31L, 32L, 33L, 36L, 37L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 49L,
50L, 52L, 53L, 54L, 56L, 57L, 58L, 60L, 62L, 63L, 66L, 67L, 68L,
69L, 70L, 71L, 72L, 75L, 77L, 80L, 81L, 82L, 88L, 93L, 95L, 97L,
99L, 101L, 102L, 107L, 108L, 114L), .Label = c("AU-Tum", "AU-Wac",
"BE-Bra", "BE-Jal", "BE-Vie", "BR-Cax", "BR-Ma2", "BR-Sa1", "BR-Sa3",
"BW-Ma1", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man", "CA-NS1",
"CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7", "CA-Oas",
"CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2", "CA-SF3",
"CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4", "CN-Cha",
"CN-Ku1", "CZ-Bk1", "De-Bay", "DE-Hai", "DE-Har", "DE-Meh", "DE-Tha",
"DE-Wet", "DK-Sor", "ES-Es1", "FI-Hyy", "FI-Sod", "FR-Fon", "FR-Hes",
"FR-Lbr", "FR-Pue", "GF-Guy", "ID-Pag", "IL-Yat", "IT-Col", "IT-Cpz",
"IT-Lav", "IT-Lma", "IT-Noe", "IT-Non", "IT-Pt1", "IT-Ro1", "IT-Ro2",
"IT-Sro", "JP-Tak", "JP-Tef", "JP-Tom", "NL-Loo", "PT-Esp", "RU-Fyo",
"RU-Zot", "SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1",
"UK-Gri", "UK-Ham", "US-Bar", "US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3",
"US-Dk2", "US-Dk3", "US-Fmf", "US-Fwf", "US-Ha1", "US-Ha2", "US-Ho1",
"US-Ho2", "US-Lph", "US-Me1", "US-Me3", "US-Nc2", "US-NR1", "US-Oho",
"US-So2", "US-So3", "US-Sp1", "US-Sp2", "US-Sp3", "US-Syv", "US-Umb",
"US-Wbw", "US-Wcr", "US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8",
"VU-Coc", "CA-Cbo", "CN-Lao", "ID-Buk", "IS-Gun", "JP-Fuj", "MY-Pso",
"RU-Ab", "RU-Be", "RU-Mix", "TH-Mae"), class = "factor"), Y = c(436.783385497984,
55.1825021383702, 526.4133417369, 391.49284084118, -519.814235572849,
11.5525291214872, 162.441016515717, 39.0395567645998, -70.4910326673707,
17.1155716306239, -106.326129257097, -94.9308303585276, -66.4285516217351,
-144.929052323413, -220.613145695315, 157.129576861289, 44.1257786633602,
46.8326830295943, -146.719591499443, 30.8043649939355, -4.10548956954153,
-108.258462657337, 90.3369144331664, 126.866108251153, 42.9489971246803,
-45.4886732113082, 483.932040393885, 590.754048774834, 82.1480000555981,
76.8863707484328, 404.007940533033, 202.629066249886, -46.9675149230141,
557.939170770813, 300.979565786038, 224.256197650044, 148.719307398695,
201.195892312115, 466.727302447427, 552.762670615377, 595.145436977735,
481.359543363331, 467.379381521489, 444.812610935212, 308.198167469197,
-638.973101716489, 321.395064735785, 181.896345832773, 629.214319321327,
-176.181996958815, 59.1716887350485, -186.42650026083, 515.533437888983,
595.091753601562, 367.15020653978, 934.415348643437, 255.499246957091,
368.347069109092, 141.97570288631, 39.5917358684237, 105.039591642989,
77.9087587283187, 153.700042322307, 157.436949779134, 358.242634316906
), Unc = c(-2.87896446519996, -0.30731156873436, -1.3811336535939,
-3.60168125065523, 1.35359565655672, -0.58525692609091, -0.463995294634932,
-0.112770209421705, -0.178508318809592, -0.44506337354913, 0.285085608169751,
0.241425707960461, -0.616179720920167, -0.00579570274186878,
0.385699289486463, -0.43071884834486, -1.32799753416588, -0.138248737701239,
0.026437443324628, -0.0981101016865843, -0.125326368431498, 0.289668409902704,
-0.224679559714174, -0.376257445433255, -0.0904535633017475,
-1.27942478849042, -2.78944896222686, -1.57015451106923, -3.02435991211342,
-0.188885650489005, -2.77697810019308, -0.683634153351544, 0.148164853468482,
-1.520102142822, -0.855422614115418, -0.580609573477037, -2.12306082165876,
-1.2334420909422, -2.00323122411995, -1.45967340674881, -1.60448511158608,
-2.52530298868671, -1.28908559855364, -1.16270411420386, -1.5186009244046,
0.24330408272554, -1.72852090322909, -0.497423296440042, -2.79905035399537,
0.453520174531953, -0.38557736709315, 0.513504024431323, -1.58608831551316,
-1.56046815861851, -3.32259575879769, -7.99135003959363, -0.913109035398266,
-3.48447862397436, -0.518022500487711, -0.352263975401941, -0.331662926968978,
-0.236234610041281, -2.31039763656225, -0.987148209221828, -3.37441047823435
), X = c(98.5, 77, 63.2222222222222, 52.5, 3.5, 15.5, 71, 161.833333333333,
153.5, 73, 39, 40, 23, 14, 5.5, 78, 129.5, 73.5, 4, 100, 10,
3, 30, 65.5, 198, 45.5, 20, 111.5, 44, 68.5, 102.5, 39.1111111111111,
83.8, 136, 31.5, 56.5, 101, 39.25, 108.5, 52.1666666666667, 54.5,
9.5, 13, 52.1428571428571, 66.5, 1, 44.25, 106, 19, 202.571428571429,
36.6, 2, 21.2, 69, 67.5, 21, 135, 46.5, 17.5, 96, 80.6666666666667,
10.6666666666667, 86.5, 66.2, 2.5)), .Names = c("ID", "Y", "Unc",
"X"), row.names = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L,
51L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L,
65L, 66L, 67L), class = "data.frame", na.action = structure(c(4L,
52L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L), .Names = c("4",
"52", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77"
), class = "omit"))
I am currently implementing a non-linear regression analysis with a leave one out ID as follows:
library (minpack.lm)
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500)), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
Basically, for each iteration, I remove one ID and perform a non-linear regression model using the other IDs which is then used to predict the response of the left out ID.
However, each observation used to create the non-linear models have different uncertainty values (see Unc column in df). Therefore, I wanted to include a cost function in the nls based on the uncertainty of my observations. I made some research and the following cost function should do the work : cost= sum(((obs-pred)/ Unc)^2) which is basically the sum of squared error (SEE) weighted by the uncertainty.
I have troubles finding a way to include this cost function in the nlsLM() object. I have seen the weights parameters which could be included in the nlsLM() object but the possibilities of this parameter look quite limited.
This is what I have tried so far by including the weights parameters in the nlsLM() function, but it does not work so far:
id<-unique(df$ID)
nlm1_pred<- c()
for (i in id){
nlm1<- try(nlsLM(Y~offset + A*(1-exp(k*X)), data = df[df$ID != i,],
start = list(A=192.93829, k=-0.08976, offset=-700), control = list(maxiter = 500), weights = wfct(sum(((fitted-Y)/Unc)^2))), silent=TRUE);
nlm1_pred[[i]]<- if (inherits(nlm1, "nls")) sim = predict(nlm1, newdata=df[df$ID == i,]) else NA;
}
You can use 1/Unc^2 as the weights in nlsLM to get the objective function referred to as cost in the question and obj.w below.
To illustrate, the two unweighted examples using optim and nlsLM below give the same result and the two weighted examples (also using optim and nlsLM) give the same result. Since the objective here is to show that the weights are the same we used starting values fairly close to the final values in each case so that differences in the algorithms do not come into play.
rhs <- function(p, df) with(df, p[[3]] + p[[1]]*(1-exp(p[[2]]*X)))
# unweighted
st.u <- list(A = 800, k = -0.2, offset = -700)
obj.u <- function(p, df) with(df, sum((Y - rhs(p, df))^2))
fit.optim.u <- optim(st.u, obj.u, df = df)
fit.optim.u$par; fit.optim.u$value # 1
fit.nlsLM.u <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, start = st.u)
coef(fit.nlsLM.u); deviance(fit.nlsLM.u) # 2 - same as 1
# weighted
st.w <- list(A = 2704, k = -1.7, offset = -2845)
obj.w <- function(p, df) with(df, sum((Y - rhs(p, df))^2/Unc^2))
fit.optim.w <- optim(st.w, obj.w, df = df)
fit.optim.w$par; fit.optim.w$value # 3
fit.nlsLM.w <- nlsLM(Y ~ rhs(c(A, k, offset), df), data = df, weights = 1/Unc^2,
start = st.w)
coef(fit.nlsLM.w); deviance(fit.nlsLM.w) # 4 = same as 3
It seems that precisely what objective function/weights you want it is still not clear so you might want to play with the examples above using optim until you find the objective function that you want (since with it the objective is explicit) and then implement it in nlsLM checking that they give the same answers. Note that optim and nlsLM can find different answers even if you are using identical objectives/weights due to the different algorithms and stopping criteria so if you run both and find that then try starting them both at the best coefficients found to eliminate differences.
The point of all this is that since with optim we specify the objective explicitly and not with nlsLM it gives a way of double checking what nlsLM is doing.
Update: reworked focusing on comparing optim and nlsLM.

Compute a function from a loop to apply it in a list of dataframe

I have a dataframe df
structure(list(ID = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 12L, 12L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 20L, 20L, 20L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 57L, 57L, 62L, 62L, 62L, 70L, 70L, 70L,
70L, 70L, 2L, 2L, 2L, 43L, 43L, 43L, 43L, 43L, 43L, 43L, 46L,
46L, 46L, 46L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L), .Label = c("AU-Tum",
"BE-Bra", "BR-Sa3", "CA-Ca1", "CA-Ca2", "CA-Ca3", "CA-Gro", "Ca-Man",
"CA-NS1", "CA-NS2", "CA-NS3", "CA-NS4", "CA-NS5", "CA-NS6", "CA-NS7",
"CA-Oas", "CA-Obs", "CA-Ojp", "CA-Qcu", "CA-Qfo", "CA-SF1", "CA-SF2",
"CA-SF3", "CA-SJ1", "CA-SJ2", "CA-SJ3", "CA-TP1", "CA-TP2", "CA-TP4",
"CZ-Bk1", "DE-Har", "DE-Wet", "DK-Sor", "FI-Hyy", "FR-Hes", "FR-Pue",
"ID-Pag", "IT-Ro1", "IT-Ro2", "IT-Sro", "JP-Tak", "JP-Tef", "NL-Loo",
"SE-Abi", "SE-Fla", "SE-Nor", "SE-Sk1", "SE-Sk2", "SE-St1", "UK-Gri",
"US-Blo", "US-Bn1", "US-Bn2", "Us-Bn3", "US-Dk3", "US-Fmf", "US-Fwf",
"US-Ha1", "US-Ha2", "US-Ho1", "US-Ho2", "US-Lph", "US-Me1", "US-Me3",
"US-Nc2", "US-NR1", "US-Sp1", "US-Sp2", "US-Sp3", "US-Umb", "US-Wcr",
"US-Wi0", "US-Wi1", "US-Wi2", "US-Wi4", "US-Wi8"), class = "factor"),
x = c(156, 157, 160, 162, 163, 164, 165, 153, 154, 155, 71,
72, 73, 74, 37, 38, 39, 40, 41, 39, 40, 22, 23, 24, 12, 13,
14, 15, 16, 4, 5, 6, 7, 74, 75, 76, 77, 78, 79, 80, 81, 82,
126, 127, 128, 129, 130, 131, 132, 71, 72, 73, 74, 75, 76,
99, 100, 101, 49, 50, 51, 52, 53, 54, 56, 9, 10, 46, 47,
48, 84, 85, 86, 87, 88, 77, 78, 79, 101, 105, 106, 107, 108,
109, 110, 81, 82, 84, 88, 131, 132, 133, 134, 135, 136, 137,
138, 139), y = c(50.0472381226718, 706.825824817992, 729.621982051409,
593.225827791495, 685.154353165934, 574.088067465695, 650.30821636616,
494.185166497016, 436.312162090908, 631.891738044098, 280.949480787385,
641.231373456365, 412.116433330579, 416.824746264203, 415.905685925856,
494.374217984441, 201.745910386788, 486.030122926459, 647.782697262242,
389.839577941515, 256.552344558528, 605.790549736819, 483.045965372879,
668.017897433514, 35.2706101682852, 265.693628564011, 285.116345260642,
291.023782284086, 357.428790589795, 205.920375034591, 229.606221692753,
230.952761338012, 241.641164634028, 1089.06303295676, 1255.88808925333,
1087.75402177912, 1068.248897182, 1212.17254891642, 884.222588171535,
938.887718005513, 863.582247020735, 1065.91969416523, 902.338968377328,
790.570635510725, 834.500908313203, 710.755041345197, 814.002362551197,
726.814950022846, 828.559687148314, 611.564698476112, 603.238720579422,
524.322001078981, 565.296378873638, 532.431853589369, 597.174114277044,
606.075737104722, 686.408686154056, 705.914347674276, 1858.98340779543,
1893.38468471169, 1819.83262739703, 1827.31409981102, 1640.5816780664,
1689.0365549922, 2112.67917439342, 479.374777290737, 326.663507855032,
1184.81825619942, 1281.2920902365, 1269.12480160726, 1265.48484068702,
1193.29000986667, 1156.81486114406, 1199.7373066445, 1116.24029749935,
1100.47051284742, 1072.57190890331, 1228.25697739795, 1576.32775748242,
1631.14609672129, 1796.87265141308, 1712.90461264737, 1844.87409764528,
1938.56225809082, 1663.52108450048, 1626.12740687071, 1333.52924151719,
1349.01338642137, 1376.41668179166, 1362.32371946308, 1317.75608457439,
1519.12511487596, 1558.26111694807, 1588.8933303128, 1624.50100837374,
1433.10019567201, 1371.01498340943, 1439.94849821774)), .Names = c("ID",
"x", "y"), row.names = c(290L, 291L, 292L, 293L, 294L, 295L,
296L, 297L, 298L, 299L, 300L, 301L, 302L, 303L, 304L, 305L, 306L,
307L, 308L, 309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L,
318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L,
329L, 330L, 331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L,
340L, 341L, 342L, 343L, 344L, 351L, 352L, 353L, 424L, 425L, 426L,
427L, 428L, 429L, 430L, 471L, 472L, 493L, 494L, 495L, 512L, 513L,
514L, 515L, 516L, 266L, 267L, 268L, 438L, 439L, 440L, 441L, 442L,
443L, 444L, 451L, 452L, 453L, 454L, 484L, 485L, 486L, 487L, 488L,
489L, 490L, 491L, 492L), class = "data.frame")
I wanted to compute a the model efficiency in a cross validation leave one subject out mode on this dataframe by using one non-linear function, which I have implemented as such:
library(stats)
library (hydroGOF)
Out <- c()
id <- unique(df$ID)
for (i in id){
fit1 <- try(nls(y~A*x^3+B*x^2+C*x+D, data = df[df$ID != i,], start = list(A=0.02, B=-0.6, C= 50, D=200)), silent=TRUE)
Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=df[df$ID==i,])
}
MEF<- NSE(Out, df$y)
However, I would like to create a function out of it in order to apply it in n dataframe with the same structure but by also including two non-linear functions in the loop. I have started to implement this line of codes but without success.
stat <- function(dat) {
id <- unique(dat$ID)
Out<-c()
Out2<-c()
for (i in id){
fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
c(Out, Out2)
}}
df.list<-list(df) # Here I put only one dataframe but it will be more than one.
res<-lapply(df.list, stat)
Anyone could solve my issue?
I didn't get if you want it to output the NSE result or just the Out and Out2 so I included two variations.
First of all I think your function needs to list Out and Out2 instead of using c which will concatenate the two.
Then you can do:
stat <- function(dat) {
id <- unique(dat$ID)
Out<-c()
Out2<-c()
for (i in id){
fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
}
list(Out, Out2)
#list(NSE(Out, dat$y), NSE(Out2, dat$y))
}
Which will output just Out and Out2 on which then you could use NSE:
df.list<-list(df) # Here I put only one dataframe but it will be more than one.
res<-lapply(df.list, stat)
> res
[[1]]
[[1]][[1]]
[1] 1293.19052 1296.35786 1291.80408 1290.99605 1298.77649 1288.68723 1280.85556 1274.29969 73.09179 137.39803 199.57234 253.22724 306.64911 767.04455
[15] 805.64571 830.52502 852.82856 392.31343 392.76582 381.99857 471.69255 468.30934 1213.32652 1318.90282 1336.20432 1331.59450 1093.61066 776.19050
[29] 816.66237 855.65923 347.74230 411.60219 1187.15420 1175.01320 1169.16425 1160.33162 1148.21352 1145.15265 1134.55927 1126.44445 1112.65676 667.97854
[43] 656.19839 654.13085 639.01529 635.08903 620.08683 1222.46503 1214.07093 1206.90274 1197.26398 1188.93353 1178.60658 142.68868 209.94101 278.51757
[57] 339.27000 401.06092 952.40556 939.72216 928.89347 627.67858 671.42009 217.45400 1229.16628 1303.97471 1305.00665 1303.10220 1306.25091 1302.15214
[71] 1284.16542 1268.00061 1252.26307 1241.32474 1236.48697 1234.85012 1273.66465 1283.02331 1305.82044 1316.90994 137.40424 1326.03716 1323.69234 896.51028
[85] 936.55168 945.38984 937.21832 919.84053 909.75825 1088.71144 1080.18786 1070.87042 1059.73379 1051.40485 1239.73625 1215.11176 1193.85910
[[1]][[2]]
[1] 1293.19052 1296.35786 1291.80408 1290.99605 1298.77649 1288.68723 1280.85556 1274.29969 73.09179 137.39803 199.57234 253.22724 306.64911 767.04455
[15] 805.64571 830.52502 852.82856 392.31343 392.76582 381.99857 471.69255 468.30934 1213.32652 1318.90282 1336.20432 1331.59450 1093.61066 776.19050
[29] 816.66237 855.65923 347.74230 411.60219 1187.15420 1175.01320 1169.16425 1160.33162 1148.21352 1145.15265 1134.55927 1126.44445 1112.65676 667.97854
[43] 656.19839 654.13085 639.01529 635.08903 620.08683 1222.46503 1214.07093 1206.90274 1197.26398 1188.93353 1178.60658 142.68868 209.94101 278.51757
[57] 339.27000 401.06092 952.40556 939.72216 928.89347 627.67858 671.42009 217.45400 1229.16628 1303.97471 1305.00665 1303.10220 1306.25091 1302.15214
[71] 1284.16542 1268.00061 1252.26307 1241.32474 1236.48697 1234.85012 1273.66465 1283.02331 1305.82044 1316.90994 137.40424 1326.03716 1323.69234 896.51028
[85] 936.55168 945.38984 937.21832 919.84053 909.75825 1088.71144 1080.18786 1070.87042 1059.73379 1051.40485 1239.73625 1215.11176 1193.85910
Or use:
stat <- function(dat) {
id <- unique(dat$ID)
Out<-c()
Out2<-c()
for (i in id){
fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
}
#list(Out, Out2)
c(NSE(Out, dat$y), NSE(Out2, dat$y))
}
If you want it to output the NSEs instead:
df.list<-list(df) # Here I put only one dataframe but it will be more than one.
res<-lapply(df.list, stat)
> res
[[1]]
[1] 0.3125795 0.3125795
Edit for answering the comment:
stat <- function(dat) {
id <- nrow(dat)
Out<-c()
Out2<-c()
for (i in 1:id){
fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[-i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
Out[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[-i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
Out2[i] <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[i,]) else NA;
}
#list(Out, Out2)
c(NSE(Out, dat$y), NSE(Out2, dat$y))
}
> stat(df)
[1] 0.2571609 0.2571609
Edit2:
stat <- function(dat) {
id <- unique(dat$ID)
NSEs<-list()
for (i in id){
fit1 <- try(nls(y~A*(x^B)*(exp(k*x)), data = dat[dat$ID != i,], start = list(A = 1000, B = 0.170, k = -0.00295)), silent=TRUE);
Out <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
fit2 <- try(nls(y~A*x^2+B*x+C, data = dat[dat$ID != i,], start = list(A=-0.4, B=50, C= 300)), silent=TRUE);
Out2 <- if (inherits(fit1, "nls")) sim = predict(fit1, newdata=dat[dat$ID==i,]) else NA;
NSEs[[length(NSEs)+1]] <- c(NSE(Out, dat$y[dat$ID == i]), NSE(Out2, dat$y[dat$ID == i]))
}
NSEs
}
> stat(df)
[[1]]
[1] -4.218322 -4.218322
[[2]]
[1] -27.30966 -27.30966
[[3]]
[1] -35.98506 -35.98506
[[4]]
[1] -10.89336 -10.89336
[[5]]
[1] -73.49176 -73.49176
#and so on...
output <- sapply(
seq_len(nrow(df)),
function(i){
model <- list(
try(
nls(
y ~ A * (x ^ B) * (exp(k * x)),
data = df[-i,],
start = list(A = 1000, B = 0.170, k = -0.00295)
),
silent=TRUE
),
try(
nls(
y ~ A * x ^ 2 + B * x + C,
data = df[-i,],
start = list(A = -0.4, B = 50, C= 300)
),
silent=TRUE
)
)
sapply(model, function(x){
ifelse(
class(x) == "try-error",
NA,
predict(x, newdata = df[i, ])
)
})
}
)

Resources