R nls() Initial Parameter Problem, nonlinear Regression - r

I get a error message:
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
when using the nls() function like
form_Q10_parabolic_SM <- as.formula(Lin_Flux..mymol.m.2.s.1. ~ (rRef<- 5.5354)*a*exp(b*Mean_Soil_Temp_V2..C.)*((-c*Soil_Moist_V3**2)+(d*Soil_Moist_V3)+e))
Q10_parabolic_SM <- nls(form_Q10_parabolic_SM, data = conB1_2015, start = list(a = 1, b = 0.11, c = 0.0001, d = 0.01, e = 0.1))
I got my initial parameters by using the preview() function of the nsltools library like this (same definition of the formula like above)
preview(form_Q10_parabolic_SM, data = conB1_2015, start = c(a = 1, b = 0.11, c = 0.0001, d = 0.01, e = 0.1), variable = 1)
Which gives me this output with the parameters a-e above:
This looks quite good by my eyes and I really don't know what to do at this point since the preview() works just fine.
Is my model too complex or overparameterized? Or did I just do something wrong with the nls function?
Any tips would be really appreciated!
> dput(head(conB1_2015, 30))
structure(list(X = c(13L, 68L, 69L, 70L, 71L, 72L, 73L, 74L,
75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L,
88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L), IV_Date = c("2015-01-14",
"2015-03-11", "2015-03-12", "2015-03-13", "2015-03-14", "2015-03-15",
"2015-03-16", "2015-03-17", "2015-03-18", "2015-03-19", "2015-03-20",
"2015-03-21", "2015-03-22", "2015-03-23", "2015-03-24", "2015-03-25",
"2015-03-26", "2015-03-27", "2015-03-28", "2015-03-29", "2015-03-30",
"2015-03-31", "2015-04-01", "2015-04-02", "2015-04-03", "2015-04-04",
"2015-04-05", "2015-04-06", "2015-04-07", "2015-04-08"), SMmean010.... = c(24.5341666666667,
23.4754166666667, 23.0585416666667, 22.830625, 22.7447916666667,
22.7729166666666, 22.7929166666667, 22.7354166666667, 22.6579166666667,
22.5935416666667, 22.5233333333333, 22.7641666666667, 23.6010416666667,
23.445625, 23.404375, 23.2845833333333, 23.0672916666667, 22.9347916666667,
22.8272916666667, 23.0316666666667, 23.988125, 25.5647916666667,
27.055, 27.7995833333333, 26.23125, 25.4658333333333, 25.0845833333333,
24.8175, 24.605, 24.4216666666667), Lin_Flux..mymol.m.2.s.1. = c(1.13,
2.146, 1.98708333333333, 1.88416666666667, 1.57083333333333,
1.93041666666667, 2.69875, 2.8075, 3.23272727272727, 2.35818181818182,
2.23833333333333, 1.84958333333333, 2.18695652173913, 2.16958333333333,
2.69791666666667, 3.025, 1.985, 1.88083333333333, 2.30416666666667,
2.775, 1.44458333333333, 1.78791666666667, 1.04863636363636,
1.03458333333333, 1.4725, 1.86833333333333, 1.71125, 1.79, 1.53166666666667,
1.97666666666667), Mean_Soil_Temp_V2..C. = c(4.739, 5.1864, 4.08408333333333,
3.61625, 3.68508333333333, 4.09925, 4.87079166666667, 5.64720833333333,
6.58433333333333, 5.05075, 4.93708333333333, 4.109, 3.2295, 3.537,
5.1395, 5.65270833333333, 5.931875, 5.61775, 5.88695833333333,
6.86308333333333, 5.61833333333333, 4.24566666666667, 3.05952173913043,
2.45716666666667, 3.6365, 3.68820833333333, 3.83766666666667,
4.3435, 4.8745, 6.29133333333333), Soil_Moist_V3 = c(25.603137,
21.98744709, 21.8053864833333, 21.6770563291667, 20.1319423708333,
19.9826592666667, 19.8279438958333, 20.1589541791667, 21.5796382,
21.5971315083333, 21.3742824541667, 21.8992939333333, 23.9737254583333,
23.4506886041667, 23.0956395708333, 22.574581225, 22.3561680833333,
21.3806269916667, 21.4045219791667, 21.5611478916667, 25.5090813166667,
28.6440265, 31.4434210347826, 31.9276734541667, 27.5706909333333,
25.1139413583333, 24.2945348333333, 24.0232171416667, 23.705631425,
22.8323341625), precip50..mm. = c(0.6, 0, 0, 0, 0.9, 1.3, 0,
0, 0, 0, 0, 6.6, 0, 0, 0, 0, 0.1, 0.2, 0.1, 6.1, 5, 17.6, 10.4,
6.6, 0, 0, 0, 0, 0, 0), RWI = c(0.6, 0.4, 0.2, 0.133333333333333,
0.9, 1.3, 1.3, 0.65, 0.433333333333333, 0.325, 0.26, 6.6, 6.6,
3.3, 2.2, 1.65, 0.1, 0.2, 0.1, 6.1, 5, 17.6, 10.4, 6.6, 6.6,
3.3, 2.2, 1.65, 1.32, 1.1)), na.action = structure(c(`1` = 1L,
`2` = 2L, `3` = 3L, `4` = 4L, `5` = 5L, `6` = 6L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `15` = 15L,
`16` = 16L, `17` = 17L, `18` = 18L, `19` = 19L, `20` = 20L, `21` = 21L,
`22` = 22L, `23` = 23L, `24` = 24L, `25` = 25L, `26` = 26L, `27` = 27L,
`28` = 28L, `29` = 29L, `30` = 30L, `31` = 31L, `32` = 32L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`40` = 40L, `41` = 41L, `42` = 42L, `43` = 43L, `44` = 44L, `45` = 45L,
`46` = 46L, `47` = 47L, `48` = 48L, `49` = 49L, `50` = 50L, `51` = 51L,
`52` = 52L, `53` = 53L, `54` = 54L, `55` = 55L, `56` = 56L, `57` = 57L,
`58` = 58L, `59` = 59L, `60` = 60L, `61` = 61L, `62` = 62L, `63` = 63L,
`64` = 64L, `65` = 65L, `66` = 66L, `67` = 67L, `68` = 68L, `199` = 199L,
`218` = 218L, `219` = 219L, `220` = 220L, `221` = 221L, `222` = 222L,
`223` = 223L, `224` = 224L, `225` = 225L, `226` = 226L, `227` = 227L,
`228` = 228L, `229` = 229L, `230` = 230L, `231` = 231L, `232` = 232L,
`264` = 264L, `265` = 265L, `266` = 266L, `267` = 267L, `352` = 352L,
`353` = 353L, `354` = 354L, `355` = 355L, `356` = 356L, `357` = 357L,
`358` = 358L, `359` = 359L, `360` = 360L, `361` = 361L, `362` = 362L,
`363` = 363L, `364` = 364L, `365` = 365L, `366` = 366L), class = "omit"), row.names = c(14L,
69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L,
82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L,
95L, 96L, 97L), class = "data.frame")

The main problem is that the parameters are not uniquely identifiable. We can multiply a by an arbitrary number and divide c, d and e by that same number and we get the same model. Omit a.
Although it won't hurt the use of as.formula is redundant since it is already a formula.
Having an assignment within an nls formula is highly unusual. nls will think that Rref is a parameter and fail on that account. Remove the assignment.
If we make these changes then it does give an answer with the data in the updated version of the question.
form_Q10_parabolic_SM <- Lin_Flux..mymol.m.2.s.1. ~
exp(b*Mean_Soil_Temp_V2..C.) * ( (-c*Soil_Moist_V3**2) + (d*Soil_Moist_V3) + e)
Q10_parabolic_SM <- nls(form_Q10_parabolic_SM, data = conB1_2015,
start = list(b = 0.11, c = 0.0001, d = 0.01, e = 0.1))
giving:
> Q10_parabolic_SM
Nonlinear regression model
model: Lin_Flux..mymol.m.2.s.1. ~ exp(b * Mean_Soil_Temp_V2..C.) * ((-c * Soil_Moist_V3^2) + (d * Soil_Moist_V3) + e)
data: conB1_2015
b c d e
0.103062 -0.001564 -0.135531 3.528621
residual sum-of-squares: 3.979
Number of iterations to convergence: 6
Achieved convergence tolerance: 4.401e-06
plinear
Note that nls also has the plinear algorithm which has the advantage that only nonlinear parameters (in this case only b) need starting values. In that case the formula's RHS should be a matrix with the columns that multiply each linear parameter. It gives the same answer as above except the linear parameters are given names starting with .lin . Note that the plinear version converges in fewer iterations than the version using the default algorithm above. (Also it seems that the plinear version is not very sensitive to the starting value and even if we use b=1 as the starting value it converges.)
fo <- Lin_Flux..mymol.m.2.s.1. ~
cbind(-Soil_Moist_V3**2, Soil_Moist_V3, 1) * exp(b*Mean_Soil_Temp_V2..C.)
fm <- nls(fo, data = conB1_2015, start = list(b = 0.11), algorithm = "plinear")
giving:
> fm
Nonlinear regression model
model: Lin_Flux..mymol.m.2.s.1. ~ cbind(-Soil_Moist_V3^2, Soil_Moist_V3, 1) * exp(b * Mean_Soil_Temp_V2..C.)
data: conB1_2015
b .lin1 .lin.Soil_Moist_V3 .lin3
0.103062 -0.001564 -0.135528 3.528593
residual sum-of-squares: 3.979
Number of iterations to convergence: 3
Achieved convergence tolerance: 2.189e-06

Related

can any one help on making pca biplot with circle and eclipse?

Example dataset:
structure(list(Litter = c("Bottle caps & lids", "Bottles < 2 L", "Drink package rings, six-pack rings, ring carriers", "Food containers (fast food, cups, lunch boxes etc.)", "Plastic bags (opaque & clear)", "Cigarettes, butts & filters", "Rope", "Fishing net", "Foam sponge", "Foam (insulation & packaging)", "Clothing, shoes, hats & towels", "Bottles & jars", "Paper (including newspapers & magazines)", "Cups, food trays, food wrappers, cigarette packs etc.", "Footwear (flip-flops)"), Cox.s.Bazar = c(229L, 228L, 73L, 120L, 1311L, 442L, 208L, 125L, 225L, 207L, 29L, 60L, 74L, 96L, 111L), Chittagong = c(13L, 72L, 1L, 7L, 871L, 28L, 59L, 22L, 0L, 382L, 70L, 7L, 249L, 54L, 38L), St..Martin.s.Island = c(29L, 213L, 37L, 45L, 578L, 147L, 30L, 32L, 0L, 48L, 107L, 18L, 48L, 97L, 54L), Kuakata = c(21L, 54L, 0L, 41L, 276L, 87L, 13L, 8L, 0L, 37L, 0L, 7L, 7L, 41L, 12L), Kotka = c(16L, 37L, 3L, 0L, 47L, 19L, 0L, 0L, 0L, 0L, 3L, 0L, 5L, 0L, 0L)), class = "data.frame", row.names = c(NA, -15L))
My attempt to solve the problem:
install_github("vqv/ggbiplot")
library(devtools)
library(ggbiplot)
data.class <- Mydata[,1]
data.pca <- prcomp(Mydata[,2:6], scale. = TRUE)
g <- ggbiplot(data.pca, obs.scale = 1, var.scale = 1,
groups = data.class, ellipse = TRUE, circle = TRUE)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
print(g)
I suspect the problem is that the example shown on https://github.com/vqv/ggbiplot (the wine dataset) has a ".class" variable that your data doesn't have. One solution is to add it yourself, e.g.
library(tidyverse)
library(devtools)
install_github("vqv/ggbiplot")
library(ggbiplot)
data <- MyData
data.pca <- prcomp(data[-1], scale. = TRUE)
data.class <- factor(x = c("recyclable", "recyclable", "recyclable", "recyclable",
"recyclable", "not recyclable", "not applicable",
"not applicable", "not applicable", "not recyclable",
"recyclable", "recyclable", "recyclable", "recyclable",
"not recyclable"),
levels = c("recyclable", "not recyclable", "not applicable"))
ggbiplot(data.pca, obs.scale = 1, var.scale = 1,
groups = data.class, ellipse = TRUE,
circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal',
legend.position = 'top')

How can i draw a barplot with 3 variables?

i'm having some trouble making a barplot.
I want to make a barplot with 3 ordinal variables (scale: yes, no, i don't know (for each))
I need the x-axis to show the bars side by side (yes1, yes2, yes3, no1, no2... and so on). They y-axis should show the frequency or the percentage.
Each variable belongs to a different wave in a panel and i want to show the changes through a barplot.
I've come so far, to draw a plot for each variable (see code)
What i need is to combine the 3 plots, i'm just don't know yet how to do it. I've tried facet_wrap/facet_grid, but that i haven't been able to solve my problem with that approach. I also get the error:
"Don't know how to automatically pick scale for object of type haven_labelled. Defaulting to continuous."so the labels on the x-axis can't be shown.
Can someone please help me?
Thanks,
Ingrid.
Here is my the data:
dput(veraenderung[1:4, ])
structure(list(vor = structure(c(2, 3, 3, 1), label = "Erwartung, dass sich durch die Teilnahme an der FoBi Veränderungen im Berufsallt", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), nach = structure(c(2,
3, 1, 1), label = "Erwarten Sie, dass Ihre Teilnahme an dieser FoBi zu Veränderungen in Ihrem Beruf", labels = c(ja = 1,
nein = 2, `weiß nicht` = 3), class = "haven_labelled"), sechs_monate_spaeter = structure(c(2,
2, 1, 3), label = "Hat sich durch Ihre Teilnahme an der Fortbildung zur interkulturellen Kompetenz", labels = c(ja = 1,
nein = 2, `weiß nicht` = 9), class = "haven_labelled"), Welle123 = c(1,
1, 1, 1)), na.action = structure(c(`4` = 4L, `7` = 7L, `8` = 8L,
`9` = 9L, `10` = 10L, `11` = 11L, `12` = 12L, `13` = 13L, `14` = 14L,
`15` = 15L, `16` = 16L, `17` = 17L, `19` = 19L, `20` = 20L, `24` = 24L,
`26` = 26L, `27` = 27L, `29` = 29L, `30` = 30L, `31` = 31L, `33` = 33L,
`34` = 34L, `35` = 35L, `36` = 36L, `37` = 37L, `38` = 38L, `39` = 39L,
`41` = 41L, `43` = 43L, `44` = 44L, `46` = 46L, `47` = 47L, `48` = 48L,
`49` = 49L, `50` = 50L, `52` = 52L, `54` = 54L, `55` = 55L, `58` = 58L,
`59` = 59L, `60` = 60L, `63` = 63L, `64` = 64L, `66` = 66L, `68` = 68L,
`71` = 71L, `72` = 72L, `73` = 73L, `74` = 74L, `75` = 75L, `78` = 78L,
`80` = 80L, `81` = 81L, `82` = 82L, `83` = 83L, `84` = 84L, `86` = 86L,
`87` = 87L, `91` = 91L, `92` = 92L, `94` = 94L, `97` = 97L, `99` = 99L,
`101` = 101L, `102` = 102L, `105` = 105L, `106` = 106L, `107` = 107L,
`108` = 108L, `109` = 109L, `112` = 112L, `113` = 113L, `114` = 114L,
`116` = 116L, `117` = 117L, `119` = 119L, `121` = 121L, `122` = 122L,
`123` = 123L, `124` = 124L, `127` = 127L, `128` = 128L, `130` = 130L,
`132` = 132L, `134` = 134L, `135` = 135L, `136` = 136L, `138` = 138L,
`139` = 139L, `140` = 140L, `141` = 141L, `142` = 142L, `144` = 144L,
`146` = 146L, `147` = 147L, `148` = 148L, `149` = 149L, `151` = 151L,
`152` = 152L, `153` = 153L, `156` = 156L, `157` = 157L, `159` = 159L,
`164` = 164L, `165` = 165L, `166` = 166L, `168` = 168L, `169` = 169L,
`170` = 170L, `172` = 172L, `173` = 173L, `174` = 174L, `176` = 176L,
`177` = 177L, `178` = 178L, `179` = 179L, `180` = 180L, `181` = 181L,
`183` = 183L, `184` = 184L, `185` = 185L, `190` = 190L, `191` = 191L,
`192` = 192L, `194` = 194L, `195` = 195L, `196` = 196L, `197` = 197L,
`202` = 202L, `205` = 205L, `206` = 206L, `208` = 208L, `209` = 209L,
`210` = 210L, `211` = 211L, `212` = 212L, `213` = 213L, `215` = 215L,
`216` = 216L, `217` = 217L, `218` = 218L, `221` = 221L, `223` = 223L,
`225` = 225L, `226` = 226L, `227` = 227L, `228` = 228L, `229` = 229L,
`230` = 230L, `231` = 231L, `232` = 232L, `233` = 233L, `234` = 234L,
`235` = 235L, `236` = 236L, `237` = 237L, `238` = 238L, `239` = 239L,
`240` = 240L, `241` = 241L, `242` = 242L, `243` = 243L, `244` = 244L,
`245` = 245L, `246` = 246L, `247` = 247L, `248` = 248L, `249` = 249L
), class = "omit"), row.names = c(NA, 4L), class = "data.frame")
Here is the code:
library(tidyverse)
veraenderung <- ikoe %>%
select(v13, wn06, xn2, Welle123) %>%
rename(vor = v13,
nach = wn06,
sechs_monate_spaeter = xn2) %>%
na.omit(veraenderung) %>%
as.data.frame()
ggplot(veraenderung, aes(x = vor)) +
geom_bar()
ggplot(veraenderung, aes(x = nach)) +
geom_bar()
ggplot(veraenderung, aes(x = sechs_monate_spaeter)) +
geom_bar()
Your haven object is a bit a challenge for tidyverse manipulations. See below what I suggest to make this object a bit "cleaner" (remove labels, change your values to character class). And then making long and plotting.
library(tidyverse)
names(veraenderung) <- c('vor','nach','sechs','welle') #remove labels in names
veraenderung <- as_tibble(veraenderung) %>% transmute_all(as.character) #change values to character class
veraenderung <- veraenderung %>% pivot_longer(cols = everything(), names_to = 'key', values_to = 'value')
ggplot(veraenderung, aes(key)) +
geom_bar(aes(fill = value), position = position_dodge(preserve = 'single'))
#try without preserve or position_dodge and see what happens
Created on 2020-02-06 by the reprex package (v0.3.0)

Removing underscore from species names in phylo4d class when constructing dotplot

I have created a dotplot.phylo4d using the phylosignal package. When using species names it requires that an underscore separates the genus and species like this: Genus_species however on the tree itself the tip nodes should appear without this underscore but it does not. I have tried adjusting the underscore parameter, but no luck.
Here is the code that makes the tree:
library("phylosignal")
dotplot.phylo4d(local, dot.col=points.col, dot.pchFpoints.pch, underscore=TRUE,
trait.labels=c("Water Repellency", "Barb Stiffness"), trait.bg.col="white")
I have tried manually adding tip.lables but it does not place the species in the correct locations. When I try to extract the lable names from the local phylo4d class I get an error: Error in local$lable : $ operator not defined for this S4 class
Does anyone have any ideas?
Here is an example of dput(local)
new("phylo4d", data = structure(list(water_repelency_factor = c(0.406853948726056,
0.607154878704302, 0.650989064481201, 0.124886215381352, 0.0723507857767838,
-0.0723167215080719, 0.013459653778258, -1.78914935357281, -0.564983339285733,
0.606337089022796, 0.659663703834298, 0.0176228716122535, 0.0127574040830885,
-0.196395841638203, -0.0459106519882355, -0.00530956412638191,
0.639508124725596, 0.777990116847955, 0.451610618568295, 0.355696905949063,
0.263869048235165, -0.221564237669859, 0.147157051369543, 0.117382140996346,
0.0780014518176963, -0.0432113445007319, 1.63762850650646, 3.18758974791682,
3.1915764230193, 2.59818280850751, 0.13012860558643, 0.438800747071309,
0.220659982250533, 0.0406142785151005, -0.0966974465314274, 0.123091856737923,
-0.00273950242722704, -0.0440442594202772, -0.018999718854047,
-0.0913244261412157, 0.425022676610531, 0.632243165802067, 0.368361182363585,
-0.00812059600733897, -0.147599248089371, 0.0148070440392808,
-0.16383227579288, 0.276110714945516, 0.232486022888682, 0.120444921178624,
0.0247910093458199, 0.0199371653665217, 0.0137880171357138, -0.00359380232761909,
-0.135584983429275, 0.824091682655614, 0.589492230611259, -0.386326654254213,
0.656926986174162, 0.647598093829388, 1.10465298748181, 1.08603679621794,
0.185419991568835, 0.536392662698255), stiffness_parameter = c(-0.451152645392232,
-0.0675275753134292, -0.365241405962641, 0.0905545163858439,
0.288842041901916, 0.266165143212233, 0.136623204129795, 0.0897899100887545,
0.165206538251347, 0.115331641755025, 0.0705395185451837, 0.00977078724881693,
0.25245123194264, 0.600773298772429, 0.589060413404627, -0.403924106606226,
0.0347747504905809, 0.0413013238266368, 0.0719688786031885, 0.0546088786069706,
-0.468432315476859, -0.145643606900637, 0.366171679501629, 0.312574773140502,
0.0838000268770967, 0.0810893539547085, 1.08795895425903, 2.06681624860963,
2.02014470904867, 1.67845452279315, -0.00619064668359723, 0.064525330741118,
0.0321501505674897, -0.148400036867211, -0.374596029376242, 0.000137596835294316,
0.00173620676450802, -0.252397879000816, 0.141994990326184, 0.147348381636395,
-0.505048807104088, -0.461332473091121, -0.121426983017065, -0.136696496034141,
-0.146453001600327, -0.393612849002826, -0.401688658534651, -0.131022632470828,
-0.0516916448217204, 0.0447178990330358, -0.00917528749341404,
-0.0645128131411127, 0.0230480464064479, -0.0807760835928284,
-0.211124585904092, 0.0742996834809848, 0.393936005786656, -0.178168184216292,
0.496357516284941, 0.218262874943631, 1.67041563314074, 1.67041563314074,
-0.0264417662148244, 0.170746918626684)), row.names = c(NA, 64L
), class = "data.frame"), metadata = list(), edge = structure(c(0L,
65L, 66L, 67L, 67L, 68L, 68L, 66L, 69L, 69L, 65L, 70L, 71L, 71L,
70L, 72L, 73L, 74L, 75L, 76L, 77L, 77L, 78L, 79L, 79L, 78L, 80L,
80L, 76L, 81L, 81L, 82L, 82L, 83L, 83L, 75L, 84L, 84L, 85L, 85L,
86L, 87L, 87L, 86L, 88L, 88L, 74L, 89L, 90L, 91L, 91L, 92L, 92L,
93L, 93L, 90L, 94L, 94L, 95L, 95L, 96L, 96L, 89L, 97L, 97L, 98L,
98L, 73L, 99L, 100L, 101L, 102L, 102L, 101L, 103L, 103L, 100L,
104L, 104L, 105L, 105L, 99L, 106L, 107L, 108L, 108L, 107L, 109L,
109L, 110L, 110L, 106L, 111L, 112L, 113L, 113L, 112L, 114L, 114L,
111L, 115L, 115L, 116L, 117L, 117L, 118L, 118L, 116L, 119L, 119L,
72L, 120L, 120L, 121L, 122L, 122L, 123L, 123L, 124L, 124L, 121L,
125L, 125L, 126L, 126L, 127L, 127L, 65L, 66L, 67L, 1L, 68L, 2L,
3L, 69L, 4L, 5L, 70L, 71L, 6L, 7L, 72L, 73L, 74L, 75L, 76L, 77L,
12L, 78L, 79L, 8L, 9L, 80L, 10L, 11L, 81L, 16L, 82L, 13L, 83L,
14L, 15L, 84L, 22L, 85L, 21L, 86L, 87L, 17L, 18L, 88L, 19L, 20L,
89L, 90L, 91L, 26L, 92L, 25L, 93L, 23L, 24L, 94L, 27L, 95L, 30L,
96L, 28L, 29L, 97L, 31L, 98L, 32L, 33L, 99L, 100L, 101L, 102L,
34L, 35L, 103L, 36L, 37L, 104L, 38L, 105L, 39L, 40L, 106L, 107L,
108L, 41L, 42L, 109L, 43L, 110L, 44L, 45L, 111L, 112L, 113L,
46L, 47L, 114L, 48L, 49L, 115L, 50L, 116L, 117L, 53L, 118L, 51L,
52L, 119L, 54L, 55L, 120L, 64L, 121L, 122L, 56L, 123L, 59L, 124L,
57L, 58L, 125L, 63L, 126L, 60L, 127L, 61L, 62L), .Dim = c(127L,
2L), .Dimnames = list(NULL, c("ancestor", "descendant"))), edge.length = c(`0-65` = NA,
`65-66` = 0.936507936507937, `66-67` = 0.0317460317460317, `67-1` = 0.0317460317460317,
`67-68` = 0.0158730158730159, `68-2` = 0.0158730158730159, `68-3` = 0.0158730158730159,
`66-69` = 0.0476190476190476, `69-4` = 0.0158730158730159, `69-5` = 0.0158730158730159,
`65-70` = 0.0793650793650794, `70-71` = 0.904761904761905, `71-6` = 0.0158730158730159,
`71-7` = 0.0158730158730159, `70-72` = 0.0317460317460317, `72-73` = 0.142857142857143,
`73-74` = 0.349206349206349, `74-75` = 0.174603174603175, `75-76` = 0.0952380952380952,
`76-77` = 0.0634920634920635, `77-12` = 0.0634920634920635, `77-78` = 0.0158730158730159,
`78-79` = 0.0317460317460317, `79-8` = 0.0158730158730159, `79-9` = 0.0158730158730159,
`78-80` = 0.0317460317460317, `80-10` = 0.0158730158730159, `80-11` = 0.0158730158730159,
`76-81` = 0.0793650793650794, `81-16` = 0.0476190476190476, `81-82` = 0.0158730158730159,
`82-13` = 0.0317460317460317, `82-83` = 0.0158730158730159, `83-14` = 0.0158730158730159,
`83-15` = 0.0158730158730159, `75-84` = 0.142857142857143, `84-22` = 0.0793650793650794,
`84-85` = 0.0158730158730159, `85-21` = 0.0634920634920635, `85-86` = 0.0158730158730159,
`86-87` = 0.0317460317460317, `87-17` = 0.0158730158730159, `87-18` = 0.0158730158730159,
`86-88` = 0.0317460317460317, `88-19` = 0.0158730158730159, `88-20` = 0.0158730158730159,
`74-89` = 0.238095238095238, `89-90` = 0.0476190476190476, `90-91` = 0.0634920634920635,
`91-26` = 0.0476190476190476, `91-92` = 0.0158730158730159, `92-25` = 0.0317460317460317,
`92-93` = 0.0158730158730159, `93-23` = 0.0158730158730159, `93-24` = 0.0158730158730159,
`90-94` = 0.0634920634920635, `94-27` = 0.0476190476190476, `94-95` = 0.0158730158730159,
`95-30` = 0.0317460317460317, `95-96` = 0.0158730158730159, `96-28` = 0.0158730158730159,
`96-29` = 0.0158730158730159, `89-97` = 0.126984126984127, `97-31` = 0.0317460317460317,
`97-98` = 0.0158730158730159, `98-32` = 0.0158730158730159, `98-33` = 0.0158730158730159,
`73-99` = 0.412698412698413, `99-100` = 0.238095238095238, `100-101` = 0.0476190476190476,
`101-102` = 0.0317460317460317, `102-34` = 0.0158730158730159,
`102-35` = 0.0158730158730159, `101-103` = 0.0317460317460317,
`103-36` = 0.0158730158730159, `103-37` = 0.0158730158730159,
`100-104` = 0.0634920634920635, `104-38` = 0.0317460317460317,
`104-105` = 0.0158730158730159, `105-39` = 0.0158730158730159,
`105-40` = 0.0158730158730159, `99-106` = 0.111111111111111,
`106-107` = 0.158730158730159, `107-108` = 0.0476190476190476,
`108-41` = 0.0158730158730159, `108-42` = 0.0158730158730159,
`107-109` = 0.0317460317460317, `109-43` = 0.0317460317460317,
`109-110` = 0.0158730158730159, `110-44` = 0.0158730158730159,
`110-45` = 0.0158730158730159, `106-111` = 0.0793650793650794,
`111-112` = 0.0952380952380952, `112-113` = 0.0317460317460317,
`113-46` = 0.0158730158730159, `113-47` = 0.0158730158730159,
`112-114` = 0.0317460317460317, `114-48` = 0.0158730158730159,
`114-49` = 0.0158730158730159, `111-115` = 0.0634920634920635,
`115-50` = 0.0793650793650794, `115-116` = 0.0158730158730159,
`116-117` = 0.0317460317460317, `117-53` = 0.0317460317460317,
`117-118` = 0.0158730158730159, `118-51` = 0.0158730158730159,
`118-52` = 0.0158730158730159, `116-119` = 0.0476190476190476,
`119-54` = 0.0158730158730159, `119-55` = 0.0158730158730159,
`72-120` = 0.761904761904762, `120-64` = 0.126984126984127, `120-121` = 0.0158730158730159,
`121-122` = 0.0634920634920635, `122-56` = 0.0476190476190476,
`122-123` = 0.0158730158730159, `123-59` = 0.0317460317460317,
`123-124` = 0.0158730158730159, `124-57` = 0.0158730158730159,
`124-58` = 0.0158730158730159, `121-125` = 0.0634920634920635,
`125-63` = 0.0476190476190476, `125-126` = 0.0158730158730159,
`126-60` = 0.0317460317460317, `126-127` = 0.0158730158730159,
`127-61` = 0.0158730158730159, `127-62` = 0.0158730158730159),
label = c(`1` = "Anhima_cornuta", `2` = "Alopochen_aegyptiaca",
`3` = "Anas_undulata", `4` = "Francolinus_coqui", `5` = "Meleagris_gallopavo",
`6` = "Pterocles_namaqua", `7` = "Streptopelia_lugens", `8` = "Anhinga_melanogaster",
`9` = "Phalacrocorax_capensis", `10` = "Morus_bassanus",
`11` = "Morus_capensis", `12` = "Fregata_minor", `13` = "Threskiornis_aethiopicus",
`14` = "Ephippiorhynchus_senegalensis", `15` = "Mycteria_ibis",
`16` = "Gavia_immer", `17` = "Pterodroma_macroptera", `18` = "Procellaria_cinerea",
`19` = "Halobaena_caerulea", `20` = "Pelecanoides_urinatrix",
`21` = "Thalassarche_chlororhynchos", `22` = "Hydrobates_pelagicus",
`23` = "Pelecanus_occidentalis", `24` = "Pelecanus_rufescens",
`25` = "Pelecanus_onocrotalus", `26` = "Scopus_umbretta",
`27` = "Pygoscelis_papua", `28` = "Spheniscus_demersus",
`29` = "Spheniscus_magellanicus", `30` = "Eudyptes_chrysocome",
`31` = "Egretta_garzetta", `32` = "Ardea_melanocephala",
`33` = "Ardea_cinerea", `34` = "Fulica_cristata", `35` = "Podica_senegalensis",
`36` = "Aramus_guarauna", `37` = "Grus_paradisea", `38` = "Phoenicopterus_ruber",
`39` = "Podiceps_nigricollis", `40` = "Tachybaptus_ruficollis",
`41` = "Chionis_albus", `42` = "Burhinus_capensis", `43` = "Charadrius_marginatus",
`44` = "Recurvirostra_avosetta", `45` = "Haematopus_moquini",
`46` = "Numenius_arquata", `47` = "Phalaropus_fulicarius",
`48` = "Actophilornis_africanus", `49` = "Rostratula_benghalensis",
`50` = "Dromas_ardeola", `51` = "Larus_fuscus", `52` = "Rynchops_flavirostris",
`53` = "Sterna_fuscata", `54` = "Stercorarius_pomarinus",
`55` = "Uria_aalge", `56` = "Apus_caffer", `57` = "Apaloderma_narina",
`58` = "Alcedo_semitorquata", `59` = "Caprimulgus_rufigena",
`60` = "Sturnus_vulgaris", `61` = "Cinclus_leucocephalus",
`62` = "Cinclus_schulzi", `63` = "Poicephalus_robustus",
`64` = "Centropus_senegalensis"), edge.label = structure(character(0), .Names = character(0)),
order = "preorder", annote = list())
To access slots of an S4 object you need to use #, not $:
instead of local$label use local#label
Get a straightforward intro into S4 classes here:
http://adv-r.had.co.nz/S4.html

Calling function inside with-statement gives error variable not found in function scope

I am preparing a bootstrapped estimation of a mean prediction error on a multiple imputed dataset. My function seems to be unable to find the dependent variable in scope. Is there some way to circumvent that?
Multiple imputation runs smoothly, but the specific problem seems to be that the line
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
cannot find the variable CG.tot:
Error in eval(expr, envir, enclos) : object 'CG.tot' not found
However, if I state the formula as a string:
glm.nb(formula=CG.tot~Fibrinogen)
it works...
Minimal running example:
library(mice)
library(MASS)
#compute the mean prediction error on a dataframe with missing data
predicterr <- function(f, data, indices){
if(!(class(f)=="formula")){stop("'f' must be of the 'formula' type")}
if(!(class(data)=="data.frame")){stop("'data' must be of the 'data.frame' type")}
#recompute random sampling & multiple imputation
data.test <- data[sample(nrow(data), 15),]
data.train <- data[setdiff(rownames(data), rownames(data.test)),]
data.mi.train <- mice(data.train)
data.mi.test <- mice(data.test)
#recompute model
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f))
coeffs <- summary(pool(mod.nb.train))[,"est"]
#compute prediction error on each dataset row
errvec <- apply(complete(data.mi.test, include = F, action = "long")[,c(names(coeffs)[-1], as.character(f)[2])],
1, function(x){
return(exp(sum(x[1:length(x)-1]*coeffs[-1], coeffs[1]))-x[length(x)])
})
return(mean(errvec))
}
predicterr(CG.tot~Fibrinogen, d.mi)
Dataset (a little long, but that's for the imputation...):
d <- structure(list(Hb = c(7.5, 12.9, 12.9, 10.2, 10.5, 11.2, 12.7,
9.3, 11.7, 13.4, 151, 10.9, 5.9, 12.8, 10.2, 15.3, 13.8, 9.6,
7.6, 12.2, 11.1, 13.6, 8.9, 7.2, 7.8, 8.7, 10.3, 14, 8.8, 7.5
), Hct = c(23, 39.8, 39.4, 31.6, 32.5, 34.4, 39, 28, 35.9, 41.2,
43.8, 33.7, 18.6, 37.7, 31.7, 44, 87.3, 29.4, 23.6, 37.7, 34.3,
39.8, 27.4, 22.6, 24.2, 29.1, 31.8, 43.1, 27.3, 23.3), EXTEM.CT = c(51L,
60L, 45L, 115L, 55L, 48L, 49L, 106L, 56L, 68L, 61L, 53L, 69L,
44L, 58L, 126L, 47L, 68L, 49L, 68L, 51L, 84L, 63L, 66L, 51L,
108L, 63L, 51L, 53L, 63L), EXTEM.CFT = c(133L, 162L, 175L, 216L,
101L, 60L, 140L, 248L, 137L, 203L, 113L, 199L, 316L, 90L, 224L,
235L, 133L, 46L, 308L, 300L, 119L, 420L, 44L, 207L, 91L, 69L,
96L, 130L, 153L, 99L), EXTEM.MCF = c(59L, 55L, 50L, 46L, 64L,
72L, 52L, 46L, 50L, 50L, 60L, 40L, 40L, 56L, 46L, 47L, 52L, 67L,
40L, 35L, 83L, 30L, 82L, 47L, 61L, 76L, 63L, 51L, 58L, 58L),
INTEM.CT = c(NA, 158L, 154L, 240L, 141L, 141L, 143L, 122L,
104L, 193L, 183L, 186L, 182L, 172L, 192L, 149L, 133L, 162L,
238L, 158L, 144L, 144L, 162L, 213L, 139L, 157L, 104L, 376L,
140L, 192L), INTEM.CFT = c(NA, 91L, 119L, 165L, 97L, 51L,
118L, 190L, 84L, 90L, 82L, 114L, 226L, 90L, 89L, 209L, NA,
64L, 203L, 222L, 64L, 104L, 43L, 170L, 66L, 50L, 61L, 332L,
70L, 66L), INTEM.MCF = c(NA, 57L, 48L, 48L, 74L, 70L, 49L,
50L, 50L, 55L, 58L, 49L, 40L, 57L, 48L, 46L, 64L, 68L, 44L,
39L, 64L, 54L, 80L, 51L, 64L, 78L, 68L, 54L, 62L, 61L), FIBTEM.CT = c(50L,
62L, 101L, 123L, 58L, 49L, 49L, 74L, 77L, 117L, 61L, 54L,
79L, 41L, 69L, 189L, 49L, 67L, 55L, 56L, 57L, 59L, 56L, 62L,
57L, 65L, 51L, 58L, 68L, 67L), FIBTEM.CFT = c(NA, NA, NA,
NA, NA, 94L, NA, NA, NA, NA, NA, 615L, NA, 56L, NA, NA, NA,
79L, NA, NA, 625L, NA, 75L, NA, 892L, NA, NA, NA, NA, 1206L
), FIBTEM.MCF = c(9L, 9L, NA, 5L, 10L, 21L, 11L, 4L, 6L,
3L, 16L, 7L, 6L, 31L, NA, 4L, NA, 35L, 11L, 10L, 42L, NA,
28L, 13L, 22L, 28L, 8L, 7L, 9L, 21L), INR = c(1.14, 1, 1,
1.33, 1.01, 1.07, 1.06, 1.43, 1.22, 1.12, 1.18, 1.54, NA,
1.3, 1.13, 1.05, 1.09, 1.11, 1.49, 1.22, 1.33, 1.04, NA,
1.87, 1.67, 1, 1, 1.07, 1.12, 1.88), PTT = c(30, 28.4, 22.1,
37.8, 25.6, 28.9, 27.2, 32.7, 27.2, 28.9, 27.3, 69.9, 132,
31.9, 26.5, NA, 28.9, 44.3, 50.8, 36.6, NA, 23.5, 30, 70.6,
41.2, 30.1, 25.7, 26.7, 26, 41.9), Platelets = c(150, 193,
343, 138, 284, 216, 141, 291, 142, 230, 254, 126, NA, 249,
153, 308, 253, 66, 30, 41, 293, 208, 545, 141, 136, 256,
249, 305, 327, 112), Fibrinogen = c(1.3, NA, NA, 0.9, 2.1,
3.4, 2.3, 1.1, 1.5, 1.1, 1.8, 0.8, NA, 2.3, 2.4, NA, 2.2,
7.4, 1.8, 1.7, NA, 2.6, 7.1, 0.6, 1.2, NA, 1.1, 2.5, 1.7,
2), CG.tot = c(3L, 2L, 3L, 11L, 12L, 0L, 1L, 10L, 4L, 4L,
5L, 0L, 12L, 11L, 3L, 9L, 5L, 0L, 4L, 0L, 0L, 3L, 0L, 21L,
2L, 1L, 1L, 1L, 2L, 3L)), .Names = c("Hb", "Hct", "EXTEM.CT",
"EXTEM.CFT", "EXTEM.MCF", "INTEM.CT", "INTEM.CFT", "INTEM.MCF",
"FIBTEM.CT", "FIBTEM.CFT", "FIBTEM.MCF", "INR", "PTT", "Platelets",
"Fibrinogen", "CG.tot"), row.names = c(50L, 38L, 54L, 82L, 86L,
4L, 24L, 78L, 59L, 58L, 72L, 16L, 85L, 81L, 45L, 77L, 70L, 6L,
63L, 7L, 11L, 53L, 13L, 93L, 36L, 30L, 18L, 19L, 40L, 43L), class = "data.frame")
You're missing one parameter in glm.nb:
mod.nb.train <- with(data = data.mi.train, exp = glm.nb(f, environment()))
and it works.

Add a line from different result to boxplot graph in ggplot2

I have a dataframe (df1) that contains 3 columns (y1, y2, x). I managed to plot a boxplot graph between y1, x and y2, x. I have another dataframe (df2) which contains two columns A, x. I want to plot a line graph (A,x) and add it to the boxplot. Note the variable x in both dataframes is the axis access, however, it has different values. I tried to combine and reshape both dataframes and plot based on the factor(x)... I got 3 boxplots in one graph. I need to plot df2 as line and df1 as boxplot in one graph.
df1 <- structure(list(Y1 = c(905L, 941L, 744L, 590L, 533L, 345L, 202L,
369L, 200L, 80L, 200L, 80L, 50L, 30L, 60L, 20L, 30L, 30L), Y2 = c(774L,
823L, 687L, 545L, 423L, 375L, 249L, 134L, 45L, 58L, 160L, 60L,
20L, 40L, 20L, 26L, 19L, 27L), x = c(10L, 10L, 10L, 20L, 20L,
20L, 40L, 40L, 40L, 50L, 50L, 50L, 70L, 70L, 70L, 90L, 90L, 90L
)), .Names = c("Y1", "Y2", "x"), row.names = c(NA, -18L), class = "data.frame")
df2 <- structure(list(Y3Line = c(384L, 717L, 914L, 359L, 241L, 265L,
240L, 174L, 114L, 165L, 184L, 96L, 59L, 60L, 127L, 54L, 31L,
44L), x = c(36L, 36L, 36L, 56L, 56L, 56L, 65L, 65L, 65L, 75L,
75L, 75L, 85L, 85L, 85L, 99L, 99L, 99L)), .Names = c("A",
"x"), row.names = c(NA, -18L), class = "data.frame")
df_l <- melt(df1, id.vars = "x")
ggplot(df_l, aes(x = factor(x), y =value, fill=variable )) +
geom_boxplot()+
# here I'trying to add the line graph from df2
geom_line(data = df2, aes(x = x, y=A))
Any suggestions?
In the second dataset you have three y values per x value, do you want to draw seperate lines per x value or the mean per x value? Both are shown below. The trick is to first change the x variables in both datasets to factors that contain all the levels of both variables.
df1 <-structure(list(Y1 = c(905L, 941L, 744L, 590L, 533L, 345L, 202L,
369L, 200L, 80L, 200L, 80L, 50L, 30L, 60L, 20L, 30L, 30L), Y2 = c(774L,
823L, 687L, 545L, 423L, 375L, 249L, 134L, 45L, 58L, 160L, 60L,
20L, 40L, 20L, 26L, 19L, 27L), x = c(10L, 10L, 10L, 20L, 20L,
20L, 40L, 40L, 40L, 50L, 50L, 50L, 70L, 70L, 70L, 90L, 90L, 90L
)), .Names = c("Y1", "Y2", "x"), row.names = c(NA, -18L), class = "data.frame")
df2 <- structure(list(Y3Line = c(384L, 717L, 914L, 359L, 241L, 265L,
240L, 174L, 114L, 165L, 184L, 96L, 59L, 60L, 127L, 54L, 31L,
44L), x = c(36L, 36L, 36L, 56L, 56L, 56L, 65L, 65L, 65L, 75L,
75L, 75L, 85L, 85L, 85L, 99L, 99L, 99L)), .Names = c("A",
"x"), row.names = c(NA, -18L), class = "data.frame")
library(ggplot2)
library(reshape2)
df_l <- melt(df1, id.vars = "x")
allLevels <- levels(factor(c(df_l$x,df2$x)))
df_l$x <- factor(df_l$x,levels=(allLevels))
df2$x <- factor(df2$x,levels=(allLevels))
Line per x category:
ggplot(data=df_l,aes(x = x, y =value))+geom_line(data=df2,aes(x = factor(x), y =A)) +
geom_boxplot(aes(fill=variable ))
Connected means of x categories:
ggplot(data=df2,aes(x = factor(x), y =A)) +
stat_summary(fun.y=mean, geom="line", aes(group=1)) +
geom_boxplot(data=df_l,aes(x = x, y =value,fill=variable ))

Resources