My dataset:
mydata<-structure(list(t = c(0.208333333, 0.208333333, 0.208333333, 0.208333333,
1, 1, 1, 1, 2, 2, 2, 2, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16,
16, 16, 0.208333333, 0.208333333, 0.208333333, 0.208333333, 1,
1, 1, 1, 2, 2, 2, 2), parent = c(1.2, 1.4, 0.53, 1.2, 1, 0.72,
0.93, 1.1, 0.88, 0.38, 0.45, 0.27, 0.057, 0.031, 0.025, 0.051,
0.027, 0.015, 0.034, 0.019, 0.017, 0.025, 0.024, 0.023, 0.29,
0.22, 0.34, 0.19, 0.12, 0.092, 0.41, 0.28, 0.064, 0.05, 0.058,
0.043)), .Names = c("t", "Ct"), row.names = c(325L, 326L,
327L, 328L, 341L, 342L, 343L, 344L, 357L, 358L, 359L, 360L, 373L,
374L, 375L, 376L, 389L, 390L, 391L, 392L, 401L, 402L, 403L, 404L,
805L, 806L, 807L, 808L, 821L, 822L, 823L, 824L, 837L, 838L, 839L,
840L), class = "data.frame")
The function to be fitted is a hockeystick curve; i.e. it flattens off after the bending point tb:
hockeystick<-function (t, C0, k1, k2, tb)
{
Ct = ifelse(t <= tb, C0 -k1 * t, C0 -k1*tb -k2*t)
}
Fitting using nls:
start.hockey<-c(C0=3,k1=1,k2=0.1,tb=3)
nls(log(Ct)~hockeystick(t,C0,k1,k2,tb),start=start.hockey,data=mydata)
No matter what starting values I use, I always get this error:
Error in nlsModel(formula, mf, start, wts) :
singular gradient matrix at initial parameter estimates
I tried both the port and the standard nls methods. I tried both the linearized (shown here) and the normal state of the model but neither seems to work.
Edit: As per the suggestion of Carl I tried to fit the model to a dataset where I first averaged the Ct values per value of t and still get the error.
edit: Changed the model somewhat so the k2 value is positive instead of negative. A negative value does not make sense kinetically.
I haven't quite solved the nls() problem, but I have a few suggestions.
First of all, I would suggest revising your hockey stick function slightly to make it continuous at the breakpoint:
hockeystick<-function (t, C0, k1, k2, tb)
{
Ct <- ifelse(t <= tb, C0 -k1 * t, C0 -k1*t -k2*(t-tb))
}
Eyeballing:
par(las=1,bty="l") ## cosmetic
plot(log(Ct)~t,data=mydata)
curve(hockeystick(x,C0=0,k1=0.8,k2=-0.7, tb=3),add=TRUE)
I've made k2 negative here so the decreasing slope in the second stage is less than in the first stage.
start.hockey <- c(C0=0,k1=0.8,k2=-0.7, tb=3)
nls(log(Ct)~hockeystick(t,C0,k1,k2,tb),
start=start.hockey,data=mydata)
Models with breakpoints are often non-differentiable in the parameters, but
I don't quite see how that's a problem here ...
This does work:
library(bbmle)
m1 <- mle2(log(Ct)~dnorm(hockeystick(t,C0,k1,k2,tb),
sd=exp(logsd)),
start=c(as.list(start.hockey),list(logsd=0)),
data=mydata)
The parameters are reasonable (and different from the starting values):
coef(summary(m1))
## Estimate Std. Error z value Pr(z)
## C0 -0.4170749 0.2892128 -1.442104 1.492731e-01
## k1 0.6720120 0.2236111 3.005271 2.653439e-03
## k2 -0.5285974 0.2400605 -2.201934 2.766994e-02
## tb 2.0007688 0.1714292 11.671108 1.790751e-31
## logsd -0.2218745 0.1178580 -1.882558 5.976033e-02
Plot predictions:
pframe <- data.frame(t=seq(0,15,length=51))
pframe$pred <- predict(m1,newdata=pframe)
with(pframe,lines(t,pred,col=2))
Related
I am using the caret package along with the confusionMatrix function and I would like to know if it is possible to know which are the exact values that were not clasified properly.
Here is a subset of my train data
train_sub <- structure(
list(
corr = c(
0.629922866893549,
0.632354159559817,
0.656112138936032,
0.4469719807955,
0.598136079870775,
0.314461239093862,
0.379065842199838,
0.347331370037428,
0.310270891798492,
0.361064451331448,
0.335628455451358
),
rdist = c(
0.775733824285612,
0.834148208687529,
0.884167982488944,
0.633989717138057,
0.850225777237626,
0.626197919283803,
0.649597055761598,
0.680382136363523,
0.627828985862852,
0.713674404108905,
0.646094473468118
),
CCF2 = c(
0.634465565134314,
0.722096802135009,
0.792385621105087,
0.46497582143802,
0.739612023831014,
0.470724554509749,
0.505961260826622,
0.527876803999064,
0.461724328071479,
0.564117580569802,
0.490084457081904
),
Wcorr = c(
0.629,
0.613,
0.812,
0.424,
0.593,
0.36,
0.346,
0.286,
0.333,
0.381,
0.333
),
Wcorr2 = c(
0.735,
0.743,
0.802,
0.588,
0.691,
0.632,
0.61,
0.599,
0.599,
0.632,
0.613
),
Wcorr3 = c(
0.21,
0.301,
0.421,
-0.052,
0.169,
-0.032,
-0.042,-0.048,
-0.035,
0.006,
-0.004
),
Var = c("W", "W", "W", "W",
"W", "B", "B", "B", "B", "B", "B")
),
row.names = c(1L, 2L,
3L, 5L, 7L, 214L, 215L, 216L, 217L, 218L, 221L),
class = "data.frame"
)
and here is a subset of my test data
test_sub <- structure(
list(
corr = c(
0.636658204667785,
0.5637857758104,
0.540558984461647,
0.392647603023863,
0.561801911406989,
0.297187412065481,
0.278864501603015,
0.505277007007347,
0.403811785308709,
0.510158398354856,
0.459607853624603
),
rdist = c(
0.887270722679019,
0.843656768956754,
0.815806338767273,
0.732093571145576,
0.832944903081762,
0.485497073465096,
0.454461718498521,
0.69094669881886,
0.627667080657035,
0.705558894672344,
0.620838398507191
),
CCF2 = c(
0.802017782695131,
0.731763898271157,
0.689402284804853,
0.577932997250877,
0.715111899030751,
0.324826043263382,
0.298456267077388,
0.544808216945995,
0.458148923874818,
0.551160266327893,
0.461228649848996
),
Wcorr = c(
0.655,
0.536,
0.677,
0.556,
0.571,
0.29,
0.25,
0.484,
0.25,
0.515,
0.314
),
Wcorr2 = c(
0.779,
0.682,
0.734,
0.675,
0.736,
0.5,
0.529,
0.611,
0.555,
0.639,
0.572
),
Wcorr3 = c(
0.368,
0.154,
0.266,
0.103,
0.224,
-0.204,
-0.16,
-0.026,
-0.149,
0.032,
-0.097
),
Var = c("W", "W", "W", "W", "W", "B", "B", "B", "B", "B",
"B")
),
row.names = c(4L, 6L, 8L, 13L, 15L, 321L, 322L, 329L,
334L, 341L, 344L),
class = "data.frame"
)
When I use this line,
confusionMatrix(reference=as.factor(test$Var),data=fittedTL,mode = "everything")
With this I compute some machine learning using glmnet method (it gives the best accuracy ini my case)
classCtrl <- trainControl(method = "repeatedcv", number=10,repeats=5,classProbs = TRUE,savePredictions = "final")
set.seed(355)
glmnetTL <- train(Var~., train_sub, method= "glmnet", trControl=classCtrl)
glmnetTL
And finally I compute the confusion matrix on my test set:
predict_glmnet <- predict(glmnetTL,test_sub)
predict_glmnet
CM_glmnet <- confusionMatrix(reference=as.factor(test_sub$Var),data=predict_glmnet,mode = "everything")
CM_glmnet
The output of the confusion matrix is a table like so
B
W
B
4
0
W
2
5
So here I have two predictions/classifications that are not good.
Is there any way I can traceback to which row of my test set it corresponds ?
I have a dataset of volumes of 12 brain structures generated by two different methods. The raw data looks like this (just a small sample):
> dput(WT_MD_Raw[sample(nrow(WT_MD_Raw), 20), ])
structure(list(Method = c("ITKSNAP", "Stereology", "ITKSNAP",
"Stereology", "Stereology", "Stereology", "ITKSNAP", "Stereology",
"ITKSNAP", "ITKSNAP", "ITKSNAP", "Stereology", "ITKSNAP", "ITKSNAP",
"ITKSNAP", "ITKSNAP", "ITKSNAP", "ITKSNAP", "Stereology", "ITKSNAP"
), HCH_L = c(0.11, 0.157834822, 0.128, 0.119263065, 0.177391743,
0.14736469, 0.12, 0.175141504, 0.09, 0.13, 0.1, 0.171363868,
0.1159875, 0.13, 0.103, 0.122, 0.11, 0.125, 0.143858524, 0.123
), HCH_R = c(0.12, 0.16579533, 0.133, 0.124396906, 0.207531117,
0.127146973, 0.116, 0.146630096, 0.1, 0.13, 0.12, 0.170502743,
0.1241375, 0.15, 0.12, 0.133, 0.102, 0.133, 0.190865816, 0.123
), HCB_L = c(0.05, 0.076765729, 0.077, 0.063580641, 0.068459435,
0.073682345, 0.066, 0.072409924, 0.06, 0.05, 0.05, 0.073195622,
0.0585125, 0.05, 0.057, 0.061, 0.055, 0.049, 0.081047056, 0.063
), HCB_R = c(0.07, 0.081317642, 0.083, 0.085300735, 0.074917872,
0.078175171, 0.07, 0.074672734, 0.06, 0.05, 0.05, 0.082237434,
0.061475, 0.05, 0.056, 0.052, 0.049, 0.063, 0.05835388, 0.064
), HCT_L = c(0.03, 0.041923225, 0.049, 0.042650368, 0.037028374,
0.043580411, 0.05, 0.039372896, 0.04, 0.03, 0.02, 0.038750623,
0.0379125, 0.05, 0.035, 0.024, 0.046, 0.037, 0.052680586, 0.037
), HCT_R = c(0.03, 0.036264039, 0.044, 0.018560808, 0.027125436,
0.035493325, 0.049, 0.03348959, 0.05, 0.02, 0.04, 0.039181186,
0.0344, 0.04, 0.032, 0.026, 0.048, 0.027, 0.055922468, 0.033),
ERC_L = c(0.095, 0.193585925, 0.124, 0.140588249, 0.200211554,
0.172524515, 0.108, 0.206368284, 0.113, 0.1, 0.116, 0.289768551,
0.112275, 0.129, 0.114, 0.103, 0.128, 0.113, 0.166551699,
0.139), ERC_R = c(0.094, 0.191115764, 0.121, 0.160728701,
0.183419618, 0.204872861, 0.109, 0.234427129, 0.117, 0.117,
0.113, 0.304407675, 0.113825, 0.16, 0.104, 0.099, 0.12, 0.111,
0.184787287, 0.142), PRC_L = c(0.138, 0.124528754, 0.154,
0.109785206, 0.140363369, 0.115016343, 0.204, 0.143914724,
0.131, 0.122, 0.164, 0.096876559, 0.1371125, 0.195, 0.139,
0.117, 0.167, 0.118, 0.106171643, 0.112), PRC_R = c(0.151,
0.129179281, 0.124, 0.131900211, 0.145099557, 0.121755582,
0.146, 0.122644309, 0.115, 0.117, 0.164, 0.102904433, 0.1369875,
0.181, 0.154, 0.12, 0.18, 0.117, 0.141021877, 0.131), PHC_L = c(0.182,
0.212439273, 0.237, 0.182448795, 0.196767055, 0.200829318,
0.184, 0.17197357, 0.153, 0.134, 0.147, 0.185141868, 0.1674875,
0.171, 0.174, 0.193, 0.199, 0.148, 0.20423858, 0.201), PHC_R = c(0.172,
0.193777133, 0.205, 0.190347011, 0.201933804, 0.160843167,
0.178, 0.196411919, 0.151, 0.122, 0.146, 0.182989055, 0.1556125,
0.153, 0.14, 0.169, 0.182, 0.157, 0.18965011, 0.192)), row.names = c(54L,
161L, 14L, 123L, 148L, 81L, 13L, 93L, 50L, 56L, 39L, 91L, 80L,
5L, 72L, 64L, 33L, 58L, 135L, 18L), class = "data.frame")
Where the columns HCH_L, HCH_R, HCB_L, etc... are the structures, and Method is the method used to generate the volumes for each participant. I wanted a plot which would show the mean volumes for each method, paired together, for each structure. I had already calculated the means for each structure per method, so I just melt this and the plot is exactly what I want:
ggplot(data = reshape2::melt(WT_MD)) +
geom_bar(mapping = aes(x = variable, y = value, fill = Method), stat = "identity", position = "dodge")
I conducted some pairwise comparisons between the volumes each method generated for each structure using Mann-Whitney U tests. I'd like to show with an asterisk above each pair of bars (e.g., between HCH_L for ITK and Stereo) whether the volumes are significantly different, but am struggling with this. I tried with ggsignif, but I don't know how to express what I want:
ggplot(data = reshape2::melt(WT_MD)) +
geom_bar(mapping = aes(x = variable, y = value, fill = Method), stat = "identity", position = "dodge") +
geom_signif(comparisons = list(c("ITKSNAP", "Stereology")), map_signif_level = TRUE)
This gives me an error saying that it can't do comparisons for variables mapped to other aesthetics than the x-axis, which is fair enough:
Error in f(...) :
Can only handle data with groups that are plotted on the x-axis
I'm trying to compare across levels of one variable (method) within levels of another variable (structure), so it's unavoidable that one of these will be mapped to a different aesthetic. I've been looking at this all morning and am starting to get tunnel-vision - can anybody help with this please?
I am running multiple GAM models and need to view and compare the summary output from these. I'd like a quick and efficient way to extract and compile summary statistics from the models but have not found a way to do so.
A example data set is provided below:
example.data <- structure(list(response = c(1.47, 0.84, 1.99, 2.29, 4.14, 4.47,
2.71, 1.67, 4.12, 1.67, 2.03, 1.74, 0.98, 0.96, 0.56, 2.45, 1.31,
3.06, 2.35, 3.2, 1.16, 2.07, 0.99, 1.35, 1.02, 2.92, 1.8, 2.17,
2.56, 1.56, 2.33, 3.19, 1.53, 2.94, 3.28, 1.53, 2.8, 5.53, 1.26,
2.43, 3.5, 2.22, 3.73, 2.46, 2.16, 1.99, 3.34, 2.63, 2.51, 1.78
), predictor1 = c(17, 14.4, 99.45, 10.8, 54.25, 55.1, 40, 9,
54.25, 14.4, 14.4, 17, 14.4, 17, 10.8, 54.25, 54.25, 15.3, 55.1,
54.25, 14.4, 58, 17, 53.425, 58, 40.45, 14.4, 12.75, 91.05, 6.24,
100.25, 77.25, 43.4, 183.6, 91.05, 9.84, 100.25, 64, 10, 10,
91.05, 8.25, 100.25, 54.25, 89.4, 9.84, 10.8, 54.25, 10.8, 54.25
), predictor2 = c(165.7, 177.3, 594.2, 192.5, 426.2, 270.8, 244,
236.1, 416, 175.8, 258.6, 233.5, 115.8, 141, 153.5, 414.2, 438.9,
203, 261.4, 357.8, 148, 205.5, 137.4, 214.7, 167.8, 371.4, 179.9,
273.7, 567.2, 231.5, 355.3, 270, 319.5, 301.9, 301.9, 215.5,
256.5, 417, 231.8, 284.6, 396.3, 323, 458.4, 290, 203, 198, 350.8,
338, 323.5, 264.7), predictor3 = c(829.8, 841, 903.6, 870.3,
794, 745, 845.2, 906.5, 890.3, 874.2, 805.4, 828.8, 872, 854.7,
912.2, 790.8, 759.2, 855.1, 741.6, 961.8, 839.9, 805.1, 885.2,
887.8, 833.9, 1050.9, 787.5, 837, 731.9, 774.4, 820.8, 995.8,
916.3, 1032.1, 1014.3, 773.7, 846.4, 723.7, 764.2, 708.3, 1009.3,
1053.7, 751.7, 901.1, 848.7, 796.5, 697.1, 733.6, 725.6, 856.6
)), row.names = c(50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L,
99L, 100L), class = "data.frame")
Right now, the unsophisticated and inefficient way I do it is something like this:
library(mgcv)
mod1 = gam(response ~ s(predictor1), data=example.data)
mod2 = gam(response ~ s(predictor2), data=example.data)
mod3 = gam(response ~ s(predictor3), data=example.data)
mod.names <- c("mod1", "mod2", "mod3")
mod.predictors <- c("predictor1", "predictor2", "predictor3")
mod.rsq <- c(summary(mod1)$r.sq, summary(mod2)$r.sq, summary(mod3)$r.sq)
mod.AIC <- c(AIC(mod1), AIC(mod2), AIC(mod3))
summary.data <- data.frame(mod.names,
mod.rsq,
mod.AIC,
mod.predictors)
summary.data
I can then select models accordingly from the summary table.
I have over one hundred potential predictors in the actual data, and it's obviously laborious to manually specify all the models and their output so a more automated alternative would be desirable.
The hard part of this question is choosing which models to run: that's a hard statistical question, and depending on what you choose, a less hard programming problem.
I'll assume that you are only interested in models like the ones in your example. Then this should work:
library(mgcv)
#> Loading required package: nlme
#> This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
predictors <- setdiff(names(example.data), "response")
result <- data.frame(predictors = predictors, rsq = NA, AIC = NA)
model <- response ~ predictor
for (i in seq_len(nrow(result))) {
pred <- result$predictors[i]
model[[3]] <- bquote(s(.(as.name(pred))))
mod <- gam(model, data = example.data)
result$rsq[i] <- summary(mod)$r.sq
result$AIC[i] <- AIC(mod)
}
result
#> predictors rsq AIC
#> 1 predictor1 0.2011252 138.0875
#> 2 predictor2 0.4666861 118.7270
#> 3 predictor3 0.1959123 139.0365
The tricky part is computing the model formula. I start with a simple model response ~ predictor, then replace the 3rd part (predictor) with code produced by bquote(s(.(as.name(pred)))). That function produces unevaluated code like s(predictor1) when pred holds "predictor1".
My data :
require(segmented)
cp <- c(0.079, 0.079, 0.079, 0.080, 0.080, 0.081, 0.081, 0.081, 0.081, 0.081, 0.081, 0.082, 0.083, 0.084, 0.086, 0.088, 0.088, 0.088, 0.088, 0.088)
dates <- c(1443991015, 1443994615, 1443998215, 1444001815, 1444005415, 1444009015, 1444012615, 1444016215, 1444019815, 1444023415, 1444027015, 1444030615, 1444034215, 1444037815, 1444041415,
1444045015, 1444048615, 1444052215, 1444055815, 1444059415)
I would test the breakpoint occurrence. So I did the Davies.test, as suggested here :
davies.test(lm(cp ~ dates), seg.Z = ~ dates)
But it returns p-value = NA :
Davies' test for a change in the slope
data: formula = cp ~ dates , method = lm
model = gaussian , link = identity
segmented variable = dates
= , n.points = 0, p-value = NA
alternative hypothesis: two.sided
It looks like davies.test can't handle values on that scale; some kind of overflow is happening internally.
dd <- scale(dates)
davies.test(lm(cp ~ dd), seg.Z = ~ dd)
seems to work fine.
This is the structure of my data
> dput(test)
structure(list(MAT = c(4.9, 4.9, 15.5, 14.1, 14.1, 14.1, 11.5,
11.5, 11.5, 17, 6.1, 2.7, 2.2, 2.2, 14.1, 14.1, 14.1, 9.5, 9.5,
9.5, 9.5, 9.3, 8.3, 8.266666651, 8.266666651, 4.3, 4.3, 22.3,
14.1, 14.1, 14.1, 8.5, 8.5, 8.5, 8.5, 21.5, 21.5, 3.8, 3.8, 6,
6, 6, 6, 6), es = c(0.29603085763985, 0.421393627439682, 0.189653473156549,
0.226685054608428, 0.291373762079697, 0.166533544378467, 0.250586529054368,
0.146320008054403, 0.199565119644333, -0.0819047677231083, 0.15963948187092,
-0.154628141843561, 0.201121044198443, 0.0867981239977565, 0.543870310978598,
0.34547921143505, 0.37557241352574, -0.287318919407836, 0.207937483228907, 0.190143660810163, 0.276182673435993, 0.128596803172119, 0.454753165843559,
0.399237234440439, 0.32075358541748, 0.362664873575803, -0.0865925288159671,
0.51290512543514, 0.186308318839249, 0.147936083867325, 0.243792477087184,
0.625169403695832, 0.110317782120045, 0.217836235313289, 0.171468156841181,
0.50548821117127, 0.164418265301427, -0.00246305543239786, 0.325552346507191,
0.381240606108843, 0.19337350462531, 0.0408803528990759, 0.321815078821239,
0.307642815014319), var = c(0.00496277337027962, 0.0130962311273343,
0.0180149624217804, 0.0134568083459063, 0.00139708925143695,
0.000725862546533828, 0.00670831011660164, 0.0190783110089115,
0.0641568910090007, 0.0121596544795352, 0.0653909966557582, 0.0514610437228611,
0.0231592619167496, 0.0108989891148006, 0.0588577146414195, 0.0695760532112402,
0.0744256820906048, 0.00997789089155498, 0.00928124381998638,
0.0145009450673482, 0.00652956018299188, 0.0111886178917916,
0.0265943757419349, 0.142676904340634, 0.110705177803624, 0.0576538348777718,
0.0625171635976251, 0.0131652117394448, 0.00947904166717649,
0.00813569411386797, 0.00444289889858652, 0.0673007030900184,0.00545169559098343, 0.240046081413733, 0.00561125010476281,
0.0185516235174018, 0.0179989506841957, 0.0496806959944248, 0.022478393723115,
0.0521209786580004, 0.282298667080106, 0.0151428845076692, 0.00992945920656693, 0.0145544965304081), MAP = c(810, 810, 1140, 1750, 1750, 1750,
1034, 1034, 1034, 720, 645, 645, 645, 645, 1000, 1000, 1000,
691, 691, 691, 691, 1134, 1750, 1326, 1326, 1140, 1140, 1310,
1750, 1750, 1750, 1003, 1003, 1003, 1003, 1750, 1750, 1750, 1750,
1750, 1750, 1750, 1750, 1750), CO2dif = c(162L, 162L, 190L, 165L,
165L, 165L, 200L, 200L, 200L, 150L, 335L, 335L, 335L, 335L, 348L,
348L, 348L, 200L, 200L, 200L, 200L, 220L, 350L, 350L, 350L, 350L,
350L, 350L, 180L, 180L, 180L, 130L, 130L, 130L, 130L, 320L, 320L,
360L, 360L, 345L, 345L, 350L, 348L, 348L)), row.names = c(NA,
-44L), class = "data.frame", .Names = c("MAT", "es", "var", "MAP",
"CO2dif"))
I run model selection using meta-analysis, and the best model to predict the effects size is:
library(metafor)
summary(rma(es, var, data=test ,control=list(stepadj=.5), mods= ~ 1 + log(MAT) + MAP + CO2dif + log(MAT):CO2dif, knha=TRUE))
Model Results:
estimate se tval pval ci.lb ci.ub
intrcpt 1.2556 0.3719 3.3758 0.0017 0.5033 2.0080 **
log(MAT) -0.5740 0.1694 -3.3882 0.0016 -0.9167 -0.2313 **
MAP 0.0001 0.0001 2.5181 0.0160 0.0000 0.0003 *
CO2dif -0.0042 0.0013 -3.2932 0.0021 -0.0067 -0.0016 **
log(MAT):CO2dif 0.0020 0.0005 3.7500 0.0006 0.0009 0.0031 ***
Now I want to plot es vs MAT, with an example with this model, assuming that MAP=1200 mm and CO2dif=350
MAPi <- 1200
CO2i <- 350
make_pct <- function(x) (exp(x) - 1) * 100
ggplot(test, aes(x = log(MAT), y = make_pct(es))) +
geom_abline(aes(intercept = make_pct(1.2556 + 0.0001 * MAPi - 0.0042 * CO2i),
slope = make_pct(log(0.0020 * CO2i)) - make_pct(log(0.5740))) ,
color = "red", size=0.8) +
geom_point() +
theme_classic()
Effect size (es) is in log format, and I want percentage, so I transform it with the function make_pct. MAT, on the other hand, has to be log-transformed in the plot as indicated in the model output. Is the slope of the ggplot above correct with the log and percentage transformations? It seems to me that the slope is rather low. I am not very familiar with this type of plots and transformations, so any tips are welcome. Thanks
The relationship between exp(es)-1 and the explanatory variable log(MAT) is not linear.
For a given set of values of MAP and CO2dif, this relationship is of the form: y = exp(es)-1 = k1*exp(k2*log(MAT)).
This function can be plotted as follows:
library(metafor)
library(ggplot2)
modfit <- rma(es, var, data=test ,control=list(stepadj=.5),
mods= ~ 1 + MAP + log(MAT)*CO2dif, knha=TRUE)
pars <- coef(modfit)
MAPi <- 1200
CO2i <- 350
make_pct <- function(x) (exp(x) - 1) * 100
mod_fun <- function(MAP, MAT, CO2dif, pars) {
y <- pars[1]+pars[2]*MAP+pars[3]*log(MAT)+
pars[4]*CO2dif+pars[5]*log(MAT)*CO2dif
make_pct(y)
}
test$ESpct <- mod_fun(MAPi, test$MAT, CO2i, coef(modfit))
ggplot(test, aes(x = log(MAT), y = make_pct(es))) +
geom_line(aes(y=ESpct), color = "red", size=0.8) +
geom_point() + theme_classic()