Related
This has stumped me all day long. Can anyone help me to understand why my data is providing me these plots, which contain intersecting planes alongside of my 3D-scatterplot? The planes occur using all 3 of the methods below.
There may be an issue in my use of unlist of the matrix that created the attached dataset.
Link
Source Data
open3d()
bg3d(color="white")
points3d(freq_subdata$x_ft,freq_subdata$y_ft,freq_subdata$count)
plot3d(freq_subdata$x_ft,freq_subdata$y_ft,freq_subdata$count)
rgl.open()
rgl.bg(color="white")
rgl.points(freq_subdata$x_ft,freq_subdata$y_ft,freq_subdata$count,color="black")
> dput(head(freq_subdata,20))
structure(list(x = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), y = 1:20, x_ft = c(2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5,
2.5, 2.5, 2.5, 2.5, 2.5, 2.5), y_ft = c(2.5, 7.5, 12.5, 17.5,
22.5, 27.5, 32.5, 37.5, 42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5,
77.5, 82.5, 87.5, 92.5, 97.5), count = c(2L, 4L, 6L, 8L, 10L,
12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L, 28L, 30L, 32L, 34L, 36L,
38L, 40L), proportion = c(6.18888476296571e-05, 0.000123777695259314,
0.000185666542888971, 0.000247555390518629, 0.000309444238148286,
0.000371333085777943, 0.0004332219334076, 0.000495110781037257,
0.000556999628666914, 0.000618888476296571, 0.000680777323926229,
0.000742666171555886, 0.000804555019185543, 0.0008664438668152,
0.000928332714444857, 0.000990221562074514, 0.00105211040970417,
0.00111399925733383, 0.00117588810496349, 0.00123777695259314
)), .Names = c("x", "y", "x_ft", "y_ft", "count", "proportion"
), row.names = c("Var11", "Var12", "Var13", "Var14", "Var15",
"Var16", "Var17", "Var18", "Var19", "Var110", "Var111", "Var112",
"Var113", "Var114", "Var115", "Var116", "Var117", "Var118", "Var119",
"Var120"), class = "data.frame")
Link
Source Data
I am working with a dataset in which I need to compare ordinal data to continuous data in a different column. i.e, individals were categorized (by age, actually) and I need to compare different age ranges to two different test values. I have been attempting to run a multifactor anova, and have had no luck.
First, I subset each age category and tried this:
aov.first.molar<-aov(carbon.combo~first.m.cat.1+first.m.cat.2+first.m.cat.3+first.m.cat.4+first.m.cat.5)
Error in model.frame.default(formula = carbon.combo ~ first.m.cat.1 + :
invalid type (list) for variable 'first.m.cat.1'
So the subsets didn't work, so I tried just using the column headers, just to see if it would magically organize by category...
> aov.albania.first<-aov(albania$AgeCat_first~albania$juv_deltaC_dentine+albania$Adult_deltaC_collagen)
Warning messages:
1: In model.response(mf, "numeric") :
using type = "numeric" with a factor response will be ignored
2: In Ops.factor(y, z$residuals) : ‘-’ not meaningful for factors
> summary(aov.albania.first)
Error in levels(x)[x] : only 0's may be mixed with negative subscripts
That obviously didn't work either, and I am not sure what I am doing wrong. I set everything as a factor, and I don't understand why the code is not working.
I am wondering if it has something to do with the fact that the nature of my test data is negative. I am not sure how to fix that without altering the data
Here is my data, as requested. I am sorry it's so messy, I am not sure how to format it better. Turning it into a matrix helped, but I am still having problems with anov and ggplot not being able to find certain things that I already turned into factors...
structure(list(Number = structure(1:10, .Label = c("142-c-1",
"142-c-3", "142-c-5", "156-c-1", "156-c-4", "156-c-6", "157-c-1",
"157-c-3", "157-c-5", "157-c-6", "158-c-3", "158-c-6", "178-c-1/A",
"178-c-2/A", "178-c-2/b", "178-c-3/b", "178-c-4/b", "186-c-2/a",
"186-c-2/b", "186-c-3/b", "186-c-4/b", "186-c-5/b", "186-c-6/b",
"192-c-1", "192-c-2", "192-c-3", "192-c-4", "192-c-5", "205-c-1",
"205-c-2", "205-c-3", "205-c-4", "205-c-5", "205-c-6", "210-c-1",
"210-c-2", "210-c-3", "210-c-4", "210-c-5", "215-c-1", "215-c-2",
"215-c-3", "215-c-4", "215-c-5", "215-c-6", "215-c-7", "270-c-1",
"270-c-2", "270-c-3", "270-c-4", "270-c-5", "295-c-1", "295-c-3",
"295-c-4", "353-c-2", "353-c-3", "353-c-4", "353-c-5", "353-c-6",
"382-c-1", "390-c-1", "390-c-2", "390-c-3"), class = "factor"),
ToothID = structure(c(3L, 3L, 3L, 8L, 8L, 8L, 7L, 7L, 7L,
7L), .Label = c("LI2", "LM1", "LM1-2", "LM3", "LP1-2", "M2",
"RM1-2", "RM2"), class = "factor"), sex = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
Al.Qahtani.category = structure(c(2L, 5L, 8L, 2L, 5L, 8L,
2L, 6L, 7L, 8L), .Label = c("AC", "CR 1/2", "CR 3/4", "CRC",
"R 1/2", "R 1/4", "R 3/4", "RC", "Ri ", "unk"), class = "factor"),
AgeCat_first = structure(c(1L, 2L, 3L, 2L, 3L, 4L, 1L, 2L,
2L, 3L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
AgeCat_second = c(2L, 3L, 4L, 2L, 3L, 4L, 2L, 3L, 4L, 4L),
sample_age_first = structure(c(9L, 18L, 23L, 17L, 27L, 6L,
10L, 13L, 21L, 23L), .Label = c("10.5 to 16.5", "11.5 to 14.5",
"11.5 to 15.5", "11.5 to 18.5", "11.5 to 19.5", "12.5 to 15.5",
"12.5 to 19.5", "15.5 to 20.5", "1.5 to 2.5", "1.5 to 3.5",
"17.5 to 22.5", "2.5 to 4.5", "3.5 to 6.5", "3.5 to 7.5",
" 4.5 to 6.5 ", "4.5 to 6.5", "4.5 to 7.5", "4.5 to 8.5",
"6.5 to 11.5", "6.5 to 8.5", "6.5 to 9.5", "7.5 to 10.5",
"8.5 to 10.5", "8.5 to 11.5", "8.5 to 12.5", "9.5 to 12.5",
"9.5 to 13.5", "9.5 to 15.5", "unk"), class = "factor"),
sample_age_second = structure(c(16L, 25L, 7L, 15L, 26L, 7L,
15L, 22L, 2L, 7L), .Label = c("10.5 to 16.5", "11.5 to 13.5",
"11.5 to 14.5", "11.5 to 15.5", "11.5 to 18.5", "11.5 to 19.5",
"12.5 to 15.5", "12.5 to 19.5", "14.5 to 17.5", "15.5 to 20.5",
"1.5 to 3.5", "17.5 to 22.5", "3.5 to 6.5", "4.5 to 6.5",
"4.5 to 7.5", "4.5 to7.5", " 5.5 to 6.5 ", "6.5 to 11.5",
"6.5 to 8.5", "6.5 to 9.5", "7.5 to 11.5", "7.5 to 12.5",
"8.5 to 12.5", "9.5 to 12.5", "9.5 to12.5", "9.5 to 13.5",
"9.5 to 15.5", "unk"), class = "factor"), AgeCat_adult = c(9L,
9L, 9L, 8L, 8L, 8L, 7L, 7L, 7L, 7L), age_at_death = structure(c(3L,
3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("18-30",
"31-45", ">45", "Adolescent", "Ind"), class = "factor"),
weight_percent_.N = c(11.5, 6.6, 6.8, 7.8, 8.7, 9.4, 5.6,
5.6, 9.1, 3.9), weight_percent_C = c(37.8, 26.2, 29.5, 32.7,
34.7, 34.4, 22, 30.7, 46.8, 22.7), juv_deltaN_dentine = c(4.54,
4.45, NA, 4.03, 5.73, 6.81, 5.03, 4.58, 0.3, NA), juv_deltaC_dentine = c(-22.042,
-22.865, -24.345, -23.557, -23.24, -22.282, -22.85, -22.697,
-25.439, -25.776), juv_proxy = c(7.958, 7.135, 5.655, 6.443,
6.76, 7.718, 7.15, 7.303, 4.561, 4.224), Adult_deltaC_collagen = c(-18.62,
-18.62, -18.62, -18.9, -18.9, -18.9, -18.64, -18.64, -18.64,
-18.64), adult_proxy = c(11.38, 11.38, 11.38, 11.1, 11.1,
11.1, 11.36, 11.36, 11.36, 11.36), Adult_deltaC_apatite = c(12.29,
12.29, 12.29, -10.23, -10.23, -10.23, -10.73, -10.73, -10.73,
-10.73), Adult_deltaN = c(-18.62, -18.62, -18.62, -18.9,
-18.9, -18.9, -18.64, -18.64, -18.64, -18.64), apatite_collagen_spacing = c(8.66,
8.66, 8.66, 7.67, 7.67, 7.67, 7.74, 7.74, 7.74, 7.74), Adult_percent_C = structure(c(2L,
2L, 2L, 6L, 6L, 6L, 7L, 7L, 7L, 7L), .Label = c("14.31%",
"22.35%", "33.96%", "34.58%", "36.60%", "39.07%", "39.51%",
"42.12%", "42.17%", "42.29%", "42.81%", "44.01%", "44.72%",
"45.52%"), class = "factor"), Adult_percent_N = structure(c(14L,
14L, 14L, 4L, 4L, 4L, 5L, 5L, 5L, 5L), .Label = c("12.16%",
"12.30%", "13.04%", "13.78%", "14.20%", "14.89%", "14.97%",
"15.13%", "15.18%", "15.66%", "15.85%", "16.10%", "4.60%",
"7.98%"), class = "factor"), Adult_CN_ratio = c(3.27, 3.27,
3.27, 3.31, 3.31, 3.31, 3.25, 3.25, 3.25, 3.25), delta_18O = c(-5.5,
-5.5, -5.5, -4.79, -4.79, -4.79, -5.39, -5.39, -5.39, -5.39
), CP = c(0.17, 0.17, 0.17, 0.21, 0.21, 0.21, 0.2, 0.2, 0.2,
0.2), IR_SF = c(3.33, 3.33, 3.33, 3.12, 3.12, 3.12, 3.19,
3.19, 3.19, 3.19), adult_bone_sampled = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("femur", "humerus",
"occipital", "temporal", "tibia"), class = "factor")), .Names = c("Number",
"ToothID", "sex", "Al.Qahtani.category", "AgeCat_first", "AgeCat_second",
"sample_age_first", "sample_age_second", "AgeCat_adult", "age_at_death",
"weight_percent_.N", "weight_percent_C", "juv_deltaN_dentine",
"juv_deltaC_dentine", "juv_proxy", "Adult_deltaC_collagen", "adult_proxy",
"Adult_deltaC_apatite", "Adult_deltaN", "apatite_collagen_spacing",
"Adult_percent_C", "Adult_percent_N", "Adult_CN_ratio", "delta_18O",
"CP", "IR_SF", "adult_bone_sampled"), row.names = c(NA, 10L), class = "data.frame")
Your data corresponds to the second question, and so does this answer.
The way the aov function works is by measuring response as dependent on the categories. The formula thus needs to be designed as variable ~ factor.
aov.albania.first <- aov(juv_deltaC_dentine + Adult_deltaC_collagen ~ AgeCat_first,
data = albania)
summary(aov.albania.first)
Df Sum Sq Mean Sq F value Pr(>F)
AgeCat_first 3 6.480 2.160 1.667 0.272
Residuals 6 7.773 1.296
The problem with the first question might be similar to this. Further, check str(first.m.cat.1) and reformat the variable to vector.
Data Sets
> dput(head(spdistbc,50))
structure(list(Lane = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), Vehicle.class = c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
speedmph = c(0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2,
30.6, 34, 37.4, 40.8, 0, 3.4, 6.8, 10.2, 13.6, 17, 20.4,
23.8, 27.2, 30.6, 34, 37.4, 40.8, 3.4, 6.8, 10.2, 13.6, 17,
20.4, 23.8, 27.2, 30.6, 34, 37.4, 40.8, 0, 3.4, 6.8, 10.2,
13.6, 17, 20.4, 23.8, 27.2, 30.6, 34, 37.4), cprob = c(0,
0, 0.03, 0.06, 0.11, 0.2, 0.28, 0.43, 0.56, 0.75, 0.91, 0.97,
1, 0, 0, 0.01, 0.01, 0.02, 0.05, 0.17, 0.36, 0.57, 0.76,
0.93, 0.99, 1, 0, 0.01, 0.01, 0.04, 0.07, 0.16, 0.32, 0.55,
0.76, 0.94, 0.99, 1, 0, 0, 0, 0.01, 0.03, 0.06, 0.11, 0.25,
0.47, 0.74, 0.92, 0.98)), .Names = c("Lane", "Vehicle.class",
"speedmph", "cprob"), row.names = c(7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L), class = "data.frame")
> dput(head(cspdistbv,50))
structure(list(lanem = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L), cars = structure(c(34, 35, 36, 37, 38, 39, 40, 24,
26, 28, 30, 32, 34, 36, 38, 40, 20, 25, 30, 35, 40, 10, 15, 20,
25, 30, 35, 40, 10, 15, 20, 25, 30, 35, 40, 35, 40, 45, 50, 55,
0, 0.03, 0.07, 0.17, 0.67, 0.93, 1, 0, 0.03, 0.1, 0.1, 0.2, 0.27,
0.33, 0.8, 1, 0, 0.1, 0.31, 0.52, 1, 0, 0.07, 0.27, 0.37, 0.5,
0.77, 1, 0, 0.03, 0.07, 0.23, 0.4, 0.77, 1, 0, 0.13, 0.47, 0.77,
1), .Dim = c(40L, 2L), .Dimnames = list(NULL, c("speedmph", "prob"
)))), .Names = c("lanem", "cars"), row.names = c(NA, 40L), class = "data.frame")
Problem
I created the plot using spdistbc:
cb1 <- ggplot() + geom_point(data = spdistbc, mapping = aes(x=speedmph, y = cprob, color = 'observed')) + facet_wrap(~Lane) + theme_bw() + my.theme()
Which gave me this:
But when I combine another plot from the second data frame using following code:
cb2 <- cb1 + geom_point(data = cspdistbv, mapping = aes(x = cars.speedmph, y = cars.prob, color = 'simulated-default')) + facet_wrap(~lanem)
I get the error:
Error in eval(expr, envir, enclos) : object 'cars.speedmph' not found
Question
You can see in the cspdistbv data frame, there is a column named cars.speedmph, then why R can't find it? Please help.
Somehow you've created an invalid data.frame. You've stored a matrix in the second column of cspdistbv; dim(cspdistbv) thinks it only has two columns and this interferes with proper naming and such. I'm not sure how you created it, but you can fix it with
cspdistbv <- cbind.data.frame(lanem=cspdistbv[,1], cspdistbv[,2])
And then
cb1 <- ggplot() + geom_point(data = spdistbc, mapping = aes(x=speedmph,
y = cprob, color = 'observed')) + facet_wrap(~Lane) + theme_bw()
cb2 <- cb1 + geom_point(data = cspdistbv, mapping = aes(x = speedmph,
y = prob, color = 'simulated-default')) + facet_wrap(~lanem)
should work
I am trying to fit exponential decay functions on data which has only few time points. I would like to use the exponential decay equation y = y0*e^(-r*time) in order to compare r (or eventually half-life) between datasets and factors. I have understood that using a linear fit instead of nls is a better alternative for this particular function [1,2], if I want to estimate the confidence intervals (which I do).
Copy this to get some example data:
x <- structure(list(Factor = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 3L, 3L, 3L, 1L, 1L, 1L, 1L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L), .Label = c("A", "B", "C", "D"), class = "factor"),
time = c(0.25, 0.26, 0.26, 0.26, 0.27, 0.29, 0.29, 0.33,
0.38, 0.38, 0.38, 0.39, 0.4, 0.4, 0.41, 0.45, 0.45, 0.45,
0.45, 0.47, 0.51, 0.51, 0.52, 0.57, 0.57, 0.57, 0.57, 0.58,
0.58, 0.58, 0.6, 0.6, 0.6, 0.61, 0.61, 0.61, 0.62, 0.62,
0.64, 0.64, 0.67, 0.67, 0.67, 0.67, 0.69, 0.7, 0.7, 0.71,
0.76, 0.76, 0.77, 0.77, 0.79, 0.79, 0.8, 0.8, 0.83, 0.83,
0.84, 0.84, 0.86, 0.86, 0.87, 0.87, 18.57, 18.57, 18.57,
18.58, 18.69, 18.69, 18.7, 18.7, 18.7, 18.71, 18.71, 18.71,
18.74, 18.74, 18.74, 18.79, 18.85, 18.85, 18.86, 18.88, 18.89,
18.89, 18.89, 18.93, 18.93, 18.95, 18.95, 18.95, 18.96, 18.96,
18.96, 20.57, 20.57, 20.61, 20.62, 20.66, 20.67, 20.67, 20.67,
20.72, 20.72, 20.72, 21.18, 21.19, 21.19, 21.19, 21.22, 21.22,
21.22, 21.23, 21.25, 21.25, 21.25, 21.25, 87.58, 87.58, 87.64,
87.64, 87.65, 87.84, 87.85, 87.91, 87.91, 87.91, 89.27, 89.28,
89.28, 89.36, 89.36, 89.4, 89.4, 110.91, 112.19, 112.19,
112.2, 112.2, 112.24, 112.25, 112.25, 112.26, 185.6, 185.6,
185.63, 185.63, 185.64, 213, 234.96, 234.97, 234.97, 234.98,
235.01, 235.01, 235.02, 235.02), y = c(58.1, 42.9, 54.2,
45.3, 51.2, 44.4, 56.9, 53.4, 61.3, 49.3, 54.4, 55.6, 25.6,
48.1, 50.8, 54.7, 41.8, 46.2, 39.5, 51.7, 37.7, 43.1, 44.6,
48.4, 50.9, 62.5, 58.6, 47.8, 44.3, 55.6, 44.9, 49.1, 49.1,
60.3, 40.8, 57.6, 42.9, 60, 49.4, 54.1, 37.8, 46.5, 59, 64.3,
48, 54.3, 51.7, 59, 57.1, 29.4, 49.2, 50, 41.3, 40.5, 43.4,
48.6, 38.5, 35.7, 43.6, 60, 32, 27.3, 34.3, 44.4, 36.5, 25.4,
22.6, 25.5, 24.1, 18.9, 25, 5.9, 19.6, 15.7, 32.3, 14.3,
23.4, 29.4, 17, 18.3, 34.4, 26.4, 35.7, 22.6, 23.5, 19.3,
25.5, 34.7, 45.5, 38.1, 33.8, 47.9, 32.3, 32.1, 43, 27.8,
33.3, 25.5, 22.2, 29.2, 24.2, 22.8, 19.2, 31.6, 20.8, 26.4,
35.8, 50, 10.7, 24, 54.3, 67, 77.7, 51.7, 64.8, 49.3, 57.8,
43.2, 17, 17.4, 36.4, 60.2, 36, 4, 0, 0, 9.1, 2.9, 24.3,
18.8, 36, 16.3, 18.4, 17.1, 26.5, 29.3, 17.4, 23.1, 25.7,
32.7, 16.3, 14.6, 13.7, 16.2, 16.7, 21.9, 0, 0, 11.6, 8.6,
0, 3.7, 3.6, 5, 3.2, 0, 2.5, 5.7)), .Names = c("Factor",
"time", "y"), row.names = c(NA, -158L), class = "data.frame")
I manage to do this using the standard logarithmic function log(y) = x (thanks to this example), but fail when trying to fit several parameters in linear space.
summary(lm(log(y) ~ time, data = x, subset = Factor)) # I need the summary statistics to compare models
ggplot(x, aes(x = time, y = y, color = Factor)) + geom_point() + geom_smooth(method = "glm", family = gaussian(lin="log"), start=c(5,0))
Here is what I have tried:
## Summary
log.dec.fun <- function(N, r, time) -r*time + log(N) # The function in linear format
summary(glm(y ~ log.dec.fun(N, r, time), data = x, subset = Factor, start = c(5,0)))
# Error in log.dec.fun(N, r, time) : object 'r' not found
predict(glm(y ~ log.dec.fun(N, r, time), data = x, start = c(5,0)))
# Error in log.dec.fun(N, r, time) : object 'r' not found
## Plot
ggplot(x, aes(x = time, y = y, color = Factor)) + geom_point() + geom_smooth(method = "glm", formula = y ~ log.dec.fun(N, r, time), start = c(5,0))
#Error in log.dec.fun(N, r, time) : object 'r' not found
#Error in if (nrow(layer_data) == 0) return() : argument is of length zero
I can manage to get quite satisfactory models using nls, but I have learned that calculating confidence intervals for nls functions verges upon magic and beginners should not even try doing that.
dec.fun <- function(N, r, time) N*exp(-r*time) ## The function in non-linear form
g <- c()
for(i in 1:nlevels(x$Factor)){
z <- subset(x, Factor == levels(x$Factor)[i])
g <- append(g, predict(nls(y ~ dec.fun(N, r, time), data = z, start = list(N = 5, r = 0))))}
x <- x[with(x, order(Factor, time)),]
x$modelled <- g
ggplot(x, aes(x = time, color = Factor)) + geom_point(aes(y = y)) + geom_line(aes(y = modelled))
So my question is how to fit exponential decay functions using R, ggplot2 and linear approximation? There is an answer in SO, where #Joe Kington indicates that this is possible and provides the Python code. Unfortunately I do not understand Python.
I believe you simply need to allow for separate slopes and intercepts to be fit by your grouping variable Factor when you fit the model with the natural logarithm transformation for the response. I call this a separate lines model. Then you can predict and get confidence (or prediction) intervals on the log scale for each Factor, and back-transform to see the lines (much like the graphs in your original post from ggplot2.
Example of a separate lines model in R:
fit1 = lm(y ~ time*Factor, data = x)
summary(fit1)
The output of this model will show the estimated intercept for the reference level of Factor, the estimated slope for the reference level, and the difference in intercepts and slopes between the reference level and all other levels.
Alternatively, you could code the separate lines model:
fit2 = lm(y ~ time + time:Factor - 1, data = x)
summary(fit2)
This will show you the estimated intercept and slope separately for each level of Factor in your output.
To make lines based on the model, you can use predict and then back-transform to the original scale. Assuming a natural log transformation (and adding the values to your original dataset):
(x$pred = exp(predict(fit1)) )
You can also calculate and exponentiate your confidence intervals to the original scale if that's what you need.
exp(predict(fit1, interval = "confidence"))
Organizationally, you may want to put these as columns in your original dataset, as well, which you could do a variety of ways. The simplest may be to simply cbind them to the dataset x.
So I am trying to make a stacked bar graph with bar width mapped to a variable; but I want the spacing between my bars to be constant.
Does anyone know how to make the spacing constant between the bars?
Right now I've got this:
p<-ggplot(dd, aes(variable, value.y, fill=Date, width=value.x / 15))+ coord_flip() + opts(ylab="")
p1<-p+ geom_bar(stat="identity") + scale_fill_brewer(palette="Dark2") + scale_fill_hue(l=55,c=55)
p2<-p1 + opts(axis.title.x = theme_blank(), axis.title.y = theme_blank())
p2
Thanks in advance.
Here's my data by the way (sorry for the long, bulky dput):
> dput(dd)
structure(list(variable = structure(c(1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 7L, 7L, 7L, 2L, 2L,
2L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 9L, 9L, 9L,
9L, 9L, 8L, 8L, 8L, 8L, 8L), .Label = c("Alcohol and Tobacco",
"Health and Personal Care", "Clothing", "Energy", "Recreation and Education",
"Household", "Food", "Transportation", "Shelter"), class = "factor", scores = structure(c(2.91,
5.31, 10.08, 15.99, 4.95, 11.55, 11.2, 27.49, 20.6), .Dim = 9L, .Dimnames = list(
c("Alcohol and Tobacco", "Clothing", "Energy", "Food", "Health and Personal Care",
"Household", "Recreation and Education", "Shelter", "Transportation"
)))), value.x = c(2.91, 2.91, 2.91, 2.91, 2.91, 5.31, 5.31,
5.31, 5.31, 5.31, 10.08, 10.08, 10.08, 10.08, 10.08, 15.99, 15.99,
15.99, 15.99, 15.99, 4.95, 4.95, 4.95, 4.95, 4.95, 11.55, 11.55,
11.55, 11.55, 11.55, 11.2, 11.2, 11.2, 11.2, 11.2, 27.49, 27.49,
27.49, 27.49, 27.49, 20.6, 20.6, 20.6, 20.6, 20.6), Date = structure(c(5L,
4L, 3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L, 5L, 4L,
3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L, 5L, 4L, 3L,
2L, 1L, 5L, 4L, 3L, 2L, 1L, 5L, 4L, 3L, 2L, 1L), .Label = c("1993-2001",
"2001-2006", "2007-2010", "2010-2011", "2012 Jan - May"), class = "factor"),
value.y = c(2.1, 2.5, 7.6, 21.7, 2.8, 1.5, 0.3, -4.1, -4.2,
4.7, 3, 16.9, 1.9, 32.8, 23.9, 3.2, 4.6, 11.3, 8.9, 12.9,
1.7, 2, 7.8, 5.9, 10, 1.9, 2.1, 5.6, 2.2, 9.9, 1.4, 1.3,
2.2, 0.6, 17.3, 1.1, 2.3, 6.4, 13.1, 10, 4.3, 7.6, 0.9, 15.2,
20.5)), .Names = c("variable", "value.x", "Date", "value.y"
), row.names = c(NA, -45L), class = "data.frame")
For a categorical or "discrete" scale - you can adjust the width, but it needs to be between 0 and 1. Your value.x's put it over 1, hence the overlap. You can use rescale, from the scales packages to adjust this quickly so that the within category width of the bar is representative of some other variable (in this case value.x)
install.packages("scales")
library(scales)
ggplot(dd,aes(x=variable,y=value.y,fill=Date)) +
geom_bar(aes(width=rescale(value.x,c(0.5,1))),stat="identity",position="stack")' +
coord_flip()
Play with rescaling for optimal "view" change 0.5 to 0.25... etc.
Personally, I think something like this is more informative:
ggplot(dd,aes(x=variable,y=value.y,fill=Date)) +
geom_bar(aes(width=rescale(value.x,c(0.2,1))),stat="identity") +
coord_flip() + facet_grid(~Date) + opts(legend.position="none")
Attempt # 2.
I'm tricking ggplot2 into writing a continuous scale as categorical.
# The numbers for tmp I calculated by hand. Not sure how to program
# this part but the math is
# last + half(previous_width) + half(current_width)
# Change the 1st number in cumsum to adjust the between category width
tmp <- c(2.91,7.02,14.715,27.75,38.22,46.47,57.845,77.19,101.235) + cumsum(rep(5,9))
dd$x.pos1 <- rep(tmp,each=5)
ggplot(dd,aes(x=x.pos1,y=value.y,fill=Date)) +
geom_bar(aes(width=value.x),stat="identity",position="stack") +
scale_x_continuous(breaks=tmp,labels=levels(dd$variable)) +
coord_flip()
For good measure you're probably going to want to adjust the text size. That's done with ... + opts(axis.text.y=theme_text(size=12))