Related
My dataset contains 2 variables Y and X. Y was measured every 1.0 seconds.
I am trying to calculate the average slope within a moving 60-second-window, i.e. after calculating the first 60-second slope value the window moves forward one time unit (1.0 seconds) and calculates the next 60-second-window, producing successive 60-second slope values at 1.0-second increments.
My data:
dput(Dataexample)
structure(list(X = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41",
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63",
"64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74",
"75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85",
"86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96",
"97", "98", "99", "100", "101", "102", "103", "104", "105", "106",
"107", "108", "109", "110", "111", "112", "113", "114", "115",
"116", "117", "118", "119", "120", "121", "122", "123", "124",
"125", "126", "127", "128", "129", "130", "131", "132", "133",
"134", "135", "136", "137", "138", "139", "140", "141", "142",
"143", "144", "145", "146", "147", "148", "149", "150", "151",
"152", "153", "154", "155", "156", "157", "158", "159", "160",
"161", "162", "163", "164", "165", "166", "167", "168", "169",
"170", "171", "172", "173", "174", "175", "176", "177", "178",
"179", "180", "181", "182", "183", "184", "185", "186", "187",
"188", "189", "190", "191", "192", "193", "194", "195", "196",
"197", "198", "199", "200", "201", "202", "203", "204", "205",
"206", "207", "208", "209", "210", "211", "212", "213", "214",
"215", "216", "217", "218", "219", "220", "221", "222", "223",
"224", "225", "226", "227", "228", "229", "230", "231", "232",
"233", "234", "235", "236", "237", "238", "239", "240", "241",
"242", "243", "244", "245", "246", "247", "248", "249", "250",
"251", "252", "253", "254", "255", "256", "257", "258", "259",
"260", "261", "262", "263", "264", "265", "266", "267", "268",
"269", "270", "271", "272", "273", "274", "275", "276", "277",
"278", "279", "280", "281", "282", "283", "284", "285", "286",
"287", "288", "289", "290", "291", "292", "293", "294", "295",
"296", "297", "298", "299", "300", "301", "302", "303", "304",
"305", "306", "307", "308", "309", "310", "311", "312", "313",
"314", "315", "316", "317", "318", "319", "320", "321", "322",
"323", "324", "325", "326", "327", "328", "329", "330", "331",
"332", "333", "334", "335", "336", "337", "338", "339", "340",
"341", "342", "343", "344", "345", "346", "347", "348", "349",
"350", "351", "352", "353", "354", "355", "356", "357", "358",
"359", "360", "361", "362", "363", "364", "365", "366", "367",
"368", "369", "370", "371", "372", "373", "374", "375", "376",
"377", "378", "379", "380", "381", "382", "383", "384", "385",
"386", "387", "388", "389", "390", "391", "392", "393", "394",
"395", "396", "397", "398", "399", "400", "401", "402", "403",
"404", "405", "406", "407", "408", "409", "410", "411", "412",
"413", "414", "415", "416", "417", "418", "419"), Y = c(25221.6738,
25220.66, 25079.83, 24912.6719, 24801.24, 24791.1113, 24431.54,
24504.46, 24086.21, 24504.46, 24109.5, 24165.1953, 23999.1289,
23802.7012, 23838.1387, 23899.9, 23585.0371, 23605.2832, 23861.4258,
23675.1367, 23642.74, 23565.8027, 23378.5313, 23227.7129, 23105.248,
23393.7148, 23186.2168, 23577.9512, 23230.75, 23186.2168, 23057.68,
23261.1152, 23282.3711, 23184.1914, 23314.7617, 23223.666, 23023.27,
23059.7031, 22867.4219, 23047.5586, 23114.3555, 22960.5234, 23266.1758,
22824.92, 22910.9355, 22863.373, 22891.709, 22997.9688, 22873.4941,
22948.38, 22844.1465, 22969.6328, 22935.2246, 22983.8, 23078.9336,
22965.584, 22814.8, 22815.8125, 22874.5059, 22632.66, 22800.6328,
22715.6328, 22515.291, 22679.2051, 22525.4082, 22670.1, 22506.1855,
22595.2227, 22741.9434, 22609.3887, 22727.7754, 22442.4453, 22246.1777,
22361.5078, 22453.5742, 22643.791, 22587.1289, 22567.9043, 22681.23,
22590.1641, 22327.11, 22453.5742, 22350.3789, 22566.8926, 22558.7988,
22583.082, 22765.2168, 22519.3379, 22290.69, 22326.0977, 22236.06,
22662.0039, 22365.5547, 22259.3281, 22511.2441, 22284.62, 22480.89,
22422.209, 22483.9258, 22653.91, 22365.5547, 22342.2852, 22463.6914,
22160.19, 22426.2559, 22537.55, 22536.54, 22425.2441, 22251.2363,
22214.8164, 22264.3867, 22654.9219, 22342.2852, 22364.543, 22542.61,
22283.6074, 22463.6914, 22373.6484, 22276.5273, 22276.5273, 22168.2832,
22219.875, 22026.6641, 22409.0566, 22147.04, 22120.7383, 22374.66,
22304.8535, 22303.8418, 22459.6445, 22122.7617, 22236.06, 22427.2676,
22147.04, 22449.5273, 22597.2461, 22425.2441, 22289.6777, 22298.7832,
22232.0137, 22486.96, 22350.3789, 22201.666, 22234.0371, 22220.8867,
22280.5723, 22321.04, 21867.8633, 22328.1211, 22327.11, 22261.3516,
22585.1055, 22146.0273, 22210.77, 22201.666, 22185.48, 22249.2129,
22399.9512, 22549.6914, 22308.9, 22516.3027, 21998.3418, 22302.83,
22014.5273, 22014.5273, 22219.875, 22339.25, 22000.3652, 22064.0918,
22344.3086, 22044.873, 22294.7363, 22187.5039, 22102.5313, 22502.1387,
21872.92, 22168.2832, 22146.0273, 22128.832, 22114.67, 22155.1328,
22384.7754, 22300.8066, 22396.916, 22442.4453, 22141.9824, 22149.0625,
22114.67, 22259.3281, 22226.957, 22248.2012, 22263.375, 22258.3164,
22131.8652, 22272.48, 22486.96, 22212.793, 22171.3184, 22275.5156,
22307.8887, 22268.4336, 22116.6914, 21950.8027, 22266.41, 22095.45,
22101.52, 22125.7969, 22306.877, 22305.8652, 22355.4375, 22292.7129,
22089.38, 22208.748, 22183.457, 22162.2129, 22125.7969, 22440.4219,
22216.84, 22016.55, 22293.7246, 22112.6465, 22210.77, 22134.9,
21936.6426, 21776.8359, 22160.19, 22298.7832, 22342.2852, 21995.3086,
22223.9219, 22153.11, 22383.7637, 22341.2734, 22082.2988, 22198.63,
22192.56, 22138.9473, 22112.6465, 22203.69, 22054.9883, 22169.2949,
22312.9473, 21971.0313, 22088.37, 22319.0156, 22162.2129, 22279.5625,
22119.7266, 22115.6816, 22225.9453, 22293.7246, 22157.1563, 22151.0859,
22192.56, 22263.375, 22256.293, 22171.3184, 22124.7852, 22401.9746,
22038.8027, 22063.08, 22408.0449, 22057.0117, 21960.918, 22160.19,
22134.9, 22029.7, 22470.7734, 22347.3438, 22264.3867, 22182.4453,
21969.01, 22358.4727, 22286.6426, 22366.5664, 22282.5957, 22257.3047,
22162.2129, 22034.7578, 22157.1563, 22247.19, 22131.8652, 21848.6465,
22043.8613, 21923.4922, 22258.3164, 22313.959, 22058.0215, 22003.4,
22167.2715, 21953.8379, 22254.27, 22152.0977, 22206.7246, 22085.334,
22266.41, 22323.0625, 22134.9, 22083.31, 22401.9746, 22190.54,
22180.4219, 22040.8262, 21978.1133, 22149.0625, 22346.332, 22270.457,
22111.6348, 22108.6, 22408.0449, 22468.75, 22222.91, 21854.7148,
22379.7188, 22179.41, 21972.043, 22097.4727, 22436.375, 22059.0332,
22215.8281, 22311.9355, 22369.6016, 22148.05, 22227.9688, 22094.4375,
22378.707, 22169.2949, 22186.4922, 21977.1016, 22264.3867, 21984.1816,
22001.377, 22267.4219, 22164.2363, 22066.1152, 21814.2578, 22160.19,
21970.0215, 22209.7578, 22081.2871, 22338.2383, 22215.8281, 22042.85,
22214.8164, 22413.1035, 22173.3418, 22216.84, 22203.69, 22125.7969,
22038.8027, 22171.3184, 22215.8281, 21966.9863, 22226.957, 22121.75,
22021.6074, 22036.78, 22213.8047, 22313.959, 22115.6816, 22095.45,
22242.13, 22127.82, 22082.2988, 22466.7266, 22265.3984, 22276.5273,
22309.9121, 22352.4023, 22274.5039, 22183.457, 22402.9863, 22188.5156,
22141.9824, 22173.3418, 21995.3086, 22171.3184, 22079.2656, 22151.0859,
21746.4941, 21983.17, 22153.11, 22224.9336, 22115.6816, 22020.5957,
21894.16, 21947.7676, 22277.54, 22016.55, 21941.7, 22276.5273,
22113.6582, 22126.8086, 22016.55, 22374.66, 22009.4688, 22099.4961,
21991.2617, 21886.0684, 22161.2012, 22019.584, 22283.6074, 22202.6777,
22022.62, 22289.6777, 21913.3789, 21958.8945, 22125.7969, 22244.1543,
22041.8379, 22146.0273, 22120.7383, 21997.332, 21907.3086, 22010.48,
22167.2715, 21927.54, 21964.9629, 21955.86)), row.names = c(NA,
-419L), class = c("tbl_df", "tbl", "data.frame"))
You could do this with a loop:
Dataexample$X <- as.numeric(Dataexample$X)
slopes <- rep(NA, nrow(Dataexample)-59)
for( i in 1:length(slopes)){
slopes[i] <- lm(Y ~ X, data=Dataexample[i:(i+59), ])$coefficients[2]
}
The package zoo has a useful function rollapply for this sort of task where you want to do a more generalised version of a moving average. For your example, you could do it as:
library(zoo)
Dataexample$X <- as.numeric(Dataexample$X)
rollapply(Dataexample, 60, function(d)lm(Y~X, data.frame(d))$coefficients, by.column=FALSE)
# (Intercept) X
#[1,] 24563.00 -35.13712880
#[2,] 24501.07 -33.33658109
#[3,] 24434.35 -31.54255629
#[4,] 24377.38 -30.17644875
#[5,] 24318.79 -28.68243440
#...
#[360,] 23397.00 -3.29681473
In this case the X coefficient is the slope that you're interested in.
It's necessary to convert the X column to numeric because rollapply seems to be using a matrix to store the input data in, so all columns need to be of the same type.
lm(y ~ x) can accept a matrix y and, on such a case, "...a linear model is fitted separately by least-squares to each column of the matrix" (?lm, Details section). Hence, we can (if memory is sufficient) construct all 60-second windows, of y, beforehand (as a matrix) and pass it to lm:
n = 60 # window
Make design matrix:
X = cbind(1, 1:n) # X[, 1]: intercept
Make response as a matrix of 60-second windows (by column):
iY = sequence(rep_len(n, length(Dataexample$Y) - n + 1),
1:(length(Dataexample$Y) - n + 1)) # indices to subset Y
Y = matrix(Dataexample$Y[iY], n)
Run lm to retrieve slopes:
slopes1 = lm.fit(X, Y)$coefficients[2, ]
As a more efficient alternative, to avoid the overhead of fiting a model, we can calculate the slope manually (from a formula found online):
Where we have:
n = 60 # window
A sequence of indices to subset 'x' and 'y' by a 60-sec window:
i = sequence(rep_len(n, length(Dataexample$Y) - n + 1),
1:(length(Dataexample$Y) - n + 1))
Get 'x' and 'y':
X = matrix(as.numeric(Dataexample$X)[i], n)
Y = matrix(Dataexample$Y[i], n)
Compute all slopes at once:
Sxy = colSums(X * Y)
Sx = colSums(X)
Sy = colSums(Y)
slopes2 = ((n * Sxy) - (Sx * Sy)) / ((n * colSums(X ^ 2)) - (Sx ^ 2))
And check:
all.equal(slopes1, slopes2)
#[1] TRUE
I have a curve likes the following:
As can be seen, there are two upper bound and lower bound curves on the above (see the following image for the approx visualization!):
I have tried to find the lower bound by the following code:
lower_bound = rbind()
for(i in 2:(length(data[,1])-1))
{
if(data[i,2] < data[i+1,2] && data[i,2] < data[i-1,2])
{
lower_bound = rbind2(lower_bound, c(data[i,1], data[i,2]))
}
}
But, the problem is there are some local minima in the middle, so the result is not what I've expected (as follow).
Hence the question is how can I find the lower bound of such a curve in R? Is there any known algorithm to find the lower bound?
About the minimal reproducible example, I've included 500 of data (to make sense) in the following:
structure(c(3.54246575342466, 5.75616438356164, 6.54246575342466,
9.73424657534247, 7.75616438356164, 9.75616438356164, 1.41917808219178,
2.75342465753425, 3.16986301369863, 2.27123287671233, 3.74520547945205,
5.74520547945205, 4.29315068493151, 3.37260273972603, 8.37534246575343,
3.33698630136986, 5.04657534246575, 6.92328767123288, 1.33972602739726,
6.34246575342466, 26.358904109589, 2.34794520547945, 4.35068493150685,
7.35342465753425, 27.3671232876712, 1.15068493150685, 2.14794520547945,
4.15342465753425, 6.15342465753425, 9.15616438356164, 19.1616438356164,
29.1698630136986, 2.58904109589041, 4.5972602739726, 9.6, 29.613698630137,
3.10684931506849, 8.11780821917808, 3.35068493150685, 8.37260273972603,
5.11506849315068, 28.1315068493151, 5.36986301369863, 28.386301369863,
4.87945205479452, 4.46027397260274, 5.35616438356164, 25.3698630136986,
25.8684931506849, 5.85479452054795, 1.85205479452055, 4.37260273972603,
6.37260273972603, 9.37534246575343, 19.3835616438356, 29.3890410958904,
18.3808219178082, 1.36986301369863, 8.37534246575343, 3.37260273972603,
5.85479452054795, 28.3890410958904, 2.87671232876712, 4.87945205479452,
7.37808219178082, 8.17260273972603, 28.186301369863, 4.29315068493151,
6.96164383561644, 26.3479452054795, 7.96438356164384, 1.53972602739726,
1.58630136986301, 5.04657534246575, 16.6356164383562, 18.4684931506849,
25.0602739726027, 15.6493150684932, 24.8547945205479, 25.3506849315068,
26.1068493150685, 16.0547945205479, 2.16712328767123, 19.2657534246575,
20.6849315068493, 1.04383561643836, 3.04383561643836, 23.0575342465753,
3.29315068493151, 4.13150684931507, 2.04383561643836, 22.0575342465753,
22.8904109589041, 2.87671232876712, 5.54246575342466, 25.5561643835616,
1.04931506849315, 6.05205479452055, 26.0657534246575, 2.09041095890411,
7.09315068493151, 27.1068493150685, 2.09041095890411, 4.09315068493151,
6.09315068493151, 9.0958904109589, 19.1013698630137, 29.1095890410959,
1.83287671232877, 1.42739726027397, 3.43013698630137, 9.3041095890411,
2.72328767123288, 8.58082191780822, 2.8958904109589, 4.92054794520548,
9.92328767123288, 6.92054794520548, 19.9315068493151, 29.9369863013699,
20.9315068493151, 2.25205479452055, 22.2657534246575, 3.40821917808219,
23.4219178082192, 8.21095890410959, 3.20821917808219, 5.20821917808219,
18.8027397260274, 19.8054794520548, 21.7232876712329, 2.25205479452055,
22.8054794520548, 24.4301369863014, 4.41643835616438, 25.2684931506849,
5.25479452054795, 6.33698630136986, 26.3506849315068, 2.87671232876712,
4.29315068493151, 2.78630136986301, 1.2958904109589, 1.75890410958904,
2.29041095890411, 7.2958904109589, 3.44657534246575, 4.5041095890411,
9.73424657534247, 8.21643835616438, 6.29315068493151, 7.50684931506849,
3.75616438356164, 7.96438356164384, 9.96438356164384, 29.9780821917808,
17.3452054794521, 26.8547945205479, 5.5041095890411, 7.17260273972603,
6.13150684931507, 3.62739726027397, 5.79452054794521, 7.7972602739726,
5.75616438356164, 3.09041095890411, 8.0958904109589, 9.63013698630137,
4.93424657534247, 6.93424657534247, 9.93698630136986, 19.9452054794521,
6.42191780821918, 8.96438356164384, 28.9780821917808, 15.7643835616438,
18.7205479452055, 1.20547945205479, 4.16986301369863, 24.1835616438356,
25.2520547945205, 26.3095890410959, 9.61917808219178, 12.3424657534247,
14.2986301369863, 23.7698630136986, 3.71232876712329, 8.71506849315069,
3.58082191780822, 1.25205479452055, 5.96164383561644, 3.46027397260274,
8.46301369863014, 7.88219178082192, 5.54246575342466, 6.46027397260274,
4.79452054794521, 3.20821917808219, 15.5506849315068, 9.5972602739726,
20.6, 2.09041095890411, 1.87671232876712, 22.2191780821918, 5.67397260273973,
25.7260273972603, 26.9753424657534, 7.2958904109589, 27.5205479452055,
8.17260273972603, 28.3095890410959, 29.2246575342466, 3.87945205479452,
9.13424657534247, 5.84109589041096, 3.96164383561644, 4.54246575342466,
9.92602739726027, 29.9397260273973, 16.1397260273973, 16.1780821917808,
16.1397260273973, 18.7643835616438, 18.7643835616438, 20.2191780821918,
20.6383561643836, 20.8904109589041, 1.16712328767123, 21.2657534246575,
1.33424657534247, 2.04383561643836, 2.20547945205479, 4.75616438356164,
2.92054794520548, 23.2219178082192, 4.09315068493151, 4.24931506849315,
5.13150684931507, 5.96164383561644, 6.20821917808219, 27.3917808219178,
29.2246575342466, 9.75890410958904, 8.75890410958904, 28.2712328767123,
8.42465753424658, 28.4383561643836, 2.37260273972603, 4.37534246575343,
4.75616438356164, 1.67123287671233, 3.96164383561644, 2.37260273972603,
5.04657534246575, 2.25205479452055, 5.62739726027397, 4.29315068493151,
1.29041095890411, 3.5041095890411, 5.5041095890411, 5.09315068493151,
3.83561643835616, 8.46301369863014, 3.42191780821918, 1.33424657534247,
4.5041095890411, 4.79452054794521, 6.37534246575343, 7.21095890410959,
9.37808219178082, 29.3534246575342, 3.54246575342466, 28.6904109589041,
8.2958904109589, 8.54246575342466, 4.62739726027397, 12.4082191780822,
19.4712328767123, 20.972602739726, 3.67397260273973, 4.42191780821918,
4.46027397260274, 25.3890410958904, 5.87945205479452, 25.8931506849315,
2.20547945205479, 27.2246575342466, 7.88219178082192, 1.43561643835616,
2.43561643835616, 4.46027397260274, 6.46027397260274, 9.46301369863014,
19.4712328767123, 29.4767123287671, 2.45753424657534, 9.63013698630137,
7.88219178082192, 23.641095890411, 5.13150684931507, 6.46027397260274,
9.7972602739726, 29.8109589041096, 3.21095890410959, 8.21643835616438,
4.92328767123288, 3.25479452054795, 2.62465753424658, 3.71232876712329,
5.75616438356164, 25.7698630136986, 6.20821917808219, 1.20547945205479,
4.96164383561644, 2.09041095890411, 4.13150684931507, 2.45753424657534,
27.4767123287671, 7.46301369863014, 3.2, 18.2164383561644, 1.1972602739726,
5.20821917808219, 8.21095890410959, 28.2246575342466, 19.972602739726,
7.50684931506849, 9.92602739726027, 3.75616438356164, 1.38904109589041,
3.39178082191781, 19.4082191780822, 2.04383561643836, 24.0602739726027,
5.75616438356164, 26.4356164383562, 6.42191780821918, 28.0630136986301,
8.21095890410959, 18.2164383561644, 6.37534246575343, 3.96164383561644,
6.87945205479452, 7.75890410958904, 5.54246575342466, 7.75890410958904,
2.19452054794521, 27.213698630137, 29.227397260274, 8.2, 5.87945205479452,
1.12876712328767, 3.12876712328767, 7.54520547945206, 2.74246575342466,
4.74520547945205, 6.74520547945205, 9.74794520547945, 29.7616438356164,
3.58904109589041, 8.53150684931507, 18.5369863013699, 28.5452054794521,
4.96164383561644, 19.3479452054795, 1.04657534246575, 21.386301369863,
3.96164383561644, 5.42191780821918, 3.62739726027397, 7.13150684931507,
2.54246575342466, 27.5616438356164, 18.0547945205479, 4.16986301369863,
9.25753424657534, 4.76986301369863, 15.2164383561644, 6.37534246575343,
8.17808219178082, 3.73424657534247, 1.26849315068493, 6.27123287671233,
6.2986301369863, 19.3945205479452, 17.1397260273973, 1.20547945205479,
6.20821917808219, 3.37534246575342, 5.25479452054795, 4.75616438356164,
6.54246575342466, 3.16986301369863, 2.04383561643836, 4.25479452054795,
1.04383561643836, 7.71506849315069, 5.09315068493151, 4.42191780821918,
6.71232876712329, 3.04383561643836, 1.67945205479452, 3.30958904109589,
3.96164383561644, 2.92054794520548, 7.92602739726027, 27.9397260273973,
2.83835616438356, 7.46301369863014, 8.21095890410959, 7.23835616438356,
27.2520547945205, 2.53972602739726, 6.58904109589041, 9.96438356164384,
29.9780821917808, 16.4301369863014, 18.3671232876712, 19.2657534246575,
20.386301369863, 2.37534246575342, 6.46027397260274, 26.4739726027397,
6.62739726027397, 4.75616438356164, 6.71232876712329, 28.8958904109589,
15.5123287671233, 14.8465753424658, 14.8465753424658, 2.0027397260274,
3.20821917808219, 3.71232876712329, 1.6958904109589, 2.07671232876712,
7.75616438356164, 2.88493150684931, 4.29315068493151, 1.84931506849315,
8.8958904109589, 1.12328767123288, 5.38630136986301, 5.12602739726027,
8.25205479452055, 3.24931506849315, 1.38904109589041, 5.89315068493151,
6.60821917808219, 1.5013698630137, 3.46027397260274, 6.5041095890411,
7.75890410958904, 6.54246575342466, 9.37808219178082, 3.96164383561644,
5.04657534246575, 7.04657534246575, 8.13424657534247, 6.13150684931507,
9.37808219178082, 7.96438356164384, 11.0493150684932, 12.213698630137,
2.12876712328767, 6.67397260273973, 4.46027397260274, 24.4739726027397,
3.75616438356164, 8.75890410958904, 18.7643835616438, 8.46301369863014,
5.13150684931507, 9.88219178082192, 9.33150684931507, 5.96164383561644,
7.25753424657534, 5.62739726027397, 7.50684931506849, 4.84109589041096,
3.46027397260274, 3.92054794520548, 18.5095890410959, 7.13150684931507,
8.59178082191781, 3.42191780821918, 6.75616438356164, 4.75616438356164,
2.95890410958904, 3.96164383561644, 20.4328767123288, 1.41917808219178,
21.4328767123288, 22.1041095890411, 2.09041095890411, 23.2219178082192,
3.20821917808219, 3.75616438356164, 24.7698630136986, 25.4739726027397,
4.46027397260274, 2.75970346198464, 3.45063169924232, 3.64711503029937,
4.21614538082358, 4.46897200885451, 4.78330432863798, 2.44945159376164,
3.0246987676982, 3.18637219409113, 2.82704067767991, 3.39633877759394,
4.01211492772477, 3.58227507536714, 3.26211339894416, 4.58021400951056,
3.24894671597498, 3.81640051029768, 4.29811641034397, 2.41235239871336,
4.16381972504066, 4.82320604897032, 2.85924829032196, 3.60102245001335,
4.38947242891732, 4.83225858506487, 2.32276057802484, 2.77466777269801,
3.53613679638981, 4.11732178923756, 4.70239811454869, 5.02684596680901,
4.89394362719873, 2.95858956427959, 3.67972641713649, 4.76329504168897,
4.91952178329268, 3.16243766754998, 4.53553009720828, 3.25401782240246,
4.57975033696973, 3.83648225102819, 4.85066377882221, 3.90946630290976,
4.85924427553214, 3.7665702648057, 3.63632959242775, 3.90561093988307,
4.82873462476434, 4.25993172383506, 3.47661513525158, 2.08147939877802,
3.04373161945452, 3.60670989847648, 4.16882946922235, 4.45461902145002,
4.34162315280691, 4.48802758727908, 1.86206866940942, 4.01581962612883,
2.69771901556242, 3.47661513525158, 4.29494911958988, 2.50903075277185,
3.20217588142397, 3.83011128813872, 3.980829920259, 4.28800748360142,
3.58227507536714, 4.30653857989399, 4.82319396445967, 4.50784277427558,
1.94072216406505, 1.96202863703256, 3.25200612691594, 4.52865722521467,
4.48533445537866, 4.26859229653523, 4.53731981080683, 4.27200767176305,
4.26457120140531, 4.2589693893104, 4.53525135174211, 2.21846989417729,
4.45880330075475, 4.40569121171677, 1.7068990376281, 2.57392106736465,
4.31836655689863, 2.66826592342152, 2.96442641830962, 2.16545787147435,
4.35324929683238, 4.32386522416303, 2.50903075277185, 3.39298854017977,
4.26232802572813, 1.70955300282675, 3.52741837222827, 4.25907996756536,
2.18557386001823, 3.77060768900683, 4.26395740412398, 2.18557386001823,
2.95157192131744, 3.53780855712753, 4.12926524629975, 4.46454130003024,
4.32641578537161, 2.63737387449928, 2.45327077112083, 3.28325793775465,
4.41181850146463, 3.01267661647382, 4.30273228804522, 3.0809392572752,
3.77893332429253, 4.80391883502991, 4.29751273896985, 4.9988952974639,
4.94101980513708, 4.96053762255352, 2.81894331471379, 4.91003396772332,
3.27522101695249, 4.87134979305318, 4.55194984744145, 3.20084942592193,
3.86347792664764, 5.03902446306499, 5.00360541053749, 4.93013307063464,
2.81894331471379, 4.89111202391337, 4.84484889948005, 3.62226986596664,
4.83000471370643, 3.87684000847915, 4.16249148956975, 4.82319682171546,
3.07342513615359, 3.58227507536714, 3.03776390795916, 2.39174353424389,
2.60441382154721, 2.83511983796416, 4.3776430683761, 3.28927099619789,
3.65030540624117, 4.78053976420514, 3.98851220598087, 3.84027419541323,
4.10888701756863, 3.08864056496118, 4.50784277427558, 4.80886282388512,
4.94393108371142, 5.0798798303101, 4.82563811883379, 3.9468402869224,
4.35189109140176, 4.11184051977588, 3.35459034230467, 4.02517710657812,
4.47676297113471, 4.01502608262676, 2.59176865382458, 3.96722952163555,
4.76721313996148, 3.21864402185455, 3.73613392685585, 4.2411779069777,
4.43398643591996, 3.61855452792053, 4.10980345553821, 4.31985435471796,
5.10135192405871, 5.04171773535286, 2.34892141444876, 3.5416097666887,
4.85054794533227, 4.83022204838818, 4.82316539945316, 4.76579152488111,
4.70585851301182, 4.77956222001954, 4.54973985879503, 3.38475186086735,
4.63579216766113, 3.33790973940492, 2.37103458556849, 3.75717008349914,
2.98272273951349, 4.28337045807013, 4.49267828838454, 3.9573829235615,
4.19209673281346, 3.74079199043232, 3.20084942592193, 5.10187826143602,
4.76293750094991, 4.97336905684238, 2.74996824339996, 2.65677488819793,
4.91172311677223, 3.99307443262087, 4.82526350378577, 4.8268073878022,
4.3776430683761, 4.83510606972691, 4.54522430364073, 4.85652654574101,
4.89686171640296, 2.87874743905149, 4.13483923719495, 4.03742401175551,
3.47139417367879, 3.66246558484484, 4.80424996332984, 4.94121262270349,
4.53454492449136, 4.53419598722523, 4.53454492449136, 4.47589167424468,
4.47589167424468, 4.42360799906947, 4.4074923871952, 4.39773506579433,
2.01907578318878, 4.38322128535129, 1.84538737297096, 2.41830268718988,
2.23480800909955, 3.16465475329972, 2.52618004042805, 4.56596058005526,
2.95157192131744, 3.00349899341948, 3.52972300068793, 3.75717008349914,
3.56654832534446, 4.26829419274457, 4.33246733302122, 4.47209933166968,
4.07829496689505, 4.54366179335202, 4.02412309850527, 4.29676089988903,
2.8695390825768, 3.60901246346208, 3.72904913668145, 2.56498963399193,
3.47139417367879, 2.8695390825768, 3.81640051029768, 2.81894331471379,
3.98051398879079, 3.58227507536714, 2.38916039360867, 3.31021811457649,
3.9468402869224, 3.83007753533476, 3.42794910435245, 4.59492002573633,
3.28024670656985, 2.4097817563527, 3.65030540624117, 3.74079199043232,
4.17176477555296, 4.35996196359352, 4.73359981456944, 4.90398228783007,
3.32409784536479, 4.87118581003356, 4.56666593488235, 4.60802805678997,
3.68916442604293, 5.02118529619339, 4.4514690518147, 4.39455164410878,
2.80677702722226, 3.05963754502245, 3.07193520904601, 4.2641136733005,
3.48305015568073, 4.25979660315916, 2.23480800909955, 4.26558395648925,
3.9282839050028, 1.89269207014021, 2.33130752327983, 3.07193520904601,
3.62770234943172, 4.18074663670204, 4.4514690518147, 4.34675554764834,
2.34036194508005, 4.20281875657975, 4.49267828838454, 4.86491005913173,
3.84127256835413, 4.19209673281346, 4.78844960315538, 4.93234475967529,
3.20188086629451, 4.5529065893626, 3.7797549774533, 3.21833597902597,
2.97302436839012, 3.38475186086735, 4.01502608262676, 4.82494685548552,
4.1309427087262, 2.34892141444876, 3.79122469981557, 2.74996824339996,
3.52882080169136, 2.90475632846179, 4.83425050183613, 4.41167553240461,
3.19775298791517, 5.05733248819791, 2.34500731502677, 3.86347792664764,
4.55194984744145, 4.8536532162969, 4.68580090262939, 3.8560422018531,
4.80424996332984, 2.83579574924565, 2.43541801827223, 3.26917867887851,
4.70658092262176, 2.41830268718988, 4.54205071405147, 3.70347651496055,
4.51179026913548, 3.87139934363606, 4.53702297816656, 4.24040027977525,
4.74578292053171, 4.17176477555296, 3.15984460601259, 3.9768746297032,
4.46949323821673, 3.9573829235615, 4.46949323821673, 2.79454182212808,
4.82981722014996, 4.89700936635987, 4.55003332824908, 4.04744453906246,
2.31225215646125, 3.17078397447093, 4.42804643606615, 3.02033214617563,
3.72568245460231, 4.25829574912718, 4.78226928394965, 4.92905316482749,
3.34086061969711, 4.04183801378363, 4.48319605565641, 4.30085456068988,
3.79122469981557, 4.45589006029623, 1.70822621819291, 4.63142865759546,
2.90699979029706, 3.35965176254463, 2.79019595892293, 3.77878982474076,
2.37522576957373, 4.27154459842833, 4.49758018625893, 2.97721538330697,
4.15244511731778, 3.73325024745895, 5.10139379386631, 4.17176477555296,
4.54618813621284, 3.39248194962301, 2.37881218415977, 4.1464619591812,
4.15316130101696, 4.70707172705424, 4.77267338855031, 2.03737184678255,
3.81939314106, 3.26312377270195, 3.87684000847915, 3.72904913668145,
4.21150941368065, 2.87482262642492, 2.41830268718988, 3.25814616479328,
1.95974385334363, 4.14957253152334, 3.51852796766856, 3.31248236073798,
3.93926475863266, 2.82676588308018, 2.25715273670682, 3.23877823069724,
3.47139417367879, 3.09057442380978, 4.50079498118852, 4.84503359841119,
3.05834404390917, 4.41167553240461, 4.55194984744145, 4.36569411888241,
4.83039001292176, 2.93850101323729, 4.22239549435516, 4.80886282388512,
4.94393108371142, 5.09584661299229, 5.05283820324407, 5.02319768413649,
4.67005370266932, 2.87068065591085, 4.19209673281346, 4.82343926231588,
3.91974901504793, 3.72904913668145, 4.25081432629887, 4.88033737221285,
4.53751058675557, 5.09889228357165, 5.09889228357165, 2.7120126164778,
3.20084942592193, 3.38475186086735, 2.57611729759449, 2.74406304100437,
4.46897200885461, 3.07664762906049, 3.58227507536714, 2.64466064962826,
4.66384481684621, 2.30962110926233, 3.91408252149265, 3.83967705403174,
4.55910078340349, 3.21628402040912, 2.43541801827223, 4.05100906700729,
4.22685401869853, 2.48748627144419, 3.29427230717969, 4.20248230878606,
4.46949323821673, 4.21150941368065, 4.73359981456944, 3.47139417367879,
3.81640051029768, 4.32499408591158, 4.53844903952499, 4.11184051977588,
4.73359981456944, 4.50784277427558, 4.92221662489616, 5.00972596707877,
2.76645294455757, 4.24203451863899, 3.63632959242775, 4.8438946910502,
3.40019013262738, 4.64268935027678, 5.04028605762641, 4.59492002573633,
3.84127256835413, 4.79892563273548, 4.41562762186783, 4.06871965116534,
4.36969042097439, 3.98051398879079, 4.42043658523483, 3.75496657158094,
3.29427230717969, 3.45730608585978, 5.04844935470778, 3.77878982474076,
4.05166542652766, 3.28024670656985, 4.26078052262918, 3.72904913668145,
3.10550475341744, 3.47139417367879, 4.41541846233588, 1.88505721037991,
4.3767970882106, 4.35153448481756, 2.18557386001823, 4.31311576433973,
2.6364550425402, 2.83579574924565, 4.27355104887647, 4.26316357649486,
3.07193520904601), .Dim = c(500L, 2L), .Dimnames = list(c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
"101", "102", "103", "104", "105", "106", "107", "108", "109",
"110", "111", "112", "113", "114", "115", "116", "117", "118",
"119", "120", "121", "122", "123", "124", "125", "126", "127",
"128", "129", "130", "131", "132", "133", "134", "135", "136",
"137", "138", "139", "140", "141", "142", "143", "144", "145",
"146", "147", "148", "149", "150", "151", "152", "153", "154",
"155", "156", "157", "158", "159", "160", "161", "162", "163",
"164", "165", "166", "167", "168", "169", "170", "171", "172",
"173", "174", "175", "176", "177", "178", "179", "180", "181",
"182", "183", "184", "185", "186", "187", "188", "189", "190",
"191", "192", "193", "194", "195", "196", "197", "198", "199",
"200", "201", "202", "203", "204", "205", "206", "207", "208",
"209", "210", "211", "212", "213", "214", "215", "216", "217",
"218", "219", "220", "221", "222", "223", "224", "225", "226",
"227", "228", "229", "230", "231", "232", "233", "234", "235",
"236", "237", "238", "239", "240", "241", "242", "243", "244",
"245", "246", "247", "248", "249", "250", "251", "252", "253",
"254", "255", "256", "257", "258", "259", "260", "261", "262",
"263", "264", "265", "266", "267", "268", "269", "270", "271",
"272", "273", "274", "275", "276", "277", "278", "279", "280",
"281", "282", "283", "284", "285", "286", "287", "288", "289",
"290", "291", "292", "293", "294", "295", "296", "297", "298",
"299", "300", "301", "302", "303", "304", "305", "306", "307",
"308", "309", "310", "311", "312", "313", "314", "315", "316",
"317", "318", "319", "320", "321", "322", "323", "324", "325",
"326", "327", "328", "329", "330", "331", "332", "333", "334",
"335", "336", "337", "338", "339", "340", "341", "342", "343",
"344", "345", "346", "347", "348", "349", "350", "351", "352",
"353", "354", "355", "356", "357", "358", "359", "360", "361",
"362", "363", "364", "365", "366", "367", "368", "369", "370",
"371", "372", "373", "374", "375", "376", "377", "378", "379",
"380", "381", "382", "383", "384", "385", "386", "387", "388",
"389", "390", "391", "392", "393", "394", "395", "396", "397",
"398", "399", "400", "401", "402", "403", "404", "405", "406",
"407", "408", "409", "410", "411", "412", "413", "414", "415",
"416", "417", "418", "419", "420", "421", "422", "423", "424",
"425", "426", "427", "428", "429", "430", "431", "432", "433",
"434", "435", "436", "437", "438", "439", "440", "441", "442",
"443", "444", "445", "446", "447", "448", "449", "450", "451",
"452", "453", "454", "455", "456", "457", "458", "459", "460",
"461", "462", "463", "464", "465", "466", "467", "468", "469",
"470", "471", "472", "473", "474", "475", "476", "477", "478",
"479", "480", "481", "482", "483", "484", "485", "486", "487",
"488", "489", "490", "491", "492", "493", "494", "495", "496",
"497", "498", "499", "500"), NULL))
For a relatively simple implementation, you can try as follows:
Consider a running slice of the data, with a constant width. The width must be larger than the largest gap.
Construct the upper and lower hull of the points, using the Monotone Chain algorithm (there is no need to sort, your points are already by increasing abscissa). https://en.wikibooks.org/wiki/Algorithm_Implementation/Geometry/Convex_hull/Monotone_chain
For the middle abscissa in the slice, estimate the two ordinates on the convex hull (by identifying the hull edges at that particular absicssa). By sweeping, this will create the desired curves. At both ends of the range, use all abscissas in the first and last half-slices.
There will be a small downward bias where the curvature is high and/or the window is large, but nobody's perfect.
The total running time will be proportional to the product of the number of points and the window width. But I guess that an incremental version can do better.
There is a linear programming approach to this, and you might find R packages that will do linear programming for you.
Find a space of smooth curves such as splines, where a smooth curve is a linear combination of basis functions.
Now solve the linear program to minimize the sum, taken over all points, of the value at that point minus the value from a linear combination of basis functions at that point, subject to the constraint that the linear combination of basis functions must be no larger than the value at that point.
I'm running a hurdle type analysis on species distribution data which involves two fitting steps. The first step is to model (m1) presence/absence data using all data with family=quasibinomial. The second step (m2) is to use positive presence only data with family=Gamma. This works wonderfully until I try to predict using the second model (m2) on the full dataset I receive an error due to new factor levels. I understand why I am receiving this error; there are factor levels that appear in the full dataset that are not present in the reduce (presence only) dataset. My question is how do I work around this error so that I can get predictions using the second model on the full set?
I am using mgcv.
Edit: Updated with additional code and data.
# Step1 - GAM using full dataset for presence/absense
grays<-structure(list(Grid_ID = structure(c(39L, 51L, 52L, 67L), .Label = c("1",
"1,000", "1,001", "1,008", "1,009", "1,010", "1,011", "1,012",
"1,013", "1,014", "1,015", "1,016", "1,022", "1,023", "1,024",
"1,025", "1,026", "1,027", "1,028", "1,029", "1,034", "1,035",
"1,036", "1,037", "1,039", "1,040", "1,045", "1,046", "1,047",
"1,048", "1,053", "1,054", "1,055", "10", "100", "101", "103",
"104", "105", "106", "107", "108", "109", "11", "110", "118",
"119", "12", "122", "125", "126", "127", "128", "129", "13",
"130", "131", "132", "133", "14", "141", "142", "15", "150",
"151", "152", "153", "154", "155", "156", "157", "158", "159",
"160", "161", "162", "163", "167", "168", "169", "173", "174",
"175", "176", "177", "178", "179", "180", "181", "182", "183",
"184", "185", "188", "189", "190", "196", "197", "198", "199",
"2", "20", "200", "201", "202", "203", "204", "205", "206", "207",
"209", "210", "211", "219", "22", "220", "221", "222", "223",
"224", "225", "226", "227", "228", "229", "23", "230", "231",
"233", "234", "235", "236", "237", "24", "246", "247", "248",
"249", "25", "250", "252", "253", "254", "255", "256", "257",
"258", "259", "26", "260", "261", "267", "268", "269", "27",
"270", "271", "272", "273", "274", "275", "276", "277", "278",
"279", "28", "280", "281", "286", "287", "288", "289", "29",
"290", "291", "292", "293", "294", "295", "296", "297", "298",
"299", "3", "300", "301", "302", "303", "305", "306", "307",
"308", "309", "310", "311", "312", "313", "314", "315", "316",
"317", "318", "319", "320", "321", "326", "327", "328", "329",
"330", "331", "332", "333", "334", "335", "336", "337", "339",
"340", "341", "343", "344", "345", "346", "347", "348", "349",
"350", "351", "352", "355", "356", "357", "36", "360", "361",
"362", "363", "364", "365", "366", "367", "368", "369", "37",
"372", "373", "374", "38", "380", "381", "382", "383", "384",
"385", "386", "39", "391", "392", "397", "398", "399", "4", "40",
"400", "401", "402", "408", "409", "41", "410", "412", "413",
"414", "415", "416", "417", "42", "423", "424", "425", "426",
"43", "430", "431", "432", "433", "434", "44", "441", "442",
"443", "444", "447", "448", "449", "45", "450", "451", "458",
"459", "46", "460", "461", "462", "463", "464", "465", "466",
"470", "471", "472", "473", "474", "475", "476", "484", "485",
"486", "487", "488", "489", "490", "491", "492", "496", "497",
"498", "499", "5", "500", "501", "513", "514", "515", "516",
"517", "518", "523", "524", "525", "526", "527", "528", "529",
"54", "541", "542", "543", "544", "545", "55", "550", "551",
"552", "553", "554", "56", "569", "57", "570", "571", "572",
"573", "574", "578", "579", "580", "581", "582", "599", "60",
"600", "601", "602", "603", "604", "605", "606", "607", "608",
"609", "61", "610", "62", "626", "627", "628", "629", "63", "632",
"633", "634", "635", "636", "637", "638", "639", "64", "653",
"654", "655", "656", "657", "658", "659", "660", "663", "664",
"665", "666", "667", "668", "669", "670", "671", "672", "673",
"687", "688", "689", "690", "691", "692", "693", "696", "697",
"698", "699", "7", "700", "701", "702", "703", "704", "705",
"716", "717", "718", "720", "721", "722", "723", "724", "725",
"726", "727", "728", "739", "74", "740", "741", "746", "747",
"748", "749", "75", "750", "751", "752", "753", "754", "764",
"765", "768", "769", "77", "770", "771", "772", "773", "78",
"782", "783", "784", "788", "789", "79", "790", "798", "799",
"8", "80", "800", "801", "804", "805", "81", "812", "813", "814",
"815", "816", "819", "82", "820", "821", "827", "828", "829",
"83", "830", "831", "833", "834", "835", "836", "84", "842",
"843", "844", "845", "846", "849", "85", "850", "851", "852",
"853", "854", "860", "861", "862", "863", "864", "869", "870",
"871", "872", "873", "874", "88", "881", "882", "883", "884",
"885", "886", "89", "890", "891", "892", "893", "894", "9", "902",
"903", "904", "905", "906", "908", "909", "910", "911", "912",
"922", "923", "924", "925", "926", "927", "928", "929", "930",
"940", "941", "942", "943", "944", "945", "946", "947", "948",
"957", "958", "959", "96", "960", "961", "962", "963", "964",
"965", "966", "97", "976", "977", "978", "979", "980", "981",
"982", "983", "984", "992", "993", "994", "995", "996", "997",
"998", "999"), class = "factor"), Grid_Lat = c(56.85582097, 56.90062505,
56.90024495, 56.94461032), Grid_Long = c(153.4783612, 153.4777153,
153.3954873, 153.3124098), Er_Pres = c(0L, 0L, 0L, 0L), Er_Count = c(0L,
0L, 0L, 0L), Er_Count_Density = c(0, 0, 0, 0), Month = structure(c(8L,
8L, 8L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11"), class = "factor"), Year = structure(c(1L, 1L,
1L, 1L), .Label = c("1997", "1998", "1999", "2000", "2001", "2002",
"2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010",
"2011", "2012", "2013"), class = "factor"), chl = c(0.53747,
0.53747, 0.53747, 0.581741), SST = c(13.4171, 13.4171, 13.4171,
13.4025002), Bathymetry = c(76.11354065, 92.14147949, 90.60312653,
71.55316162), Grid_Area = c(25, 25, 25, 25), DFS = c(6.807817092,
4.233185446, 9.199096676, 5.153224038), Slope = c(0.13670446,
0.38316911, 0.08646853, 0.20038579), DOY = c(244L, 244L, 244L,
244L)), .Names = c("Grid_ID", "Grid_Lat", "Grid_Long", "Er_Pres",
"Er_Count", "Er_Count_Density", "Month", "Year", "chl", "SST",
"Bathymetry", "Grid_Area", "DFS", "Slope", "DOY"), row.names = c(NA,
4L), class = "data.frame")
m1<-gam(Er_Pres~ s(Grid_Lat,Grid_Long,k=10,bs='tp')+Month+Year+s(SST,k=5,bs='tp'),family=quasibinomial(link='logit'),data=grays,gamma=1.4,offset(Grid_Area))
#step 2 - reduce dataset and run second GAM for positive abundance only.
grays2<-subset(grays,Er_Pres>0)
m2<-gam(Er_Count~ Year +s(Grid_Lat,Grid_Long,k=10,bs='tp') + s(SST,k=5,bs='tp') + s(sqrt(DFS),k=5,bs='tp') + Month +log10(chl),family=Gamma(link='log'),data=grays2,Gamma=1.4,offset(Grid_Area))
Running the second model gives me the follow error:
Error in predict.gam(m2, newdata = full, type = "response") :
1997, 1998, 2006, 2007 not in original fit
This is an old post, so I suspect you have found a solution by now, but if not consider this:
If you only want to account for data within the same year being more similar than data across year, but you are not necessarily interested in the effect of particular years (say the difference between 2007 and 1998) then you could specify year as a random effect.
I believe there are several ways to do this, but in mgcv, you can specify:
s(Year, bs="re")
I'm trying to calculate sums and means on a very large dataset (~22000 records) for several parameters (e.g. Er_Count, Mn_Count) by month, year , Survey ID and Grid ID. I tried this code initially to get overall sums:
dlply(Effort_All,c("Er_Count","Mn_Count","Bp_Count"),sum)
And received the following error:
Error: only defined on a data frame with all numeric variables
Since I cannot even get overall sums, I am unable to get statistics by the specific variables either. Do I need to split the data in some manner?
I have included a sample dataset of 25 records below.
structure(list(Grid_ID = structure(c(527L, 92L, 331L, 395L, 934L,
93L), .Label = c("1", "1,000", "1,001", "1,002", "1,003", "1,004",
"1,005", "1,006", "1,007", "1,008", "1,009", "1,010", "1,011",
"1,012", "1,013", "1,014", "1,015", "1,016", "1,017", "1,018",
"1,019", "1,020", "1,021", "1,022", "1,023", "1,024", "1,025",
"1,026", "1,027", "1,028", "1,029", "1,030", "1,031", "1,032",
"1,033", "1,034", "1,035", "1,036", "1,037", "1,038", "1,039",
"1,040", "1,041", "1,042", "1,043", "1,044", "1,045", "1,046",
"1,047", "1,048", "1,049", "1,050", "1,051", "1,052", "1,053",
"1,054", "1,055", "1,056", "1,057", "1,058", "1,059", "1,060",
"1,061", "10", "100", "101", "102", "103", "104", "105", "106",
"107", "108", "109", "11", "110", "111", "112", "113", "114",
"115", "116", "117", "118", "119", "12", "120", "121", "122",
"123", "124", "125", "126", "127", "128", "129", "13", "130",
"131", "132", "133", "134", "135", "136", "137", "138", "139",
"14", "140", "141", "142", "143", "144", "145", "146", "147",
"148", "149", "15", "150", "151", "152", "153", "154", "155",
"156", "157", "158", "159", "16", "160", "161", "162", "163",
"164", "165", "166", "167", "168", "169", "17", "170", "171",
"172", "173", "174", "175", "176", "177", "178", "179", "18",
"180", "181", "182", "183", "184", "185", "186", "187", "188",
"189", "19", "190", "191", "192", "193", "194", "195", "196",
"197", "198", "199", "2", "20", "200", "201", "202", "203", "204",
"205", "206", "207", "208", "209", "21", "210", "211", "212",
"213", "214", "215", "216", "217", "218", "219", "22", "220",
"221", "222", "223", "224", "225", "226", "227", "228", "229",
"23", "230", "231", "232", "233", "234", "235", "236", "237",
"238", "239", "24", "240", "241", "242", "243", "244", "245",
"246", "247", "248", "249", "25", "250", "251", "252", "253",
"254", "255", "256", "257", "258", "259", "26", "260", "261",
"262", "263", "264", "265", "266", "267", "268", "269", "27",
"270", "271", "272", "273", "274", "275", "276", "277", "278",
"279", "28", "280", "281", "282", "283", "284", "285", "286",
"287", "288", "289", "29", "290", "291", "292", "293", "294",
"295", "296", "297", "298", "299", "3", "30", "300", "301", "302",
"303", "304", "305", "306", "307", "308", "309", "31", "310",
"311", "312", "313", "314", "315", "316", "317", "318", "319",
"32", "320", "321", "322", "323", "324", "325", "326", "327",
"328", "329", "33", "330", "331", "332", "333", "334", "335",
"336", "337", "338", "339", "34", "340", "341", "342", "343",
"344", "345", "346", "347", "348", "349", "35", "350", "351",
"352", "353", "354", "355", "356", "357", "358", "359", "36",
"360", "361", "362", "363", "364", "365", "366", "367", "368",
"369", "37", "370", "371", "372", "373", "374", "375", "376",
"377", "378", "379", "38", "380", "381", "382", "383", "384",
"385", "386", "387", "388", "389", "39", "390", "391", "392",
"393", "394", "395", "396", "397", "398", "399", "4", "40", "400",
"401", "402", "403", "404", "405", "406", "407", "408", "409",
"41", "410", "411", "412", "413", "414", "415", "416", "417",
"418", "419", "42", "420", "421", "422", "423", "424", "425",
"426", "427", "428", "429", "43", "430", "431", "432", "433",
"434", "435", "436", "437", "438", "439", "44", "440", "441",
"442", "443", "444", "445", "446", "447", "448", "449", "45",
"450", "451", "452", "453", "454", "455", "456", "457", "458",
"459", "46", "460", "461", "462", "463", "464", "465", "466",
"467", "468", "469", "47", "470", "471", "472", "473", "474",
"475", "476", "477", "478", "479", "48", "480", "481", "482",
"483", "484", "485", "486", "487", "488", "489", "49", "490",
"491", "492", "493", "494", "495", "496", "497", "498", "499",
"5", "50", "500", "501", "502", "503", "504", "505", "506", "507",
"508", "509", "51", "510", "511", "512", "513", "514", "515",
"516", "517", "518", "519", "52", "520", "521", "522", "523",
"524", "525", "526", "527", "528", "529", "53", "530", "531",
"532", "533", "534", "535", "536", "537", "538", "539", "54",
"540", "541", "542", "543", "544", "545", "546", "547", "548",
"549", "55", "550", "551", "552", "553", "554", "555", "556",
"557", "558", "559", "56", "560", "561", "562", "563", "564",
"565", "566", "567", "568", "569", "57", "570", "571", "572",
"573", "574", "575", "576", "577", "578", "579", "58", "580",
"581", "582", "583", "584", "585", "586", "587", "588", "589",
"59", "590", "591", "592", "593", "594", "595", "596", "597",
"598", "599", "6", "60", "600", "601", "602", "603", "604", "605",
"606", "607", "608", "609", "61", "610", "611", "612", "613",
"614", "615", "616", "617", "618", "619", "62", "620", "621",
"622", "623", "624", "625", "626", "627", "628", "629", "63",
"630", "631", "632", "633", "634", "635", "636", "637", "638",
"639", "64", "640", "641", "642", "643", "644", "645", "646",
"647", "648", "649", "65", "650", "651", "652", "653", "654",
"655", "656", "657", "658", "659", "66", "660", "661", "662",
"663", "664", "665", "666", "667", "668", "669", "67", "670",
"671", "672", "673", "674", "675", "676", "677", "678", "679",
"68", "680", "681", "682", "683", "684", "685", "686", "687",
"688", "689", "69", "690", "691", "692", "693", "694", "695",
"696", "697", "698", "699", "7", "70", "700", "701", "702", "703",
"704", "705", "706", "707", "708", "709", "71", "710", "711",
"712", "713", "714", "715", "716", "717", "718", "719", "72",
"720", "721", "722", "723", "724", "725", "726", "727", "728",
"729", "73", "730", "731", "732", "733", "734", "735", "736",
"737", "738", "739", "74", "740", "741", "742", "743", "744",
"745", "746", "747", "748", "749", "75", "750", "751", "752",
"753", "754", "755", "756", "757", "758", "759", "76", "760",
"761", "762", "763", "764", "765", "766", "767", "768", "769",
"77", "770", "771", "772", "773", "774", "775", "776", "777",
"778", "779", "78", "780", "781", "782", "783", "784", "785",
"786", "787", "788", "789", "79", "790", "791", "792", "793",
"794", "795", "796", "797", "798", "799", "8", "80", "800", "801",
"802", "803", "804", "805", "806", "807", "808", "809", "81",
"810", "811", "812", "813", "814", "815", "816", "817", "818",
"819", "82", "820", "821", "822", "823", "824", "825", "826",
"827", "828", "829", "83", "830", "831", "832", "833", "834",
"835", "836", "837", "838", "839", "84", "840", "841", "842",
"843", "844", "845", "846", "847", "848", "849", "85", "850",
"851", "852", "853", "854", "855", "856", "857", "858", "859",
"86", "860", "861", "862", "863", "864", "865", "866", "867",
"868", "869", "87", "870", "871", "872", "873", "874", "875",
"876", "877", "878", "879", "88", "880", "881", "882", "883",
"884", "885", "886", "887", "888", "889", "89", "890", "891",
"892", "893", "894", "895", "896", "897", "898", "899", "9",
"90", "900", "901", "902", "903", "904", "905", "906", "907",
"908", "909", "91", "910", "911", "912", "913", "914", "915",
"916", "917", "918", "919", "92", "920", "921", "922", "923",
"924", "925", "926", "927", "928", "929", "93", "930", "931",
"932", "933", "934", "935", "936", "937", "938", "939", "94",
"940", "941", "942", "943", "944", "945", "946", "947", "948",
"949", "95", "950", "951", "952", "953", "954", "955", "956",
"957", "958", "959", "96", "960", "961", "962", "963", "964",
"965", "966", "967", "968", "969", "97", "970", "971", "972",
"973", "974", "975", "976", "977", "978", "979", "98", "980",
"981", "982", "983", "984", "985", "986", "987", "988", "989",
"99", "990", "991", "992", "993", "994", "995", "996", "997",
"998", "999"), class = "factor"), ER_Groups = c(2, 2, 2, 3, 5,
6), Er_Count = c(60, 75, 14, 12, 8, 26), Mn_Count = c(30, 9, 6, 33,
7, 12), Bp_Groups = c(1, 2, 1, 1, 0, 1), Bp_Count = c(3, 3, 2,
5, 0, 6), Mn_Groups = c(1, 1, 3, 1, 0, 0), Month = c(10L, 6L,
12L, 4L, 2L, 4L), Year = c(2000L, 2001L, 2009L, 2004L, 2002L,
2001L), SurveyID = structure(c(16L, 24L, 93L, 56L, 34L, 22L), .Label = c("199708HS",
"199808HS", "199908HS", "199909SSLQ", "199910SSL", "199911SSL",
"200001SSLQ", "200002SSL", "200003SSLQ", "200004SSLQ", "200005SSL",
"200006SSL", "200007SSL", "200008HS", "200008SSL", "200009SSL",
"200010SSL", "200011SSL", "200101SSL", "200102SSL", "200103SSL",
"200104SSL", "200105SSL", "200106SSL", "200107SSL", "200108HS",
"200108SSL", "200109SSL", "200110SSL", "200111SSL", "200112SSL",
"200201SSL", "200202SSL", "200203SSL", "200204SSL", "200205SSL",
"200206SSL", "200207SSL", "200208HS", "200208SSL", "200210SSL",
"200211SSL", "200212SSL", "200301SSL", "200302SSL", "200303SSL",
"200304SSL", "200305SSL", "200306SSL", "200307SSL", "200309SSL",
"200310SSL", "200311SSL", "200312SSL", "200403SSL", "200404SSL",
"200405SSL", "200406SSL", "200407SSL", "200408HS", "200408SSL",
"200409SSL", "200505SSL", "200506SSL", "200507SSL", "200510SSL",
"200512SSL", "200603SSL", "200609SSL", "200612SSL", "200709GAP07",
"200710GAP07", "200712GAP07", "200802GAP07", "200803GAP07", "200804GAP07",
"200805GAP07", "200806GAP07", "200807GAP07", "200808GAP07", "200809GAP08",
"200810GAP08", "200812GAP08", "200901GAP08", "200903GAP08", "200904GAP08",
"200905GAP08", "200906GAP08", "200907GAP08", "200908GAP08", "200909GAP08",
"200910GAP09", "200912GAP09", "201001GAP09", "201002GAP09", "201003GAP09",
"201004GAP09", "201005GAP09", "201006GAP09", "201007GAP09", "201008GAP09",
"201009GAP09", "201010GAP09", "201011GAP09", "201101GAP09", "201102GAP09",
"201103GAP09", "201104GAP09", "201106GAP09", "201108GAP09", "201109GAP09",
"201111GAP09", "201201GAP09", "201203GAP09", "201205GAP09", "201207GAP09",
"201208GAP09", "201211GAP09", "201301GAP09", "201303GAP09", "201305GAP09",
"201307GAP09", "201309GAP09", "201311GAP09"), class = "factor"),
Er_Group_Density = c(4, 9, 12, 4, 1, 0), Mn_Group_Density = c(3,
1, 1, 1, 0, 2), Bp_Group_Density = c(1, 2, 1, 0, 1, 0), Er_Count_Density = c(50,
14, 12, 9, 6, 4), Mn_Count_Density = c(9, 5, 2, 3, 2, 0), Bp_Count_Density = c(2,
3, 0, 4, 1, 0)), .Names = c("Grid_ID", "ER_Groups", "Er_Count",
"Mn_Count", "Bp_Groups", "Bp_Count", "Mn_Groups", "Month", "Year",
"SurveyID", "Er_Group_Density", "Mn_Group_Density", "Bp_Group_Density",
"Er_Count_Density", "Mn_Count_Density", "Bp_Count_Density"), row.names = c(2770L,
4421L, 17348L, 11263L, 6736L, 3974L), class = "data.frame")
There are a number of ways to get statistics by group. I'll assume you have a bias for plyr, since your example uses it.
Remember that dlply() splits the data into smaller dataframes by the grouping variables, then it applies the requested function to each of the smaller dataframes. Therefore the function you pass should operate on a whole dataframe. sum() does not do this. You can write your own function, though.
Based on your description, what you want is something like this
myfun <- function(x) colSums(x[, c("Er_Count", "Mn_Count", "Bp_Count")])
dlply(Effort_All, c("Month", "Year", "Grid_ID", "SurveyID"), myfun)
Remember that the second argument to dlply() is the set of variables used for grouping. Not sure why you want the output as a list. Would it be easier to read if you used ddply (with the same arguments)?
Other approaches include using sqldf() or something like lapply().
=============== EDIT: Other approaches =============
sqldf is always very easy to read and understand:
output <- sqldf('select Month,Year,Grid_ID,SurveyID,
sum(Er_Count) as ercount,
sum(Mn_Count) as mncount,
sum(Bp_Count) as bpcount
from Effort_All
group by Month, Year, Grid_ID, SurveyID')
lapply works pretty much the same way as dlply. Just different arguments.
Also, you could use colwise from plyr
dlply(Effort_All, .(Month, Year, Grid_ID, SurveyID), colwise(sum, .(Er_Count, Mn_Count, Bp_Count)))
Or summarise_each from dplyr
library(dplyr)
Effort_All%>%
group_by(Month, Year, Grid_ID, SurveyID) %>%
summarise_each(funs(sum), Er_Count, Mn_Count, Bp_Count)
#Source: local data frame [6 x 7]
#Groups: Month, Year, Grid_ID
# Month Year Grid_ID SurveyID Er_Count Mn_Count Bp_Count
# 1 2 2002 884 200203SSL 8 7 0
# 2 4 2001 126 200104SSL 26 12 6
# 3 4 2004 399 200404SSL 12 33 5
# 4 6 2001 125 200106SSL 75 9 3
# 5 10 2000 517 200009SSL 60 30 3
# 6 12 2009 340 200912GAP09 14 6 2
I have a data set that looks like this:
id ......... date sales
19164958 ......... 2001-09-01 .... 30
39578413 ......... 2001-09-01 .... 75.6
There are about 65k observations in the data set. The data is structured in 4 columns: id (which are non-consecutive in the range of 10 to 80 millions), churn, date and sales. It describes the spending of all customers for about 3/4 of a year.
Now I shall calculate the average spending of each customer. I have been given this code:
aggr.data <- merge(data[, lapply(.SD, mean), by = c("id"),.SDcols = c("sales")],
data[, lapply(.SD, mean), by = c("id"),.SDcols = c("sales")],
c("id", "sales"))
Now I have the problem that r does not know .SD.
Can anybody please tell me what I have to change to receive the results. Or does anybody know what other commands I can use to get the average spendings of each id?
Thank you for your help
dput(head(tel))
structure(list(id = c(19164958L, 39578413L, 43061957L, 51326773L,
54271247L, 70765025L), churn = c(0L, 0L, 0L, 0L, 0L, 0L), date = structure(c(11566,
11566, 11566, 11566, 11566, 11566), class = "Date"), sales = structure(c(522L,
849L, 649L, 649L, 522L, 649L), .Label = c("100", "100.2", "100.4",
"100.6", "100.8", "101", "101.2", "101.4", "101.6", "101.8",
"102", "102.4", "102.8", "103", "103.2", "103.4", "103.6", "103.8",
"104", "104.2", "104.4", "104.8", "105", "105.2", "105.6", "105.8",
"106", "106.2", "106.4", "106.6", "106.8", "107", "107.2", "107.4",
"107.6", "108", "108.2", "108.4", "108.6", "108.8", "109", "109.2",
"109.4", "109.6", "109.8", "110", "110.2", "110.4", "110.8",
"111", "111.2", "111.4", "111.6", "111.8", "112", "112.4", "112.6",
"112.8", "113.2", "113.4", "113.6", "114", "114.2", "114.4",
"114.8", "115.2", "115.6", "116", "116.2", "116.4", "116.8",
"117", "117.2", "117.4", "117.6", "117.8", "118", "118.4", "118.8",
"119.2", "119.6", "119.8", "120", "120.4", "120.6", "120.8",
"121.2", "121.4", "121.6", "121.8", "122", "122.2", "122.4",
"122.8", "123", "123.2", "123.6", "123.8", "124", "124.4", "124.8",
"125", "125.2", "125.4", "125.6", "125.8", "126", "126.4", "126.8",
"127", "127.2", "127.6", "127.8", "128", "128.4", "128.8", "129",
"129.2", "129.4", "129.6", "130", "130.2", "130.4", "130.8",
"131.2", "131.4", "131.6", "131.8", "132", "132.4", "132.8",
"133.2", "133.4", "133.6", "133.8", "134", "134.4", "134.8",
"135", "135.2", "135.6", "135.8", "136", "136.2", "136.4", "136.8",
"137.2", "137.6", "138", "138.4", "138.6", "138.8", "139.2",
"139.6", "140", "140.2", "140.4", "140.8", "141.2", "141.4",
"141.6", "142", "142.2", "142.4", "142.6", "142.8", "143.2",
"143.6", "144", "144.2", "144.4", "144.6", "144.8", "145.2",
"145.4", "145.6", "146", "146.4", "146.6", "146.8", "147.2",
"147.6", "147.8", "148", "148.2", "148.4", "148.6", "148.8",
"149.2", "149.6", "149.8", "150", "150.2", "150.4", "150.8",
"151", "151.2", "151.6", "152", "152.4", "152.8", "153", "153.2",
"153.6", "154", "154.4", "154.6", "154.8", "155.2", "155.6",
"155.8", "156", "156.2", "156.4", "156.6", "156.8", "157.2",
"157.4", "157.6", "157.8", "158", "158.4", "158.8", "159.2",
"159.4", "159.6", "160", "160.2", "160.4", "160.8", "161.2",
"161.4", "161.6", "162", "162.4", "162.8", "163", "163.2", "163.6",
"163.8", "164", "164.4", "164.8", "165", "165.2", "165.6", "166",
"166.4", "166.8", "167.2", "167.4", "167.6", "168", "168.4",
"168.8", "169.2", "169.6", "170", "170.2", "170.4", "170.8",
"171", "171.2", "171.6", "172", "172.4", "172.8", "173.2", "173.6",
"173.8", "174", "174.4", "174.8", "175.2", "175.6", "175.8",
"176.4", "176.8", "177", "177.2", "177.6", "178", "178.2", "178.4",
"178.8", "179.2", "179.4", "179.6", "179.8", "180", "180.4",
"180.8", "181", "181.2", "181.6", "182", "182.2", "182.4", "182.8",
"183.2", "183.6", "183.8", "184", "184.4", "184.8", "185.2",
"185.6", "186", "186.4", "187.2", "187.4", "187.6", "187.8",
"188", "188.4", "188.8", "189.2", "189.6", "189.8", "190", "190.4",
"190.8", "191.6", "192", "192.4", "192.8", "193.2", "193.6",
"194", "194.4", "194.8", "195.2", "195.6", "196.4", "196.8",
"197.2", "197.6", "197.8", "198", "198.2", "198.4", "198.8",
"199.2", "199.6", "200", "200.4", "200.8", "201.2", "201.6",
"202", "202.4", "202.8", "203.6", "204", "204.4", "204.8", "205.6",
"206", "206.4", "206.6", "206.8", "207.2", "207.6", "208", "208.4",
"208.8", "209.2", "209.6", "209.8", "210", "210.4", "210.6",
"210.8", "211.2", "211.6", "212.8", "213.2", "213.6", "214",
"214.4", "214.8", "215.2", "215.4", "216", "216.2", "216.8",
"217", "217.2", "217.6", "218.4", "218.8", "219.2", "219.6",
"220", "221", "221.2", "221.6", "221.8", "222", "222.4", "223.2",
"223.6", "224.4", "224.8", "225.2", "225.6", "226", "226.2",
"226.4", "226.6", "227.2", "227.8", "228", "228.4", "228.8",
"229.2", "229.6", "229.8", "230", "230.4", "230.8", "231.6",
"232.2", "232.4", "232.8", "233.2", "233.6", "234.4", "234.8",
"235.6", "235.8", "236", "237.2", "237.4", "237.6", "238", "239.2",
"240.4", "240.8", "241.2", "241.6", "242", "242.4", "243.4",
"243.6", "244.6", "245.2", "245.6", "246", "246.4", "247.2",
"248", "249.6", "250", "250.4", "250.8", "251.2", "251.6", "252.8",
"254.4", "254.8", "255.2", "255.4", "255.6", "256", "256.4",
"256.8", "257.2", "257.6", "258.8", "259.2", "260", "261.6",
"262", "262.4", "262.8", "263.2", "263.6", "264", "264.4", "264.8",
"266", "266.8", "267.2", "267.6", "268.4", "270", "270.2", "270.4",
"271", "271.2", "271.6", "272.4", "272.8", "273.2", "274", "274.4",
"275.2", "275.6", "276", "276.8", "278.8", "279.2", "279.6",
"280", "281.6", "282", "282.6", "283.2", "284.8", "285.6", "287.2",
"289.6", "290.4", "291.2", "293.2", "295.2", "296", "296.8",
"298", "299", "30", "30.2", "30.4", "30.6", "30.8", "300.8",
"301.2", "301.6", "302.8", "303.6", "304", "304.4", "305.2",
"306", "306.4", "307.2", "308.8", "309.2", "31", "31.2", "31.4",
"31.6", "31.8", "310.8", "313.2", "313.6", "314", "315", "315.6",
"316", "316.4", "316.8", "317", "318.4", "319.6", "32", "32.2",
"32.4", "32.6", "32.8", "322", "324.8", "326.4", "326.8", "327.2",
"328.4", "329.2", "329.6", "33", "33.2", "33.4", "33.6", "33.8",
"331.6", "332.4", "332.8", "334", "338.4", "338.6", "339.2",
"34", "34.2", "34.4", "34.6", "34.8", "340", "341.2", "342",
"342.4", "347.2", "347.6", "35", "35.2", "35.4", "35.6", "35.8",
"350", "352.8", "353.2", "354", "354.8", "355.6", "357.6", "36",
"36.2", "36.4", "36.6", "36.8", "360.8", "361.6", "362", "362.4",
"363.6", "365.6", "367.6", "368", "368.4", "369.6", "37", "37.2",
"37.4", "37.6", "37.8", "371.6", "372.4", "375.6", "377", "38",
"38.2", "38.4", "38.6", "38.8", "382.6", "384.8", "385.2", "387.2",
"388", "388.4", "39", "39.2", "39.4", "39.6", "39.8", "390.4",
"391.2", "397.6", "399.6", "40", "40.2", "40.4", "40.6", "40.8",
"405.2", "408.8", "41", "41.2", "41.4", "41.6", "41.8", "411.6",
"414.4", "419.2", "42", "42.2", "42.4", "42.6", "42.8", "43",
"43.2", "43.4", "43.6", "43.8", "430.2", "432.4", "437.2", "438",
"439.6", "44", "44.2", "44.4", "44.6", "44.8", "444.8", "45",
"45.2", "45.4", "45.6", "45.8", "450", "454", "455.6", "46",
"46.2", "46.4", "46.6", "46.8", "47", "47.2", "47.4", "47.6",
"47.8", "473.2", "474", "475.6", "48", "48.2", "48.4", "48.6",
"48.8", "482.4", "49", "49.2", "49.4", "49.6", "49.8", "50",
"50.2", "50.4", "50.6", "50.8", "500", "503.2", "51", "51.2",
"51.4", "51.6", "51.8", "52", "52.2", "52.4", "52.6", "52.8",
"521.6", "53", "53.2", "53.4", "53.6", "53.8", "54", "54.2",
"54.4", "54.6", "54.8", "55", "55.2", "55.4", "55.6", "55.8",
"550", "56", "56.2", "56.4", "56.6", "56.8", "57", "57.2", "57.4",
"57.6", "57.8", "58", "58.2", "58.4", "58.6", "58.8", "59", "59.2",
"59.4", "59.6", "59.8", "60", "60.2", "60.4", "60.6", "60.8",
"61", "61.2", "61.4", "61.6", "61.8", "62", "62.2", "62.4", "62.6",
"62.8", "63", "63.2", "63.4", "63.6", "63.8", "64", "64.2", "64.4",
"64.6", "64.8", "65", "65.2", "65.4", "65.6", "65.8", "66", "66.2",
"66.4", "66.6", "66.8", "67", "67.2", "67.4", "67.6", "67.8",
"68", "68.2", "68.4", "68.6", "68.8", "69", "69.2", "69.4", "69.6",
"69.8", "70", "70.2", "70.4", "70.6", "70.8", "71", "71.2", "71.4",
"71.6", "71.8", "72", "72.2", "72.4", "72.6", "72.8", "73", "73.2",
"73.4", "73.6", "73.8", "74", "74.2", "74.4", "74.6", "74.8",
"75", "75.2", "75.4", "75.6", "75.8", "76", "76.2", "76.4", "76.6",
"76.8", "77", "77.2", "77.4", "77.6", "77.8", "78", "78.2", "78.4",
"78.6", "78.8", "79", "79.2", "79.4", "79.6", "79.8", "80", "80.2",
"80.4", "80.6", "80.8", "81", "81.2", "81.4", "81.6", "81.8",
"82", "82.2", "82.4", "82.6", "82.8", "83", "83.2", "83.4", "83.6",
"83.8", "84", "84.2", "84.4", "84.6", "84.8", "85", "85.2", "85.4",
"85.6", "85.8", "86", "86.4", "86.6", "86.8", "87", "87.2", "87.6",
"87.8", "88", "88.2", "88.4", "88.6", "88.8", "89", "89.2", "89.6",
"89.8", "90", "90.2", "90.4", "90.6", "90.8", "91", "91.2", "91.4",
"91.6", "91.8", "92", "92.2", "92.4", "92.6", "92.8", "93", "93.2",
"93.4", "93.6", "93.8", "94", "94.2", "94.4", "94.6", "94.8",
"95", "95.2", "95.4", "95.6", "95.8", "96", "96.2", "96.4", "96.6",
"96.8", "97", "97.2", "97.4", "97.6", "97.8", "98", "98.2", "98.4",
"98.6", "98.8", "99", "99.2", "99.4", "99.6", "99.8"), class = "factor")), .Names = c("id",
"churn", "date", "sales"), row.names = c(NA, 6L), class = "data.frame")
mydf$sales <- as.numeric(as.character(mydf$sales))
Using base R
tapply(mydf$sales,mydf$id,mean)
where mydf is your dataframe
Using data.table package
library(data.table)
DT<-data.table(mydf)
DT[,mean(sales),by=id]
Using plyr package
library(plyr)
ddply(mydf,.(id),meansales=mean(sales))