I have some Stata code and I want to replicate the results in R. However, even with the same dataset and, I think, the same code, I get different results in R from those in Stata. I think it could be because Stata makes the order of the regression different than keyed in.
Do I need exactly the same order as in Stata to get the same results and how can I do this?
I changed all the variables to factors and tried again but the problem is still there.
I noticed that when I change the order of the explanatory variables I get different results, but I don`t find "the right order" to replicate the Stata results.
Stata code:
. anova testm2 c.testm1 i.hptreat c.cortm1 c.cortm2 i.female if inelig == 0 & anyoutv1 == 0
Number of obs =39 R-squared =0.7048
Root MSE= 16.0144 Adj R-squared =0.6601
Source | Partial SS df MS F Prob>F
---------------------------------------------------------------
Model | 20209.281 5 4041.8563 15.76 0.0000
testm1 | 3516.6527 1 3516.6527 13.71 0.0008
hptreat| 1183.5007 1 1183.5007 4.61 0.0391
cortm1 | 8.5753841 1 8.5753841 0.03 0.8560
cortm2 | 2810.9353 1 2810.9353 10.96 0.0023
female | 2557.3444 1 2557.3444 9.97 0.0034
Residual| 8463.2532 33 256.46222
----------------------------------------------------------------
Total | 28672.535 38 754.54038
R code:
FosseTest<-aov(testm2~testm1+hptreat+cortm1+cortm2+female,data=X2data)
summary(FosseTest)
Df Sum Sq Mean Sq F value Pr(>F)
testm1 1 15121 15121 58.962 7.68e-09 ***
hptreat 1 524 524 2.043 0.16228
cortm1 1 23 23 0.089 0.76715
cortm2 1 1984 1984 7.735 0.00888 **
female 1 2557 2557 9.972 0.00339 **
Residuals 33 8463 256
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
You can see that I get totally different values in the replication.
in the X2data Set I already subset the values for if inelig == 0 & anyoutv1 == 0
for the reconstruction of the data:
dput(X2data)
structure(list(id = c(29L, 30L, 31L, 32L, 34L, 35L, 36L, 37L,
39L, 41L, 42L, 43L, 44L, 46L, 47L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 57L, 58L, 59L, 60L, 61L, 62L, 64L, 65L, 66L, 67L, 68L, 69L,
70L, 71L, 72L, 73L, 74L), inelig = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Analytic sample (keep)", "Ineligible (drop)"
), class = "factor"), ccydrop = c(0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), cortm1v2 = c(0.003, 0.086, 0.047, 0.106, NA, 0.153, 0.086,
0.005, 0.133, 0.036, 0.03, 0.015, 0.014, 0.111, 0.389, 0.298,
0.4, 0.215, 0.062, 0.021, 0.075, 0.073, 0.033, 0.243, 0.126,
0.147, 0.019, 0.048, 0.28, 0.052, 0.039, 0.105, 0.111, 0.133,
0.065, 0.051, 0.143, 0.127, 0.095), cortm2v2 = c(0.025, 0.167,
0.059, 0.112, 0.171, 0.183, 0.102, 0.018, 0.08, 0.015, 0.027,
0.05, 0.025, 0.046, 0.085, 0.144, 0.155, 0.09, 0.057, 0.023,
0.038, 0.205, 0.035, 0.198, 0.112, 0.211, 0.042, 0.142, 0.328,
0.076, 0.067, 0.094, 0.245, 0.153, 0.115, 0.127, 0.257, 0.125,
0.096), cdiffv2 = c(0.022, 0.081, 0.012, 0.006, NA, 0.03, 0.016,
0.013, -0.053, -0.021, -0.003, 0.035, 0.011, -0.065, -0.304,
-0.154, -0.245, -0.125, -0.005, 0.002, -0.037, 0.132, 0.002,
-0.045, -0.014, 0.064, 0.023, 0.094, 0.048, 0.024, 0.028, -0.011,
0.134, 0.02, 0.05, 0.076, 0.114, -0.002, 0.001), testm1v2 = c(38.72,
32.77, 32.32, 17.99, 73.58, 80.69, 48.56, 21.92, 27.24, 40.93,
31.73, 60.05, 38.04, 30.17, 59.07, 26.92, 25.41, 47.81, 63.02,
34.49, 104.38, 38.08, 30.99, 35.23, 104.81, 49.33, 50.03, 11.65,
143.57, 48.31, 90.37, 48.56, 41.67, 75.23, 60.56, 39.03, 18.16,
37.9, 84.5), testm2v2 = c(62.37, 29.23, 27.51, 28.66, 44.67,
105.48, 42.67, 15.01, 21.33, 10.87, 2.14, 44.53, 35.8, 10.43,
47.54, 48.5, 38.98, 91.32, 52.94, 22.43, 58.68, 81.63, 34.79,
38.57, 94.86, 50.83, 55.75, 45.33, 111.62, 65.15, 81.08, 50.08,
44.86, 58.63, 85.85, 58.69, 16.35, 35.97, 99.08), tdiffv2 = c(23.65,
-3.54, -4.81, 10.67, -28.91, 24.79, -5.89, -6.91, -5.91, -30.06,
-29.59, -15.52, -2.24, -19.74, -11.53, 21.58, 13.57, 43.51, -10.08,
-12.06, -45.7, 43.55, 3.8, 3.34, -9.95, 1.5, 5.72, 33.68, -31.95,
16.84, -9.29000000000001, 1.52, 3.19, -16.6, 25.29, 19.66, -1.81,
-1.93, 14.58), testoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Not selected", "Selected"), class = "factor"),
cortoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
anyoutv1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
testoutv2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
cortoutv2 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
anyoutv2 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Not selected", "Selected"), class = "factor"),
pose1rate = c(6L, 7L, 6L, 6L, 7L, 7L, 6L, 7L, 5L, 6L, 7L,
4L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), pose2rate = c(6L,
6L, 5L, 7L, 7L, 7L, 7L, 7L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 7L, 6L, 7L, 7L, 7L,
6L, 7L, 7L, 7L, 7L, 7L, 6L, 6L), poseratem = c(6, 6.5, 5.5,
6.5, 7, 7, 6.5, 7, 5.5, 6.5, 7, 5.5, 7, 7, 7, 6, 6.5, 7,
7, 7, 6.5, 7, 7, 7, 7, 6.5, 7, 6.5, 7, 7, 7, 6.5, 7, 7, 7,
7, 7, 6.5, 6.5), saldiff = c(24.30555556, 20.83333333, 29.16666667,
18.75, 23.61111111, 34.02777778, 18.05555556, 19.44444444,
21.52777778, 15.97222222, 22.91666667, 13.88888889, 22.22222222,
25, 22.22222222, 22.22222222, 18.05555556, 17.36111111, 22.22222222,
27.08333333, 20.83333333, 24.30555556, 22.22222222, 28.47222222,
24.30555556, 25, 27.77777778, 22.22222222, 15.97222222, 24.30555556,
21.52777778, 19.44444444, 15.97222222, 15.27777778, 15.97222222,
24.30555556, 19.44444444, 24.30555556, 15.27777778), sal2manip = c(19.80555556,
16.33333333, 24.66666667, 14.25, 19.11111111, 29.52777778,
13.55555556, 14.94444444, 17.02777778, 11.47222222, 18.41666667,
9.38888889, 17.72222222, 20.5, 17.72222222, 17.72222222,
13.55555556, 12.86111111, 17.72222222, 22.58333333, 16.33333333,
19.80555556, 17.72222222, 23.97222222, 19.80555556, 20.5,
23.27777778, 17.72222222, 11.47222222, 19.80555556, 17.02777778,
14.94444444, 11.47222222, 10.77777778, 11.47222222, 19.80555556,
14.94444444, 19.80555556, 10.77777778), hptreat = structure(c(2L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor"),
female = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L
), .Label = c("0", "1"), class = "factor"), age = c(19L,
20L, 20L, 18L, 21L, 20L, 18L, 21L, 35L, 20L, 18L, 20L, 20L,
18L, 20L, 25L, 18L, 23L, 21L, 19L, 20L, 20L, 30L, 19L, 22L,
18L, 19L, 22L, 19L, 20L, 28L, 28L, 19L, 19L, 20L, 25L, 20L,
25L, 23L), cort1a1 = c(0.004, 0.085, 0.049, 0.107, 0.486,
0.159, 0.088, 0.004, 0.138, 0.035, 0.03, 0.018, 0.017, 0.111,
0.39, 0.292, 0.396, 0.213, 0.065, 0.022, 0.074, 0.077, 0.035,
0.241, 0.126, 0.154, 0.021, 0.05, 0.296, 0.054, 0.04, 0.109,
0.114, 0.133, 0.063, 0.055, 0.149, 0.134, 0.098), cort1a2 = c(0.001,
0.086, 0.045, 0.105, 0.482, 0.147, 0.085, 0.005, 0.127, 0.037,
0.031, 0.013, 0.011, 0.111, 0.389, 0.304, 0.405, 0.218, 0.059,
0.02, 0.076, 0.069, 0.032, 0.246, 0.126, 0.141, 0.017, 0.046,
0.264, 0.051, 0.038, 0.101, 0.109, 0.133, 0.068, 0.048, 0.137,
0.12, 0.092), cort2a1 = c(0.027, 0.174, 0.056, 0.111, 0.175,
0.179, 0.103, 0.021, 0.079, 0.014, 0.028, 0.051, 0.024, 0.051,
0.083, 0.148, 0.156, 0.086, 0.062, 0.024, 0.038, 0.209, 0.036,
0.199, 0.114, 0.207, 0.041, 0.141, 0.333, 0.078, 0.065, 0.088,
0.238, 0.157, 0.119, 0.132, 0.268, 0.132, 0.099), cort2a2 = c(0.023,
0.161, 0.062, 0.113, 0.166, 0.188, 0.101, 0.016, 0.081, 0.015,
0.026, 0.049, 0.026, 0.041, 0.086, 0.139, 0.154, 0.093, 0.052,
0.022, 0.038, 0.202, 0.034, 0.198, 0.111, 0.215, 0.042, 0.142,
0.324, 0.075, 0.068, 0.101, 0.252, 0.149, 0.111, 0.123, 0.247,
0.118, 0.093), cortm1 = c(0.0024999999, 0.085500002, 0.046999998,
0.106, 0.484, 0.153, 0.086499996, 0.0044999998, 0.13249999,
0.035999998, 0.0305, 0.0155, 0.014, 0.111, 0.38949999, 0.29800001,
0.4005, 0.2155, 0.061999999, 0.021, 0.075000003, 0.072999999,
0.033500001, 0.24349999, 0.126, 0.14749999, 0.018999999,
0.048, 0.28, 0.052499998, 0.039000001, 0.105, 0.1115, 0.133,
0.065499999, 0.0515, 0.14300001, 0.127, 0.094999999), cortm2 = c(0.025,
0.1675, 0.059, 0.112, 0.1705, 0.18350001, 0.102, 0.0185,
0.079999998, 0.0145, 0.027000001, 0.050000001, 0.025, 0.046,
0.0845, 0.1435, 0.155, 0.089500003, 0.057, 0.023, 0.037999999,
0.20550001, 0.035, 0.19850001, 0.1125, 0.211, 0.041499998,
0.1415, 0.3285, 0.076499999, 0.066500001, 0.094499998, 0.245,
0.153, 0.115, 0.1275, 0.25749999, 0.125, 0.096000001), cdiff = c(0.022500001,
0.082000002, 0.012000002, 0.0060000047, -0.31349999, 0.03050001,
0.015500002, 0.014, -0.052499995, -0.021499999, -0.0034999996,
0.034500003, 0.011, -0.064999998, -0.30500001, -0.15450001,
-0.2455, -0.12599999, -0.004999999, 0.0020000003, -0.037000004,
0.13250001, 0.0014999993, -0.044999987, -0.013500005, 0.063500002,
0.022499999, 0.093499996, 0.048500001, 0.024, 0.0275, -0.010499999,
0.13350001, 0.019999996, 0.049500003, 0.075999998, 0.11449999,
-0.0020000041, 0.001000002), test1a1 = c(39.87, 33.22, 32.52,
19.74, 78.85, 83.51, 48.37, 22.31, 28.17, 41.44, 32.92, 61.4,
40.31, 30.36, 59.44, 27.52, 26.14, 46.75, 63.73, 34.03, 98.47,
36.62, 30.26, 37.15, 105.64, 47.99, 50.15, 11.33, 149.12,
48.57, 92.04, 51.22, 42.25, 77.07, 62.75, 38.8, 17.91, 40.28,
88.47), test1a2 = c(37.58, 32.32, 32.12, 16.25, 68.31, 77.88,
48.75, 21.53, 26.32, 40.42, 30.55, 58.7, 35.78, 29.97, 58.7,
26.32, 24.69, 48.87, 62.32, 34.95, 110.29, 39.53, 31.72,
33.32, 103.99, 50.67, 49.9, 11.97, 138.02, 48.05, 88.7, 45.89,
41.08, 73.39, 58.38, 39.25, 18.41, 35.53, 80.54), test2a1 = c(64.22,
29.43, 27.98, 28.17, 46.14, 105.92, 43.68, 16.41, 21.42,
11.35, 1.66, 44.17, 38.58, 11.11, 48.57, 48.31, 39.71, 92.04,
52.73, 22.3, 58.23, 82.01, 35.76, 39.59, 94.06, 50.52, 55.82,
45.91, 115.13, 67.59, 82.97, 49.89, 45.09, 57.86, 86.76,
58.83, 16.53, 36.7, 100.4), test2a2 = c(60.53, 29.04, 27.04,
29.14, 43.2, 105.05, 41.66, 13.62, 21.25, 10.39, 2.63, 44.9,
33.02, 9.75, 46.52, 48.7, 38.25, 90.59, 53.15, 22.57, 59.14,
81.24, 33.81, 37.55, 95.66, 51.14, 55.69, 44.74, 108.1, 62.71,
79.18, 50.27, 44.63, 59.39, 84.94, 58.55, 16.16, 35.24, 97.75
), testm1 = c(38.724998, 32.77, 32.32, 17.995001, 73.580002,
80.695, 48.560001, 21.92, 27.245001, 40.93, 31.735001, 60.049999,
38.044998, 30.165001, 59.07, 26.92, 25.415001, 47.810001,
63.025002, 34.490002, 104.38, 38.075001, 30.99, 35.235001,
104.815, 49.330002, 50.025002, 11.65, 143.57001, 48.310001,
90.370003, 48.555, 41.665001, 75.230003, 60.564999, 39.025002,
18.16, 37.904999, 84.504997), testm2 = c(62.375, 29.235001,
27.51, 28.655001, 44.669998, 105.485, 42.669998, 15.015,
21.334999, 10.87, 2.145, 44.535, 35.799999, 10.43, 47.544998,
48.505001, 38.98, 91.315002, 52.939999, 22.434999, 58.685001,
81.625, 34.785, 38.57, 94.860001, 50.830002, 55.755001, 45.325001,
111.615, 65.150002, 81.074997, 50.080002, 44.860001, 58.625,
85.849998, 58.689999, 16.344999, 35.970001, 99.074997), tdiff = c(23.650002,
-3.5349998, -4.8099995, 10.66, -28.910004, 24.790001, -5.8900032,
-6.9049997, -5.9100018, -30.060001, -29.59, -15.514999, -2.2449989,
-19.735001, -11.525002, 21.585001, 13.564999, 43.505001,
-10.085003, -12.055002, -45.694996, 43.549999, 3.7950001,
3.3349991, -9.9550018, 1.5, 5.7299995, 33.675003, -31.955009,
16.84, -9.2950058, 1.5250015, 3.1949997, -16.605003, 25.285,
19.664997, -1.8150005, -1.9349976, 14.57), feelpower = structure(c(2L,
3L, 1L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 3L, 1L, 2L, 1L,
1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 4L, 3L,
4L, 3L, 1L, 3L, 4L, 2L, 2L, 3L), .Label = c("2", "3", "Not at all",
"Very much"), class = "factor"), incharge = structure(c(1L,
1L, 3L, 4L, 1L, 2L, 3L, 3L, 1L, 1L, 3L, 4L, 3L, 2L, 2L, 1L,
3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 3L, 1L, 1L, 4L, 3L, 1L, 1L), .Label = c("2", "3", "Not at all",
"Very much"), class = "factor"), powm = structure(c(3L, 1L,
1L, 5L, 2L, 4L, 6L, 6L, 1L, 1L, 6L, 7L, 6L, 3L, 4L, 2L, 1L,
4L, 4L, 3L, 2L, 4L, 2L, 2L, 3L, 3L, 3L, 4L, 1L, 5L, 1L, 4L,
6L, 2L, 1L, 7L, 2L, 3L, 1L), .Label = c("1.5", "2", "2.5",
"3", "3.5", "Not at all", "Very much"), class = "factor"),
diceroll = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L
), .Label = c("No", "Yes"), class = "factor")), row.names = c(2L,
3L, 4L, 5L, 7L, 8L, 9L, 10L, 12L, 14L, 15L, 16L, 17L, 19L, 20L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 30L, 31L, 32L, 33L, 34L, 35L,
37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L), class = "data.frame")
You can get the same results in R using drop1(FosseTest, test = "F"). This will test the effect of leaving one of the variables off the aov.
drop1(FosseTest, test = "F")
#
# Single term deletions
#
# Model:
# testm2 ~ testm1 + hptreat + cortm1 + cortm2 + female
# Df Sum of Sq RSS AIC F value Pr(>F)
# <none> 8463.3 221.82
# testm1 1 3516.7 11979.9 233.37 13.7122 0.0007751 ***
# hptreat 1 1183.5 9646.8 224.92 4.6147 0.0391333 *
# cortm1 1 8.6 8471.8 219.86 0.0334 0.8560279
# cortm2 1 2810.9 11274.2 231.00 10.9604 0.0022605 **
# female 1 2557.3 11020.6 230.11 9.9716 0.0033895 **
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(FosseTest) displays the sequential effect of addeding the variables one after another.
There was a different way how to access this, but at the moment I can't remember...
I want to loop through the dataframe Out using a window that:
Grows one increment at a time (so the rear of the window is fixed and the front of the window grows - window gets bigger)
At each increment, the following rules should be run over the window:
if (mean(Speed_out) <= 0.152682)
Behaviour <- Lying
else if (Movement_Out == “left”) <= 20.8 && (mean(Speed_Out) >=
0.200921)
Behaviour <- Grazing
If no rules are met then the window should grow one increment at a time until a rule is met.
Once a rule is met, all of the previous increments should be labelled with the Behaviour assigned to that rule above.
The next window should then start at the next element after where the last window terminated.
The initial window size should be adjustable (the window size at the start and after each terminated window).
Notes:
The units (Movement_Out == “left”) <= 20.8 mean that if "left" occupies less than 20.8% of the window.
Example:
Here's a short example of the output I'd like from the data provided below where the starting window size was set to 4:
Speed_Out Movement_Out Behaviour
1 0.220 left Lying
2 0.155 left Lying
3 0.120 forward Lying
4 0.090 non-moving Lying <== window terminates here
5 0.125 non-moving Grazing <== new window starts here
6 0.125 non-moving Grazing
7 0.155 non-moving Grazing
8 0.340 forward Grazing
9 0.370 forward Grazing <== window terminates here
10 0.185 forward Grazing <== new window starts here
11 0.155 right Grazing
12 0.220 non-moving Grazing
13 0.220 non-moving Grazing
14 0.280 non-moving Grazing <== window terminates here
15 0.215 non-moving Grazing <== new window starts here
16 0.060 right Grazing
17 0.340 non-moving Grazing
18 0.555 forward Grazing <== window terminates here
19 0.275 right And so on..
20 0.215 forward
Dataframe for your use
Out <- structure(list(Speed_Out = c(0.22, 0.155, 0.12, 0.09, 0.125,
0.125, 0.155, 0.34, 0.37, 0.185, 0.155, 0.22, 0.22, 0.28, 0.215,
0.06, 0.34, 0.555, 0.275, 0.215, 0.185, 0.06, 0.245, 0.31, 0.345,
0.375, 0.375, 0.87, 1.025, 0.405, 0, 0.185, 0.31, 0.155, 0.125,
0.22, 0.375, 0.345, 0.345, 0.405, 0.31, 0.34, 0.245, 0.155, 0.19,
0.22, 0.185, 0.12, 0.185, 0.155, 0.245, 0.31, 0.155, 0.155, 0.25,
0.215, 0.09, 0.06, 0.245, 0.495, 0.495, 0.34, 0.28, 0.31, 0.28,
0.25, 0.25, 0.185, 0.155, 0.25, 0.28, 0.28, 0.34, 0.215, 0.125,
0.155, 0.34, 0.34, 0.09, 0.59, 1.71, 1.18, 0.185, 0.215, 0.185,
0.185, 0.155, 0.19, 0.19, 0.19, 0.87, 2.045, 2.73, 1.585, 0.22,
0.25, 0.435, 0.405, 0.405, 0.405, 0.715, 0.62, 0.37, 0.4, 0.185,
0.375, 0.59, 0.525, 0.245, 0.495, 0.495, 0.68, 0.775, 0.25, 0.31,
0.34, 0.28, 0.28, 0.25, 1.55, 2.695, 1.705, 1.21, 0.87, 0.25,
1.52, 1.52, 0.405, 0.81, 2.08, 2.915, 1.705, 0.435, 0.22, 0.78,
1.215, 0.84, 0.495, 0.495, 0.56, 0.375, 0.28, 0.715, 1.025, 0.495,
0.65, 1.18, 1.09, 0.995, 0.87, 0.435, 0.125, 0.435, 0.555, 0.775,
1.12, 1.555, 1.15, 0.25, 0.87, 0.93, 0.28, 0.31, 0.31, 0.375,
0.78, 0.655, 0.53, 0.62, 0.525, 0.37, 0.555, 1.025, 0.655, 1.12,
1.585, 0.715, 0.155, 0.28, 1.12, 2.11, 1.645, 0.715, 0.465, 0.84,
0.81, 0.655, 0.84, 0.435, 0.28, 0.215, 0.93, 1.335, 0.65, 0.185,
0.155, 0.34, 0.4, 0.37, 0.435, 0.405, 0.28, 0.28, 0.25, 0.25,
0.745, 1.24, 0.805, 1.055, 1.085, 0.465, 0.375, 0.5, 0.59, 0.37,
0.185, 0.34, 0.37, 0.435, 0.405, 0.06, 0.125, 0.25, 0.31, 0.405,
0.78, 0.56, 0.215, 0.495, 0.87, 1.025, 0.62, 0.405, 0.405, 0.405,
0.31, 0.215, 0.465, 0.435, 0.34, 0.275, 0.215, 0.25, 0.22, 0.22,
0.125, 0.245, 0.34, 0.31, 0.37, 0.31, 0.31, 0.245, 0.185, 0.25,
0.22, 0.22, 0.31, 0.28, 0.22, 0.28, 0.53, 0.655, 0.375, 0.19,
0.405, 0.435, 0.28, 0.215, 0.77, 0.96, 1.865, 1.83, 0.495, 0.655,
1.615, 1.395, 0.31, 0.31, 0.25, 0.28, 0.34, 0.34), Movement_Out = structure(c(2L,
2L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
1L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 3L, 2L, 4L, 1L, 2L, 1L, 3L, 3L,
1L, 3L, 2L, 4L, 3L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 1L, 1L, 2L, 4L, 1L, 2L,
4L, 3L, 3L, 4L, 3L, 3L, 2L, 4L, 2L, 1L, 2L, 4L, 4L, 2L, 4L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 3L,
2L, 2L, 3L, 1L, 2L, 4L, 3L, 4L, 2L, 3L, 1L, 4L, 4L, 3L, 1L, 2L,
1L, 1L, 4L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 2L, 4L, 1L, 1L,
2L, 4L, 2L, 4L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 4L,
2L, 1L, 1L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 4L, 1L, 1L, 4L, 2L,
4L, 4L, 3L, 4L, 4L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 4L, 2L, 2L,
4L, 1L, 2L, 2L, 4L, 4L, 4L, 2L, 2L, 1L, 4L, 4L, 2L, 3L, 1L, 2L,
2L, 4L, 4L, 1L, 2L, 4L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 1L, 1L,
2L, 1L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 4L, 2L,
4L, 2L, 4L, 3L, 1L, 4L, 1L, 1L, 2L, 4L, 2L, 1L, 4L, 1L, 4L, 3L,
2L, 3L, 2L, 4L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 2L, 4L, 2L, 3L, 2L, 1L, 4L, 3L, 2L, 4L, 4L, 2L, 4L, 1L, 1L,
2L, 2L, 4L, 1L, 2L, 4L, 2L, 4L, 3L, 4L), .Label = c("forward",
"left", "non-moving", "right"), class = "factor")), .Names = c("Speed_Out",
"Movement_Out"), row.names = c(NA, 283L), class = "data.frame")
Ok, I have to say that this has been less trivial than I expected. My answer is ugly and most likely not optimal, but it seems to work.
There seem to be a few spots where even when the rest of the data was taken into account, none of the conditions were met, so the behaviour for those stayed at NA.
library(dplyr)
# Create id variable used to join results later
Out <- Out %>%
mutate(id=row_number())
# Initial window size
window_size <- 4
# Initialize variables used in loop
w <- window_size
i<-1
window_cnt<-1
out_behaviour <- data.frame(id=as.numeric(), Behaviour=as.character(), stringsAsFactors = FALSE)
while (i <= NROW(Out)){
print(paste0("Row: ", i, ", Window Size: ", w))
df <- Out[i:(i+w-1),] %>%
mutate(mean_sp=mean(Speed_Out),
mvmt=sum(ifelse(Movement_Out=="left",1 ,0))/NROW(.)) %>%
mutate(Behaviour=case_when(mean_sp <= 0.152682 ~ "Lying",
mvmt <= 0.208 & mean_sp >= 0.200921 ~ "Grazing",
TRUE ~ as.character(NA)),
window_nr=window_cnt)
if (!all(is.na(df$Behaviour))){
i<-w+i
w<-window_size
out_behaviour <- rbind(out_behaviour, df %>% select(id, Behaviour, window_nr))
window_cnt<-window_cnt+1
} else {
if (w<=NROW(Out)-i){
w<-w+1
} else {
w<-window_size
i<-i+1
}
}
rm(df)
}
# Join Behaviour column bacl to original data frame
Out <- left_join(Out, out_behaviour, by="id") %>% select(-id)
# Clean up workspace
rm(i, w, window_size, window_cnt, out_behaviour)
And the first 20 outputs
Speed_Out Movement_Out Behaviour window_nr
1 0.220 left Lying 1
2 0.155 left Lying 1
3 0.120 forward Lying 1
4 0.090 non-moving Lying 1
5 0.125 non-moving Grazing 2
6 0.125 non-moving Grazing 2
7 0.155 non-moving Grazing 2
8 0.340 forward Grazing 2
9 0.370 forward Grazing 2
10 0.185 forward Grazing 3
11 0.155 right Grazing 3
12 0.220 non-moving Grazing 3
13 0.220 non-moving Grazing 3
14 0.280 non-moving Grazing 3
15 0.215 non-moving Grazing 4
16 0.060 right Grazing 4
17 0.340 non-moving Grazing 4
18 0.555 forward Grazing 4
19 0.275 right Grazing 5
20 0.215 forward Grazing 5
I know the code is a mess, so let me know if it needs some extra commenting.
**UPDATED BELOW
I have created a plot, I literally need it horizontal, but the coord_flip() leaves the facets on the bottom instead of having nested groups on the left.
The data:
srvc_data <- structure(list(dept = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Distribution Centre Services",
"IT", "Marketing", "Merchandise & Inventory", "Operations and Communication"
), class = "factor"), label = c("test5", "test7", "test3", "test10",
"test4", "test6", "test2", "test1", "test11", "test12", "test9",
"test8", "test18", "test19", "test15", "test17", "test13", "test16",
"test20", "test14", "test22", "test21", "test25", "test23", "test24",
"test27", "test26", "test28", "test29", "test31", "test33", "test30",
"test32", "test38", "test36", "test37", "test43", "test34", "test35",
"test40", "test39", "test42", "test41", "test5", "test7", "test3",
"test10", "test4", "test6", "test2", "test1", "test11", "test12",
"test9", "test8", "test18", "test19", "test15", "test17", "test13",
"test16", "test20", "test14", "test22", "test21", "test25", "test23",
"test24", "test27", "test26", "test28", "test29", "test31", "test33",
"test30", "test32", "test38", "test36", "test37", "test43", "test34",
"test35", "test40", "test39", "test42", "test41"), Gap = c(-0.07,
-0.13, -0.15, -0.16, -0.16, -0.21, -0.22, -0.24, -0.24, -0.25,
-0.3, -0.3, -0.18, -0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41,
-0.46, -0.63, -0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31,
-0.31, -0.35, -0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22,
-0.27, -0.29, -0.29, -0.31, -0.36, -0.07, -0.13, -0.15, -0.16,
-0.16, -0.21, -0.22, -0.24, -0.24, -0.25, -0.3, -0.3, -0.18,
-0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41, -0.46, -0.63,
-0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31, -0.31, -0.35,
-0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22, -0.27, -0.29,
-0.29, -0.31, -0.36), impeff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Importance", "Effectiveness"), class = "factor"),
score = c(0.87, 0.79, 0.78, 0.82, 0.81, 0.81, 0.92, 0.92,
0.78, 0.81, 0.86, 0.91, 0.79, 0.79, 0.87, 0.93, 0.9, 0.9,
0.82, 0.95, 0.91, 0.95, 0.77, 0.79, 0.82, 0.8, 0.83, 0.9,
0.91, 0.94, 0.89, 0.94, 0.91, 0.82, 0.74, 0.78, 0.81, 0.83,
0.85, 0.82, 0.81, 0.8, 0.83, 0.8, 0.66, 0.63, 0.66, 0.65,
0.6, 0.7, 0.68, 0.54, 0.56, 0.56, 0.61, 0.61, 0.6, 0.63,
0.64, 0.6, 0.56, 0.46, 0.54, 0.45, 0.32, 0.61, 0.61, 0.61,
0.58, 0.56, 0.61, 0.6, 0.63, 0.54, 0.55, 0.49, 0.67, 0.59,
0.58, 0.6, 0.61, 0.58, 0.53, 0.52, 0.49, 0.47)), row.names = c(NA,
-86L), .Names = c("dept", "label", "Gap", "impeff", "score"), class = "data.frame")
And the code:
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(~dept, switch = "x", scales = "free", space = "free") +
#coord_flip()+
The plot (without the flip) looks like the below, I need it horizontal, with the facet categories on the far left. How does the coord_flip() work? Why wouldn't it also flip/move the facet strips? Please ignore the crammed formatting!
**UPDATE
So thanks to #neilfws I have fixed the plot, by switching the order of the data.
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(dept~., switch = "y", scales = "free_y", space = "free") +
coord_flip()
Now I have the correctly oriented plot, but there is lots of unused space for all the labels that are unused in each facet. Within the facet_grid call, setting scales = "free" doesn't work, nor does drop = T. Any ideas? Plot below for reference.
If you coord_flip, you also need to reverse the faceting relationship (~), to place it on the side, and the switch, to place it on the y-axis. Does this get you close to what you want?
ggplot(srvc_data, aes(label, score)) +
geom_bar( aes(fill = impeff), stat = "identity", position = "dodge", width = 1) +
facet_grid(dept ~ ., switch = "y", scales = "free", space = "free") + coord_flip()
I'm trying to use a for loop to pull subsets of data out of a dataframe with R.
I have a little vector to hold all the possible occurences of the names in that column
meter_class<-c("one_s_120","nine_s_120", "nine_s_480","fortyfive_s_120", "fortyfive_s_480")
Whenever I try to address it by index reference, it fails. Either nothing in the data subset survives (NULLs everywhere), or R complains about not passing the right argument by using meter_class[1]
attach(meter_class[1])
Error in attach(meter_class[1]) : file 'one_s_120' not found
subset(cal, cal$Form==as.character(meter_class[1]))
[1] Test Amps Type Accuracy Voltage Form
<0 rows> (or 0-length row.names)
Also, here's the output of dput on the datafram cal:
structure(list(Test = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Burst", "ESD", "Inspection",
"Surge"), class = "factor"), Amps = c(15, 15, 1.5, 2.5, 2.5,
0.25, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25, 15, 15,
1.5, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25, 2.5, 2.5,
0.25, 15, 15, 1.5, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25, 2.5, 2.5,
0.25, 2.5, 2.5, 0.25, 15, 15, 1.5, 2.5, 2.5, 0.25, 2.5, 2.5,
0.25, 2.5, 2.5, 0.25, 2.5, 2.5, 0.25), Type = structure(c(2L,
1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L,
3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L,
2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L,
1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L), .Label = c("Lag - 0.5",
"Unity - Full", "Unity - Light"), class = "factor"), Accuracy = c(-0.011,
0.012, 0.027, 0.033, 0.076, 0.006, 0.052, 0.046, -0.016, 0.021,
0.008, 0.023, 0.034, 0.036, 0.038, 0.002, 0.012, 0.097, 0.055,
0.093, 0.033, 0.068, 0.048, -0.016, 0.042, 0.03, 0.035, 0.041,
0.024, 0.027, 0.004, -0.012, 0.002, 0.038, 0.084, 0.015, 0.049,
0.045, -0.009, 0.025, 0.002, 0.029, 0.03, 0.032, 0.064, 0.011,
0.024, 0.033, 0.054, 0.085, 0.027, 0.071, 0.059, 0.01, 0.051,
0.012, 0.051, 0.048, 0.04, 0.051), Voltage = c(120, 120, 120,
120, 120, 120, 480, 480, 480, 120, 120, 120, 480, 480, 480, 120,
120, 120, 120, 120, 120, 480, 480, 480, 120, 120, 120, 480, 480,
480, 120, 120, 120, 120, 120, 120, 480, 480, 480, 120, 120, 120,
480, 480, 480, 120, 120, 120, 120, 120, 120, 480, 480, 480, 120,
120, 120, 480, 480, 480), Form = structure(c(3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("45S", "9S", "i210plus"
), class = "factor")), .Names = c("Test", "Amps", "Type", "Accuracy",
"Voltage", "Form"), class = "data.frame", row.names = c(NA, -60L
))
I know this is a simple thing to do if you know how to do it...Can anyone light the way?
Thanks!
It seems that none of the values of "meter_class" are represented in "Form" in your data frame.
unique(df$Form)
# [1] i210plus 9S 45S
meter_class %in% unique(df$Form)
# [1] FALSE FALSE FALSE FALSE FALSE
Just try two forms of subsetting, using values of "Form" actually present in the data:
subset(df, Form == "9S")
df[df$Form == "9S", ]
I also note that you wish to "pull subsets of data out of a dataframe". Not knowing the full story and your objectives of doing so, but please note that there are loads of functions that allow you to perform calculations, plotting, or whatever, on subsets of your data.
Update following comment
You can subset a data frame by combining logical conditions with logical operators (see e.g. ?Extract, ?&)
meter_class <- c("i210plus", "9S", "45S")
df[df$Form == "9S" & df$Voltage == 120, ]
# or
subset(df, Form == "9S" & Voltage == 120)