Related
Is there a way to make a file with the correlation statistic between the raw number of fish observed ("num") and each environmental data column ("temp", "do", etc.) by species ("group")?
*As well as correlations between the means and medians of num vs. env. factors?
I'd also like to be able to choose which correlation method to use (Pearson correlation, Kendall rank correlation, Spearman correlation, etc.)
My data:
zeros <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("2019", "2020"), class = "factor"), season = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"),
site = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("Hardhead silverside", "Sailfin molly"), class = "factor"),
num = c(0, 8, 0, 9, 0, 13, 0, 9, 0, 10, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 2,
0, 3, 0, 13, 0), temp = c(23L, 36L, 35L, 34L, 30L, 28L, 18L,
19L, 33L, 33L, 25L, 20L, 33L, 23L, 36L, 32L, 28L, 17L, 34L,
31L, 26L, 34L, 26L, 35L, 15L, 25L, 26L, 20L, 18L, 14L, 23L,
17L, 26L, 17L, 17L, 19L, 29L, 31L, 18L, 15L), sal = c(12.5,
25.5, 8.5, 15.5, 17.5, 27.5, 9.5, 31.5, 1.5, 34.5, 25.5,
21.5, 10.5, 8.5, 32.5, 19.5, 6.5, 5.5, 15.5, 28.5, 6.5, 3.5,
29.5, 13.5, 7.5, 16.5, 3.5, 28.5, 22.5, 5.5, 9.5, 12.5, 29.5,
24.5, 8.5, 32.5, 37.5, 3.5, 12.5, 19.5), do = c(9.66, 7.66,
1.66, 14.66, 15.66, 1.66, 14.66, 15.66, 0.66, 5.66, 10.66,
11.66, 4.66, 0.66, 13.66, 1.66, 13.66, 6.66, 6.66, 10.66,
9.66, 15.66, 9.66, 15.66, 4.66, 13.66, 1.66, 11.66, 6.66,
8.66, 12.66, 0.66, 6.66, 0.66, 9.66, 16.66, 1.66, 10.66,
15.66, 10.66), depth = c(120L, 161L, 52L, 52L, 43L, 105L,
165L, 23L, 79L, 136L, 41L, 59L, 65L, 118L, 122L, 69L, 137L,
88L, 152L, 105L, 108L, 79L, 96L, 80L, 22L, 110L, 157L, 118L,
126L, 93L, 156L, 64L, 74L, 24L, 111L, 113L, 157L, 78L, 121L,
130L)), class = "data.frame", row.names = c(NA, -40L))
The first part of your question is straightforward:
zeros.spl <- split(zeros, zeros$group)
zeros.cors <- sapply(zeros.spl, function(x) cor(x[, "num"], x[, 6:9]))
dimnames(zeros.cors)[[1]] <- colnames(zeros)[6:9]
zeros.cors
# Hardhead silverside Sailfin molly
# temp -0.3080334 0.36174046
# sal 0.1393580 0.47095129
# do 0.2544695 -0.06646818
# depth 0.1296208 0.08777425
t(zeros.cors)
# temp sal do depth
# Hardhead silverside -0.3080334 0.1393580 0.25446948 0.12962078
# Sailfin molly 0.3617405 0.4709513 -0.06646818 0.08777425
Use write.csv(zeros.cors, file="results.csv") or write.csv(t(zeros.cors), file="results.csv") depending on what you want the rows/cols to be.
The second question is not clear. The means/medians of a group will be a single value so you cannot correlate it with the environmental variables. You could compute the means by group with aggregate:
aggregate(zeros[, 5:9], by=list(zeros$group), "mean")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 1.45 25.95 15.35 8.51 105.20
# 2 Sailfin molly 2.45 25.00 18.90 9.06 90.25
aggregate(zeros[, 5:9], by=list(zeros$group), "median")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 0 26 11.5 9.66 115.5
# 2 Sailfin molly 0 24 19.5 10.66 90.5
I am investigating the effect of time since fire on species diversity. I am attempting to make a graph that has different colours at different time since fire ages. However, putting the colours onto the graph has made the model prediction line fade away. I am wondering if there is some way to bring the line in front of geom_rect?
Loaded packages:
library(voxel)
library(gamm4)
library(ggplot2)
My data:
data <- read.csv('StacksOverflow.csv')
structure(list(Lscape = c(158L, 158L, 158L, 158L, 158L, 158L),
TSF = c(5, 5, 5, 18.5, 5, 18.5), VegtypeNew = structure(c(1L,
1L, 1L, 2L, 1L, 1L), .Label = c("spinsandplain", "woodlndsandplain"
), class = "factor"), FF = c(2L, 2L, 2L, 1L, 2L, 1L), ThreeYearRain = c(913.799997,
913.799997, 913.799997, 913.799997, 913.799997, 913.799997
), Div = c(2.2629743, 1.9630117, 1.7336569, 1.2816843, 2.4155056,
1.4240443), triodia_low = c(19L, 6L, 21L, 32L, 11L, 32L)), row.names = c(NA,
6L), class = "data.frame")
Extended data:
structure(list(Lscape = c(158L, 158L, 158L, 158L, 158L, 158L,
158L, 158L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 235L,
235L, 235L, 235L, 235L, 235L, 235L, 235L, 237L, 237L, 237L, 237L,
237L, 237L, 237L, 237L, 254L, 254L, 254L, 254L, 254L, 254L, 254L,
254L, 287L, 287L, 287L, 287L, 287L, 287L, 287L, 287L, 304L, 304L,
304L, 304L, 304L, 304L, 304L, 304L, 311L, 311L, 311L, 311L, 311L,
311L, 311L, 311L, 312L, 312L, 312L, 312L, 312L, 312L, 312L, 312L,
323L, 323L, 323L, 323L, 323L, 323L, 323L, 323L, 326L, 326L, 326L,
326L, 326L, 326L, 326L, 326L, 327L, 327L, 327L, 327L, 327L, 327L,
327L, 327L, 337L, 337L, 337L, 337L, 337L, 337L, 337L, 337L, 355L,
355L, 355L, 355L, 355L, 355L, 355L, 355L, 370L, 370L, 370L, 370L,
370L, 370L, 370L, 370L, 379L, 379L, 379L, 379L, 379L, 379L, 379L,
379L, 411L, 411L, 411L, 411L, 411L, 411L, 411L, 411L, 414L, 414L,
414L, 414L, 414L, 414L, 414L, 414L, 435L, 435L, 435L, 435L, 435L,
435L, 435L, 435L, 437L, 437L, 437L, 437L, 437L, 437L, 437L, 437L,
438L, 438L, 438L, 438L, 438L, 438L, 438L, 438L, 447L, 447L, 447L,
447L, 447L, 447L, 447L, 447L, 452L, 452L, 452L, 452L, 452L, 452L,
452L, 452L), TSF = c(5, 5, 5, 18.5, 5, 18.5, 18.5, 18.5, 11.5,
4.5, 0.5, 20, 11.5, 0.5, 1, 4.5, 1, 1, 4.5, 5, 4.5, 2, 5, 1,
6, 6, 4.5, 6, 14.5, 17, 4.5, 6, 1, 1, 7, 4.5, 2, 2, 7, 7, 20,
4, 3.5, 4, 3.5, 3.5, 11.5, 20, 6, 0.5, 5, 6, 6, 0.5, 7, 6, 3.5,
3.5, 3.5, 11.5, 11.5, 1, 1, 11.5, 1, 1, 4, 1, 1, 4, 1, 10.5,
7, 17.5, 0.5, 0.5, 0.5, 17.5, 7, 0.5, 18, 1.5, 3.5, 18, 18, 5,
3.5, 18.5, 14.5, 1.5, 7, 1.5, 7, 7, 7, 7, 10.5, 1.5, 0, 1.5,
7, 3, 7, 10.5, 0.5, 20, 0.5, 2, 2, 1.5, 2, 3, 20, 1, 1.5, 10.5,
17, 1.5, 1.5, 10.5, 3, 1, 1, 1, 4.5, 1, 6.5, 1, 10, 1.5, 12.5,
1.5, 1.5, 1.5, 1.5, 1.5, 2, 7, 12.5, 2, 7, 2, 2, 2, 1.5, 18.5,
18.5, 1.5, 7, 1.5, 1.5, 5, 12.5, 6.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 6.5, 6.5, 1.5, 6.5, 18.5, 6.5, 7, 1.5, 1, 7, 7, 1, 7, 1,
7.5, 7.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), VegtypeNew = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("spinsandplain", "woodlndsandplain"
), class = "factor"), FF = c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
3L, 3L, 4L, 2L, 2L, 5L, 3L, 4L, 5L, 5L, 2L, 2L, 4L, 3L, 5L, 4L,
5L, 4L, 4L, 5L, 3L, 3L, 4L, 5L, 5L, 3L, 4L, 6L, 5L, 5L, 3L, 4L,
1L, 5L, 3L, 4L, 4L, 4L, 2L, 1L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 3L,
3L, 3L, 4L, 3L, 3L, 3L, 4L, 2L, 6L, 6L, 6L, 6L, 5L, 2L, 7L, 3L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, 4L, 4L, 3L, 4L, 3L, 3L,
4L, 5L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 6L, 4L, 4L, 3L, 3L,
4L, 0L, 2L, 4L, 3L, 2L, 3L, 3L, 0L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
3L, 2L, 5L, 6L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 4L, 5L, 4L, 3L, 3L,
6L, 4L, 3L, 5L, 5L, 5L, 5L, 4L, 3L, 2L, 1L, 2L, 2L, 3L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 3L, 2L, 4L, 2L, 3L, 4L, 5L, 5L, 3L, 4L, 3L, 3L, 4L
), ThreeYearRain = c(913.799997, 913.799997, 913.799997, 913.799997,
913.799997, 913.799997, 913.799997, 913.799997, 938.899988, 938.899988,
938.899988, 938.600004, 938.899988, 938.899988, 938.899988, 938.499989,
930.700005, 932.800001, 930.700005, 930.700005, 932.800001, 930.700005,
930.700005, 932.800001, 932.699991, 934.799987, 934.799987, 934.799987,
932.699991, 934.799987, 932.699991, 934.799987, 896.99999, 896.99999,
908.999991, 908.999991, 908.999991, 908.999991, 910.199991, 898.399994,
928.000009, 939.800006, 935.500004, 928.000009, 928.000009, 923.700007,
931.499996, 931.499996, 866.200004, 866.200004, 867.000003, 867.000003,
867.000003, 867.300002, 867.000003, 869.3, 926.800003, 926.800003,
926.800003, 926.800003, 933.800003, 934.600006, 934.600006, 934.600006,
924.2, 925.100002, 924.2, 924.2, 924.2, 922.2, 924.2, 924.7,
974.799995, 983.500006, 983.500006, 983.500004, 983.500006, 974.799995,
974.799994, 983.500006, 839.1, 839.1, 839.1, 839.100001, 839.300001,
839.1, 838.699999, 839.100001, 839.100001, 838.699999, 842.300004,
842.300004, 842.900006, 842.300004, 842.900006, 842.300004, 936.900014,
936.900014, 936.900014, 932.999984, 933.099983, 932.999984, 936.900014,
936.900014, 870.499995, 870.499995, 877.399998, 877.399998, 876.099997,
876.099997, 876.099997, 859.199997, 957.199982, 966.299982, 955.699998,
955.699998, 957.199982, 955.699998, 955.699998, 956.299985, 852.2,
852.2, 852.600006, 852.500001, 852.500001, 852.500001, 852.600006,
852.500001, 906.700011, 904.700001, 912.600007, 912.600007, 914.600007,
906.700001, 906.399998, 914.600007, 925.599982, 933.299992, 933.299992,
933.299992, 933.299992, 926.500012, 935.899994, 935.199992, 916.800001,
916.100001, 916.800001, 916.400003, 918.700003, 904.100001, 916.800001,
918.700003, 899.1, 904.100001, 906.000003, 903.400002, 904.100001,
903.400002, 906.000003, 906.000003, 905.7, 903.099999, 903.099999,
905.7, 912.199994, 893.200002, 905.399999, 904.999998, 933.700012,
933.700012, 933.700012, 933.700012, 933.700012, 932.30001, 932.300008,
932.300008, 878.500006, 878.500006, 878.500006, 879.300004, 879.300004,
879.300004, 879.300004, 873.200008), Div = c(2.2629743, 1.9630117,
1.7336569, 1.2816843, 2.4155056, 1.4240443, 1.5178948, 0.8993031,
1.2022801, 1.9287665, 2.0237769, 2.004871, 1.5020684, 2.1776591,
2.093787, 2.3139276, 2.7244402, 2.7026829, 1.6644725, 2.0696347,
1.9561853, 2.6018987, 2.5800017, 2.1867866, 2.4144821, 1.7389892,
2.1427451, 1.6544538, 1.8651966, 1.7569776, 1.8257533, 1.4048204,
2.7384914, 2.9344488, 2.2306909, 2.5085619, 1.8874836, 2.3431509,
1.8401602, 1.8620274, 1.8038997, 2.5909049, 2.2265328, 2.0882065,
2.4737837, 2.2995223, 1.4231311, 2.0577752, 1.6463134, 2.1464331,
2.2636437, 2.0992589, 1.7666974, 1.835061, 1.7732171, 2.0813243,
1.865505, 2.0200607, 1.2510612, 1.021761, 0.8111482, 0.2617645,
2.0282081, 1.1145976, 2.2596683, 2.3517629, 1.9424972, 1.9191269,
1.4222035, 2.6007698, 2.0071984, 1.9049132, 1.073374, 0.9576897,
1.6273043, 1.7701581, 0.6890092, 1.5764456, 0.384906, 1.5099996,
1.6713486, 2.5483064, 2.2033185, 2.0798843, 1.9082985, 2.1580972,
1.6952798, 1.6303402, 1.9461221, 1.4116405, 1.5347693, 2.6924921,
1.727278, 1.9384415, 1.6659585, 1.612819, 1.6592884, 2.7129796,
0, 2.7098898, 1.3785924, 2.7635218, 1.1481271, 1.8597007, 2.2191531,
1.088549, 2.431015, 1.3702099, 2.1018035, 2.3442348, 2.3599146,
2.789816, 1.8340235, 1.0606126, 2.5852679, 1.7791063, 1.2273106,
2.2432636, 2.5642458, 1.3306642, 2.6771856, 1.5062567, 2.0903266,
2.0398412, 2.4821503, 0.5979376, 1.479214, 1.9188301, 1.2267089,
2.4491421, 1.5366949, 2.516592, 2.4084849, 2.4385928, 2.549348,
2.7090074, 2.3337573, 1.8982968, 1.7956341, 2.3752386, 1.6587394,
2.6663039, 2.4853204, 1.9325793, 2.4431141, 1.6976331, 0.8791745,
2.6625573, 1.9596877, 1.9287565, 2.4590816, 2.4963942, 1.8767916,
1.3954333, 2.5155936, 2.2327274, 2.6613726, 2.580748, 2.3142567,
2.2280879, 1.7925025, 1.663008, 2.3488945, 2.0746398, 1.7050203,
2.0108246, 1.7317251, 2.4936515, 0.9556999, 1.3716151, 2.0694067,
1.4944032, 1.0984774, 1.2868726, 1.6429103, 1.3720737, 1.8037795,
1.8745583, 1.8921264, 1.8320377, 1.201682, 1.8489571, 1.798546,
0.8486856), triodia_low = c(19L, 6L, 21L, 32L, 11L, 32L, 16L,
29L, 17L, 20L, 0L, 24L, 37L, 0L, 3L, 29L, 4L, 2L, 31L, 28L, 20L,
12L, 6L, 6L, 26L, 28L, 27L, 32L, 37L, 26L, 15L, 27L, 2L, 1L,
19L, 5L, 13L, 10L, 33L, 14L, 25L, 22L, 15L, 34L, 15L, 7L, 36L,
25L, 25L, 0L, 25L, 4L, 21L, 0L, 33L, 16L, 16L, 15L, 22L, 25L,
25L, 0L, 0L, 18L, 2L, 0L, 26L, 0L, 0L, 7L, 0L, 13L, 28L, 35L,
0L, 0L, 0L, 31L, 29L, 0L, 14L, 5L, 14L, 11L, 12L, 16L, 21L, 26L,
22L, 7L, 23L, 10L, 23L, 17L, 19L, 7L, 27L, 3L, 0L, 2L, 29L, 14L,
30L, 12L, 0L, 35L, 0L, 29L, 4L, 5L, 14L, 15L, 33L, 0L, 3L, 21L,
34L, 0L, 2L, 28L, 16L, 0L, 1L, 0L, 11L, 0L, 32L, 0L, 27L, 2L,
28L, 3L, 0L, 4L, 1L, 6L, 14L, 27L, 25L, 12L, 7L, 10L, 16L, 9L,
4L, 15L, 40L, 2L, 18L, 5L, 3L, 6L, 1L, 33L, 2L, 5L, 12L, 4L,
7L, 3L, 17L, 30L, 5L, 7L, 17L, 15L, 16L, 9L, 0L, 26L, 16L, 0L,
24L, 1L, 27L, 32L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-184L))
The model:
m1b <-gamm4(Div~TSF+FF+s(triodia_low, k=6)+VegtypeNew+ThreeYearRain,random=~(1|Lscape),data=data)
Plotting:
p <-plotGAMM (m1b,smooth.cov="triodia_low",groupCovs = NULL,orderedAsFactor=T,rawOrFitted="raw",plotCI=T,grouping = NULL)
p + labs(x= "Years since fire") +
labs(y="Species diversity (H')") +
theme_bw() +
theme(panel.grid.major = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour="black")) +
theme(axis.title = element_text(size=22)) + # face="bold"
theme(axis.ticks = element_line()) +
scale_x_continuous(breaks = seq(0,40,by=5)) +
geom_rect(aes(xmin=0,xmax=0.6,ymin=-Inf,ymax=Inf),alpha=0.002, fill="coral4") +
geom_rect(aes(xmin=.6,xmax=1,ymin=-Inf,ymax=Inf),alpha=0.002, fill="gold") +
geom_rect(aes(xmin=1,xmax=5,ymin=-Inf,ymax=Inf),alpha=0.002, fill="darkred") +
geom_rect(aes(xmin=5,xmax=10,ymin=-Inf,ymax=Inf),alpha=0.002,fill="chocolate") +
geom_rect(aes(xmin=10,xmax=40,ymin=-Inf,ymax=Inf),alpha=0.002,fill="orangered") +
theme(axis.text = element_text(size=18, colour="black")) +
theme(text = element_text(family = "Arial")) +
theme(legend.position= "none")
The plot:
Any help would be greatly appreciated :)
Using the development version of gratia you can replicate the plot you showed with a few simple calls to create the data, predict etc.
To install the development version of gratia do
# install.packages("remotes")
remotes::install_github("gavinsimpson/gratia")
Once installed you can produce an object suitable for plotting using:
library('mgcv')
library('gamm4')
library('gratia')
library('ggplot2')
library('dplyr')
## model fit
m1b <- gamm4(Div ~ TSF + FF + s(triodia_low, k=6) + VegtypeNew + ThreeYearRain,
random = ~ (1|Lscape), data = df)
## data to predict at
new_df <- data_slice(m1b, var1 = 'triodia_low', n = 100)
## predict and cast to a tibble
pred_df <- as_tibble(predict(m1b[["gam"]], new_df, se.fit = TRUE))
## add to the data we're predicting at
pred_df <- bind_cols(new_df, pred_df)
## grab the inverse link of the model (not needed here, but is for non-Normal fits)
ilink <- inv_link(m1b)
## create the upper and lower credible interval
pred_df <- mutate(pred_df,
lwr = ilink(fit - (2 * se.fit)),
upr = ilink(fit + (2 * se.fit)),
fit = ilink(fit))
The plot itself can be created using:
ggplot(pred_df, aes(x = triodia_low, y = fit)) +
labs(x = "Years since fire", y = "Species diversity (H')") +
theme_bw() +
theme(panel.grid.major = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour="black")) +
theme(axis.title = element_text(size=22)) +
theme(axis.ticks = element_line()) +
scale_x_continuous(breaks = seq(0,40,by=5)) +
geom_rect(aes(xmin=0, xmax=0.6, ymin=-Inf, ymax=Inf), alpha=0.01, fill="coral4") +
geom_rect(aes(xmin=0.6, xmax=1, ymin=-Inf, ymax=Inf), alpha=0.01, fill="gold") +
geom_rect(aes(xmin=1, xmax=5, ymin=-Inf, ymax=Inf), alpha=0.01, fill="darkred") +
geom_rect(aes(xmin=5, xmax=10, ymin=-Inf, ymax=Inf), alpha=0.01, fill="chocolate") +
geom_rect(aes(xmin=10, xmax=40, ymin=-Inf, ymax=Inf), alpha=0.01, fill="orangered") +
geom_point(data = df, mapping = aes(x = triodia_low, y = Div)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
geom_line()
Most of the ggplot code is your's, but now that we have full control, we can put the data layers in the foreground by leaving those layers until the end.
Note that I'm not convinced this is a great plot. The fitted function you are showing is conditional upon the other covariates in the data set. Here, and as with voxel::plotGAMM(), we're predicting from the model and hence we have to supply something for the other covariates. Following voxel::plotGAMM and mgcv::vis.gam, we fix the other covariates not shown at
the value of the data observation closest to the median (for continuous variables), or
the modal category (for factor parametric terms)
So, the resulting figure is the fit conditional upon those values. In particular it is for the spinsandplain level of VegTypeNew. As such it is a little misleading.
I am trying to make a line graph with three factors. In addition, I would like to have my legend labels have two symbols.
This is the published example I am trying to emulate....
as you can see, each label has two symbols, one for each level of one factor. Furthermore, the same factor is shown twice on the same panel for each level, i.e. cultivar with two different levels of a factor.
I am trying to make one panel at a time, I will join them later and put the legend at the top like in the above example.
Here is the start of my attempt....
I have got my points on the graph, though im baffled as to how to make a line join through each cultivar, show error bars, and make the legend similar to the above example. I want the legend to have one label for each cultivar, but with two symbols, one symbol for Waterlogging and one for Non-waterlogging
structure(list(pot = c(41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L
), rep = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L), cultivar = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Dinninup", "Riverina",
"Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35), form = c(2.81, 2.64, 2.59, 3.28, 3.18, 2.57, 2.9,
3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77, 2.95, 2.36,
2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58, 3.71, 3.11,
2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75, 2.52, 1.88,
1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34, 2.14, 0.63,
1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63, 1.06, 1.07,
1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01, 1, 1.16,
0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6)), row.names = 41:120, class = "data.frame")
library(Rmisc)
library(ggplot2)
tglf3 <- summarySE(yar, measurevar="form", groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
ggplot(tglf3, aes(x=P, y=form)) +
geom_point(aes(colour = factor(Waterlogging),
shape=factor(cultivar)),size=3.5,position=position_dodge(1))+
geom_errorbar(aes(ymin=form-se, ymax=form+se),colour="black", width=.1,position=position_dodge(1))
Is this approaching what you're looking for?
library(tidyverse)
tglf3 <- summarySE(yar, measurevar="form", groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
Mostly, making use of dplyr::group_by can solve the point-line pairing:
tglf3 %>%
group_by(cultivar) %>%
ggplot(aes(x=P, y=form, colour=Waterlogging, shape=cultivar)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_point(size=3.5) +
geom_line() +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical")
More formatting using scale_x_manual:
tglf3 %>%
unite(new, cultivar, Waterlogging, sep = ', ') %>%
group_by(new) %>%
ggplot(aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2))
Without %>%:
tglf4 <- unite(data = tglf3, new, cultivar, Waterlogging, sep = ', ')
tglf5 <- group_by(.data = tglf4, new)
ggplot(tglf5, aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2))
Using facet_grid (example using fake data):
tglf3 %>%
mutate(
form = form * 1.5
) %>%
bind_rows(tglf3, .id = 'species') %>% # Add fake data to the real data as an example
mutate(
species = case_when(species == 1 ~ 'O. sativus',
T ~ 'O. compressus')
) %>%
unite(new, cultivar, Waterlogging, sep = ', ') %>%
group_by(new) %>%
ggplot(aes(x=P, y=form, colour=new, shape=new, linetype=new)) +
geom_errorbar(aes(ymin=form-se, ymax=form+se), colour="black", width=.6) +
geom_line(color="black") +
geom_point(size=3.5) +
scale_colour_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c("blue", "red", "blue", "red")) +
scale_shape_manual(name = "Cultivar, Waterlogging",
labels = c("Riverina, Non-waterlogged", "Riverina, Waterlogged", "Yarloop, Non-waterlogged", "Yarloop, Waterlogged"),
values = c(19, 19, 17, 17)) +
scale_linetype_manual(name = "Cultivar, Waterlogging", values=c("longdash", "solid", "longdash", "solid")) +
theme_classic() +
theme(legend.position = 'top', legend.direction="vertical") +
guides(color=guide_legend(ncol=2)) +
facet_grid(.~species)
I'm very new to R and am trying to facet_wrap raincloud plots. I am trying to facet_wrap by Hypothesis chosen (which has been binary coded), so ideally would like to plot proportion of confirmatory and disconfirmatory leads chosen by hypothesis.
Here is what I have so far:
my_data2 <- melt(my_data, id.vars = c("ID"),
measure.vars = c("Proportion.of.Disconfirmatory.Leads.Chosen","Proportion.of.Confirmatory.Leads.Chosen", "Hypothesis"),
variable.name = "Leads", "Hyp",
value.name = "Proportion")
plot3 <- ggplot(data = my_data2, aes(y = Proportion, x = Leads, fill = Leads)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
geom_point(aes(y = Proportion, color = Leads), position = position_jitter(width = .15), size = .5, alpha = 0.8) +
geom_boxplot(width = .1, guides = FALSE, outlier.shape = NA, alpha = 0.5) +
facet_wrap(vars(Hypothesis), nrow = 2)+
expand_limits(x = 5.25) +
guides(fill = FALSE) +
guides(color = FALSE) +
scale_color_brewer(palette = "Spectral") +
scale_fill_brewer(palette = "Spectral") +
coord_flip() +
theme_bw()
plot3
However, I am receiving this error:
"Error: At least one layer must contain all faceting variables: `Hypothesis`.
* Plot is missing `Hypothesis`
* Layer 1 is missing `Hypothesis`
* Layer 2 is missing `Hypothesis`
* Layer 3 is missing `Hypothesis`
* Layer 4 is missing `Hypothesis`"
> dput(my_data)
structure(list(ID = c(2L, 5L, 23L, 34L, 35L, 48L, 53L, 59L, 71L,
76L, 1L, 3L, 4L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L,
46L, 47L, 49L, 50L, 51L, 52L, 54L, 55L, 56L, 57L, 58L, 60L, 61L,
62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 72L, 73L, 74L, 75L,
78L), Hypothesis = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), Sum.of.Disconfirmatory.Leads.Chosen = c(9L, 7L, 0L,
3L, 4L, 1L, 2L, 3L, 6L, 3L, 2L, 3L, 5L, 3L, 4L, 3L, 3L, 5L, 0L,
5L, 5L, 1L, 4L, 5L, 6L, 4L, 5L, 2L, 6L, 4L, 6L, 1L, 4L, 4L, 8L,
3L, 4L, 2L, 5L, 2L, 4L, 7L, 1L, 1L, 2L, 3L, 5L, 2L, 5L, 8L, 0L,
5L, 4L, 7L, 3L, 4L, 6L, 1L, 1L, 4L, 4L, 8L, 7L, 3L, 4L, 6L, 2L,
5L, 2L, 5L, 5L, 8L, 2L, 4L, 5L, 7L), Sum.of.Confirmatory.Leads.Chosen = c(5L,
2L, 2L, 2L, 8L, 3L, 4L, 5L, 4L, 2L, 4L, 6L, 3L, 7L, 4L, 3L, 2L,
3L, 3L, 7L, 4L, 5L, 2L, 3L, 6L, 4L, 9L, 6L, 5L, 5L, 1L, 1L, 3L,
6L, 6L, 3L, 7L, 1L, 2L, 3L, 6L, 8L, 2L, 2L, 6L, 9L, 5L, 6L, 5L,
4L, 6L, 6L, 2L, 3L, 2L, 5L, 6L, 4L, 5L, 4L, 5L, 4L, 5L, 7L, 4L,
5L, 4L, 4L, 3L, 5L, 5L, 7L, 6L, 4L, 3L, 7L), Proportion.of.Disconfirmatory.Leads.Chosen = c(64.28571429,
77.77777778, 0, 60, 33.33333333, 25, 33.33333333, 37.5, 60, 60,
33.33333333, 33.33333333, 62.5, 30, 50, 50, 60, 62.5, 0, 41.66666667,
55.55555556, 16.66666667, 66.66666667, 62.5, 50, 50, 35.71428571,
25, 54.54545455, 44.44444444, 85.71428571, 50, 57.14285714, 40,
57.14285714, 50, 36.36363636, 66.66666667, 71.42857143, 40, 40,
46.66666667, 33.33333333, 33.33333333, 25, 25, 50, 25, 50, 66.66666667,
0, 45.45454545, 66.66666667, 70, 60, 44.44444444, 50, 20, 16.66666667,
50, 44.44444444, 66.66666667, 58.33333333, 30, 50, 54.54545455,
33.33333333, 55.55555556, 40, 50, 50, 53.33333333, 25, 50, 62.5,
50), Proportion.of.Confirmatory.Leads.Chosen = c(35.71428571,
22.22222222, 100, 40, 66.66666667, 75, 66.66666667, 62.5, 40,
40, 66.66666667, 66.66666667, 37.5, 70, 50, 50, 40, 37.5, 100,
58.33333333, 44.44444444, 83.33333333, 33.33333333, 37.5, 50,
50, 64.28571429, 75, 45.45454545, 55.55555556, 14.28571429, 50,
42.85714286, 60, 42.85714286, 50, 63.63636364, 33.33333333, 28.57142857,
60, 60, 53.33333333, 66.66666667, 66.66666667, 75, 75, 50, 75,
50, 33.33333333, 100, 54.54545455, 33.33333333, 30, 40, 55.55555556,
50, 80, 83.33333333, 50, 55.55555556, 33.33333333, 41.66666667,
70, 50, 45.45454545, 66.66666667, 44.44444444, 60, 50, 50, 46.66666667,
75, 50, 37.5, 50)), class = "data.frame", row.names = c(NA, -76L
))
> head(my_data)
ID Hypothesis Sum.of.Disconfirmatory.Leads.Chosen Sum.of.Confirmatory.Leads.Chosen
1 2 0 9 5
2 5 0 7 2
3 23 0 0 2
4 34 0 3 2
5 35 0 4 8
6 48 0 1 3
Proportion.of.Disconfirmatory.Leads.Chosen Proportion.of.Confirmatory.Leads.Chosen
1 64.28571 35.71429
2 77.77778 22.22222
3 0.00000 100.00000
4 60.00000 40.00000
5 33.33333 66.66667
6 25.00000 75.00000
I suspect that I have introduced the variable Hypothesis incorrectly in the code, however I have no idea where or how! I have tried to include it in sumld however am receiving this error when doing so:
Error in fs[[1]](x, ...) : attempt to apply non-function
Thank you all in advance for your help.
Hypothesis is not a independent column in your melted data frame mydata2, and cannot be used for faceting. You have included Hypothesis in measure.vars and it has been converted to one of the categories of Leads in mydata2.
my_data2 %>% group_by(Leads) %>% summarize(n=n())
# A tibble: 3 x 2
Leads n
<fct> <int>
1 Proportion.of.Disconfirmatory.Leads.Chosen 76
2 Proportion.of.Confirmatory.Leads.Chosen 76
3 Hypothesis 76
If you want to use it for faceting, include Hypothesis in id.vars instead.
my_data2 <- melt(my_data, id.vars = c("ID", "Hypothesis"),
measure.vars = c("Proportion.of.Disconfirmatory.Leads.Chosen",
"Proportion.of.Confirmatory.Leads.Chosen"),
variable.name = "Leads",
value.name = "Proportion")
head(my_data2)
ID Hypothesis Leads Proportion
1 2 0 Proportion.of.Disconfirmatory.Leads.Chosen 64.28571
2 5 0 Proportion.of.Disconfirmatory.Leads.Chosen 77.77778
3 23 0 Proportion.of.Disconfirmatory.Leads.Chosen 0.00000
4 34 0 Proportion.of.Disconfirmatory.Leads.Chosen 60.00000
5 35 0 Proportion.of.Disconfirmatory.Leads.Chosen 33.33333
6 48 0 Proportion.of.Disconfirmatory.Leads.Chosen 25.00000
Now you can use Hypothesis for faceting:
library(ggplot2)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
plot3 <- ggplot(data = my_data2, aes(y = Proportion, x = Leads, fill = Leads)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0), alpha = .8) +
geom_point(aes(y = Proportion, color = Leads),
position = position_jitter(width = .15), size = .5, alpha = 0.8) +
geom_boxplot(width = .1, guides = FALSE, outlier.shape = NA, alpha = 0.5) +
facet_wrap(~Hypothesis, nrow = 2) +
expand_limits(x = 5.25) +
guides(fill = FALSE) +
guides(color = FALSE) +
scale_color_brewer(palette = "Spectral") +
scale_fill_brewer(palette = "Spectral") +
coord_flip() +
theme_bw()
plot3
Edited: Solution to follow-up question on how to modifying variable names on axis labels. One approach is by factoring variable and assigning labels to factors.
my_data2$Leads <- factor(my_data2$Leads,
levels=c("Proportion.of.Disconfirmatory.Leads.Chosen",
"Proportion.of.Confirmatory.Leads.Chosen"),
labels=c("Proportion of Confirmatory Leads Chosen",
"Proportion of Disconfirmatory Leads Chosen"))
Rerun ggplot code to produce this:
Here's a snipped of randomly selected data from my full dataframe:
canopy<-structure(list(Stage = structure(c(6L, 5L, 3L, 6L, 7L, 5L, 4L,
7L, 2L, 7L, 5L, 1L, 1L, 4L, 3L, 6L, 5L, 7L, 4L, 4L), .Label = c("milpa",
"robir", "jurup che", "pak che kor", "mehen che", "nu kux che",
"tam che"), class = c("ordered", "factor")), ID = c(44L, 34L,
18L, 64L, 54L, 59L, 28L, 51L, 11L, 56L, 33L, 1L, 7L, 25L, 58L,
48L, 36L, 51L, 27L, 66L), Sample = c(4L, 2L, 2L, 10L, 6L, 9L,
4L, 3L, 3L, 8L, 1L, 1L, 7L, 1L, 10L, 8L, 4L, 3L, 3L, 10L), Subsample = c(2L,
3L, 4L, 3L, 2L, 1L, 3L, 2L, 4L, 3L, 1L, 3L, 2L, 4L, 1L, 1L, 3L,
1L, 1L, 4L), Size..ha. = c(0.5, 0.5, 0.5, 0.5, 6, 0.5, 0.5, 0.25,
0.5, 6, 1, 1, 0.5, 2, 1, 0.5, 1, 0.25, 0.5, 2), Avg.Subsample.Canopy = c(94.8,
94.8, 97.92, 96.88, 97.14, 92.46, 93.24, 97.4, 25.64, 97.4, 94.8,
33.7, 13.42, 98.18, 85.44, 96.36, 97.4, 95.58, 85.7, 92.2), dec = c(0.948,
0.948, 0.9792, 0.9688, 0.9714, 0.9246, 0.9324, 0.974, 0.2564,
0.974, 0.948, 0.337, 0.1342, 0.9818, 0.8544, 0.9636, 0.974, 0.9558,
0.857, 0.922)), .Names = c("Stage", "ID", "Sample", "Subsample",
"Size..ha.", "Avg.Subsample.Canopy", "dec"), row.names = c(693L,
537L, 285L, 1017L, 853L, 929L, 441L, 805L, 173L, 889L, 513L,
9L, 101L, 397L, 913L, 753L, 569L, 801L, 417L, 1053L), class = "data.frame")
I am trying to code a GLMM of dec as a function of Stage and Size..ha.
The GLMM is necessary because each row represents a point Subsample measured within a larger Sample area. I am also using a binomial distribution given dec are proportional data.
I tried the model:
canopy.binomial.mod<-glmer(dec~Stage*Size..ha.+(1|Sample),family="binomial",data=canopy)
summary(canopy.binomial.mod)
but get the error:
Error in pwrssUpdate(pp, resp, tol = tolPwrss, GQmat = GQmat, compDev
= compDev, : (maxstephalfit) PIRLS step-halvings failed to reduce deviance in pwrssUpdate
I've seen online that this can be a result of needing to scale a predictor variable, so I tried:
cs. <- function(x) scale(x,scale=TRUE,center=TRUE)
canopy.binomial.mod<-glmer(dec~Stage*cs.(Size..ha.)+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Which doesn't seem to help. I also thought that maybe I'm asking too much of the model and it's not converging due to too many predictor variables, so let's remove the Size variable, which is of less interest to me.
canopy.binomial.mod<-glmer(dec~Stage+(1|Sample),family="binomial",data=canopy.rmna)
summary(canopy.binomial.mod)
Still no luck. Any ideas how to address this?