bar chart of constant height for factors in time series - r

I am a beginner to try R for making graphs. Please help me. I have data of multiple columns (time series). Each column holds factors (please see the one column example data below). I would like to make a constant height (say 1 unit) bar chart of the time series and would like to represent “A” and “B” in different colors with the DATE on the x axis. Any tip?
Thanking you in advance!
DATE GROUP
2011.06.18 00:00:00 R
2011.06.18 06:00:00 L
2011.06.18 12:00:00 R
2011.06.18 18:00:00 R
2011.06.19 00:00:00 L
2011.06.19 06:00:00 L
2011.06.19 12:00:00 R
2011.06.19 18:00:00 L
2011.06.20 00:00:00 L
2011.06.20 06:00:00 L
2011.06.20 12:00:00 R
2011.06.20 18:00:00 L
2011.06.21 00:00:00 R
2011.06.21 06:00:00 L

Assuming your data are in dat, but with an extra column:
dat <- structure(list(DATE = structure(list(sec = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L), mday = c(18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L), mon = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), year = c(111L,
111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L,
111L, 111L), wday = c(6L, 6L, 6L, 6L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 2L, 2L), yday = c(168L, 168L, 168L, 168L, 169L, 169L,
169L, 169L, 170L, 170L, 170L, 170L, 171L, 171L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt")), GROUP = structure(c(2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("L",
"R"), class = "factor"), GROUP2 = structure(c(1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("L", "R"), class = "factor")), .Names = c("DATE",
"GROUP", "GROUP2"), row.names = c(NA, -14L), class = "data.frame")
Then I think this does what you want. First count the elements == to one of the classes
counts <- apply(dat[, 2:3], 1, function(x) sum(x == "R"))
then compute the other count and bind to a matrix:
countmat <- t(cbind(L = (NCOL(dat) - 1) - counts, R = counts))
then we plot using barplot()
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat, ylim = c(0,2.5),
legend.text = c("L","R"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which produces:
See the help pages of the individual functions for explanations on the arguments.
Edit: If you just want to do this for an individual column, then this isn't the most interesting graph, but...
count2 <- with(dat, GROUP == "R")
countmat2 <- t(cbind(R = count2, L = !count2))
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat2, ylim = c(0, 1.5),
legend.text = c("R","L"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which gives this figure:

Related

How to insert new rows for missing data with intervals that could vary by a few minutes in R

I would like to insert rows when there are missing data within a 5 minute interval glucose sensor dataset. I have managed to complete this using the tsibble package but there can be time drifts in the data e.g. the sensor records a value at 4 minutes instead of 5. This causes the inserted time stamps to become unsynchronised throughout the remainder of the data frame.
Is there a way to complete this for a time interval that should be 5 minutes, but could be between 4 and 6 minutes? The dataset also includes multiple different IDs.
The ultimate aim is then to fill in the missing data gaps based upon a set criteria (i.e. max fill <= 3 rows) using the existing data.
Reprex pasted below.
library(tsibble, warn.conflicts = FALSE)
#> Warning: package 'tsibble' was built under R version 4.1.1
Data <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
gl = c(125L, 133L, 132L, 130L, 133L, 135L, 166L, 161L, 67L, 66L, 67L, 69L, 67L),
time = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
min = c(42L, 47L, 51L, 56L, 6L, 11L, 11L, 16L, 2L, 17L, 22L, 27L, 32L),
hour = c(9L, 9L, 9L, 9L, 10L, 10L, 11L, 11L, 0L, 0L, 0L, 0L, 0L),
mday = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L),
mon = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
year = c(121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L,121L),
wday = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 0L, 0L, 0L, 0L,0L),
yday = c(92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 93L, 93L,93L, 93L, 93L),
isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L, 0L, 0L)),
class = c("POSIXlt", "POSIXt"), tzone = "GMT"),
dif = structure(c(NA, 5, 4, 5, 10, 5, 60, 5, NA, 15, 5, 5, 5),
units = "mins", class = "difftime")),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -13L), groups = structure(list(id = 1:2, .rows = structure(list(1:8, 9:13),
ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -2L), .drop = TRUE))
x <- new_interval(minute = 5)
tsdata <- build_tsibble(Data, key = id, index = time, interval = x)
tsdata <- fill_gaps(tsdata, .full = FALSE)
This is probably not a final answer to what you are looking for, but it might get you started in getting what you want..
library(data.table)
library(zoo)
# Split to list by id
L <- split(DT, by = "id")
# Interpolate gl based on time
ans <- lapply(L, function(x) {
# build time series by minute
temp <- data.table::data.table(
id = unique(x$id),
time = seq(min(x$time), max(x$time), by = 60))
# join in measured data
temp[x, gl_measured := i.gl, on = .(time)]
# imterpolate gl-values
temp[, gl_approx := zoo::na.approx(gl_measured)]
})
# Bind list together again
final <- data.table::rbindlist(ans)

How to bring model predict line in front of geom_rect

I am investigating the effect of time since fire on species diversity. I am attempting to make a graph that has different colours at different time since fire ages. However, putting the colours onto the graph has made the model prediction line fade away. I am wondering if there is some way to bring the line in front of geom_rect?
Loaded packages:
library(voxel)
library(gamm4)
library(ggplot2)
My data:
data <- read.csv('StacksOverflow.csv')
structure(list(Lscape = c(158L, 158L, 158L, 158L, 158L, 158L),
TSF = c(5, 5, 5, 18.5, 5, 18.5), VegtypeNew = structure(c(1L,
1L, 1L, 2L, 1L, 1L), .Label = c("spinsandplain", "woodlndsandplain"
), class = "factor"), FF = c(2L, 2L, 2L, 1L, 2L, 1L), ThreeYearRain = c(913.799997,
913.799997, 913.799997, 913.799997, 913.799997, 913.799997
), Div = c(2.2629743, 1.9630117, 1.7336569, 1.2816843, 2.4155056,
1.4240443), triodia_low = c(19L, 6L, 21L, 32L, 11L, 32L)), row.names = c(NA,
6L), class = "data.frame")
Extended data:
structure(list(Lscape = c(158L, 158L, 158L, 158L, 158L, 158L,
158L, 158L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 235L,
235L, 235L, 235L, 235L, 235L, 235L, 235L, 237L, 237L, 237L, 237L,
237L, 237L, 237L, 237L, 254L, 254L, 254L, 254L, 254L, 254L, 254L,
254L, 287L, 287L, 287L, 287L, 287L, 287L, 287L, 287L, 304L, 304L,
304L, 304L, 304L, 304L, 304L, 304L, 311L, 311L, 311L, 311L, 311L,
311L, 311L, 311L, 312L, 312L, 312L, 312L, 312L, 312L, 312L, 312L,
323L, 323L, 323L, 323L, 323L, 323L, 323L, 323L, 326L, 326L, 326L,
326L, 326L, 326L, 326L, 326L, 327L, 327L, 327L, 327L, 327L, 327L,
327L, 327L, 337L, 337L, 337L, 337L, 337L, 337L, 337L, 337L, 355L,
355L, 355L, 355L, 355L, 355L, 355L, 355L, 370L, 370L, 370L, 370L,
370L, 370L, 370L, 370L, 379L, 379L, 379L, 379L, 379L, 379L, 379L,
379L, 411L, 411L, 411L, 411L, 411L, 411L, 411L, 411L, 414L, 414L,
414L, 414L, 414L, 414L, 414L, 414L, 435L, 435L, 435L, 435L, 435L,
435L, 435L, 435L, 437L, 437L, 437L, 437L, 437L, 437L, 437L, 437L,
438L, 438L, 438L, 438L, 438L, 438L, 438L, 438L, 447L, 447L, 447L,
447L, 447L, 447L, 447L, 447L, 452L, 452L, 452L, 452L, 452L, 452L,
452L, 452L), TSF = c(5, 5, 5, 18.5, 5, 18.5, 18.5, 18.5, 11.5,
4.5, 0.5, 20, 11.5, 0.5, 1, 4.5, 1, 1, 4.5, 5, 4.5, 2, 5, 1,
6, 6, 4.5, 6, 14.5, 17, 4.5, 6, 1, 1, 7, 4.5, 2, 2, 7, 7, 20,
4, 3.5, 4, 3.5, 3.5, 11.5, 20, 6, 0.5, 5, 6, 6, 0.5, 7, 6, 3.5,
3.5, 3.5, 11.5, 11.5, 1, 1, 11.5, 1, 1, 4, 1, 1, 4, 1, 10.5,
7, 17.5, 0.5, 0.5, 0.5, 17.5, 7, 0.5, 18, 1.5, 3.5, 18, 18, 5,
3.5, 18.5, 14.5, 1.5, 7, 1.5, 7, 7, 7, 7, 10.5, 1.5, 0, 1.5,
7, 3, 7, 10.5, 0.5, 20, 0.5, 2, 2, 1.5, 2, 3, 20, 1, 1.5, 10.5,
17, 1.5, 1.5, 10.5, 3, 1, 1, 1, 4.5, 1, 6.5, 1, 10, 1.5, 12.5,
1.5, 1.5, 1.5, 1.5, 1.5, 2, 7, 12.5, 2, 7, 2, 2, 2, 1.5, 18.5,
18.5, 1.5, 7, 1.5, 1.5, 5, 12.5, 6.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 6.5, 6.5, 1.5, 6.5, 18.5, 6.5, 7, 1.5, 1, 7, 7, 1, 7, 1,
7.5, 7.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), VegtypeNew = structure(c(1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("spinsandplain", "woodlndsandplain"
), class = "factor"), FF = c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
3L, 3L, 4L, 2L, 2L, 5L, 3L, 4L, 5L, 5L, 2L, 2L, 4L, 3L, 5L, 4L,
5L, 4L, 4L, 5L, 3L, 3L, 4L, 5L, 5L, 3L, 4L, 6L, 5L, 5L, 3L, 4L,
1L, 5L, 3L, 4L, 4L, 4L, 2L, 1L, 2L, 2L, 3L, 4L, 4L, 3L, 2L, 3L,
3L, 3L, 4L, 3L, 3L, 3L, 4L, 2L, 6L, 6L, 6L, 6L, 5L, 2L, 7L, 3L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 3L, 4L, 4L, 3L, 4L, 3L, 3L,
4L, 5L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 4L, 3L, 6L, 4L, 4L, 3L, 3L,
4L, 0L, 2L, 4L, 3L, 2L, 3L, 3L, 0L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
3L, 2L, 5L, 6L, 3L, 3L, 3L, 3L, 2L, 4L, 3L, 4L, 5L, 4L, 3L, 3L,
6L, 4L, 3L, 5L, 5L, 5L, 5L, 4L, 3L, 2L, 1L, 2L, 2L, 3L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 3L,
2L, 2L, 2L, 3L, 2L, 4L, 2L, 3L, 4L, 5L, 5L, 3L, 4L, 3L, 3L, 4L
), ThreeYearRain = c(913.799997, 913.799997, 913.799997, 913.799997,
913.799997, 913.799997, 913.799997, 913.799997, 938.899988, 938.899988,
938.899988, 938.600004, 938.899988, 938.899988, 938.899988, 938.499989,
930.700005, 932.800001, 930.700005, 930.700005, 932.800001, 930.700005,
930.700005, 932.800001, 932.699991, 934.799987, 934.799987, 934.799987,
932.699991, 934.799987, 932.699991, 934.799987, 896.99999, 896.99999,
908.999991, 908.999991, 908.999991, 908.999991, 910.199991, 898.399994,
928.000009, 939.800006, 935.500004, 928.000009, 928.000009, 923.700007,
931.499996, 931.499996, 866.200004, 866.200004, 867.000003, 867.000003,
867.000003, 867.300002, 867.000003, 869.3, 926.800003, 926.800003,
926.800003, 926.800003, 933.800003, 934.600006, 934.600006, 934.600006,
924.2, 925.100002, 924.2, 924.2, 924.2, 922.2, 924.2, 924.7,
974.799995, 983.500006, 983.500006, 983.500004, 983.500006, 974.799995,
974.799994, 983.500006, 839.1, 839.1, 839.1, 839.100001, 839.300001,
839.1, 838.699999, 839.100001, 839.100001, 838.699999, 842.300004,
842.300004, 842.900006, 842.300004, 842.900006, 842.300004, 936.900014,
936.900014, 936.900014, 932.999984, 933.099983, 932.999984, 936.900014,
936.900014, 870.499995, 870.499995, 877.399998, 877.399998, 876.099997,
876.099997, 876.099997, 859.199997, 957.199982, 966.299982, 955.699998,
955.699998, 957.199982, 955.699998, 955.699998, 956.299985, 852.2,
852.2, 852.600006, 852.500001, 852.500001, 852.500001, 852.600006,
852.500001, 906.700011, 904.700001, 912.600007, 912.600007, 914.600007,
906.700001, 906.399998, 914.600007, 925.599982, 933.299992, 933.299992,
933.299992, 933.299992, 926.500012, 935.899994, 935.199992, 916.800001,
916.100001, 916.800001, 916.400003, 918.700003, 904.100001, 916.800001,
918.700003, 899.1, 904.100001, 906.000003, 903.400002, 904.100001,
903.400002, 906.000003, 906.000003, 905.7, 903.099999, 903.099999,
905.7, 912.199994, 893.200002, 905.399999, 904.999998, 933.700012,
933.700012, 933.700012, 933.700012, 933.700012, 932.30001, 932.300008,
932.300008, 878.500006, 878.500006, 878.500006, 879.300004, 879.300004,
879.300004, 879.300004, 873.200008), Div = c(2.2629743, 1.9630117,
1.7336569, 1.2816843, 2.4155056, 1.4240443, 1.5178948, 0.8993031,
1.2022801, 1.9287665, 2.0237769, 2.004871, 1.5020684, 2.1776591,
2.093787, 2.3139276, 2.7244402, 2.7026829, 1.6644725, 2.0696347,
1.9561853, 2.6018987, 2.5800017, 2.1867866, 2.4144821, 1.7389892,
2.1427451, 1.6544538, 1.8651966, 1.7569776, 1.8257533, 1.4048204,
2.7384914, 2.9344488, 2.2306909, 2.5085619, 1.8874836, 2.3431509,
1.8401602, 1.8620274, 1.8038997, 2.5909049, 2.2265328, 2.0882065,
2.4737837, 2.2995223, 1.4231311, 2.0577752, 1.6463134, 2.1464331,
2.2636437, 2.0992589, 1.7666974, 1.835061, 1.7732171, 2.0813243,
1.865505, 2.0200607, 1.2510612, 1.021761, 0.8111482, 0.2617645,
2.0282081, 1.1145976, 2.2596683, 2.3517629, 1.9424972, 1.9191269,
1.4222035, 2.6007698, 2.0071984, 1.9049132, 1.073374, 0.9576897,
1.6273043, 1.7701581, 0.6890092, 1.5764456, 0.384906, 1.5099996,
1.6713486, 2.5483064, 2.2033185, 2.0798843, 1.9082985, 2.1580972,
1.6952798, 1.6303402, 1.9461221, 1.4116405, 1.5347693, 2.6924921,
1.727278, 1.9384415, 1.6659585, 1.612819, 1.6592884, 2.7129796,
0, 2.7098898, 1.3785924, 2.7635218, 1.1481271, 1.8597007, 2.2191531,
1.088549, 2.431015, 1.3702099, 2.1018035, 2.3442348, 2.3599146,
2.789816, 1.8340235, 1.0606126, 2.5852679, 1.7791063, 1.2273106,
2.2432636, 2.5642458, 1.3306642, 2.6771856, 1.5062567, 2.0903266,
2.0398412, 2.4821503, 0.5979376, 1.479214, 1.9188301, 1.2267089,
2.4491421, 1.5366949, 2.516592, 2.4084849, 2.4385928, 2.549348,
2.7090074, 2.3337573, 1.8982968, 1.7956341, 2.3752386, 1.6587394,
2.6663039, 2.4853204, 1.9325793, 2.4431141, 1.6976331, 0.8791745,
2.6625573, 1.9596877, 1.9287565, 2.4590816, 2.4963942, 1.8767916,
1.3954333, 2.5155936, 2.2327274, 2.6613726, 2.580748, 2.3142567,
2.2280879, 1.7925025, 1.663008, 2.3488945, 2.0746398, 1.7050203,
2.0108246, 1.7317251, 2.4936515, 0.9556999, 1.3716151, 2.0694067,
1.4944032, 1.0984774, 1.2868726, 1.6429103, 1.3720737, 1.8037795,
1.8745583, 1.8921264, 1.8320377, 1.201682, 1.8489571, 1.798546,
0.8486856), triodia_low = c(19L, 6L, 21L, 32L, 11L, 32L, 16L,
29L, 17L, 20L, 0L, 24L, 37L, 0L, 3L, 29L, 4L, 2L, 31L, 28L, 20L,
12L, 6L, 6L, 26L, 28L, 27L, 32L, 37L, 26L, 15L, 27L, 2L, 1L,
19L, 5L, 13L, 10L, 33L, 14L, 25L, 22L, 15L, 34L, 15L, 7L, 36L,
25L, 25L, 0L, 25L, 4L, 21L, 0L, 33L, 16L, 16L, 15L, 22L, 25L,
25L, 0L, 0L, 18L, 2L, 0L, 26L, 0L, 0L, 7L, 0L, 13L, 28L, 35L,
0L, 0L, 0L, 31L, 29L, 0L, 14L, 5L, 14L, 11L, 12L, 16L, 21L, 26L,
22L, 7L, 23L, 10L, 23L, 17L, 19L, 7L, 27L, 3L, 0L, 2L, 29L, 14L,
30L, 12L, 0L, 35L, 0L, 29L, 4L, 5L, 14L, 15L, 33L, 0L, 3L, 21L,
34L, 0L, 2L, 28L, 16L, 0L, 1L, 0L, 11L, 0L, 32L, 0L, 27L, 2L,
28L, 3L, 0L, 4L, 1L, 6L, 14L, 27L, 25L, 12L, 7L, 10L, 16L, 9L,
4L, 15L, 40L, 2L, 18L, 5L, 3L, 6L, 1L, 33L, 2L, 5L, 12L, 4L,
7L, 3L, 17L, 30L, 5L, 7L, 17L, 15L, 16L, 9L, 0L, 26L, 16L, 0L,
24L, 1L, 27L, 32L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-184L))
The model:
m1b <-gamm4(Div~TSF+FF+s(triodia_low, k=6)+VegtypeNew+ThreeYearRain,random=~(1|Lscape),data=data)
Plotting:
p <-plotGAMM (m1b,smooth.cov="triodia_low",groupCovs = NULL,orderedAsFactor=T,rawOrFitted="raw",plotCI=T,grouping = NULL)
p + labs(x= "Years since fire") +
labs(y="Species diversity (H')") +
theme_bw() +
theme(panel.grid.major = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour="black")) +
theme(axis.title = element_text(size=22)) + # face="bold"
theme(axis.ticks = element_line()) +
scale_x_continuous(breaks = seq(0,40,by=5)) +
geom_rect(aes(xmin=0,xmax=0.6,ymin=-Inf,ymax=Inf),alpha=0.002, fill="coral4") +
geom_rect(aes(xmin=.6,xmax=1,ymin=-Inf,ymax=Inf),alpha=0.002, fill="gold") +
geom_rect(aes(xmin=1,xmax=5,ymin=-Inf,ymax=Inf),alpha=0.002, fill="darkred") +
geom_rect(aes(xmin=5,xmax=10,ymin=-Inf,ymax=Inf),alpha=0.002,fill="chocolate") +
geom_rect(aes(xmin=10,xmax=40,ymin=-Inf,ymax=Inf),alpha=0.002,fill="orangered") +
theme(axis.text = element_text(size=18, colour="black")) +
theme(text = element_text(family = "Arial")) +
theme(legend.position= "none")
The plot:
Any help would be greatly appreciated :)
Using the development version of gratia you can replicate the plot you showed with a few simple calls to create the data, predict etc.
To install the development version of gratia do
# install.packages("remotes")
remotes::install_github("gavinsimpson/gratia")
Once installed you can produce an object suitable for plotting using:
library('mgcv')
library('gamm4')
library('gratia')
library('ggplot2')
library('dplyr')
## model fit
m1b <- gamm4(Div ~ TSF + FF + s(triodia_low, k=6) + VegtypeNew + ThreeYearRain,
random = ~ (1|Lscape), data = df)
## data to predict at
new_df <- data_slice(m1b, var1 = 'triodia_low', n = 100)
## predict and cast to a tibble
pred_df <- as_tibble(predict(m1b[["gam"]], new_df, se.fit = TRUE))
## add to the data we're predicting at
pred_df <- bind_cols(new_df, pred_df)
## grab the inverse link of the model (not needed here, but is for non-Normal fits)
ilink <- inv_link(m1b)
## create the upper and lower credible interval
pred_df <- mutate(pred_df,
lwr = ilink(fit - (2 * se.fit)),
upr = ilink(fit + (2 * se.fit)),
fit = ilink(fit))
The plot itself can be created using:
ggplot(pred_df, aes(x = triodia_low, y = fit)) +
labs(x = "Years since fire", y = "Species diversity (H')") +
theme_bw() +
theme(panel.grid.major = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.border = element_blank()) +
theme(axis.line = element_line(colour="black")) +
theme(axis.title = element_text(size=22)) +
theme(axis.ticks = element_line()) +
scale_x_continuous(breaks = seq(0,40,by=5)) +
geom_rect(aes(xmin=0, xmax=0.6, ymin=-Inf, ymax=Inf), alpha=0.01, fill="coral4") +
geom_rect(aes(xmin=0.6, xmax=1, ymin=-Inf, ymax=Inf), alpha=0.01, fill="gold") +
geom_rect(aes(xmin=1, xmax=5, ymin=-Inf, ymax=Inf), alpha=0.01, fill="darkred") +
geom_rect(aes(xmin=5, xmax=10, ymin=-Inf, ymax=Inf), alpha=0.01, fill="chocolate") +
geom_rect(aes(xmin=10, xmax=40, ymin=-Inf, ymax=Inf), alpha=0.01, fill="orangered") +
geom_point(data = df, mapping = aes(x = triodia_low, y = Div)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
geom_line()
Most of the ggplot code is your's, but now that we have full control, we can put the data layers in the foreground by leaving those layers until the end.
Note that I'm not convinced this is a great plot. The fitted function you are showing is conditional upon the other covariates in the data set. Here, and as with voxel::plotGAMM(), we're predicting from the model and hence we have to supply something for the other covariates. Following voxel::plotGAMM and mgcv::vis.gam, we fix the other covariates not shown at
the value of the data observation closest to the median (for continuous variables), or
the modal category (for factor parametric terms)
So, the resulting figure is the fit conditional upon those values. In particular it is for the spinsandplain level of VegTypeNew. As such it is a little misleading.

How to add different boxplots to the same plot based on different data sources in ggplot /R?

Please find My Data below. Please note that picture below is an example of the design I wish to copy and does not correlate to My Data specifically.
My Data is stored in p. I have a continuous covariate p$ki67pro which denominate the percentage of cells actively dividing in a tumor sample (thus, ranging from 0 to 100). I have three different stages of the tumor, which correspond to p$WHO.Grade==1,2,3. Each sample represent a tumor patient that either had recurrence (p$recurrence==1) or not (p$recurrence==0).
Therefore:
head(p)
WHO.Grade recurrence ki67pro
1 1 0 1
2 2 0 12
3 1 0 3
9 1 0 3
10 1 0 5
11 1 0 3
I wish to produce the boxplot below. As you can see, there are four points which correspond to each p$WHO.Grade and and All samples. There are two boxplots per p$WHO.Grade + All.
Per p$WHO.Grade and All, I want one boxplot to represent p$ki67pro for recurrent tumors (p$recurrence==1) and the other boxplot to represent p$ki67pro for non-recurrent tumors (p$recurrence==0).
I.e.
p$ki67pro[p$WHO.Grade==1 & p$recurrence==0] versus
p$ki67pro[p$WHO.Grade==1 & p$recurrence==1]
p$ki67pro[p$WHO.Grade==2 & p$recurrence==0] versus
p$ki67pro[p$WHO.Grade==2 & p$recurrence==1]
p$ki67pro[p$WHO.Grade==3 & p$recurrence==0] versus
p$ki67pro[p$WHO.Grade==3 & p$recurrence==1]
And for All
p$ki67pro[p$recurrence==0] versus
p$ki67pro[p$recurrence==1]
I have used the following script so far, but I can figure out on how to get the All included. Please, note that there is only one case p$WHO.Grade==3
df <- data.frame(x = as.factor(c(p$WHO.Grade)),
y = c(p$ki67pro),
f = rep(c("ki67pro"), c(nrow(p))))
df <- df[!is.na(df$x),]
ggplot(df) +
geom_boxplot(aes(x, y, fill = f, colour = f), outlier.alpha = 0, position = position_dodge(width = 0.78)) +
scale_x_discrete(name = "", label=c("WHO-I","WHO-II","WHO-III","All")) +
scale_y_continuous(name="x", breaks=seq(0,30,5), limits=c(0,30)) +
stat_boxplot(aes(x, y, colour = f), geom = "errorbar", width = 0.3,position = position_dodge(0.7753)) +
geom_point(aes(x, y, fill = f, colour = f), size = 3, shape = 21, position = position_jitterdodge()) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
labels = c("", "")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "",
labels = c("","")) + theme(legend.position="none")
My Data p
p <- structure(list(WHO.Grade = c(1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), recurrence = c(0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), ki67pro = c(1L, 12L,
3L, 3L, 5L, 3L, 20L, 25L, 7L, 4L, 5L, 12L, 3L, 15L, 4L, 5L, 7L,
8L, 3L, 12L, 10L, 4L, 10L, 7L, 3L, 2L, 3L, 7L, 4L, 7L, 10L, 4L,
5L, 5L, 3L, 5L, 2L, 5L, 3L, 3L, 3L, 4L, 4L, 3L, 2L, 5L, 1L, 5L,
2L, 3L, 1L, 2L, 3L, 3L, 5L, 4L, 20L, 5L, 0L, 4L, 3L, 0L, 3L,
4L, 1L, 2L, 20L, 2L, 3L, 5L, 4L, 8L, 1L, 4L, 5L, 4L, 3L, 6L,
12L, 3L, 4L, 4L, 2L, 5L, 3L, 3L, 3L, 2L, 5L, 4L, 2L, 3L, 4L,
3L, 3L, 2L, 2L, 4L, 7L, 4L, 3L, 4L, 2L, 3L, 6L, 2L, 3L, 10L,
5L, 10L, 3L, 10L, 3L, 4L, 5L, 2L, 4L, 3L, 4L, 4L, 4L, 5L, 3L,
12L, 5L, 4L, 3L, 2L, 4L, 3L, 4L, 2L, 1L, 6L, 1L, 4L, 12L, 3L,
4L, 3L, 2L, 6L, 5L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, 4L, 1L,
3L, 3L, 4L, 0L, 3L)), class = "data.frame", row.names = c(1L,
2L, 3L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 44L, 45L, 46L, 47L, 48L,
49L, 50L, 51L, 52L, 53L, 54L, 55L, 57L, 59L, 60L, 61L, 62L, 63L,
64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 87L, 89L, 90L, 91L,
92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L,
105L, 106L, 107L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L,
117L, 118L, 119L, 120L, 121L, 123L, 124L, 125L, 126L, 127L, 128L,
130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L,
141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L,
152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L,
163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L,
174L, 175L))
A trick that can be used is to create a new level in WHO.Grade, since it only has 3 levels. This should be a temporary level, so a good way of doing it is with package dplyr, function mutate.
Note that there is no need to create a new dataframe df.
library(ggplot2)
library(dplyr)
p %>%
bind_rows(p %>% mutate(WHO.Grade = 4)) %>%
mutate(WHO.Grade = factor(WHO.Grade),
recurrence = factor(recurrence)) %>%
ggplot(aes(WHO.Grade, ki67pro,
fill = recurrence, colour = recurrence)) +
geom_boxplot(outlier.alpha = 0,
position = position_dodge(width = 0.78, preserve = "single")) +
geom_point(size = 3, shape = 21,
position = position_jitterdodge()) +
scale_x_discrete(name = "",
label = c("WHO-I","WHO-II","WHO-III","All")) +
scale_y_continuous(name = "x", breaks=seq(0,30,5), limits=c(0,30)) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
labels = c("", "")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "",
labels = c("","")) +
theme(legend.position="none")
What about something like this:
# here you duplicate your original data
p1 <- p
# how to catch the all
p1$WHO.Grade <- 'all'
p <- rbind(p1,p)
library(ggplot2)
ggplot(p) +
geom_boxplot(aes(as.factor(WHO.Grade),
y = ki67pro,
fill = factor(recurrence) ,
color = factor(recurrence) ),
outlier.alpha = 0 , position = position_dodge(width = 0.78)) +
# from here it's more or less your code
scale_x_discrete(name = "", label=c("WHO-I","WHO-II","WHO-III","All")) +
scale_y_continuous(name="x", breaks=seq(0,30,5), limits=c(0,30)) +
stat_boxplot(aes(as.factor(WHO.Grade),
y = ki67pro,
color = factor(recurrence) ),
geom = "errorbar", width = 0.3,position = position_dodge(0.7753)) +
geom_point(aes(as.factor(WHO.Grade),
y = ki67pro,
color = factor(recurrence) ),
size = 3, shape = 21, position = position_jitterdodge()) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
labels = c("", "")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "",
labels = c("","")) +
theme(legend.position="none",
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
In case your dataset is too large for just doubling it in size you create two plots and put them next to each other via grid.arrange().
library(ggplot2)
library(gridExtra)
#the data
df <- data.frame(x = as.factor(c(p$WHO.Grade)),
y = p$ki67pro,
f = as.factor(p$recurrence))
df <- df[!is.na(df$x),]
# plot 1
plot1 <- ggplot(df) +
geom_boxplot(aes(x, y, fill = f, colour = f), outlier.alpha = 0, position = position_dodge(width = 0.78)) +
scale_x_discrete(name = "", label=c("WHO-I","WHO-II","WHO-III","All")) +
scale_y_continuous(name="x", breaks=seq(0,30,5), limits=c(0,30)) +
stat_boxplot(aes(x, y, colour = f), geom = "errorbar", width = 0.3,position = position_dodge(0.7753)) +
geom_point(aes(x, y, fill = f, colour = f), size = 3, shape = 21, position = position_jitterdodge()) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
labels = c("", "")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "",
labels = c("","")) + theme(legend.position="none") +
theme(plot.margin = unit(c(1,-0.5,1, 1), "cm"))
#plot 2
plot2 <- ggplot(df) +
geom_boxplot(aes(x = "All", y = y, fill = f, colour = f), outlier.alpha = 0, position = position_dodge(width = 0.78)) +
scale_x_discrete(name = "") +
scale_y_continuous(name="x", breaks=seq(0,30,5), limits=c(0,30)) +
stat_boxplot(aes(x = "All", y = y, colour = f), geom = "errorbar", width = 0.3,position = position_dodge(0.7753)) +
geom_point(aes(x = "All", y = y, fill = f, colour = f), size = 3, shape = 21, position = position_jitterdodge()) +
scale_fill_manual(values = c("#edf1f9", "#fcebeb"), name = "",
labels = c("", "")) +
scale_colour_manual(values = c("#1C73C2", "red"), name = "",
labels = c("","")) + theme(legend.position="none") +
theme(axis.line.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = unit(c(1,1,1, -0.5), "cm"))
#put it together
lm <- rbind(c(1,1,1,2))
grid.arrange(plot1, plot2, layout_matrix = lm)
If I understood correctly, you just want to show all of your data in the last boxplot.
You can do this easily by just duplicating the data while creating the data frame and labelling the duplicate with All.
df <- data.frame(x = as.factor(c(p$WHO.Grade, rep("All", nrow(p)))),
y = rep(c(p$ki67pro), 2),
f = "ki67pro")
The plotting remains the same and you can easily add recurrence.
However, the plot you're showing above looks weird as the All boxplot doesn't contain all the data.

R software: error when using cozigam() function

I am modelling the potential distribution of a species using COZIGAM package. I have the response variable ("pb", which tells where the species is present) and the predictor variables (e.g. altitude, temperature, precipitation, etc).
When I run this formula:
# devtools::install_github('AndrewLJackson/COZIGAM')
coz.model <- cozigam(formula=pb ~ s(altitude) + s(combustible) + s(distribution) + s(e1) + s(e2) + s(e3) + s(euc.human) + s(euc.river) + s(fccarb) + s(fccmat) + s(forarb) + s(aspect) + s(slope) + s(precipitation) + s(radiation) + s(tipestr_class) + s(tipestr_forest) + s(tmean), data=sdmdata2, family=poisson)
it appears an error warning, which is:
Error in as.matrix(x) : object 'altitude' not found
However, when I run as.matrix(sdmdata2), 'altitude' variable exits in my matrix. The output of dput(head(sdmdata2)) is:
structure(list(X = 1:6, pb = c(2L, 2L, 2L, 2L, 2L, 2L), altitude = c(879L,
1094L, 1035L, 410L, 342L, 665L), combustible = c(6L, 6L, 3L,
0L, 3L, 3L), distribution = c(6L, 6L, 6L, 0L, 6L, 0L), e1 = c(4L,
4L, 2L, 0L, 4L, 0L), e2 = c(0L, 0L, 2L, 0L, 2L, 0L), e3 = c(0L,
0L, 4L, 0L, 2L, 0L), euc.human = c(790.569397, 3201.562012, 1750,
250, 250, 1952.562012), euc.river = c(0, 4069.705078, 353.5534058,
1030.776001, 559.0170288, 0), fccarb = c(90L, 70L, 40L, 0L, 30L,
0L), fccmat = c(5L, 10L, 35L, 0L, 60L, 80L), forarb = c(1L, 1L,
2L, 0L, 5L, 0L), aspect = c(6L, 8L, 6L, 4L, 3L, 3L), slope = c(5L,
3L, 5L, 2L, 6L, 5L), precipitation = c(87.01500702, 79.57628632,
81.86239624, 75.10630798, 49.58106995, 69.55927277), radiation = c(160.1408997,
163.4971008, 161.8542938, 157.9179993, 159.2113953, 160.6203003
), tipestr_class = c(1L, 1L, 1L, 7L, 1L, 2L), tipestr_forest = c(6L,
6L, 6L, 0L, 6L, 0L), tmean = c(141.7760925, 134.9530029, 141.9192047,
171.9972992, 186.2566986, 157.0391998)), .Names = c("X", "pb",
"altitude", "combustible", "distribution", "e1", "e2", "e3", "euc.human",
"euc.river", "fccarb", "fccmat", "forarb", "aspect", "slope",
"precipitation", "radiation", "tipestr_class", "tipestr_forest",
"tmean"), row.names = c(NA, 6L), class = "data.frame")
Do someone know what is the problem?

Error using predict with klaR package, NaiveBayes

I'm using the klaR package's predict method as mentioned in the post Naive bayes in R:
nb_testpred <- predict(mynb, newdata=testdata).
nb_testpred is my Naive Bayes model, developed on traindata; testdata is the remaining data.
However, I get this error:
Error in FUN(1:10[[4L]], ...) : subscript out of bounds
I'm not sure what's going on - testdata has fewer rows than traindata, and the same number of columns.
For reference, my code looks like this:
ind <- sample(2, nrow(mydata), replace=TRUE, prob=c(0.9,0.1))
traindata <- mydata[ind==1,]
testdata <- mydata[ind==2,]
myformula <- as.factor(dep) ~ X1 + as.factor(X2) + as.factor(X3) + as.factor(X4) + X5 + as.factor(X6) + as.factor(date) + as.factor(hour)
mynb <- NaiveBayes(myformula, data=traindata)
nb_testpred <- predict(mynb, newdata=testdata) #where I'm getting an error...
A sample of the data is here (the original file has 100,000+ rows):
sampledata <- structure(list(dep = c(1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), X1 = structure(c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("A", "B"), class = "factor"), X2 = c(200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L,
200L, 200L), X3 = structure(c(4L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L), .Label = c(".", "1400000", "2400000", "900000"), class = "factor"), X4 = c(0L, 0L, 0L, 3L, 4L, 5L, 5L, 5L, 5L, 0L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L), X5 = c(TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), X6 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), date = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("9/23/2012",
"9/24/2012"), class = "factor"), hour = c(18L, 17L, 23L, 8L, 1L, 19L, 19L, 16L, 22L, 2L, 12L, 16L, 15L, 9L, 1L, 9L,
13L, 19L)), .Names = c("dep", "X1", "X2", "X3", "X4", "X5", "X6", "date", "hour"), class = "data.frame", row.names = c(NA, -18L))
Any help would be greatly appreciated!
You can act as follows:
traindata$dep=factor(traindata$dep)
mynb <- NaiveBayes(dep~.,traindata)
Then it works, however you should refine your data to have avoid constant columns.

Resources