Related
I want an overlay of a forest plot from the ZINB models of full and the subset of data using the sjPlot package. As you may know, the ZINB model produces two models: one for the count model and one for the zero-inflated model. plot_model works fine when employing the ZINB model from either full or a subset of data meaning producing a plot for both models (count and zero models), but when I overlay using plot_models then only one plot is produced for the count model. I am looking for the count and zero-inflated model plots from the full and sub-model for both the full and the subset of data. any help would be much appreciated
library(sjPlot)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(MASS)
library(pscl)
library(boot)
zinb_all_uni <- zeroinfl(ivdays~age,
link="logit",
dist = "negbin",
data=caterpillor)
summary(zinb_all_uni)
plot_model(zinb_all_uni, type="est")
zinb_full_adj <- zeroinfl(ivdays~age+sex+edu,
link="logit",
dist = "negbin",
data=caterpillor)
summary(zinb_full_adj)
plot_model(zinb_full_adj, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))
############ second model#######
Zinb_uni_sub <- zeroinfl(ivdays~age,
link="logit",
dist = "negbin",
data=subset(caterpillor, country=="eng"))
summary(zinb_uni_sub)
plot_model(zinb_uni_sub, type="est")
zinb_adj_sub <- zeroinfl(ivdays~age+sex+edu,
link="logit",
dist = "negbin",
data=subset(caterpillor, country=="eng"))
summary(zinb_adj_sub)
plot_model(zinb_adj_sub, type="est", terms = c("count_ageb", "count_agec", "zero_ageb", "zero_agec"))
### overlying plots from both models
plot_models(zinb_all_uni, Zinb_uni_sub)
plot_models(zinb_full_adj, zinb_adj_sub)
DATA:
caterpillor=structure(list(id = 1:100,
age = structure(c(1L, 1L, 2L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
.Label = c("a", "b", "c"), class = "factor"),
sex = structure(c(2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 1L, 1L),
.Label = c("F", "M"), class = "factor"),
country = structure(c(1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
2L, 2L, 2L),
.Label = c("eng", "scot", "wale"), class = "factor"),
edu = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L),
.Label = c("x", "y", "z"), class = "factor"),
lungfunction = c(45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L,
70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L,
50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L,
70L, 69L, 90L, 50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L,
50L, 62L, 45L, 23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 45L,
23L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L, 70L, 69L,
90L, 50L, 62L, 25L, 45L, 70L, 69L, 90L, 50L, 62L, 25L, 45L,
70L, 69L, 90L),
ivdays = c(15L, 26L, 36L, 34L, 2L, 4L, 5L,
8L, 9L, 15L, 26L, 36L, 34L, 2L, 4L, 5L, 8L, 9L, 15L, 26L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 5L, 8L, 9L, 36L, 34L, 2L, 4L, 5L, 8L,
9L, 36L, 34L, 2L, 4L, 5L),
no2_quintile = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
.Label = c("q1", "q2",
"q3", "q4", "q5"), class = "factor")),
class = "data.frame", row.names = c(NA,
-100L))
but when i overlay plots i get only one plot
Code below, basic points:
when I run into trouble with automated machinery like plot_model I usually prefer to use machinery like broom::tidy() (for coefficients) or the ggeffects or emmeans packages (for predictions) and build my own ggplot — for me, it's easier than trying to figure out what the more automated tool is doing
broom doesn't have a tidy() method for zeroinfl models, but a little googling finds one in the poissonreg package ...
... however, that tidy() method doesn't have machinery for constructing confidence intervals or back-transforming coefficients to a count-ratio or odds-ratio scale, so I had to implement my own below ...
library(broom)
library(poissonreg)
library(tidyverse) ## purrr::map_dfr, ggplot ...
theme_set(theme_bw())
library(colorspace)
mod_list <- list(all_uni = zinb_all_uni, uni_sub = Zinb_uni_sub,
full_adj = zinb_full_adj, adj_sub = zinb_adj_sub)
tidy(zinb_all_uni, type = "all")
coefs <- (mod_list
|> map_dfr(tidy, type = "all",
.id = "model")
## construct CIs
|> mutate(conf.low = qnorm(0.025, estimate, std.error),
conf.high = qnorm(0.975, estimate, std.error))
|> filter(term != "(Intercept)") ## usually don't want this
## cosmetic (strip results down to the components we actually need)
|> select(model, term, type, estimate, conf.low, conf.high)
## back-transform
|> mutate(across(c(estimate, conf.low, conf.high), exp))
)
ggplot(coefs, aes(x = estimate, y = term, colour = model)) +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high),
position = position_dodge(width = 0.5)) +
## separate count-ratio and odds-ratio (conditional/zero) plots
facet_wrap(~type, scale = "free") +
scale_color_discrete_qualitative() ## cosmetic
If you only want to see the age-related coefficients you can add
|> filter(stringr::str_detect(term, "^age"))
to the end of the pipeline that defines coefs.
I am using a dataset that has likert scale responses. I am attaching sample observations from the dataset below.
I am always get an error, can someone help me with this?
Thanks
att<-structure(list(att1_goodofall = c(3L, 3L, 1L, 3L, 3L, 3L, 3L,
2L, 3L, 3L), att2_pvtdisease = c(3L, 3L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L), att3_curedisease = c(3L, 1L, 3L, 2L, 2L, 1L, 2L,
3L, 3L, 3L), att4_timewaste = c(4L, 4L, 2L, 3L, 4L, 4L, 4L, 4L,
4L, 4L), att5_helpgenerations = c(3L, 3L, 3L, 3L, 2L, 3L, 3L,
2L, 3L, 3L)), row.names = c(NA, 10L), class = "data.frame")
#labelling the values
for(i in att) {
val_lab(att)<-make_labels("0 Strongly disagree
1 Disagree
2 Neither agree or disagree
3 Agree
4 Strongly agree")
}
#plot_likert function
plot_likert(at)
Error in freq[valid] <- counts :
NAs are not allowed in subscripted assignments
I'm pretty sure that this is a bug but couldn't tell whether it is a bug in expss or in sjPlot.
However, as workaround you could switch to e.g. sjlabelled::set_labels to label your dataset:
library(sjPlot)
library(expss)
att <- sjlabelled::set_labels(att, labels = make_labels("0 Strongly disagree
1 Disagree
2 Neither agree or disagree
3 Agree
4 Strongly agree"))
sjPlot::plot_likert(att, cat.neutral = 3)
After I applied the solution, I wanted to do the same for another set of variables. I applied the same solution but did not get the desired result.
know<-structure(list(know1_spendmoney = c(0L, 9L, 0L, 0L, 0L), know2_access = c(0L,
0L, 0L, 0L, 9L), know3_medicalrecords = c(0L, 0L, 1L, 1L, 1L),
know4_pharmacy = c(0L, 9L, 9L, 9L, 9L), know5_infoprivacy = c(1L,
1L, 1L, 1L, 1L), know6_sharing = c(0L, 1L, 1L, 9L, 9L), know7_police = c(0L,
9L, 9L, 0L, 1L), know8_infoselling = c(0L, 0L, 9L, 0L, 1L
), know9_insurancecompanies = c(0L, 1L, 1L, 1L, 9L), know10_riskofdiseaseinfo = c(1L,
1L, 9L, 9L, 9L)), row.names = c(NA, 5L), class = "data.frame")
know<-sjlabelled::set_labels(know, labels = make_labels("0 False
1 True
9 Don't know"))
plot_likert(know,
expand.grid = TRUE,
show.prc.sign = TRUE, coord.flip = TRUE, reverse.scale = T, values = "sum.outside")
Warning: number of items to replace is not a multiple of replacement length
Any idea how I can resolve this? I tried multiple options nothing seems to work
I am trying make bar chart with ggplot2 with the dataset below. When I use the code
ggplot(p.data, aes(x = `Period Number`, y = `Total Jumps`)) +
stat_summary(data = subset(p.data, Status = "Starter"), fun ="mean", geom = "bar")
I get this graph:
The most concerning aspect is the for period 2, 3, 4, and 5 the bars should be taller (period 2 should be around 9.9). Additionally, I would like to remove period 0 and period 1 and add bar labels with the raw data and without creating an additional data frame.
p.data <- structure(list(`Period Number` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
`Total Jumps` = c(112L, 97L, 28L, 132L, 162L, 19L, 92L, 112L,
97L, 141L, 68L, 86L, 76L, 26L, 105L, 125L, 19L, 92L, 112L,
64L, 101L, 68L, 4L, 8L, 0L, 8L, 12L, 0L, 0L, 0L, 13L, 8L,
0L, 8L, 2L, 2L, 5L, 12L, 0L, 0L, 0L, 5L, 11L, 0L, 0L, 6L,
0L, 9L, 8L, 0L, 0L, 0L, 7L, 10L, 0L, 14L, 5L, 0L, 5L, 5L,
0L, 0L, 0L, 8L, 11L, 0L, 108L, 131L, 47L, 136L, 159L, 35L,
114L, 116L, 111L, 190L, 64L, 75L, 95L, 47L, 116L, 123L, 27L,
103L, 108L, 70L, 152L, 64L, 4L, 7L, 0L, 14L, 10L, 0L, 0L,
0L, 15L, 10L, 0L, 4L, 0L, 0L, 3L, 7L, 7L, 8L, 8L, 5L, 10L,
0L, 7L, 14L, 0L, 3L, 10L, 1L, 0L, 0L, 11L, 7L, 0L, 18L, 15L,
0L, 0L, 9L, 0L, 3L, 0L, 10L, 11L, 0L, 118L, 96L, 48L, 143L,
170L, 37L, 118L, 117L, 116L, 165L, 56L, 80L, 68L, 48L, 114L,
130L, 36L, 114L, 107L, 80L, 123L, 56L, 2L, 10L, 0L, 8L, 11L,
0L, 0L, 0L, 5L, 9L, 0L, 4L, 12L, 0L, 6L, 5L, 0L, 4L, 8L,
12L, 8L, 0L, 7L, 4L, 0L, 10L, 10L, 0L, 0L, 0L, 12L, 13L,
0L, 25L, 2L, 0L, 5L, 14L, 1L, 0L, 2L, 7L, 12L, 0L), Status = structure(c(1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L), .Label = c("Bench", "Starter"), class = "factor")), row.names = c(NA,
198L), class = "data.frame")
Thank you for your help!
It's best to pass that data you actually want to plot to the plotting function, rather than trying to coerce it within the plotting function. In this case you were trying to subset a different data frame from the one you passed to ggplot inside stat_summary. The call to ggplot had already set up the aesthetics you wanted mapped, then in your only geom layer, you were telling ggplot you wanted a completely different set of aesthetics.
You don't need to create another data frame to reshape your data. Here's how you could do it using dplyr:
library(dplyr)
library(ggplot2)
p.data %>%
filter(Status == "Starter") %>%
group_by(`Period Number`) %>%
summarise(`Total Jumps` = mean(`Total Jumps`)) %>%
filter(`Period Number` > 1) %>%
ggplot(aes(x = `Period Number`, y = `Total Jumps`)) +
geom_col(fill = "dodgerblue", colour = "black") +
geom_text(aes(y = `Total Jumps` + 1, label = signif(`Total Jumps`, 2)))
I have a binomial GLM in R, with several predictors that are both continuous and categorical.
The response variable is "Presence", which is binary (0/1).
Length is a continuous variable, while all others are categorical.
I am trying to plot predictions for each of the variables in the final model, particularly for "length", but I'm having difficulties.
My data are the following:
MyData<-structure(list(site = structure(c(3L, 1L, 3L, 2L, 1L, 4L, 3L,
4L, 1L, 2L, 4L, 5L, 5L, 1L, 4L, 3L, 2L, 4L, 1L, 4L, 5L, 1L, 5L,
4L, 3L, 1L, 3L, 5L, 5L, 4L, 4L, 3L, 1L, 5L, 1L, 3L, 1L, 4L, 4L,
3L, 4L, 4L, 2L, 3L, 1L, 4L, 2L, 1L, 1L, 4L, 4L, 4L, 1L, 3L, 3L,
2L, 1L, 4L, 2L, 5L, 5L, 3L, 3L, 2L, 5L, 2L, 4L, 5L, 2L, 4L, 4L,
2L, 5L, 2L, 3L, 5L, 4L, 4L, 5L, 1L, 1L, 3L, 2L, 4L, 3L, 1L, 4L,
3L, 1L, 4L, 3L, 3L, 4L, 5L, 1L, 3L, 2L, 3L, 2L, 3L, 2L, 1L, 1L,
5L, 5L, 1L, 5L, 2L, 3L, 4L, 4L, 3L, 2L, 3L, 3L, 5L, 3L, 3L, 3L,
5L, 1L, 5L, 2L, 3L, 4L, 5L, 5L, 1L, 4L, 2L, 5L, 3L, 2L, 5L, 4L,
3L, 3L, 3L, 1L, 1L, 4L, 1L, 2L, 4L, 5L, 1L, 1L, 2L, 2L, 5L, 3L,
4L, 4L, 1L, 5L, 2L, 4L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 4L, 3L, 1L,
5L, 3L, 3L, 3L, 4L, 1L, 1L, 3L, 4L, 3L, 1L, 1L, 1L, 1L, 5L, 1L,
3L, 4L, 3L, 2L, 1L, 1L, 2L, 5L, 2L, 1L, 5L, 3L, 1L, 4L, 1L, 3L,
3L, 3L, 3L, 5L, 1L, 4L, 1L, 1L, 3L, 3L, 4L, 1L, 3L, 3L, 4L, 2L,
5L, 5L, 5L, 1L, 4L, 4L, 3L, 1L, 2L, 3L, 1L, 3L, 1L, 1L, 4L, 3L,
1L, 1L, 5L, 3L, 1L), .Label = c("R1a", "R1b", "R2", "Za", "Zb"
), class = "factor"), species = structure(c(1L, 1L, 3L, 3L, 3L,
1L, 3L, 1L, 4L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 4L, 3L, 4L, 3L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 3L, 1L, 4L,
3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 3L, 3L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L,
3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 3L, 4L, 3L, 1L,
1L, 3L, 1L, 1L, 4L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 4L,
1L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 4L, 3L, 1L, 1L, 3L,
1L, 1L, 4L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 1L, 2L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 4L, 3L, 1L, 4L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L,
1L, 4L, 3L, 4L, 3L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 2L, 3L, 4L, 3L,
1L, 1L, 4L, 1L, 1L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 4L, 3L, 3L,
1L, 3L, 1L, 4L, 1L, 1L, 4L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 3L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 3L), .Label = c("Monogyna", "Other",
"Prunus", "Rosa"), class = "factor"), aspect = structure(c(4L,
4L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 4L,
3L, 4L, 3L, 1L, 4L, 4L, 3L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L,
4L, 4L, 2L, 4L, 3L, 3L, 1L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 4L,
3L, 3L, 3L, 4L, 1L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 1L,
4L, 3L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 4L, 4L, 4L,
2L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 4L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L,
3L, 2L, 3L, 1L, 2L, 5L, 2L, 4L, 4L, 4L, 3L, 3L, 1L, 2L, 4L, 3L,
4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 4L, 4L, 3L, 4L, 2L, 3L, 4L, 4L, 2L, 3L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 4L, 4L, 3L,
3L, 4L, 2L, 5L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 3L, 3L, 4L, 4L, 2L,
4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 2L, 4L), .Label = c("East",
"Flat", "North", "South", "West"), class = "factor"), length = c(260L,
60L, 60L, 40L, 240L, 80L, 30L, 100L, 100L, 200L, 70L, 50L, 60L,
35L, 120L, 60L, 500L, 40L, 20L, 70L, 250L, 80L, 50L, 130L, 350L,
170L, 50L, 60L, 90L, 50L, 40L, 110L, 60L, 70L, 70L, 500L, 140L,
50L, 50L, 360L, 50L, 150L, 60L, 270L, 280L, 130L, 130L, 50L,
60L, 30L, 70L, 70L, 60L, 400L, 20L, 30L, 70L, 160L, 340L, 100L,
210L, 60L, 70L, 130L, 50L, 40L, 50L, 80L, 390L, 40L, 110L, 130L,
40L, 230L, 120L, 70L, 80L, 80L, 90L, 70L, 150L, 120L, 50L, 100L,
120L, 10L, 40L, 80L, 180L, 160L, 200L, 40L, 70L, 90L, 50L, 40L,
80L, 80L, 70L, 480L, 90L, 60L, 100L, 140L, 190L, 20L, 70L, 360L,
70L, 130L, 60L, 50L, 320L, 210L, 130L, 180L, 90L, 20L, 300L,
90L, 50L, 130L, 70L, 70L, 40L, 40L, 50L, 40L, 100L, 20L, 70L,
100L, 340L, 70L, 110L, 40L, 230L, 200L, 80L, 35L, 110L, 200L,
50L, 110L, 100L, 50L, 150L, 110L, 50L, 50L, 40L, 70L, 80L, 60L,
100L, 90L, 40L, 300L, 140L, 180L, 140L, 40L, 190L, 100L, 170L,
40L, 120L, 15L, 70L, 340L, 40L, 40L, 70L, 60L, 130L, 140L, 170L,
120L, 90L, 130L, 210L, 50L, 180L, 120L, 100L, 50L, 90L, 70L,
360L, 80L, 30L, 170L, 70L, 300L, 40L, 130L, 120L, 90L, 40L, 40L,
140L, 80L, 400L, 70L, 80L, 60L, 420L, 320L, 200L, 40L, 50L, 70L,
50L, 80L, 50L, 110L, 100L, 120L, 170L, 20L, 110L, 20L, 20L, 30L,
30L, 90L, 150L, 80L, 40L, 90L, 300L, 30L, 70L, 50L, 90L, 200L
), sun = structure(c(1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L,
3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L,
3L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 1L, 2L, 1L,
1L, 3L, 3L, 3L, 2L, 3L, 3L, 2L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 3L,
1L, 3L, 3L, 2L, 1L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L,
2L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 3L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 1L,
3L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 3L, 3L,
3L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 1L,
1L, 3L, 3L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 2L,
3L, 3L), .Label = c("Half", "Shade", "Sun"), class = "factor"),
leaf = structure(c(2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
2L, 4L, 4L, 4L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 2L, 2L, 1L,
2L, 2L, 4L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
2L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 2L, 4L, 2L, 2L, 2L, 1L, 4L,
4L, 1L, 4L, 1L, 2L, 4L, 3L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L,
2L, 2L, 2L, 2L, 4L, 1L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 4L, 2L, 2L, 1L, 4L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 1L, 1L,
1L, 2L, 4L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 4L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 4L, 4L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 4L, 1L, 2L,
4L, 2L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 4L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 4L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 2L,
2L), .Label = c("Large", "Medium", "Scarce", "Small"), class = "factor"),
Presence = c(0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L
)), .Names = c("site", "species", "aspect", "length", "sun",
"leaf", "Presence"), row.names = c(NA, 236L), class = "data.frame")
(note that this is a reduced dataset, and I have already removed variables that were dropped during model selection)
The optimal model is:
model <- glm(Presence ~ site + species + aspect + length + sun
+ leaf, data=MyData, family=binomial)
I tried the following, but it wants the other variables too, so I get an error:
plot(MyData$length, MyData$Presence)
mydat1 <- data.frame(length = seq(from = 10, to = 500, by = 1)
pred1 <- predict(model, newdata = mydat1, type = "response")
lines(MyData$length, pred1)
So I tried specifying all variables, but then it only puts a horizontal line through the presence data points (and that means I need to specify all possible combinations of factor variables I suppose):
plot(MyData$length, MyData$Presence)
mydat2 <- data.frame(length = seq(from = 10, to = 500, by = 1),
site = "R1a",
species = "Monogyna",
aspect = "Flat",
sun = "Sun",
leaf = "Scarce")
pred2 <- predict(model, newdata = mydat2, type = "response")
lines(MyData$length, pred2)
Finally, I tried the following code:
pred <- predict(model, type = "response")
par(mfrow=c(2,2))
for(i in names(MyData)){
plot(MyData[,i],pred,xlab=i, ylab="Probability")
}
I am confused by this last one, as I am not able to obtain the curve, plus the output gives me predicted values for variables that are not even in the optimal model.
What I should expect under this model, is a sinusoidal curve, I suppose. But that's not what I'm getting.
How can I produce a meaningful plot of predictions?
Any help would be greatly appreciated.
I would use the effects package for some easier results for a single predictor. Here is how:
library(effects)
fit <- as.data.frame(effect('length', model, xlevels = 100))
Plotting is easy (although note the overplotting):
plot(MyData$length, MyData$Presence)
lines(fit$length, fit$fit)
Or we can use ggplot2:
library(ggplot2)
ggplot() +
geom_count(aes(length, Presence), MyData) +
geom_line(aes(length, fit), fit, size = 1, col = 'red') +
geom_ribbon(aes(length, ymin = lower, ymax = upper), fit, alpha = 0.15) +
scale_size_area()
We can see that the effect of length is not very impressive.
I am trying to replicate a 3 Factor nested ANOVA anlaysis in a paper: Underwood, AJ (1993) The Mechanics of spatially replicated sampling programmes to detect environmental impacts in a variable world.
The data for the example (from Table 3, Underwood 1993) can be produced by:
dat <-
structure(list(B = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("A", "B"), class = "factor"), C = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("C", "I"), class = "factor"),
Times = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
Locations = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L,
3L), X = c(59L, 51L, 45L, 46L, 40L, 32L, 39L, 32L, 25L, 51L,
44L, 37L, 55L, 47L, 41L, 31L, 38L, 45L, 41L, 47L, 55L, 43L,
36L, 29L, 23L, 30L, 37L, 57L, 50L, 43L, 36L, 44L, 51L, 39L,
29L, 23L, 38L, 44L, 52L, 31L, 38L, 45L, 42L, 35L, 28L, 52L,
44L, 37L, 51L, 43L, 37L, 38L, 31L, 24L, 60L, 52L, 46L, 30L,
37L, 44L, 41L, 34L, 27L, 53L, 46L, 39L, 40L, 34L, 26L, 21L,
27L, 35L), Times.unique = structure(c(5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A_1", "A_2", "A_3",
"A_4", "B_1", "B_2", "B_3", "B_4"), class = "factor")), .Names = c("B",
"C", "Times", "Locations", "Y", "Times.unique"), row.names = c(NA,
-72L), class = "data.frame")
dat
The data frame dat has 4 factors:
B - has two levels "A" and "B" (before v after)
Times - 8 levels, 4 within before "B" and 4 within after "A", coded as 1:4 within each. note that variable Times.unique is the same thing but with a unique code for each time (before and after)
Locations - has three levels, all measured every time both before and after
C - has two levels control (C) and (I). note: two locations are control and one is impact
While I am clear on how to analyse such a design using mixed models (lmer), I would like to replicate his example exactly so that I can run some simulations to compare his method.
In particular I am attempting to replicate the SS values presented in table 4 under column "a". He fits a design that has SS and df values for the following terms:
B -> SS = 66.13, df = 1
Times(B) -> SS = 280.64, df = 6
Locations -> SS = 283.86, df = 2
B x Locations -> SS = 29.26, df = 2
Times(B) x Locations-> SS = 575.45, df = 12
Residual -> SS = 2420.00, df = 48
Total -> SS = 6208.34, df = 71
I assume the Times(B) term represents Times nested within the Before/After treatment "B". For this example he ignores that Locations are from control and impact treatments and leaves out factor C altogether.
I have tried all possible combinations I can think of to reproduce this nested anova, using both unique Times coding and Times coded as 1:4 within B (before and after). I have tried using %in%, / and Error() arguments, as well as Anova from car to change the type of SS calculated. Examples of the %in% and / nested fits include:
aov(Y~B+Locations+Times%in%B+B:Locations+Times%in%B:Locations, data=dat)
aov(Y~B+Locations+B/Times+B:Locations+B/Times:Locations, data=dat)
I seem to be unable to replicate Underwood's SS values exactly, particularly for the two interaction terms. A friend let me fit the model in statistix, where the SS values can be reproduced exactly, so it is possible to obtain the above SS values for this model.
Can anyone help me fit this model in R? I wish to embed it in a larger simulation and really need to be able to run the model in R, such that the Underwood 1993 SS values are reproduced exactly?
Your problem is that dat$Locations is an integer, when it should be a factor (three unique locations). One hint is that your ANOVA line thinks Locations takes up only 1 df, while Underwood gives it 2.
Simply add the line:
dat$Locations = factor(dat$Locations)
And then your line of code reproduces the Underwood results perfectly:
aov(Y~B+Locations+B/Times+B:Locations+B/Times:Locations, data=dat)
#Call:
# aov(formula = Y ~ B + Locations + B/Times + B:Locations + B/Times:Locations,
# data = dat)
#
#Terms:
# B Locations B:Times B:Locations B:Locations:Times
#Sum of Squares 66.1250 2836.8611 280.6389 29.2500 575.4444
#Deg. of Freedom 1 2 6 2 12
# Residuals
#Sum of Squares 2420.0000
#Deg. of Freedom 48