Axis tick labels won't show up in ggplot facet - r

I am struggling to get my x-axis tick labels to show up as the day the sample was taken. I am also struggling with my grouping reordered, currently, it is showing up as Afternoon coming before Pre-Dawn, I would like Pre-Dawn to be first in order.
Data
http://www.sharecsv.com/s/f7079be36f5fc5035029ae105f96d560/VR_Sonde_Data_May_2017%20(1).csv
DO=read.csv("VR_Sonde_Data_May_2017 (1).csv")
DOmelt <- melt(DO, id.vars=c("Month", "Day", "TimeofDay"), measure.vars = c("AverageDO"))
ggplot(DOmelt, aes((x=Day), group=interaction(Month, TimeofDay), fill=TimeofDay)) +
geom_bar(aes(y=value), stat="identity", position=position_dodge()) +
facet_grid(~Month, scales = "free_x") +
ggtitle("Dissolved Oxygen in Ventura River") +
labs(subtitle = "2017") +
theme(plot.title = element_text(size=30, face="bold", vjust=2, hjust=.5), plot.subtitle = element_text(size=20, face="bold", vjust=2, hjust=.5))+
scale_x_discrete("day") +
scale_y_continuous(name ="Average Dissolved Oxygen")+
theme(axis.text.x =element_text(angle=90))

You can use the following code
library(tidyverse)
DOmelt %>%
arrange(AverageDO) %>%
mutate(TimeofDay = factor(TimeofDay, levels=c("Pre-Dawn", "Afternoon"))) %>%
ggplot(aes(x=Day, y=AverageDO, group=interaction(Month, TimeofDay), fill=TimeofDay)) +
geom_bar(position=position_dodge(), stat="identity") +
facet_grid(~Month, scales = "free_x") +
ggtitle("Dissolved Oxygen in Ventura River") +
labs(subtitle = "2017") +
theme(plot.title = element_text(size=30, face="bold", vjust=2, hjust=.5), plot.subtitle = element_text(size=20, face="bold", vjust=2, hjust=.5))+
xlab("Day") +
scale_y_continuous(name ="Average Dissolved Oxygen")+
theme(axis.text.x =element_text(angle=90))
Data
DOmelt = structure(list(Month = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("May", "September"), class = "factor"),
Day = c(11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L,
16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L,
22L, 22L, 23L, 23L, 24L, 24L, 25L, 6L, 6L, 7L, 7L, 8L, 8L,
9L, 9L, 10L, 10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L, 19L, 19L, 20L), TimeofDay = structure(c(2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Afternoon",
"Pre-Dawn"), class = "factor"), AverageDO = c(6.99, 12.24,
6.61, 12.05, 6.51, 11.94, 6.63, 12.12, 6.67, 12.28, 6.68,
12.14, 6.87, 11.94, 6.64, 10.77, 6.47, 9.3, 6.21, 10.71,
5.92, 10.95, 5.85, 11.46, 5.98, 11.31, 6.12, 10.27, 6.38,
6.61, 8.97, 6.88, 9.08, 7.01, 9.18, 7.2, 9.39, 7.25, 9.61,
6.97, 8.87, 6.77, 8.8, 6.88, 8.92, 7.1, 9.25, 7.34, 9.26,
7.44, 9.46, 7.59, 9.66, 7.74, 9.72, 7.77, 9.54, 7.71)), class = "data.frame", row.names = c(NA,
-58L))

Related

geom_text not matching group aes

I'm pretty sure this is a silly question, but I've been stuck with it for a while now. I want to match the geom_text() to my means from stat_summary according to My_group
the plot:
my code:
### getting model's predictions:
mod1 <- lmer(MY_CONT ~ YEAR * GROUP_2 + (1|ID), data = data, REML = FALSE)
###
data$predictions <- predict(mod1)
### put model's predictions in a sep df:
dfPred <- data %>% group_by(YEAR, MY_GROUP) %>% rstatix::get_summary_stats(predictions)
### check it:
dfPred %>% select(YEAR, MY_GROUP, variable, n, mean)
# A tibble: 4 x 5
YEAR MY_GROUP variable n mean
<fct> <fct> <chr> <dbl> <dbl>
1 A G1 predictions 21 17.6
2 A G2 predictions 21 18.5
3 B G1 predictions 21 18.8
4 B G2 predictions 21 19.1
### the model:
data %>%
mutate_if(is.numeric, round, 2) %>%
ggplot(., aes(x = YEAR, y = predictions)) +
stat_boxplot(aes(x = YEAR, fill = MY_GROUP), geom = "errorbar",
width = 0.15, position = position_dodge(.75)) +
geom_boxplot(aes(fill = MY_GROUP),
outlier.colour = "lightgrey",
outlier.shape = 19,
outlier.size= 2, notch = T) +
geom_text(data = dfPred,
aes(label = round(mean, 2),
y = round(mean, 2) + 0.8)) +
stat_summary(aes(group = MY_GROUP),
fun = mean, geom = "point",
shape = 20, size= 3, color= "black",
position = position_dodge(.75))
Warning message:
In stat_boxplot(aes(x = YEAR, fill = MY_GROUP), geom = "errorbar", :
Ignoring unknown aesthetics: fill
My Question:
How can I put the means' labels according to their group ? (MY_GROUP) ?
side question 1 : I get the fill warning, but it doesn't work without fill
side question 2 : The bars are showing the standard error aroung the means, right? not the 95%
data (and more details on the model and on how I obtained the above effects here):
data <- structure(list(PARTICIPANTS = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L,
20L, 20L, 21L, 21L, 21L, 21L), CONT_Y = c(19.44, 20.07, 19.21,
16.35, 11.37, 12.82, 19.42, 18.94, 19.59, 20.01, 19.7, 17.92,
18.78, 19.21, 19.27, 18.46, 19.52, 20.02, 16.19, 19.97, 13.83,
15.93, 14.79, 21.55, 18.8, 19.42, 19.27, 19.37, 17.14, 14.45,
17.63, 20.01, 20.28, 17.93, 19.36, 20.15, 16.06, 17.04, 19.16,
20.1, 16.44, 18.39, 18.01, 19.05, 18.04, 19.69, 19.61, 16.88,
19.02, 20.42, 18.27, 18.43, 18.08, 17.1, 19.98, 19.43, 19.71,
19.93, 20.11, 18.41, 20.31, 20.1, 20.38, 20.29, 13.6, 18.92,
19.05, 19.13, 17.75, 19.15, 20.19, 18.3, 19.43, 19.8, 19.83,
19.53, 16.14, 21.14, 17.37, 18.73, 16.51, 17.51, 17.06, 19.42
), CATEGORIES = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), MY_GROUP = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("G1", "G2"), class = "factor")), row.names = c(NA,
-84L), class = c("tbl_df", "tbl", "data.frame"))
### rename column:
data <- data %>% rename(., YEAR = CATEGORIES)

how to plot multiple boxplots from emmeans output

How can I plot with ggplot2 multiple boxplots (in the same figure) to illustrate my model's pairwise comparisons?
my model and pairwise comparisons with emmeans:
mod1 <- lmer(CONT_Y ~ MY_GROUP * YEAR + (1|ID), data = dfModels)
group <- emmeans(mod1,~ MY_GROUP|YEAR)
year <- emmeans(mod1,~YEAR|MY_GROUP)
my_pairs <- data.frame(group_p) %>% full_join(data.frame(year_p))
my_pairs
contrast YEAR MY_GROUP estimate SE df t.ratio p.value
1 L1 - L2 2020 <NA> -0.91 0.53 60 -1.73 0.09
2 L1 - L2 2021 <NA> -0.31 0.53 60 -0.59 0.56
3 YEAR2020 - YEAR2021 <NA> G1 -1.14 0.53 60 -2.16 0.03
4 YEAR2020 - YEAR2021 <NA> G2 -0.54 0.53 60 -1.02 0.31
Desired output: something like this
How can I plot these tests with multiple boxplots in ggplot2 ?
data (and more details on the model and on how I obtained the above effects here):
data <- structure(list(PARTICIPANTS = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L,
20L, 20L, 21L, 21L, 21L, 21L), CONT_Y = c(19.44, 20.07, 19.21,
16.35, 11.37, 12.82, 19.42, 18.94, 19.59, 20.01, 19.7, 17.92,
18.78, 19.21, 19.27, 18.46, 19.52, 20.02, 16.19, 19.97, 13.83,
15.93, 14.79, 21.55, 18.8, 19.42, 19.27, 19.37, 17.14, 14.45,
17.63, 20.01, 20.28, 17.93, 19.36, 20.15, 16.06, 17.04, 19.16,
20.1, 16.44, 18.39, 18.01, 19.05, 18.04, 19.69, 19.61, 16.88,
19.02, 20.42, 18.27, 18.43, 18.08, 17.1, 19.98, 19.43, 19.71,
19.93, 20.11, 18.41, 20.31, 20.1, 20.38, 20.29, 13.6, 18.92,
19.05, 19.13, 17.75, 19.15, 20.19, 18.3, 19.43, 19.8, 19.83,
19.53, 16.14, 21.14, 17.37, 18.73, 16.51, 17.51, 17.06, 19.42
), CATEGORIES = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), MY_GROUP = structure(c(1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("G1", "G2"), class = "factor")), row.names = c(NA,
-84L), class = c("tbl_df", "tbl", "data.frame"))
### rename column:
data <- data %>% rename(., YEAR = CATEGORIES)

Newbie attempting linear mixed effects model in R studio - TOTAL FAIL

After searching over an hour (this forum, Youtube, class notes, google) I've found no help for my question. I'm a complete newb who knows nothing about R or stats.
I'm attempting to create a linear mixed effects model in R. I'm measuring leaf width across three different locations (Jacksonville FL, Augusta GA, & Atlanta GA), and within those three locations there is a high-nitrogen and low-nitrogen plot. I have 150 leaf measurements from 50 trees.
My limited understanding tells me that the leaf width is the continuous response variable, and city and plot are the discrete explanatory variables. The random effect would be the individual trees, since the leaf width within a single tree is non-independent.
I've used "nlme" to make a model:
leaf.width.model <- lme(width ~ city*plot, (1|tree.id), data=leaf)
I then ran an ANOVA test, and it suggested there's something going on with city and the interaction between city and plot. This is where I'm stuck. I want to make a plot that has lines for all three cities, but I haven't a clue how to do that. When I try to use the plot function, I just get a boxplot.
I've literally tried for hours and am more lost and confused than before.
1) How can I make this graph?
2) What other tests should I do to analyze and/or visualize this data?
I am forever grateful for any help at all. I really want to learn R and stats very badly, but I'm getting discouraged.
Thank you,
Rich
P.S Here is the output of the dput function:
> dput(tree) structure(list(tree.id = structure(c(24L, 24L, 32L, 25L, 25L, 24L, 24L, 32L, 25L, 25L, 43L, 45L, 45L, 43L, 23L, 23L, 45L, 45L, 23L, 23L, 41L, 41L, 38L, 11L, 11L, 38L, 41L, 41L, 11L, 11L, 14L, 14L, 29L, 13L, 13L, 14L, 14L, 29L, 13L, 13L, 4L, 4L, 1L, 1L, 20L, 1L, 1L, 20L, 6L, 8L, 8L, 5L, 5L, 6L, 4L, 4L, 8L, 8L, 5L, 5L, 9L, 9L, 10L, 10L, 12L, 12L, 13L, 13L, 22L, 22L, 23L, 23L, 24L, 24L, 25L, 25L, 25L, 25L, 40L, 40L, 41L, 41L, 38L, 38L, 39L, 39L, 14L, 14L, 14L, 15L, 15L, 28L, 28L, 29L, 29L, 35L, 35L, 36L, 36L, 37L, 37L, 42L, 42L, 43L, 43L, 44L, 44L, 45L, 45L, 46L, 46L, 47L, 47L, 2L, 1L, 3L, 3L, 4L, 4L, 7L, 11L, 11L, 16L, 16L, 20L, 20L, 21L, 21L, 17L, 17L, 18L, 18L, 19L, 19L, 26L, 26L, 27L, 27L, 30L, 30L, 31L, 31L, 32L, 32L, 33L, 33L, 34L, 34L, 48L), .Label = c("Tree_112", "Tree_112 ", "Tree_115", "Tree_130", "Tree_137", "Tree_139", "Tree_140", "Tree_141", "Tree_153", "Tree_154", "Tree_156", "Tree_159", "Tree_166", "Tree_169", "Tree_171", "Tree_180", "Tree_182", "Tree_184", "Tree_185", "Tree_202", "Tree_213", "Tree_218", "Tree_222", "Tree_227", "Tree_239", "Tree_242", "Tree_246", "Tree_247", "Tree_252", "Tree_260", "Tree_267", "Tree_269", "Tree_271", "Tree_272", "Tree_291", "Tree_293", "Tree_298", "Tree_327", "Tree_329", "Tree_336", "Tree_350", "Tree_401", "Tree_403", "Tree_405", "Tree_407", "Tree_409", "Tree_420", "Tree_851"), class = "factor"), city = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Atlanta", "Augusta", "Jacksonville"), class = "factor"), plot = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("High-N", "Low-N"), class = "factor"), width = c(0.66, 0.716, 0.682, 0.645, 0.645, 0.696, 0.733,
0.707, 0.668, 0.686, 0.617, 0.733, 0.73, 0.615, 0.669, 0.746, 0.687, 0.682, 0.76, 0.713, 0.651, 0.664, 0.679, 0.729, 0.756,
0.669, 0.647, 0.713, 0.767, 0.685, 0.69, 0.731, 0.781, 0.729,
0.725, 0.739, 0.769, 0.791, 0.676, 0.688, 0.719, 0.753, 0.748,
0.791, 0.785, 0.78, 0.723, 0.756, 0.664, 0.645, 0.653, 0.615,
0.591, 0.642, 0.693, 0.716, 0.694, 0.676, 0.662, 0.629, 0.665,
0.748, 0.726, 0.693, 0.715, 0.714, 0.764, 0.732, 0.61, 0.721,
0.703, 0.713, 0.746, 0.752, 0.662, 0.733, 0.707, 0.674, 0.734,
0.79, 0.732, 0.794, 0.703, 0.712, 0.737, 0.731, 0.747, 0.746,
0.787, 0.709, 0.716, 0.764, 0.77, 0.764, 0.802, 0.663, 0.777,
0.642, 0.779, 0.81, 0.724, 0.645, 0.68, 0.637, 0.695, 0.768,
0.761, 0.7, 0.759, 0.726, 0.696, 0.794, 0.774, 0.799, 0.747,
0.606, 0.691, 0.733, 0.707, 0.698, 0.706, 0.72, 0.694, 0.697,
0.737, 0.716, 0.73, 0.706, 0.667, 0.734, 0.528, 0.695, 0.684,
0.763, 0.733, 0.809, 0.6, 0.676, 0.718, 0.759, 0.609, 0.665,
0.667, 0.647, 0.701, 0.663, 0.688, 0.693, 0.899)), .Names = c("tree.id", "city", "plot", "width"), class = "data.frame", row.names = c(NA, -149L))
Thank you all so much for your comments, I sincerely appreciate everyone's help!
As suggested in comments, a line plot might not make sense for your data, as you are studying how width varies in discrete categories (in separate cities and separate plots). Boxplots would make sense as you can make them for each of the interactions of city and plot. To give you a sense of what you can do I generated some fake data and made an example of the sort of plot that might be helpful to you:
# fake data
leaf <- data.frame(tree.id = rep(1:50, each = 3),
city = rep(c("Jackson", "Augusta", "Atlanta"), each = 50),
plot = rep(1:6, each = 25))
# I'll make the average of width different for each plot
leaf$width <- rnorm(nrow(leaf), leaf$plot, 1)
# plotting the data
library(ggplot2) # this is a great library for plotting in R
ggplot(leaf, aes(x = factor(plot), y = width, color = factor(plot))) +
facet_grid(~city, scales = 'free_x') + # This creates a subplot for each city
geom_boxplot() +
geom_point(position = "jitter") +
theme_bw()
In this plot I added the points (the leaf widths for each individual tree) but I 'jittered' them, meaning perturbing their position slightly so that they do not pile up on top of each other and are all visible. You could remove this if you liked.
Exploratory data analysis should be fun! And I think visualization is a good place to start when beginning in statistics. Hopefully this will prove helpful to you.
leaf.width.model <- lme(width ~ city*plot, (1|tree.id), data=leaf)
In this model if you want to plot something, you are probably trying to answer:
How much is the average leaf width for all trees in each city for each type of plot.
To show this information in a figure, you need to plot width on y axis plot plot(high and low nitrogen) on x axis and group the data by city. Then you will get the 3 lines you are taking about. However, you need to get the average width in each group as you only want to show city variation.
To get this plot from raw data: (Using fake data provided by gfgm)
set.seed(100)
leaf <- data.frame(tree.id = rep(1:50, each = 3),
city = rep(c("Jackson", "Augusta", "Atlanta"), each = 50),
plot = rep(c(1, 0), each = 25))
# I'll make the average of width different for each plot
leaf$width <- rnorm(nrow(leaf), leaf$plot, 1)
library(plotly)
library(tidyverse)
leaf %>%
group_by(city,plot) %>%
summarise(avwidth = mean(width, na.rm=T),
avsd = 1.96*sd(width, na.rm=T)/sqrt(25)) %>%
plot_ly(x = ~plot, y = ~avwidth, color= ~city,
type="scatter", mode="markers+lines",
error_y = ~list(array=avsd)
)

Plotting lmer model without covariance matrix

I am trying to plot a number of lmer models for a paper. I had to simplify the random effect structure by dropping the correlation between the random slopes and intercept (Barr et al., 2013). However, when I try to plot using the sjp.lmer funtion, I get the following error:
Error in array(NA, c(J, K)) : 'dims' cannot be of length 0
In addition: Warning message:
In ranef.merMod(object, condVar = TRUE) :
conditional variances not currently available via ranef when there are multiple terms per factor
Is there a potential work-around for this? Any help would be greatly appreciated.
Hi Ben,
Here is some of the data I am working with:
> dput(df)
structure(list(Subject = c(1L, 2L, 3L, 5L, 6L, 6L, 6L, 7L, 7L,
7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L, 10L, 11L, 11L, 11L, 12L, 12L,
13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 18L,
18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L, 23L, 23L,
24L, 24L, 25L, 25L, 25L, 26L, 26L, 26L, 27L, 27L, 28L, 28L, 29L,
29L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L,
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L,
54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L,
67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L,
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L,
105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L,
116L), A = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), B = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("1", "2", "3"), class = "factor"), C = c(9.58,
9.75, 15, 10.75, 13.3, 14.42, 15.5, 9.25, 10.33, 11.33, 9.55,
11, 11.92, 14.25, 15.5, 16.42, 14.92, 16.17, 10.83, 11.92, 12.92,
7.5, 8.5, 10.33, 11.25, 13.08, 13.83, 14.92, 15.92, 9.58, 14.83,
11.92, 8.33, 9.5, 10.5, 6.8, 7.92, 9, 13.5, 10.92, 10, 11, 13,
15.58, 12.92, 11.8, 5.75, 6.75, 7.83, 11.12, 12.25, 12.08, 13.08,
14.58, 8.08, 9.17, 10.67, 10.6, 12.67, 7.83, 8.83, 9.67, 10.58,
11.75, 7, 17.17, 11.25, 13.75, 11.83, 16.92, 8.83, 7.07, 7.83,
15.08, 15.83, 16.67, 18.87, 11.92, 12.83, 7.83, 12.33, 10, 11.08,
12.08, 15.67, 11.75, 15, 14.308, 15.9064, 16.161, 16.9578, 8.90197,
16.2897, 9.05805, 10.5969, 5.15334, 9.1046, 14.1019, 18.9736,
10.9447, 14.5455, 16.172, 6.65389, 11.3171, 12.2864, 17.9929,
10.5778, 16.9195, 7.6, 7.8, 7.2, 16.7, 17, 16.5, 17, 15.1, 16,
16.4, 13.8, 13.8, 14.5, 16.1, 15.8, 15, 14.1, 15, 14.7, 15, 14.5,
10.8, 11.4, 11.3, 10.9, 11.2, 9.3, 10.8, 9.7, 8, 8.2, 8.2, 17.5,
12.6, 11.6, 10.8, 11.8, 12.3, 16.3, 17.1, 9.626283368, 14.6,
13.7), D = structure(c(2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), Frontal_FA = c(0.4186705, 0.4151535,
0.4349945, 0.4003705, 0.403488, 0.407451, 0.3997135, 0.38826,
0.3742275, 0.3851655, 0.3730715, 0.3825115, 0.3698805, 0.395406,
0.39831, 0.4462415, 0.413532, 0.419088, 0.4373975, 0.4633915,
0.4411375, 0.3545255, 0.389322, 0.349402, 0.352029, 0.367792,
0.365298, 0.3790775, 0.379298, 0.36231, 0.3632755, 0.357868,
0.3764865, 0.3726645, 0.351422, 0.3353255, 0.334196, 0.3462365,
0.367369, 0.3745925, 0.3610755, 0.360576, 0.357035, 0.3554905,
0.3745615, 0.38828, 0.3293275, 0.3246945, 0.3555345, 0.375563,
0.38116, 0.387508, 0.357707, 0.413193, 0.3658075, 0.3776355,
0.362678, 0.3824945, 0.3771, 0.375347, 0.362468, 0.367618, 0.3630925,
0.3763995, 0.359458, 0.3982755, 0.3834765, 0.386135, 0.3691575,
0.388099, 0.350435, 0.3629045, 0.3456775, 0.4404815, 0.4554165,
0.425763, 0.4491515, 0.461206, 0.453745, 0.4501255, 0.4451875,
0.4369835, 0.456838, 0.437759, 0.4377635, 0.44434, 0.4436615,
0.437532, 0.4335325, 0.4407995, 0.470447, 0.4458525, 0.440322,
0.4570775, 0.4410335, 0.436045, 0.4721345, 0.4734515, 0.4373905,
0.4139465, 0.440213, 0.440281, 0.425746, 0.454377, 0.4457435,
0.488561, 0.4393565, 0.4610565, 0.3562055, 0.381041, 0.353253,
0.4265975, 0.4069595, 0.40092, 0.4261365, 0.429605, 0.425479,
0.4331755, 0.3981285, 0.4206245, 0.3798475, 0.3704155, 0.395192,
0.404436, 0.4148915, 0.416144, 0.384652, 0.3916045, 0.41005,
0.3940605, 0.3926085, 0.383909, 0.391792, 0.372398, 0.3531025,
0.414441, 0.404335, 0.3682095, 0.359976, 0.376681, 0.4173705,
0.3492685, 0.397057, 0.3940605, 0.398825, 0.3707115, 0.400228,
0.3946595, 0.4278775, 0.384037, 0.43577)), .Names = c("Subject",
"A", "B", "C", "D", "Frontal_FA"), class = "data.frame", row.names = c(NA,
-151L))
Here is the code that I am running
lmer fit
FA <- lmer(Frontal_FA ~ poly(C) + A + B + D + (poly(C)||Subject), data = df)
plot lmer fit
sjp.lmer(FA)
Thanks for your help.
sjp.lmer, by default, plots the random effects of a model. However, it plots random effects (BLUPs) with confidence intervals, using the arm:se.ranef function. This function causes the first error message you get:
arm::se.ranef(FA)
> Error in array(NA, c(J, K)) : 'dims' cannot be of length 0
Then, the se.ranef functions calls the lme4::ranef function with argument condVar = TRUE, which is not yet implemented for specific conditions (like yours) in lme4. Hence you get the additional warning
In ranef.merMod(object, condVar = TRUE) :
conditional variances not currently available via ranef when there are multiple terms per factor
If you are especially interested in plotting the random effects, you could use the lme4-implemented dotplot-function:
lattice::dotplot(ranef(FA))
If you are interested in any other plot type (fixed effects, marginal effects, predictions, ...), see ?sjp.lmer or some examples at his page.
Edit
If you don't mind installing from GitHub (devtools::install_github("sjPlot/devel"), I have committed a small update, so you can use show.ci = FALSE to avoid computing confidence intervals for random effects:
sjp.lmer(FA, type = "re", show.ci = F, sort.est = "(Intercept)")

Boxplot with multiple x variables

I'm new to R and having a few issues with using ggplot2.
This is an example of my data (subset of larger data set) :
df <-
structure(list(logpvalue = c(22.36, 6.93, 16.78, 1.78, 17.75,
20.99, 21.03, 9.19, 15.01, 22.25, 13.4, 6.47, 1.34, 13.4, 3.21,
0.37, 0.5, 0.12, 1.8, 0.71, 1.15, 6.73, 0.12, 6.97, 0.64, 9.85,
1.45, 1.67, 2.6, 1.8, 1.35, 4.69, 0.37, 1.91, 0.31, 0, 2.45,
1.68, 2.31, 1.35, 6.48, 4.68), SNP = structure(c(1L, 7L, 6L,
5L, 11L, 1L, 9L, 5L, 8L, 11L, 7L, 5L, 8L, 11L, 1L, 7L, 1L, 4L,
2L, 3L, 10L, 7L, 1L, 4L, 2L, 3L, 10L, 4L, 2L, 3L, 10L, 4L, 2L,
3L, 10L, 4L, 2L, 3L, 7L, 9L, 5L, 1L), .Label = c("rs10244", "rs10891244",
"rs10891245", "rs11213821", "rs12296076", "rs138567267", "rs45615536",
"rs6589218", "rs7103178", "rs7127721", "rs7944895"), class = "factor"),
X173 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("het", "hom"), class = "factor")), .Names = c("logpvalue",
"SNP", "X173"), class = "data.frame", row.names = c(NA, -42L))
I want to plot a boxplot of logpvalue on y axis, with SNP on the x-axis but with each SNP also categorized by whether the patient is het or hom for X173. So from this data I'd imagine 4 boxes on my boxplot.
If possible I'd also like to incorporate the individual data points (dotplot-boxplot overlay) with jitter.
This is the usual code I'd use for a boxplot of logpavlue vs SNP:
qplot(logpvalue, SNP, data = mydata, geom="boxplot")
+ geom_jitter(position=position_jitter(w=0.1, h=0.1)) + theme_bw()
How do I add the extra x variable into this code?
Try this:
boxplot(df$logpvalue~paste(df$SNP,df$X173))
Or using ggolot2 :
library(ggplot2)
ggplot(data=df,aes(SNP,logpvalue,colour=SNP)) +
geom_boxplot() +
geom_jitter() +
facet_grid(.~X173)

Resources