Related
Data
Here is my data:
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
Oftentimes I have odd combinations of variables that don't neatly fit into a faceted grid in ggplot. As an example, here is a code to stack my numeric data into one plot:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
However, as you can see, there are five plots, and arranging the rows and columns doesn't really fix the issue because it is an odd number:
I'm wonder if there is a possible way to paste in some kind of text box in this sixth quadrant of the faceted plot. I'm thinking something like this:
How can one accomplish this goal?
One option is to access the 'blank space' using gtable/grid, e.g.
library(tidyverse)
library(gtable)
library(grid)
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
p <- work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
note <- paste("Notes on Plots:\n",
"- coffee skew\n",
"- sleep semi-normal\n",
"- work is weird\n",
"- etc\n",
"- etc")
p2 <- ggplot() +
annotate("text",
x = 0,
y = 0,
label = note,
size = 5,
hjust = 0) +
theme_void()
pg <- ggplotGrob(p)
#> Warning: Removed 5 rows containing non-finite values (stat_density).
p2g <- ggplotGrob(p2)
pl <- gtable_filter(pg, 'panel', trim=F)$layout
pg <- gtable_add_grob(pg, p2g, t=max(pl$t), l=max(pl$l), r = pl$r[3])
grid.newpage()
grid.draw(pg)
Created on 2022-09-15 by the reprex package (v2.0.1)
I wonder if this is the right direction to get you to start:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(
legend.position = c(.8, .3)
)
Edit
To keep original black color for all graphs
cols <- c("Mins_Work" = "black", "Mins_Sleep"="black", "Coffee_Cups"="black", "Tea_Cups" = "black",
"Time_Wake" = "black")
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(legend.position = c(.8, .3))+
scale_color_manual(values = cols)
Reproducible Dataset
This is my data:
#### Data for Stack ####
stack <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L,
269L, 300L, 427L, 301L, 436L, 342L, 229L, 379L, 102L, 146L, NA,
94L, 345L, 73L, 204L, 512L, 113L, 135L, 458L, 493L, 552L, 108L,
335L, 395L, 508L, 546L, 396L, 159L, 325L, 747L, 650L, 377L, 461L,
669L, 186L, 220L, 410L, 708L, 409L, 515L, 413L, 166L, 451L, 660L,
177L, 192L, 191L, 461L, 637L, 297L, 601L, 586L, 270L, 479L, 0L,
480L, 397L, 174L, 111L, 0L, 610L, 332L, 345L, 423L, 160L, 611L,
0L, 345L, 550L, 324L, 427L, 505L, 632L, 560L, 230L, 495L, 235L,
522L, 654L, 465L, 377L, 260L, 572L, 612L, 594L, 624L, 237L, 0L,
38L, 409L, 634L, 292L, 706L, 399L, 568L, 0L, 694L, 298L, 616L,
553L, 581L, 423L, 636L, 623L, 338L, 345L, 521L, 438L, 504L, 600L,
616L, 656L, 285L, 474L, 688L, 278L, 383L, 535L, 363L, 470L, 457L,
303L, 123L, 363L, 329L, 513L, 636L, 421L, 220L, 430L, 428L, 536L,
156L, 615L, 429L, 103L, 332L, 250L, 281L, 248L, 435L, 589L, 515L,
158L, 0L, 649L, 427L, 193L, 225L, 0L, 280L, 163L, 536L, 301L,
406L, 230L, 519L, 0L, 303L, 472L, 392L, 326L, 368L, 405L, 515L,
308L, 259L, 769L, 93L, 517L, 261L, 420L, 248L, 265L, 834L, 313L,
131L, 298L, 134L, 385L, 648L, 529L, 487L, 533L, 641L, 429L, 339L,
508L, 560L, 439L, 381L, 397L, 692L, NA), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L, 6L, 4L, 6L, 4L, 4L, 2L, 6L, 6L, 5L, 1L, 3L, 1L, 5L, 4L, 6L,
5L, 0L, 6L, 6L, 4L, 4L, 2L, 2L, 6L, 6L, 7L, 3L, 3L, 0L, 5L, 7L,
6L, 3L, 5L, 3L, 3L, 1L, 9L, 9L, 3L, 3L, 6L, 6L, 6L, 3L, 0L, 7L,
6L, 6L, 3L, 9L, 3L, 8L, 8L, 3L, 3L, 7L, 6L, 3L, 3L, 3L, 6L, 6L,
6L, 1L, 9L, 3L, 3L, 2L, 6L, 3L, 6L, 9L, 6L, 8L, 9L, 6L, 6L, 6L,
0L, 3L, 0L, 3L, 3L, 6L, 3L, 0L, 9L, 3L, 0L, 2L, 0L, 6L, 6L, 6L,
3L, 6L, 3L, 9L, 3L, 0L, 0L, 6L, 3L, 3L, 3L, 3L, 6L, 0L, 6L, 3L,
3L, 5L, 5L, 3L, 0L, 6L, 4L, 2L, 0L, 2L, 4L, 0L, 6L, 4L, 4L, 2L,
2L, 0L, 9L, 6L, 3L, 6L, 6L, 9L, 0L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 0L, 9L, 6L, 3L, 6L, 3L, 6L, 1L, 6L, 6L, 6L, 6L, 6L, 1L, 3L,
9L, 6L, 3L, 6L, 9L, 3L, 5L, 6L, 3L, 0L, 6L, 3L, 3L, 5L, 0L, 6L,
3L, 5L, 3L, 0L, 6L, 7L, 3L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 7L, 6L,
6L, 4L, 3L)), class = "data.frame", row.names = c(NA, -244L))
Solution So Far
I'm trying to cut my coffee data into three groups, a low group, a medium group, and a high group. Here is how I tried doing so:
#### Load Libraries ####
library(tidyverse)
library(ggpubr)
#### Transform Data: Coffee ####
coffee_labels <- c("Low", "Medium", "High") # labels
range(stack$Coffee_Cups) # get range for split
coffee_breaks <- seq(from = 0,
to = 9,
by = 3) # split from 0 to 9 in 3 pt intervals
coffee_transform <- cut(x= stack$Coffee_Cups,
labels = coffee_labels,
breaks = coffee_breaks) # add labels and breaks
stack_transform <- stack %>%
mutate(coffee_level = coffee_transform) # mutate to add to data
tail(stack_transform$coffee_level, 30) # check transform
Problem
However, when I print the tail command at the end, I get these NA values, which I assume is from an improper cut:
[1] Low Medium Medium Low <NA> Medium Low Low Medium <NA> Medium
[12] Low Medium Low <NA> Medium High Low Medium Medium Medium Medium
[23] Low Medium Medium High Medium Medium Medium Low
Levels: Low Medium High
I looked and those values correspond to my coffee consumption equaling zero, yet I already set the cut from 0 to 9. Naturally, when I try to make a boxplot with this, the NA levels get included, which I don't want:
#### Transform Coffee Boxplot ####
ggboxplot(stack_transform,
x="coffee_level",
y="Mins_Work",
palette = "simpsons",
color = "coffee_level",
title = "Coffee Consumption Level Productivity",
caption = "*Data obtained from local matrix.",
xlab = "Coffee Consumption Level",
ylab = "Minutes of Productivity")+
theme_bw()+
theme(legend.position = "none",
plot.caption = element_text(face = "italic"),
plot.title = element_text(face = "bold",
size = 18,
family = "mono"))
Question
How do I fix these NA values? I want my zero values to be included into the "low" group if possible.
Would this work for you? It seems you are using base R, so a nested ifelse statement may be simpler:
stack$coffee_cat <- ifelse(stack$Coffee_Cups %in% 0:3, "Low",
ifelse(stack$Coffee_Cups %in% 4:6, "Medium",
ifelse(stack$Coffee_Cups %in% 7:9, "High", NA)))
Output
# Mins_Work Coffee_Cups coffee_cat
#1 435 3 Low
#2 350 0 Low
#3 145 2 Low
#4 135 6 Medium
#5 15 4 Medium
#6 60 5 Medium
case_when would be a dplyr alternative:
stack %>% mutate(coffee_level = case_when(Coffee_Cups %in% 0:3 ~ "Low",
Coffee_Cups %in% 4:6 ~ "Medium",
Coffee_Cups %in% 7:9 ~ "High"))
To include the zeros in cutting, you could also use the Hmisc::cut2function:
stack$coffee_Hmisc <- factor(Hmisc::cut2(stack$Coffee_Cups, g = 3), labels = coffee_labels)
I have four datasets derived and processed identically (though differing in size due to the availability of Landsat scenes)
I am trying to compute ANOVA using the formula:
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = JulianDay,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
Where:
*) LST = surface temperature deviation in C
*) JulianDay = days since start of year
*) Buffer = a value 100-1900 - one of 19 areas outward from the boundary of a solar power plant (each 100m wide)
*) TimePeriod = a factor with a value of 0 or 1 corresponding to pre-/post-construction of the solar power plant.
The intent is to investigate if the construction of the installation affected the adjacent land surface temperature.
At three sites the ANOVA runs successfully, however at the fourth site it doesn't and fails with the error:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
0 (non-NA) cases
I have 381 rows of data in 4 columns (extract below), the only difference I can think of here is that I had to remove two paired months from the time series as data was not available in one of the months. This means there are 20 months of data, rather than 24. Every other processing step is identical.
Reading online I have searched for N/As (there are none), and can't see how there are levels without values as every cell has data. I don't know how to properly evaluate this, though, as it seems this is the root of the error.
I'm hoping someone will know the code needed and/or be able to suggest a way forwards.
Buffer LST JulianDay TimePeriod
1800 -0.04576149 73 2
1900 -0.03422945 73 2
1900 -0.02089755 302 1
1900 -0.02062432 96 1
1900 -0.01465229 192 1
1900 -0.00643754 128 1
1900 -0.00333345 105 2
1800 -0.00266312 366 1
1900 -0.00181226 201 2
1900 -0.00158173 169 2
1900 -1.81E-05 41 2
1800 0.00144813 128 1
and 367 additional rows...
[Edits]
Per comments below:
dput() whole dataframe
dput() subset (as suggested)
Thanks #Dion for noting anova_test is from the RStatix package.
1)
> dput(LST_Weather_dataset_ANOVA)
structure(list(Buffer = c(100L, 200L, 300L, 400L, 500L, 600L,
700L, 800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L), LST = c(0.91797777, 0.95083024, 0.79129483,
0.74791195, 0.68740945, 0.64516119, 0.74870729, 0.78357522, 0.83663769,
0.82156894, 0.77440129, 0.62769619, 0.52052404, 0.46497939, 0.34456476,
0.20359411, 0.11688336, 0.04136486, -0.02089755, 1.15111659,
1.20353638, 1.11717501, 1.0286145, 0.90984545, 0.82983601, 0.78186792,
0.73227976, 0.6989393, 0.65015275, 0.56241798, 0.39651023, 0.34213091,
0.3386525, 0.24000145, 0.11809023, 0.07704512, -0.00266312, 0.01273022,
1.04229626, 1.14347392, 1.1156609, 1.10575157, 1.01202522, 0.77829087,
0.80477079, 0.79677169, 0.83116477, 0.83242401, 0.82394197, 0.72073306,
0.64099082, 0.58188225, 0.43328083, 0.28349521, 0.19752629, 0.10636456,
0.01987005, 0.74458844, 0.71512573, 0.6395358, 0.65294657, 0.63325921,
0.56155255, 0.60860815, 0.60614753, 0.59989994, 0.58766288, 0.57257261,
0.50018929, 0.4367402, 0.40497079, 0.31822141, 0.2300726, 0.16928876,
0.09449034, 0.01799424, 0.82747052, 0.78262774, 0.65488597, 0.62609552,
0.60057131, 0.59950609, 0.6609992, 0.6876772, 0.73196883, 0.75516596,
0.75554112, 0.64167458, 0.54703129, 0.49947692, 0.38230481, 0.25519237,
0.16087274, 0.07759223, 0.00820849, 0.75009747, 0.71421977, 0.62411035,
0.58621041, 0.58438012, 0.61346156, 0.72712994, 0.81372726, 0.87579554,
0.88934787, 0.87369461, 0.74686202, 0.64084028, 0.5599638, 0.40021941,
0.23612052, 0.13408522, 0.04484869, -0.02062432, 0.22133116,
0.28562902, 0.24359043, 0.17788898, 0.16563242, 0.11740664, 0.10102937,
0.07328697, 0.07948283, 0.07521508, 0.08526232, 0.0548022, 0.04632606,
0.06670398, 0.03262545, 0.00650875, 0.01186519, 0.00144813, -0.00643754,
0.26360849, 0.22139941, 0.16915041, 0.13499715, 0.12846785, 0.15351528,
0.15321108, 0.13963269, 0.13413671, 0.13097696, 0.15897844, 0.15489366,
0.12600815, 0.12363834, 0.0943688, 0.07324289, 0.0565765, 0.04005241,
0.01346488, 0.42361198, 0.39149841, 0.29086274, 0.21492842, 0.20664552,
0.24524285, 0.30548979, 0.35256808, 0.37350282, 0.38680061, 0.38567758,
0.31177736, 0.24643091, 0.22001284, 0.14356522, 0.07076854, 0.04168654,
0.01276553, -0.01465229, 0.57032414, 0.50658577, 0.41717664,
0.36134446, 0.35794989, 0.38457285, 0.43700723, 0.48358206, 0.50516801,
0.50086146, 0.49398709, 0.41516438, 0.33165215, 0.28357127, 0.20030152,
0.11993505, 0.08438345, 0.05755944, 0.01071499, 0.04963208, 0.34087747,
0.38385889, 0.40408637, 0.41182138, 0.15662208, 0.18857013, 0.17978741,
0.1533216, 0.1451422, 0.14890638, 0.14090521, 0.1782449, 0.23624089,
0.21003477, 0.13812217, 0.10759364, 0.07225312, 0.03185378, 0.27507486,
0.54404521, 0.56568824, 0.58543167, 0.49124799, 0.28299777, 0.27514982,
0.27526446, 0.27376722, 0.24620415, 0.22871699, 0.19647326, 0.2450593,
0.27133386, 0.15248773, 0.06240341, 0.04933824, 0.03356535, -1.81e-05,
0.21776379, 0.37010032, 0.32743525, 0.30588107, 0.31226738, 0.30518286,
0.32637517, 0.31003415, 0.23691586, 0.1985241, 0.16143326, 0.12384526,
0.11556386, 0.09243356, 0.05773894, 0.03660942, 0.02173758, -0.04576149,
-0.03422945, 0.06214728, 0.26440563, 0.24838816, 0.22704611,
0.17230754, 0.15660109, 0.18689433, 0.24464547, 0.28273218, 0.29602945,
0.29992488, 0.24679735, 0.24521192, 0.23913767, 0.15081173, 0.08724556,
0.05561237, 0.02530266, -0.00333345, 0.11993489, 0.20504424,
0.17323488, 0.14541868, 0.10994579, 0.12741154, 0.17959797, 0.22553943,
0.26564836, 0.29760832, 0.3207305, 0.28592135, 0.26551685, 0.2493214,
0.15767906, 0.0883716, 0.05058495, 0.02207594, 0.00162532, 0.05621313,
0.08020623, 0.05187855, 0.02643543, 0.02422505, 0.05372454, 0.09563737,
0.14735627, 0.18199015, 0.22456299, 0.25302274, 0.21978124, 0.19092835,
0.18255829, 0.11850551, 0.0581734, 0.03406168, 0.01868243, -0.00158173,
0.00980756, 0.07077972, 0.05126985, 0.03126771, 0.01828044, 0.00678076,
0.03566275, 0.05622289, 0.07218645, 0.08767578, 0.11078182, 0.08827425,
0.08881865, 0.10037876, 0.05952601, 0.03440435, 0.01843206, 0.0091852,
-0.00181226, 0.08737325, 0.14470842, 0.13066747, 0.12324597,
0.12014198, 0.13435757, 0.17843025, 0.19926835, 0.20503774, 0.20485414,
0.2124073, 0.1864257, 0.18810996, 0.20665551, 0.13839744, 0.08488387,
0.06246853, 0.03463723, 0.00349753, 0.35245488, 0.57692156, 0.64897028,
0.67306088, 0.68344534, 0.56106697, 0.52144197, 0.49250191, 0.47494065,
0.4359944, 0.39638743, 0.32554099, 0.28717774, 0.2826675, 0.22703594,
0.18186983, 0.15875118, 0.09672536, 0.04305742, 0.24294606, 0.54654222,
0.56344638, 0.53312729, 0.47324972, 0.34482643, 0.34915085, 0.33729055,
0.32086985, 0.29578347, 0.25030669, 0.17928298, 0.17007511, 0.18375903,
0.15222616, 0.10934224, 0.07536797, 0.04154465, 0.02550096),
JulianDay = c(302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L,
80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L), TimePeriod = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-380L))
> dput(LST_Weather_dataset_ANOVA[sample(1:nrow(LST_Weather_dataset_ANOVA), 50),])
structure(list(Buffer = c(800L, 1400L, 500L, 200L, 400L, 1400L,
100L, 1600L, 1800L, 100L, 1400L, 1500L, 900L, 700L, 800L, 600L,
400L, 1300L, 500L, 700L, 700L, 300L, 700L, 200L, 200L, 500L,
500L, 900L, 1000L, 1300L, 1400L, 1600L, 700L, 400L, 500L, 200L,
400L, 1500L, 1400L, 800L, 500L, 1200L, 1500L, 1900L, 600L, 800L,
100L, 1000L, 900L, 1100L), LST = c(0.48358206, 0.46497939, 0.41182138,
0.07077972, 0.17788898, 0.18255829, 0.21776379, 0.03660942, 0.04154465,
0.42361198, 0.49947692, 0.38230481, 0.28273218, 0.18857013, 0.33729055,
0.56106697, 0.13499715, 0.28717774, 0.12014198, 0.78186792, 0.74870729,
0.56344638, 0.18689433, 0.54404521, 0.78262774, 0.60057131, 1.01202522,
0.20503774, 0.13097696, 0.34213091, 0.5599638, 0.08724556, 0.17843025,
1.0286145, 0.01828044, 0.22139941, 0.67306088, 0.15248773, 0.22001284,
0.27526446, 0.02422505, 0.50018929, 0.31822141, 0.01799424, 0.56155255,
0.13963269, 0.27507486, 0.29578347, 0.18199015, 0.3207305), JulianDay = c(224L,
302L, 9L, 201L, 128L, 169L, 73L, 73L, 361L, 192L, 80L, 80L, 105L,
9L, 361L, 313L, 160L, 313L, 217L, 366L, 302L, 361L, 105L, 41L,
80L, 80L, 16L, 217L, 160L, 366L, 96L, 105L, 217L, 366L, 201L,
160L, 313L, 41L, 192L, 41L, 169L, 64L, 64L, 64L, 64L, 160L, 41L,
361L, 169L, 137L), TimePeriod = c(1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), row.names = c(179L,
14L, 195L, 306L, 118L, 299L, 229L, 244L, 379L, 153L, 90L, 91L,
256L, 197L, 369L, 348L, 137L, 355L, 328L, 26L, 7L, 364L, 254L,
211L, 78L, 81L, 43L, 332L, 143L, 32L, 109L, 263L, 330L, 23L,
309L, 135L, 346L, 224L, 166L, 217L, 290L, 69L, 72L, 76L, 63L,
141L, 210L, 371L, 294L, 277L), class = "data.frame")
You are running a repeated anova and this requires the observations to be complete for every individual, within your specified within effects. In your case, you need the ensure for every JulianDay, the observations are complete for every combination of Buffer and TimePeriod
We can tabulate it by using table() and you can see for all the JulianDays they are incomplete, for example on 9 and 16:
with(LST_Weather_dataset_ANOVA,table(Buffer,TimePeriod,JulianDay))[,,c("9","16")]
, , JulianDay = 9
TimePeriod
Buffer 1 2
100 0 1
200 0 1
300 0 1
400 0 1
500 0 1
600 0 1
700 0 1
800 0 1
900 0 1
1000 0 1
1100 0 1
1200 0 1
1300 0 1
1400 0 1
1500 0 1
1600 0 1
1700 0 1
1800 0 1
1900 0 1
, , JulianDay = 16
TimePeriod
Buffer 1 2
100 1 0
200 1 0
300 1 0
400 1 0
500 1 0
600 1 0
700 1 0
800 1 0
900 1 0
1000 1 0
1100 1 0
1200 1 0
1300 1 0
1400 1 0
1500 1 0
1600 1 0
1700 1 0
1800 1 0
1900 1 0
As you have noted, if you reconcile the dates between sites, it will work. I am not very sure how you converted the JulianDay to months, but using your data, it works if I just do
df = LST_Weather_dataset_ANOVA
df$Month = months(strptime(paste("2020",df$JulianDay),"%Y %j"))
df = subset(df,Month %in% c("May","June"))
with(df,table(Buffer,TimePeriod,Month))
, , Month = June
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
, , Month = May
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
You can see for months June and May, they are complete (no zeros), and if we run anova, it works:
res.aov <- anova_test(
data = df, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 1 1.217 0.005 222.936 4.30e-02 * 0.933
2 Buffer 18 18 0.256 0.026 9.933 5.49e-06 * 0.746
3 TimePeriod 1 1 0.013 0.048 0.274 6.93e-01 0.130
4 Buffer:TimePeriod 18 18 0.181 0.008 21.476 1.20e-08 * 0.674
While ironing last night I wondered if JulianDay might be the source of the error. It is derived from the dates of the Landsat scenes from dependant variable data are derived, so is different for each site.
Editing the dataframe to replace the JulianDay column with Month and amending the code to:
str(LST_Weather_dataset_ANOVA)
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
...the ANOVA test runs successfully:
> res.aov <- anova_test(
+ data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
+ within = c(Buffer, TimePeriod),
+ effect.size = "ges",
+ detailed = TRUE,
+ )
> get_anova_table(res.aov, correction = "auto")
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 9 36.781 6.593 50.212 5.75e-05 * 0.735
2 Buffer 18 162 8.042 3.041 23.801 1.81e-36 * 0.378
3 TimePeriod 1 9 5.065 2.506 18.194 2.00e-03 * 0.276
4 Buffer:TimePeriod 18 162 1.713 1.117 13.800 2.71e-24 * 0.114
But I still don't fully understand why...
Hopefully this will enable someone to comment and provide an explanation?
Apologies in advance if I haven't formatted this correctly, this is my first question on SO.
I have run a series of multilevel models using lme4 in R. My outcome variables are continuous and I have one categorical level-two predictor variable with multiple categories (region of the US: Midwest, Northeast, South, West) as well as a series of time-varying covariates. When I run this code,
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
perLatinxc.lag8 + perBlackc.lag8 +
femincomedisc.lag8 + femLFPdisc.lag8 + fememploydisc.lag8 + femedudisc.lag8 +
(1 + timecat1|AJID), data=data, REML=F)
I get the following results (reduced for space):
AIC BIC logLik deviance df.resid
8182.5 8269.8 -4075.2 8150.5 1722
Scaled residuals:
Min 1Q Median 3Q Max
-6.4726 -0.3921 -0.0245 0.3687 6.4414
Random effects:
Groups Name Variance Std.Dev. Corr
AJID (Intercept) 12.70271 3.5641
timecat1 0.04184 0.2045 0.44
Residual 2.16582 1.4717
Number of obs: 1738, groups: AJID, 531
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.088e+01 3.544e-01 30.696
timecat1 1.086e-01 1.252e-02 8.679
regionNortheast -2.337e+00 4.749e-01 -4.920
regionSouth 6.269e-01 4.472e-01 1.402
regionWest 1.079e+00 4.807e-01 2.245
When I plot the residuals by the independent variables, however, I only have residuals for two of the four regions (see below).
xyplot(resid(m5) ~ region, data=data, jitter.x=T, abline=0, type=c("p", "g"))
Residuals plotted on y-axis, Midwest, Northeast, South, West plotted on x-axis, residuals only available for South and West regions
I have no missing data in the region variable and am at a loss as to why I would have estimates for region without corresponding residuals. Why might this be the case?
EDIT2:
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
(1 + timecat1|AJID), data=egdata, REML=F)
xyplot(resid(m5) ~ region, data=data, jitter.x=T, abline=0, type=c("p", "g"))
> dput(egdata)
structure(list(AJID = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 13L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 68L, 68L, 68L, 68L, 68L,
68L, 68L, 68L, 79L, 83L, 83L, 83L, 83L, 83L, 83L, 83L, 97L, 116L,
116L, 116L, 127L, 127L, 127L, 127L, 127L, 127L, 127L, 148L, 148L,
148L, 148L, 148L, 148L, 148L, 148L, 152L, 152L, 152L, 152L, 152L,
152L, 160L, 160L, 160L, 160L, 160L, 160L, 168L, 168L, 168L, 168L,
168L, 168L, 168L, 171L, 171L, 171L, 176L, 176L, 176L, 176L, 176L,
176L, 176L, 176L, 179L, 179L, 179L, 179L, 179L, 179L, 179L, 183L,
183L, 183L, 183L, 183L, 183L, 191L, 191L, 191L, 191L, 191L, 191L,
191L, 197L, 197L, 207L, 207L, 207L, 225L, 235L, 235L, 235L, 235L,
235L, 235L, 235L, 237L, 237L, 237L, 237L, 237L, 237L, 237L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 245L, 245L, 245L, 245L,
245L, 251L, 251L, 251L, 251L, 251L, 251L, 251L, 265L, 265L, 265L,
265L, 265L, 266L, 266L, 266L, 266L, 266L, 266L, 266L, 273L, 273L,
273L, 273L, 273L, 275L, 275L, 275L, 275L, 275L, 275L, 275L, 275L,
279L, 279L, 279L, 279L, 279L, 280L, 280L, 280L, 280L, 280L, 280L,
284L, 284L, 284L, 284L, 284L, 284L, 284L, 286L, 286L, 286L, 286L,
286L, 286L, 286L, 286L, 296L, 296L, 296L, 296L, 296L, 296L, 296L,
296L, 313L, 341L, 341L, 341L, 341L, 341L, 345L, 345L, 345L, 345L,
345L, 345L, 345L, 345L, 352L, 363L, 363L, 365L, 365L, 365L, 365L,
365L, 365L, 365L, 365L, 369L, 369L, 369L, 369L, 374L, 374L, 374L,
374L, 374L, 374L, 374L, 385L, 385L, 385L, 385L, 385L, 385L, 385L,
391L, 391L, 391L, 391L, 391L, 391L, 391L, 416L, 416L, 416L, 416L,
416L, 416L, 416L, 417L, 417L, 417L, 417L, 417L, 417L, 417L, 423L,
423L, 423L, 423L, 423L, 423L, 423L, 429L, 429L, 429L, 429L, 429L,
429L, 434L, 434L, 434L, 434L, 434L, 434L, 441L, 441L, 441L, 441L,
441L, 441L, 441L, 441L, 447L, 447L, 447L, 447L, 447L, 447L, 447L,
447L, 448L, 448L, 448L, 448L, 448L, 448L, 448L, 448L, 453L, 454L,
454L, 454L, 454L, 454L, 454L, 466L, 466L, 466L, 466L, 466L, 466L,
466L, 480L, 480L, 480L, 480L, 480L, 480L, 482L, 482L, 506L, 506L,
506L, 510L, 510L, 510L, 510L, 510L, 513L, 513L, 513L, 513L, 513L,
513L, 513L, 514L, 514L, 514L, 514L, 514L, 514L, 514L, 525L, 525L,
525L, 525L, 525L, 525L, 525L, 525L, 547L, 563L, 563L, 563L, 563L,
563L, 563L, 563L, 563L, 577L, 577L, 577L, 577L, 577L, 577L, 577L,
580L, 580L, 580L, 580L, 580L, 580L, 580L, 586L, 586L, 586L, 586L,
586L, 586L, 586L, 598L, 598L, 598L, 598L, 598L, 598L, 598L, 598L,
602L, 602L, 602L, 602L, 602L, 602L, 602L, 603L, 603L, 603L, 617L,
617L, 617L, 617L, 617L, 617L, 617L, 617L, 630L, 630L, 630L, 630L,
630L, 630L, 630L, 636L, 636L, 641L, 641L, 641L, 641L, 641L, 641L,
641L), percentfemale = c(7.834101382, 8.612440191, 8.173076923,
9.030837004, 10.81081081, 12.15932914, 15.47861507, 13.06818182,
13.51351351, 6.010928962, 5.825242718, 8.5, 9.708737864, 9.302325581,
9.5, 12.29946524, 12.06896552, 6.802721088, 6.622516556, 7.042253521,
8.843537415, 7.843137255, 7.792207792, 11.25, 11.11111111, 10.85271318,
4.972375691, 6.179775281, 4.651162791, 4.954954955, 6.392694064,
4.867256637, 3.555555556, 5.172413793, 13.63636364, 13.97058824,
12.40875912, 5.925925926, 6.25, 7.692307692, 7.586206897, 0.666666667,
6.756756757, 8.904109589, 6.25, 6.94980695, 8.148148148, 10.98039216,
9.318996416, 8.865248227, 9.863945578, 10.52631579, 8.088235294,
11.64383562, 12.10191083, 10.625, 13.0952381, 12.4260355, 7.246376812,
9.289617486, 10.44776119, 11.01321586, 16.04938272, 14.71861472,
12.07207207, 15.55763824, 18.0734856, 17.56756757, 17.72639692,
19.07020873, 19.71014493, 17.64705882, 18, 18.25396825, 19.13043478,
16.31944444, 17.79935275, 20, 22.11838006, 19.77077364, 20.32967033,
19.66292135, 12.5, 14.59074733, 17.66666667, 19.62905719, 17.64705882,
16.09042553, 16.43646409, 6.060606061, 7.947019868, 7.638888889,
11.9205298, 13.15789474, 12.58741259, 6.091370558, 7.929515419,
12.38095238, 12.82051282, 12.88888889, 14.52991453, 15.49295775,
12.5984252, 12.90322581, 14.17322835, 13.17829457, 14.92537313,
9.803921569, 3.333333333, 5.109489051, 3.496503497, 3.821656051,
6.060606061, 9.756097561, 9.85915493, 2.857142857, 2.142857143,
4.516129032, 4.268292683, 5.769230769, 7.407407407, 7.317073171,
7.894736842, 5.365853659, 7.798165138, 9.482758621, 10.86956522,
9.777777778, 10.24590164, 11.29032258, 10.67961165, 9.615384615,
9.322033898, 9.649122807, 10.08403361, 4.615384615, 4.761904762,
6.25, 5.303030303, 7.8125, 5.882352941, 5.454545455, 8.620689655,
7.352941176, 9.032258065, 10.97560976, 9.036144578, 6.870229008,
9.459459459, 14.36464088, 11.5, 13.90134529, 18.4, 16, 8.571428571,
8.771929825, 6.194690265, 5.504587156, 6.796116505, 11.03117506,
19.47743468, 12.07289294, 12.9740519, 15.49295775, 16.42411642,
16.99604743, 19.1681736, 6.034482759, 14.28571429, 6.923076923,
9.929078014, 9.433962264, 8.074534161, 9.941520468, 13.77245509,
7.01754386, 8.333333333, 7.851239669, 4.827586207, 4.861111111,
7.092198582, 9.868421053, 10.1910828, 10.96774194, 13.66459627,
9.386776293, 10.94023069, 12.86926995, 14.01687216, 15.68885959,
17.43400859, 16.4295393, 15.56459817, 5.696202532, 5.921052632,
19.44444444, 8.024691358, 7.142857143, 6.951871658, 7.692307692,
6.179775281, 9.482758621, 4.761905, 3.703704, 4.950495, 2.912621,
6.930693, 5.447471, 7.142857, 9.056604, 13.42513, 16.14583, 17.77379,
17.54967, 17.65677, 9.565217, 6.306306, 8.181818, 5.340114, 7.124352,
7.549669, 12.74876, 13.29752, 14.33311, 15.43027, 15.96702, 7.758621,
7.968127, 10.16949, 9.60961, 2.424242, 15.34091, 10.30928, 4.6875,
5.050505, 7.009346, 7.906977, 2.9615, 3.616637, 10.94527, 11.86903,
15.31532, 17.4939, 20.42042, 0, 0, 0, 1.694915, 3.225806, 3.149606,
5, 1.183432, 1.694915, 2.717391, 2.312139, 0.9478673, 2.45098,
3.012048, 0, 1.734104, 2.564103, 3.5, 3.626943, 3.571429, 5.729167,
1, 0.9803922, 1.818182, 1.818182, 1.694915, 1.709402, 0.862069,
6.956522, 9.917355, 10.25641, 0, 11.51079, 9.333333, 1.470588,
3.472222, 4.166667, 4.166667, 6.756757, 7.801418, 0, 7.741935,
7.643312, 6.962025, 7.594937, 8.823529, 9.333333, 9.677419, 9.574468,
7.446809, 7.55814, 7.821229, 6.989247, 10.27027, 8.196721, 8.441558,
5.714286, 5.5, 6.521739, 6, 5.940594, 4.663212, 8.837209, 11.45833,
4.516129, 3.703704, 4.285714, 5.625, 5.91716, 5.813953, 6.134969,
11.87335, 12.16545, 12.23529, 12.72321, 12.67606, 15.6746, 15.21739,
6.930693, 9.677419, 11.2, 11.2782, 12.19512, 9.448819, 10.18519,
8.490566, 7.894737, 10.15625, 11.19403, 8.917197, 11.68831, 17.51412,
16.66667, 18.53933, 4.081633, 4.6875, 5.181347, 4.812834, 4.975124,
3.349282, 4.624277, 0.6369427, 2.857143, 7.142857, 5.454545,
7.058824, 7.142857, 8.391608, 1.360544, 1.37931, 1.360544, 4.026846,
4.697987, 6.535948, 5.405405, 7.801418, 5.454545, 6.622517, 5.882353,
7.18232, 8.571429, 9.589041, 9.846154, 10.81871, 11.47059, 3.90625,
4.6875, 3.571429, 4.511278, 6.818182, 11.04294, 14.10256, 4.020101,
3.045685, 2.439024, 3.478261, 3.2, 3.703704, 3.571429, 4.363636,
3.97351, 4.792332, 5.333333, 5.315615, 7.923497, 7.286432, 10.35387,
11.32075, 11.50923, 11.9877, 11.8007, 11.60267, 11.66078, 10.52002,
6.752412, 6.583072, 9.898477, 10.51345, 10.22444, 10.90487, 8.878505,
13.67521, 16.66667, 17.43119, 10, 10.62802, 12.61682, 13.00813,
11.78862, 7.33945, 10.69959, 20.95238, 7.438017, 7.5, 8.333333,
10.32028, 13.35616, 15.24823, 11.67883, 18.30508, 21.59468, 3.902439,
4.950495, 5.365854, 5.263158, 7, 8.653846, 7.614213), timecat1 = c(-26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -6L, -10L,
-6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -20L, -16L, -13L, -10L, -6L, 0L, -20L,
-16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L,
0L, -13L, -10L, -6L, -26L, -23L, -20L, -16L, -13L, -10L, -6L,
0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -20L, -16L, -13L,
-10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -6L, 0L,
-10L, -6L, 0L, -10L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -23L,
-20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, 0L, -26L, -23L,
-20L, -13L, -6L, 0L, -26L, -23L, -20L, -13L, -10L, -6L, 0L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -16L, -23L, -20L, -16L, -13L, -10L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -13L, -10L, -6L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -13L, -10L, -6L, 0L, -23L,
-20L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L, 0L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, -23L, -20L, -16L, -13L, -10L, -6L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -16L, -23L,
-20L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
0L, -23L, -20L, -13L, -10L, -6L, 0L, -13L, 0L, -10L, -6L, 0L,
-26L, -23L, -16L, -13L, -10L, -26L, -23L, -20L, -16L, -13L, -10L,
0L, -26L, -23L, -20L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, 0L, -26L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L,
-20L, -16L, -13L, -10L, -6L, -26L, -23L, -20L, -16L, -13L, -6L,
0L, -26L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L,
-16L, -13L, -10L, -6L, 0L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -6L, 0L, -26L,
-23L, -26L, -23L, -20L, -16L, -13L, -10L, -6L), region = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Midwest",
"Northeast", "South", "West"), class = "factor"), sizelogc = c(0.408823946,
0.636182014, 0.663878359, 0.774106962, 0.837491565, 0.81147203,
0.731559255, 0.989580112, -0.52527906, -0.277305662, -0.02646953,
-0.011379257, 0.009202568, -0.141848484, -0.110337044, -0.078522425,
-0.136207762, -0.595759393, -0.282844698, -0.2826483, -0.277326766,
-0.250742573, -0.245563831, -0.183035124, -0.32184058, -0.287607408,
0.048395349, -0.092604697, 0.036508019, 0.127815082, 0.155366121,
0.255766404, 0.173947166, -0.884692994, -0.731071647, -0.394593501,
-0.316877791, -0.702103128, -0.74801155, -0.646588826, -0.647397051,
-0.587480728, -0.603441812, -0.601264967, -0.183904487, 0.168725229,
0.124365995, 0.088979071, 0.14285296, 0.250676301, 0.31927981,
0.28021636, -0.460329477, -0.384224561, -0.338558779, -0.562322168,
-0.200129557, -0.076069403, -0.494231029, -0.189163979, -0.110595941,
0.001720365, 0.121454067, 0.037814992, 1.598396458, 1.552348848,
1.528920185, 1.633910643, 1.606908979, 1.832432709, 1.57677083,
-0.640839651, -0.694966643, -0.547000501, -0.149472291, 0.361169794,
0.451506904, 0.497633974, 0.455591612, 0.556782641, 0.567741169,
0.559003403, 0.960085349, 1.049901785, 1.093877349, 1.153096098,
1.25397841, 1.307771936, 1.216469988, -0.340080349, -0.277326766,
-0.309985406, -0.232080481, -0.22633193, -0.259169473, -0.066114362,
0.07356142, 0.079348177, 0.189508651, 0.182273574, 0.213919294,
0.176972887, -0.622986408, -0.655332188, -0.375868374, -0.37900877,
-0.594707567, -0.776192083, -0.530252871, -0.726032643, -0.399728748,
-0.408587383, -0.241049151, -0.153805486, -0.432188637, -0.681483841,
-0.499994734, -0.399728748, -0.363009875, -0.40728457, -0.37386737,
-0.341941759, -0.327495407, -0.02646953, -0.062299347, -0.018869451,
-0.025830674, -0.045304574, 0.009124882, -0.222468106, -0.75938432,
-0.758622686, -0.680881903, -0.705224506, -0.662300858, -0.57205,
-0.5534199, -0.521425683, -0.512222273, -0.633091239, -0.726886003,
-0.765333084, -0.673617498, -0.517985978, -0.355459502, -0.328746935,
-0.357287329, -0.547952449, -0.390596139, -0.155965909, -0.141848484,
-0.041607712, 0.070160772, 0.081964777, -0.782026797, -0.850996006,
-0.680881903, -0.726886003, -0.897757669, 0.303072117, 0.499784169,
0.702821333, 0.801114163, 0.693473185, 0.685994373, 0.914163737,
0.870120068, -0.641478507, -0.669679706, -0.384224561, -0.4137822,
-0.165559974, -0.58143934, -0.195452876, -0.273388974, 0.015697784,
0.03330568, 0.085281529, -0.650093744, -0.36861152, -0.422177711,
-0.309985406, -0.254654803, -0.314148136, -0.245248134, 2.21254214,
2.464016344, 2.517465096, 2.561832541, 2.531204875, 2.596658583,
2.532332869, 2.463416317, -0.762813478, -0.524421951, -0.559536127,
-0.464267269, -0.367972664, -0.31100774, -0.33366095, -0.336878457,
-0.78968967, -0.804381978, -0.770484405, -0.820942213, -0.780928444,
-0.753377405, 0.507649068, 0.90594493, 0.950657497, 0.942646379,
1.012083374, 0.956143966, 1.041487785, 0.985550557, -0.674065298,
-0.666992791, -0.442860242, 1.740855448, 2.044544372, 2.029656842,
2.069429545, 1.968364716, 1.97746863, 1.883613066, 1.896172492,
0.042455234, 0.173384627, 0.639131166, 0.581430439, -0.55992864,
-0.223940699, -0.081275017, -0.095328469, -0.041607712, -0.022849423,
-0.012017382, 1.467743908, 1.517602151, 1.75795099, 1.728731945,
1.819062624, 1.618562794, 1.394151166, -1.056795911, -0.989847234,
-0.929836301, -0.819247308, -0.753377405, -0.741592151, -0.933797606,
-0.639992648, -0.358710449, -0.202247874, -0.272729057, -0.154788807,
-0.090038867, -0.25450746, -0.58143934, -0.36861152, -0.250608946,
-0.155965909, -0.19481402, -0.154788807, -0.076069403, -0.867895383,
-0.793473924, -0.722750187, -0.729635149, -0.660119911, -0.677053629,
-0.683503065, -0.501432433, -0.529322349, -0.582050305, -0.333739492,
-0.428790775, -0.36875227, -0.674704154, -0.565689993, -0.575819755,
-0.640839651, -0.562322168, -0.609914327, -0.929351167, -0.507130454,
-0.465900522, -0.47544057, -0.523783095, -0.606773931, -0.448369898,
-0.37287206, -0.503951844, -0.246966411, -0.301428195, -0.227878491,
-0.118317987, -0.098549089, -0.11347744, -0.24986708, -0.495218164,
-0.030009357, -0.069790019, -0.015317049, -0.087781263, -0.079206126,
-0.204449218, -0.302135509, -0.486740125, -0.578166227, -0.584382126,
-0.461791419, -0.370726974, -0.493575335, -0.399398814, 0.250851418,
0.597179856, 0.658448481, 0.714387727, 0.820962263, 0.859063513,
1.053196627, -0.804381978, -0.578112513, -0.660642278, -0.5811315,
-0.677053629, -0.655332188, -0.827090486, -0.812961133, -0.716472848,
-0.590621124, -0.489347051, -0.645030442, -0.363648731, -0.250052349,
-0.319419439, -0.218776516, -0.54880241, -0.282844698, -0.202247874,
-0.219144811, -0.165946036, -0.090766948, -0.326828121, -0.427965937,
-0.615670623, -0.578112513, -0.467270885, -0.417979859, -0.528873117,
-0.549971673, -0.751513923, -0.404058269, -0.494231029, -0.492438146,
-0.333739492, -0.273088505, -0.410424966, -0.448995755, -0.655332188,
-0.499575469, -0.122840343, -0.135833432, 0.209243937, 0.371355262,
0.498310873, 0.539341695, 0.552798626, -0.484530622, -0.460329477,
-0.410200048, -0.621421565, -0.606773931, -0.262371866, -0.217973717,
-0.436129248, -0.191277505, -0.088765921, 0.036508019, 0.065294725,
0.173384627, 0.140030452, -0.128162504, 0.159966049, 0.221644403,
0.209243937, 0.22036664, 0.614624755, 0.681129799, 1.065212169,
1.315573819, 1.477612502, 1.56898489, 1.741486445, 1.795245059,
1.968924143, 2.11464897, 0.14434477, 0.319548119, 0.663544174,
0.670843847, 0.680632429, 0.731559255, 0.749879332, -0.687899475,
-0.622986408, -0.73479636, -0.291390402, -0.058788322, 0.020036939,
0.217706611, 0.239519072, 0.076585244, 0.182078689, 0.051608314,
-0.199956196, 0.093274675, 0.204646826, 0.288196489, 0.380811597,
0.324394911, 0.291056575, -0.009041676, 0.268254825, -0.361143441,
-0.077205048, -0.172647405, -0.015317049, -0.207445767, -0.114297445,
-0.093907344)), row.names = c(NA, -432L), class = "data.frame")
> str(egdata)
'data.frame': 432 obs. of 5 variables:
$ AJID : int 8 8 8 8 8 8 8 8 13 51 ...
$ percentfemale: num 7.83 8.61 8.17 9.03 10.81 ...
$ timecat1 : int -26 -23 -20 -16 -13 -10 -6 0 0 -26 ...
$ region : Factor w/ 4 levels "Midwest","Northeast",..: 3 3 3 3 3 3 3 3 4 4 ...
$ sizelogc : num 0.409 0.636 0.664 0.774 0.837 ...
Given the data that you've provided, I can now see the underlying issue. Your dataset has a number of missing values. As noted in the documentation of this function (https://www.rdocumentation.org/packages/lme4/versions/1.1-23/topics/lmer):
The default action (na.omit, inherited from the 'factory fresh' value of getOption("na.action")) strips any observations with any missing values in any variables.
As such, your model will only leverage rows where there are no missing values. This is the reason you see no residual values for some levels of your data.
To see this yourself, you can run the following to extract only rows that have no missing values:
data2 = data[complete.cases(data),]
You can see that data2 is much smaller. Then run table(data2$region) to see the levels of region in this smaller dataset.
Please have a look at https://stats.stackexchange.com/questions/309718/how-to-deal-with-missing-data-in-mixed-effects-or-multi-level-models for information on how you might handle this missing data.
Update: Given your new dataset of only complete cases, I do not see the issue again. Perhaps clear your R session and restart? Reproducible code:
library(lme4)
library(lattice)
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
(1 + timecat1|AJID), data=egdata, REML=F)
summary(m5)
xyplot(resid(m5) ~ region, data=egdata, jitter.x=T, abline=0, type=c("p", "g"))
I have this dataset. I want to make side-by-side boxplots of only those movies whose indexes appear 67 times in the "movie" column. The following code tells me the indexes that appear 67 times in the "movie" column:
names(which(table(votes$movie) == 67))
But how can I make side-by-side boxplots for the "rating" each of these indexes? And how can I also add the averages as a single point on each of those boxplots?
I have tried:
boxplot(votes$rating[which(table(votes$movie) == 67)])
But this is clearly wrong, as it shows me only one boxplot
MRE:
# set.seed(1)
# votes2 <- votes[sample(1:nrow(votes), 100, TRUE), ]
votes2 <-
structure(list(user = c(869L, 620L, 42L, 341L, 930L, 267L, 708L,934L, 148L, 385L, 251L, 181L, 313L, 437L, 747L, 260L, 109L, 201L,229L, 366L, 921L, 829L, 934L, 868L, 321L, 226L, 527L, 726L, 26L,457L, 117L, 325L, 327L, 60L, 804L, 158L, 593L, 200L, 880L, 482L,868L, 339L, 328L, 347L, 100L, 896L, 846L, 676L, 357L, 496L, 541L,807L, 257L, 924L, 894L, 478L, 601L, 13L, 311L, 230L, 435L, 654L,742L, 180L, 887L, 201L, 147L, 326L, 749L, 465L, 727L, 200L, 216L,267L, 345L, 445L, 268L, 26L, 366L, 82L, 763L, 436L, 324L, 707L,802L, 280L, 682L, 343L, 826L, 325L, 508L, 618L, 405L, 655L, 645L,378L, 296L, 438L, 450L, 151L), movie = c(181L, 240L, 410L, 948L,143L, 926L, 1054L, 502L, 474L, 47L, 147L, 125L, 527L, 249L, 659L,319L, 576L, 1426L, 245L, 672L, 1028L, 151L, 492L, 90L, 182L,250L, 7L, 248L, 841L, 222L, 307L, 434L, 318L, 132L, 746L, 510L,692L, 79L, 585L, 269L, 739L, 485L, 679L, 386L, 347L, 686L, 12L,303L, 597L, 532L, 304L, 820L, 285L, 173L, 52L, 71L, 208L, 333L,504L, 266L, 961L, 195L, 294L, 216L, 491L, 179L, 304L, 655L, 62L,855L, 222L, 756L, 226L, 217L, 303L, 902L, 825L, 255L, 671L, 1128L,283L, 568L, 259L, 212L, 646L, 144L, 566L, 88L, 174L, 99L, 172L,44L, 482L, 863L, 674L, 696L, 292L, 269L, 722L, 443L), rating = c(3L,5L, 3L, 3L, 2L, 2L, 3L, 4L, 5L, 4L, 3L, 3L, 4L, 5L, 4L, 2L, 3L,2L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 4L, 5L, 2L, 2L, 5L, 5L, 5L, 5L,4L, 4L, 3L, 3L, 5L, 1L, 4L, 2L, 5L, 2L, 1L, 4L, 3L, 5L, 4L, 4L,5L, 4L, 3L, 5L, 5L, 4L, 3L, 4L, 3L, 4L, 4L, 1L, 4L, 3L, 5L, 2L,5L, 5L, 5L, 3L, 4L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 3L, 5L, 1L, 4L,5L, 5L, 4L, 4L, 2L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 3L, 3L, 3L, 5L,4L, 5L, 5L),
timestamp = structure(c(884490825, 889987954, 881110483,890758169, 879535462, 878970785, 877326158, 891194539, 877019882,879441982, 886272319, 878962816, 891013525, 880142027, 888639175,890618198, 880580663, 884114015, 891632385, 888858078, 879380142,891990672, 891192087, 877109874, 879439679, 883890491, 879456162,889832422, 891380200, 882392853, 880124339, 891478376, 887820828,883325944, 879444890, 880134296, 886193724, 884128499, 880175050,887643096, 877111542, 891032413, 885049460, 881654846, 891375212,887159146, 883947777, 892685403, 878952080, 876072633, 883864207,892532068, 882049950, 885458060, 882404507, 889388790, 876350017,881514810, 884364873, 880484286, 884133635, 887864350, 881005590,877128388, 881379566, 884114471, 885593942, 879875432, 878849052,883531444, 883709350, 876042493, 880244803, 878973760, 884900448,891200870, 875742893, 891377609, 888857990, 884714361, 878915600,887769416, 880575107, 886286792, 875986155, 891700514, 888519260,876405130, 885690481, 891479244, 883767157, 891308791, 885544739,887473995, 892054402, 880045044, 884196057, 879867960, 882471524,879524947), class = c("POSIXct","POSIXt"), tzone = "")),
.Names = c("user","movie", "rating", "timestamp"), row.names = c(26551L, 37213L,57286L, 90821L, 20169L, 89839L, 94468L, 66080L, 62912L, 6179L,20598L, 17656L, 68703L, 38411L, 76985L, 49770L, 71762L, 99191L,38004L, 77745L, 93471L, 21215L, 65168L, 12556L, 26723L, 38612L,1340L, 38239L, 86970L, 34035L, 48209L, 59957L, 49355L, 18622L,82738L, 66847L, 79424L, 10795L, 72372L, 41128L, 82095L, 64707L,78294L, 55304L, 52972L, 78936L, 2334L, 47724L, 73232L, 69274L,47762L, 86121L, 43810L, 24480L, 7068L, 9947L, 31628L, 51864L,66201L, 40684L, 91288L, 29361L, 45907L, 33240L, 65088L, 25802L,47855L, 76632L, 8425L, 87533L, 33908L, 83945L, 34669L, 33378L,47636L, 89220L, 86434L, 38999L, 77733L, 96062L, 43466L, 71252L,40000L, 32536L, 75709L, 20270L, 71113L, 12170L, 24549L, 14331L,23963L, 5894L, 64229L, 87627L, 77892L, 79731L, 45528L, 41009L,
81088L, 60494L), class = "data.frame")
names(which(table(votes2$movie) == 2))
# [1] "222" "269" "303" "304"
boxplot(votes2$rating[which(table(votes2$movie) == 2)])
Perhaps, as I understand the request as being for the side by side boxplots for ratings where the votes are exactly 67 in number:
boxplot( movie ~ rating, data=votes,
subset = movie %in% names( table(votes$movie) == 67)), 'rating' ])
Switched the order in my first guess at correct formula but testing should this was more successful on your example:
boxplot(movie~rating, data=votes2, subset = movie %in% names( table(votes2$movie) == 2))
You should probably do a search on rhelp and SO for plotting a point or text for the mean of categories on boxplots. Pretty sure this has been asked before. If unsuccessful, then report on the search terms used.