How to fill blank section of faceted ggplot in R? - r

Data
Here is my data:
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
Oftentimes I have odd combinations of variables that don't neatly fit into a faceted grid in ggplot. As an example, here is a code to stack my numeric data into one plot:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
However, as you can see, there are five plots, and arranging the rows and columns doesn't really fix the issue because it is an odd number:
I'm wonder if there is a possible way to paste in some kind of text box in this sixth quadrant of the faceted plot. I'm thinking something like this:
How can one accomplish this goal?

One option is to access the 'blank space' using gtable/grid, e.g.
library(tidyverse)
library(gtable)
library(grid)
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
p <- work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
note <- paste("Notes on Plots:\n",
"- coffee skew\n",
"- sleep semi-normal\n",
"- work is weird\n",
"- etc\n",
"- etc")
p2 <- ggplot() +
annotate("text",
x = 0,
y = 0,
label = note,
size = 5,
hjust = 0) +
theme_void()
pg <- ggplotGrob(p)
#> Warning: Removed 5 rows containing non-finite values (stat_density).
p2g <- ggplotGrob(p2)
pl <- gtable_filter(pg, 'panel', trim=F)$layout
pg <- gtable_add_grob(pg, p2g, t=max(pl$t), l=max(pl$l), r = pl$r[3])
grid.newpage()
grid.draw(pg)
Created on 2022-09-15 by the reprex package (v2.0.1)

I wonder if this is the right direction to get you to start:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(
legend.position = c(.8, .3)
)
Edit
To keep original black color for all graphs
cols <- c("Mins_Work" = "black", "Mins_Sleep"="black", "Coffee_Cups"="black", "Tea_Cups" = "black",
"Time_Wake" = "black")
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(legend.position = c(.8, .3))+
scale_color_manual(values = cols)

Related

What is wrong with how I'm cutting my dataset?

Reproducible Dataset
This is my data:
#### Data for Stack ####
stack <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L,
269L, 300L, 427L, 301L, 436L, 342L, 229L, 379L, 102L, 146L, NA,
94L, 345L, 73L, 204L, 512L, 113L, 135L, 458L, 493L, 552L, 108L,
335L, 395L, 508L, 546L, 396L, 159L, 325L, 747L, 650L, 377L, 461L,
669L, 186L, 220L, 410L, 708L, 409L, 515L, 413L, 166L, 451L, 660L,
177L, 192L, 191L, 461L, 637L, 297L, 601L, 586L, 270L, 479L, 0L,
480L, 397L, 174L, 111L, 0L, 610L, 332L, 345L, 423L, 160L, 611L,
0L, 345L, 550L, 324L, 427L, 505L, 632L, 560L, 230L, 495L, 235L,
522L, 654L, 465L, 377L, 260L, 572L, 612L, 594L, 624L, 237L, 0L,
38L, 409L, 634L, 292L, 706L, 399L, 568L, 0L, 694L, 298L, 616L,
553L, 581L, 423L, 636L, 623L, 338L, 345L, 521L, 438L, 504L, 600L,
616L, 656L, 285L, 474L, 688L, 278L, 383L, 535L, 363L, 470L, 457L,
303L, 123L, 363L, 329L, 513L, 636L, 421L, 220L, 430L, 428L, 536L,
156L, 615L, 429L, 103L, 332L, 250L, 281L, 248L, 435L, 589L, 515L,
158L, 0L, 649L, 427L, 193L, 225L, 0L, 280L, 163L, 536L, 301L,
406L, 230L, 519L, 0L, 303L, 472L, 392L, 326L, 368L, 405L, 515L,
308L, 259L, 769L, 93L, 517L, 261L, 420L, 248L, 265L, 834L, 313L,
131L, 298L, 134L, 385L, 648L, 529L, 487L, 533L, 641L, 429L, 339L,
508L, 560L, 439L, 381L, 397L, 692L, NA), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L, 6L, 4L, 6L, 4L, 4L, 2L, 6L, 6L, 5L, 1L, 3L, 1L, 5L, 4L, 6L,
5L, 0L, 6L, 6L, 4L, 4L, 2L, 2L, 6L, 6L, 7L, 3L, 3L, 0L, 5L, 7L,
6L, 3L, 5L, 3L, 3L, 1L, 9L, 9L, 3L, 3L, 6L, 6L, 6L, 3L, 0L, 7L,
6L, 6L, 3L, 9L, 3L, 8L, 8L, 3L, 3L, 7L, 6L, 3L, 3L, 3L, 6L, 6L,
6L, 1L, 9L, 3L, 3L, 2L, 6L, 3L, 6L, 9L, 6L, 8L, 9L, 6L, 6L, 6L,
0L, 3L, 0L, 3L, 3L, 6L, 3L, 0L, 9L, 3L, 0L, 2L, 0L, 6L, 6L, 6L,
3L, 6L, 3L, 9L, 3L, 0L, 0L, 6L, 3L, 3L, 3L, 3L, 6L, 0L, 6L, 3L,
3L, 5L, 5L, 3L, 0L, 6L, 4L, 2L, 0L, 2L, 4L, 0L, 6L, 4L, 4L, 2L,
2L, 0L, 9L, 6L, 3L, 6L, 6L, 9L, 0L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 0L, 9L, 6L, 3L, 6L, 3L, 6L, 1L, 6L, 6L, 6L, 6L, 6L, 1L, 3L,
9L, 6L, 3L, 6L, 9L, 3L, 5L, 6L, 3L, 0L, 6L, 3L, 3L, 5L, 0L, 6L,
3L, 5L, 3L, 0L, 6L, 7L, 3L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 7L, 6L,
6L, 4L, 3L)), class = "data.frame", row.names = c(NA, -244L))
Solution So Far
I'm trying to cut my coffee data into three groups, a low group, a medium group, and a high group. Here is how I tried doing so:
#### Load Libraries ####
library(tidyverse)
library(ggpubr)
#### Transform Data: Coffee ####
coffee_labels <- c("Low", "Medium", "High") # labels
range(stack$Coffee_Cups) # get range for split
coffee_breaks <- seq(from = 0,
to = 9,
by = 3) # split from 0 to 9 in 3 pt intervals
coffee_transform <- cut(x= stack$Coffee_Cups,
labels = coffee_labels,
breaks = coffee_breaks) # add labels and breaks
stack_transform <- stack %>%
mutate(coffee_level = coffee_transform) # mutate to add to data
tail(stack_transform$coffee_level, 30) # check transform
Problem
However, when I print the tail command at the end, I get these NA values, which I assume is from an improper cut:
[1] Low Medium Medium Low <NA> Medium Low Low Medium <NA> Medium
[12] Low Medium Low <NA> Medium High Low Medium Medium Medium Medium
[23] Low Medium Medium High Medium Medium Medium Low
Levels: Low Medium High
I looked and those values correspond to my coffee consumption equaling zero, yet I already set the cut from 0 to 9. Naturally, when I try to make a boxplot with this, the NA levels get included, which I don't want:
#### Transform Coffee Boxplot ####
ggboxplot(stack_transform,
x="coffee_level",
y="Mins_Work",
palette = "simpsons",
color = "coffee_level",
title = "Coffee Consumption Level Productivity",
caption = "*Data obtained from local matrix.",
xlab = "Coffee Consumption Level",
ylab = "Minutes of Productivity")+
theme_bw()+
theme(legend.position = "none",
plot.caption = element_text(face = "italic"),
plot.title = element_text(face = "bold",
size = 18,
family = "mono"))
Question
How do I fix these NA values? I want my zero values to be included into the "low" group if possible.
Would this work for you? It seems you are using base R, so a nested ifelse statement may be simpler:
stack$coffee_cat <- ifelse(stack$Coffee_Cups %in% 0:3, "Low",
ifelse(stack$Coffee_Cups %in% 4:6, "Medium",
ifelse(stack$Coffee_Cups %in% 7:9, "High", NA)))
Output
# Mins_Work Coffee_Cups coffee_cat
#1 435 3 Low
#2 350 0 Low
#3 145 2 Low
#4 135 6 Medium
#5 15 4 Medium
#6 60 5 Medium
case_when would be a dplyr alternative:
stack %>% mutate(coffee_level = case_when(Coffee_Cups %in% 0:3 ~ "Low",
Coffee_Cups %in% 4:6 ~ "Medium",
Coffee_Cups %in% 7:9 ~ "High"))
To include the zeros in cutting, you could also use the Hmisc::cut2function:
stack$coffee_Hmisc <- factor(Hmisc::cut2(stack$Coffee_Cups, g = 3), labels = coffee_labels)

Unexpected error while using ANOVA w/repeated measures: Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : 0 (non-NA) cases

I have four datasets derived and processed identically (though differing in size due to the availability of Landsat scenes)
I am trying to compute ANOVA using the formula:
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = JulianDay,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
Where:
*) LST = surface temperature deviation in C
*) JulianDay = days since start of year
*) Buffer = a value 100-1900 - one of 19 areas outward from the boundary of a solar power plant (each 100m wide)
*) TimePeriod = a factor with a value of 0 or 1 corresponding to pre-/post-construction of the solar power plant.
The intent is to investigate if the construction of the installation affected the adjacent land surface temperature.
At three sites the ANOVA runs successfully, however at the fourth site it doesn't and fails with the error:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
0 (non-NA) cases
I have 381 rows of data in 4 columns (extract below), the only difference I can think of here is that I had to remove two paired months from the time series as data was not available in one of the months. This means there are 20 months of data, rather than 24. Every other processing step is identical.
Reading online I have searched for N/As (there are none), and can't see how there are levels without values as every cell has data. I don't know how to properly evaluate this, though, as it seems this is the root of the error.
I'm hoping someone will know the code needed and/or be able to suggest a way forwards.
Buffer LST JulianDay TimePeriod
1800 -0.04576149 73 2
1900 -0.03422945 73 2
1900 -0.02089755 302 1
1900 -0.02062432 96 1
1900 -0.01465229 192 1
1900 -0.00643754 128 1
1900 -0.00333345 105 2
1800 -0.00266312 366 1
1900 -0.00181226 201 2
1900 -0.00158173 169 2
1900 -1.81E-05 41 2
1800 0.00144813 128 1
and 367 additional rows...
[Edits]
Per comments below:
dput() whole dataframe
dput() subset (as suggested)
Thanks #Dion for noting anova_test is from the RStatix package.
1)
> dput(LST_Weather_dataset_ANOVA)
structure(list(Buffer = c(100L, 200L, 300L, 400L, 500L, 600L,
700L, 800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L), LST = c(0.91797777, 0.95083024, 0.79129483,
0.74791195, 0.68740945, 0.64516119, 0.74870729, 0.78357522, 0.83663769,
0.82156894, 0.77440129, 0.62769619, 0.52052404, 0.46497939, 0.34456476,
0.20359411, 0.11688336, 0.04136486, -0.02089755, 1.15111659,
1.20353638, 1.11717501, 1.0286145, 0.90984545, 0.82983601, 0.78186792,
0.73227976, 0.6989393, 0.65015275, 0.56241798, 0.39651023, 0.34213091,
0.3386525, 0.24000145, 0.11809023, 0.07704512, -0.00266312, 0.01273022,
1.04229626, 1.14347392, 1.1156609, 1.10575157, 1.01202522, 0.77829087,
0.80477079, 0.79677169, 0.83116477, 0.83242401, 0.82394197, 0.72073306,
0.64099082, 0.58188225, 0.43328083, 0.28349521, 0.19752629, 0.10636456,
0.01987005, 0.74458844, 0.71512573, 0.6395358, 0.65294657, 0.63325921,
0.56155255, 0.60860815, 0.60614753, 0.59989994, 0.58766288, 0.57257261,
0.50018929, 0.4367402, 0.40497079, 0.31822141, 0.2300726, 0.16928876,
0.09449034, 0.01799424, 0.82747052, 0.78262774, 0.65488597, 0.62609552,
0.60057131, 0.59950609, 0.6609992, 0.6876772, 0.73196883, 0.75516596,
0.75554112, 0.64167458, 0.54703129, 0.49947692, 0.38230481, 0.25519237,
0.16087274, 0.07759223, 0.00820849, 0.75009747, 0.71421977, 0.62411035,
0.58621041, 0.58438012, 0.61346156, 0.72712994, 0.81372726, 0.87579554,
0.88934787, 0.87369461, 0.74686202, 0.64084028, 0.5599638, 0.40021941,
0.23612052, 0.13408522, 0.04484869, -0.02062432, 0.22133116,
0.28562902, 0.24359043, 0.17788898, 0.16563242, 0.11740664, 0.10102937,
0.07328697, 0.07948283, 0.07521508, 0.08526232, 0.0548022, 0.04632606,
0.06670398, 0.03262545, 0.00650875, 0.01186519, 0.00144813, -0.00643754,
0.26360849, 0.22139941, 0.16915041, 0.13499715, 0.12846785, 0.15351528,
0.15321108, 0.13963269, 0.13413671, 0.13097696, 0.15897844, 0.15489366,
0.12600815, 0.12363834, 0.0943688, 0.07324289, 0.0565765, 0.04005241,
0.01346488, 0.42361198, 0.39149841, 0.29086274, 0.21492842, 0.20664552,
0.24524285, 0.30548979, 0.35256808, 0.37350282, 0.38680061, 0.38567758,
0.31177736, 0.24643091, 0.22001284, 0.14356522, 0.07076854, 0.04168654,
0.01276553, -0.01465229, 0.57032414, 0.50658577, 0.41717664,
0.36134446, 0.35794989, 0.38457285, 0.43700723, 0.48358206, 0.50516801,
0.50086146, 0.49398709, 0.41516438, 0.33165215, 0.28357127, 0.20030152,
0.11993505, 0.08438345, 0.05755944, 0.01071499, 0.04963208, 0.34087747,
0.38385889, 0.40408637, 0.41182138, 0.15662208, 0.18857013, 0.17978741,
0.1533216, 0.1451422, 0.14890638, 0.14090521, 0.1782449, 0.23624089,
0.21003477, 0.13812217, 0.10759364, 0.07225312, 0.03185378, 0.27507486,
0.54404521, 0.56568824, 0.58543167, 0.49124799, 0.28299777, 0.27514982,
0.27526446, 0.27376722, 0.24620415, 0.22871699, 0.19647326, 0.2450593,
0.27133386, 0.15248773, 0.06240341, 0.04933824, 0.03356535, -1.81e-05,
0.21776379, 0.37010032, 0.32743525, 0.30588107, 0.31226738, 0.30518286,
0.32637517, 0.31003415, 0.23691586, 0.1985241, 0.16143326, 0.12384526,
0.11556386, 0.09243356, 0.05773894, 0.03660942, 0.02173758, -0.04576149,
-0.03422945, 0.06214728, 0.26440563, 0.24838816, 0.22704611,
0.17230754, 0.15660109, 0.18689433, 0.24464547, 0.28273218, 0.29602945,
0.29992488, 0.24679735, 0.24521192, 0.23913767, 0.15081173, 0.08724556,
0.05561237, 0.02530266, -0.00333345, 0.11993489, 0.20504424,
0.17323488, 0.14541868, 0.10994579, 0.12741154, 0.17959797, 0.22553943,
0.26564836, 0.29760832, 0.3207305, 0.28592135, 0.26551685, 0.2493214,
0.15767906, 0.0883716, 0.05058495, 0.02207594, 0.00162532, 0.05621313,
0.08020623, 0.05187855, 0.02643543, 0.02422505, 0.05372454, 0.09563737,
0.14735627, 0.18199015, 0.22456299, 0.25302274, 0.21978124, 0.19092835,
0.18255829, 0.11850551, 0.0581734, 0.03406168, 0.01868243, -0.00158173,
0.00980756, 0.07077972, 0.05126985, 0.03126771, 0.01828044, 0.00678076,
0.03566275, 0.05622289, 0.07218645, 0.08767578, 0.11078182, 0.08827425,
0.08881865, 0.10037876, 0.05952601, 0.03440435, 0.01843206, 0.0091852,
-0.00181226, 0.08737325, 0.14470842, 0.13066747, 0.12324597,
0.12014198, 0.13435757, 0.17843025, 0.19926835, 0.20503774, 0.20485414,
0.2124073, 0.1864257, 0.18810996, 0.20665551, 0.13839744, 0.08488387,
0.06246853, 0.03463723, 0.00349753, 0.35245488, 0.57692156, 0.64897028,
0.67306088, 0.68344534, 0.56106697, 0.52144197, 0.49250191, 0.47494065,
0.4359944, 0.39638743, 0.32554099, 0.28717774, 0.2826675, 0.22703594,
0.18186983, 0.15875118, 0.09672536, 0.04305742, 0.24294606, 0.54654222,
0.56344638, 0.53312729, 0.47324972, 0.34482643, 0.34915085, 0.33729055,
0.32086985, 0.29578347, 0.25030669, 0.17928298, 0.17007511, 0.18375903,
0.15222616, 0.10934224, 0.07536797, 0.04154465, 0.02550096),
JulianDay = c(302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L,
80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L), TimePeriod = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-380L))
> dput(LST_Weather_dataset_ANOVA[sample(1:nrow(LST_Weather_dataset_ANOVA), 50),])
structure(list(Buffer = c(800L, 1400L, 500L, 200L, 400L, 1400L,
100L, 1600L, 1800L, 100L, 1400L, 1500L, 900L, 700L, 800L, 600L,
400L, 1300L, 500L, 700L, 700L, 300L, 700L, 200L, 200L, 500L,
500L, 900L, 1000L, 1300L, 1400L, 1600L, 700L, 400L, 500L, 200L,
400L, 1500L, 1400L, 800L, 500L, 1200L, 1500L, 1900L, 600L, 800L,
100L, 1000L, 900L, 1100L), LST = c(0.48358206, 0.46497939, 0.41182138,
0.07077972, 0.17788898, 0.18255829, 0.21776379, 0.03660942, 0.04154465,
0.42361198, 0.49947692, 0.38230481, 0.28273218, 0.18857013, 0.33729055,
0.56106697, 0.13499715, 0.28717774, 0.12014198, 0.78186792, 0.74870729,
0.56344638, 0.18689433, 0.54404521, 0.78262774, 0.60057131, 1.01202522,
0.20503774, 0.13097696, 0.34213091, 0.5599638, 0.08724556, 0.17843025,
1.0286145, 0.01828044, 0.22139941, 0.67306088, 0.15248773, 0.22001284,
0.27526446, 0.02422505, 0.50018929, 0.31822141, 0.01799424, 0.56155255,
0.13963269, 0.27507486, 0.29578347, 0.18199015, 0.3207305), JulianDay = c(224L,
302L, 9L, 201L, 128L, 169L, 73L, 73L, 361L, 192L, 80L, 80L, 105L,
9L, 361L, 313L, 160L, 313L, 217L, 366L, 302L, 361L, 105L, 41L,
80L, 80L, 16L, 217L, 160L, 366L, 96L, 105L, 217L, 366L, 201L,
160L, 313L, 41L, 192L, 41L, 169L, 64L, 64L, 64L, 64L, 160L, 41L,
361L, 169L, 137L), TimePeriod = c(1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), row.names = c(179L,
14L, 195L, 306L, 118L, 299L, 229L, 244L, 379L, 153L, 90L, 91L,
256L, 197L, 369L, 348L, 137L, 355L, 328L, 26L, 7L, 364L, 254L,
211L, 78L, 81L, 43L, 332L, 143L, 32L, 109L, 263L, 330L, 23L,
309L, 135L, 346L, 224L, 166L, 217L, 290L, 69L, 72L, 76L, 63L,
141L, 210L, 371L, 294L, 277L), class = "data.frame")
You are running a repeated anova and this requires the observations to be complete for every individual, within your specified within effects. In your case, you need the ensure for every JulianDay, the observations are complete for every combination of Buffer and TimePeriod
We can tabulate it by using table() and you can see for all the JulianDays they are incomplete, for example on 9 and 16:
with(LST_Weather_dataset_ANOVA,table(Buffer,TimePeriod,JulianDay))[,,c("9","16")]
, , JulianDay = 9
TimePeriod
Buffer 1 2
100 0 1
200 0 1
300 0 1
400 0 1
500 0 1
600 0 1
700 0 1
800 0 1
900 0 1
1000 0 1
1100 0 1
1200 0 1
1300 0 1
1400 0 1
1500 0 1
1600 0 1
1700 0 1
1800 0 1
1900 0 1
, , JulianDay = 16
TimePeriod
Buffer 1 2
100 1 0
200 1 0
300 1 0
400 1 0
500 1 0
600 1 0
700 1 0
800 1 0
900 1 0
1000 1 0
1100 1 0
1200 1 0
1300 1 0
1400 1 0
1500 1 0
1600 1 0
1700 1 0
1800 1 0
1900 1 0
As you have noted, if you reconcile the dates between sites, it will work. I am not very sure how you converted the JulianDay to months, but using your data, it works if I just do
df = LST_Weather_dataset_ANOVA
df$Month = months(strptime(paste("2020",df$JulianDay),"%Y %j"))
df = subset(df,Month %in% c("May","June"))
with(df,table(Buffer,TimePeriod,Month))
, , Month = June
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
, , Month = May
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
You can see for months June and May, they are complete (no zeros), and if we run anova, it works:
res.aov <- anova_test(
data = df, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 1 1.217 0.005 222.936 4.30e-02 * 0.933
2 Buffer 18 18 0.256 0.026 9.933 5.49e-06 * 0.746
3 TimePeriod 1 1 0.013 0.048 0.274 6.93e-01 0.130
4 Buffer:TimePeriod 18 18 0.181 0.008 21.476 1.20e-08 * 0.674
While ironing last night I wondered if JulianDay might be the source of the error. It is derived from the dates of the Landsat scenes from dependant variable data are derived, so is different for each site.
Editing the dataframe to replace the JulianDay column with Month and amending the code to:
str(LST_Weather_dataset_ANOVA)
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
...the ANOVA test runs successfully:
> res.aov <- anova_test(
+ data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
+ within = c(Buffer, TimePeriod),
+ effect.size = "ges",
+ detailed = TRUE,
+ )
> get_anova_table(res.aov, correction = "auto")
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 9 36.781 6.593 50.212 5.75e-05 * 0.735
2 Buffer 18 162 8.042 3.041 23.801 1.81e-36 * 0.378
3 TimePeriod 1 9 5.065 2.506 18.194 2.00e-03 * 0.276
4 Buffer:TimePeriod 18 162 1.713 1.117 13.800 2.71e-24 * 0.114
But I still don't fully understand why...
Hopefully this will enable someone to comment and provide an explanation?

Residual plot by categorical independent variable only provides residuals for some categories

Apologies in advance if I haven't formatted this correctly, this is my first question on SO.
I have run a series of multilevel models using lme4 in R. My outcome variables are continuous and I have one categorical level-two predictor variable with multiple categories (region of the US: Midwest, Northeast, South, West) as well as a series of time-varying covariates. When I run this code,
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
perLatinxc.lag8 + perBlackc.lag8 +
femincomedisc.lag8 + femLFPdisc.lag8 + fememploydisc.lag8 + femedudisc.lag8 +
(1 + timecat1|AJID), data=data, REML=F)
I get the following results (reduced for space):
AIC BIC logLik deviance df.resid
8182.5 8269.8 -4075.2 8150.5 1722
Scaled residuals:
Min 1Q Median 3Q Max
-6.4726 -0.3921 -0.0245 0.3687 6.4414
Random effects:
Groups Name Variance Std.Dev. Corr
AJID (Intercept) 12.70271 3.5641
timecat1 0.04184 0.2045 0.44
Residual 2.16582 1.4717
Number of obs: 1738, groups: AJID, 531
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.088e+01 3.544e-01 30.696
timecat1 1.086e-01 1.252e-02 8.679
regionNortheast -2.337e+00 4.749e-01 -4.920
regionSouth 6.269e-01 4.472e-01 1.402
regionWest 1.079e+00 4.807e-01 2.245
When I plot the residuals by the independent variables, however, I only have residuals for two of the four regions (see below).
xyplot(resid(m5) ~ region, data=data, jitter.x=T, abline=0, type=c("p", "g"))
Residuals plotted on y-axis, Midwest, Northeast, South, West plotted on x-axis, residuals only available for South and West regions
I have no missing data in the region variable and am at a loss as to why I would have estimates for region without corresponding residuals. Why might this be the case?
EDIT2:
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
(1 + timecat1|AJID), data=egdata, REML=F)
xyplot(resid(m5) ~ region, data=data, jitter.x=T, abline=0, type=c("p", "g"))
> dput(egdata)
structure(list(AJID = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 13L,
51L, 51L, 51L, 51L, 51L, 51L, 51L, 51L, 68L, 68L, 68L, 68L, 68L,
68L, 68L, 68L, 79L, 83L, 83L, 83L, 83L, 83L, 83L, 83L, 97L, 116L,
116L, 116L, 127L, 127L, 127L, 127L, 127L, 127L, 127L, 148L, 148L,
148L, 148L, 148L, 148L, 148L, 148L, 152L, 152L, 152L, 152L, 152L,
152L, 160L, 160L, 160L, 160L, 160L, 160L, 168L, 168L, 168L, 168L,
168L, 168L, 168L, 171L, 171L, 171L, 176L, 176L, 176L, 176L, 176L,
176L, 176L, 176L, 179L, 179L, 179L, 179L, 179L, 179L, 179L, 183L,
183L, 183L, 183L, 183L, 183L, 191L, 191L, 191L, 191L, 191L, 191L,
191L, 197L, 197L, 207L, 207L, 207L, 225L, 235L, 235L, 235L, 235L,
235L, 235L, 235L, 237L, 237L, 237L, 237L, 237L, 237L, 237L, 238L,
238L, 238L, 238L, 238L, 238L, 238L, 238L, 245L, 245L, 245L, 245L,
245L, 251L, 251L, 251L, 251L, 251L, 251L, 251L, 265L, 265L, 265L,
265L, 265L, 266L, 266L, 266L, 266L, 266L, 266L, 266L, 273L, 273L,
273L, 273L, 273L, 275L, 275L, 275L, 275L, 275L, 275L, 275L, 275L,
279L, 279L, 279L, 279L, 279L, 280L, 280L, 280L, 280L, 280L, 280L,
284L, 284L, 284L, 284L, 284L, 284L, 284L, 286L, 286L, 286L, 286L,
286L, 286L, 286L, 286L, 296L, 296L, 296L, 296L, 296L, 296L, 296L,
296L, 313L, 341L, 341L, 341L, 341L, 341L, 345L, 345L, 345L, 345L,
345L, 345L, 345L, 345L, 352L, 363L, 363L, 365L, 365L, 365L, 365L,
365L, 365L, 365L, 365L, 369L, 369L, 369L, 369L, 374L, 374L, 374L,
374L, 374L, 374L, 374L, 385L, 385L, 385L, 385L, 385L, 385L, 385L,
391L, 391L, 391L, 391L, 391L, 391L, 391L, 416L, 416L, 416L, 416L,
416L, 416L, 416L, 417L, 417L, 417L, 417L, 417L, 417L, 417L, 423L,
423L, 423L, 423L, 423L, 423L, 423L, 429L, 429L, 429L, 429L, 429L,
429L, 434L, 434L, 434L, 434L, 434L, 434L, 441L, 441L, 441L, 441L,
441L, 441L, 441L, 441L, 447L, 447L, 447L, 447L, 447L, 447L, 447L,
447L, 448L, 448L, 448L, 448L, 448L, 448L, 448L, 448L, 453L, 454L,
454L, 454L, 454L, 454L, 454L, 466L, 466L, 466L, 466L, 466L, 466L,
466L, 480L, 480L, 480L, 480L, 480L, 480L, 482L, 482L, 506L, 506L,
506L, 510L, 510L, 510L, 510L, 510L, 513L, 513L, 513L, 513L, 513L,
513L, 513L, 514L, 514L, 514L, 514L, 514L, 514L, 514L, 525L, 525L,
525L, 525L, 525L, 525L, 525L, 525L, 547L, 563L, 563L, 563L, 563L,
563L, 563L, 563L, 563L, 577L, 577L, 577L, 577L, 577L, 577L, 577L,
580L, 580L, 580L, 580L, 580L, 580L, 580L, 586L, 586L, 586L, 586L,
586L, 586L, 586L, 598L, 598L, 598L, 598L, 598L, 598L, 598L, 598L,
602L, 602L, 602L, 602L, 602L, 602L, 602L, 603L, 603L, 603L, 617L,
617L, 617L, 617L, 617L, 617L, 617L, 617L, 630L, 630L, 630L, 630L,
630L, 630L, 630L, 636L, 636L, 641L, 641L, 641L, 641L, 641L, 641L,
641L), percentfemale = c(7.834101382, 8.612440191, 8.173076923,
9.030837004, 10.81081081, 12.15932914, 15.47861507, 13.06818182,
13.51351351, 6.010928962, 5.825242718, 8.5, 9.708737864, 9.302325581,
9.5, 12.29946524, 12.06896552, 6.802721088, 6.622516556, 7.042253521,
8.843537415, 7.843137255, 7.792207792, 11.25, 11.11111111, 10.85271318,
4.972375691, 6.179775281, 4.651162791, 4.954954955, 6.392694064,
4.867256637, 3.555555556, 5.172413793, 13.63636364, 13.97058824,
12.40875912, 5.925925926, 6.25, 7.692307692, 7.586206897, 0.666666667,
6.756756757, 8.904109589, 6.25, 6.94980695, 8.148148148, 10.98039216,
9.318996416, 8.865248227, 9.863945578, 10.52631579, 8.088235294,
11.64383562, 12.10191083, 10.625, 13.0952381, 12.4260355, 7.246376812,
9.289617486, 10.44776119, 11.01321586, 16.04938272, 14.71861472,
12.07207207, 15.55763824, 18.0734856, 17.56756757, 17.72639692,
19.07020873, 19.71014493, 17.64705882, 18, 18.25396825, 19.13043478,
16.31944444, 17.79935275, 20, 22.11838006, 19.77077364, 20.32967033,
19.66292135, 12.5, 14.59074733, 17.66666667, 19.62905719, 17.64705882,
16.09042553, 16.43646409, 6.060606061, 7.947019868, 7.638888889,
11.9205298, 13.15789474, 12.58741259, 6.091370558, 7.929515419,
12.38095238, 12.82051282, 12.88888889, 14.52991453, 15.49295775,
12.5984252, 12.90322581, 14.17322835, 13.17829457, 14.92537313,
9.803921569, 3.333333333, 5.109489051, 3.496503497, 3.821656051,
6.060606061, 9.756097561, 9.85915493, 2.857142857, 2.142857143,
4.516129032, 4.268292683, 5.769230769, 7.407407407, 7.317073171,
7.894736842, 5.365853659, 7.798165138, 9.482758621, 10.86956522,
9.777777778, 10.24590164, 11.29032258, 10.67961165, 9.615384615,
9.322033898, 9.649122807, 10.08403361, 4.615384615, 4.761904762,
6.25, 5.303030303, 7.8125, 5.882352941, 5.454545455, 8.620689655,
7.352941176, 9.032258065, 10.97560976, 9.036144578, 6.870229008,
9.459459459, 14.36464088, 11.5, 13.90134529, 18.4, 16, 8.571428571,
8.771929825, 6.194690265, 5.504587156, 6.796116505, 11.03117506,
19.47743468, 12.07289294, 12.9740519, 15.49295775, 16.42411642,
16.99604743, 19.1681736, 6.034482759, 14.28571429, 6.923076923,
9.929078014, 9.433962264, 8.074534161, 9.941520468, 13.77245509,
7.01754386, 8.333333333, 7.851239669, 4.827586207, 4.861111111,
7.092198582, 9.868421053, 10.1910828, 10.96774194, 13.66459627,
9.386776293, 10.94023069, 12.86926995, 14.01687216, 15.68885959,
17.43400859, 16.4295393, 15.56459817, 5.696202532, 5.921052632,
19.44444444, 8.024691358, 7.142857143, 6.951871658, 7.692307692,
6.179775281, 9.482758621, 4.761905, 3.703704, 4.950495, 2.912621,
6.930693, 5.447471, 7.142857, 9.056604, 13.42513, 16.14583, 17.77379,
17.54967, 17.65677, 9.565217, 6.306306, 8.181818, 5.340114, 7.124352,
7.549669, 12.74876, 13.29752, 14.33311, 15.43027, 15.96702, 7.758621,
7.968127, 10.16949, 9.60961, 2.424242, 15.34091, 10.30928, 4.6875,
5.050505, 7.009346, 7.906977, 2.9615, 3.616637, 10.94527, 11.86903,
15.31532, 17.4939, 20.42042, 0, 0, 0, 1.694915, 3.225806, 3.149606,
5, 1.183432, 1.694915, 2.717391, 2.312139, 0.9478673, 2.45098,
3.012048, 0, 1.734104, 2.564103, 3.5, 3.626943, 3.571429, 5.729167,
1, 0.9803922, 1.818182, 1.818182, 1.694915, 1.709402, 0.862069,
6.956522, 9.917355, 10.25641, 0, 11.51079, 9.333333, 1.470588,
3.472222, 4.166667, 4.166667, 6.756757, 7.801418, 0, 7.741935,
7.643312, 6.962025, 7.594937, 8.823529, 9.333333, 9.677419, 9.574468,
7.446809, 7.55814, 7.821229, 6.989247, 10.27027, 8.196721, 8.441558,
5.714286, 5.5, 6.521739, 6, 5.940594, 4.663212, 8.837209, 11.45833,
4.516129, 3.703704, 4.285714, 5.625, 5.91716, 5.813953, 6.134969,
11.87335, 12.16545, 12.23529, 12.72321, 12.67606, 15.6746, 15.21739,
6.930693, 9.677419, 11.2, 11.2782, 12.19512, 9.448819, 10.18519,
8.490566, 7.894737, 10.15625, 11.19403, 8.917197, 11.68831, 17.51412,
16.66667, 18.53933, 4.081633, 4.6875, 5.181347, 4.812834, 4.975124,
3.349282, 4.624277, 0.6369427, 2.857143, 7.142857, 5.454545,
7.058824, 7.142857, 8.391608, 1.360544, 1.37931, 1.360544, 4.026846,
4.697987, 6.535948, 5.405405, 7.801418, 5.454545, 6.622517, 5.882353,
7.18232, 8.571429, 9.589041, 9.846154, 10.81871, 11.47059, 3.90625,
4.6875, 3.571429, 4.511278, 6.818182, 11.04294, 14.10256, 4.020101,
3.045685, 2.439024, 3.478261, 3.2, 3.703704, 3.571429, 4.363636,
3.97351, 4.792332, 5.333333, 5.315615, 7.923497, 7.286432, 10.35387,
11.32075, 11.50923, 11.9877, 11.8007, 11.60267, 11.66078, 10.52002,
6.752412, 6.583072, 9.898477, 10.51345, 10.22444, 10.90487, 8.878505,
13.67521, 16.66667, 17.43119, 10, 10.62802, 12.61682, 13.00813,
11.78862, 7.33945, 10.69959, 20.95238, 7.438017, 7.5, 8.333333,
10.32028, 13.35616, 15.24823, 11.67883, 18.30508, 21.59468, 3.902439,
4.950495, 5.365854, 5.263158, 7, 8.653846, 7.614213), timecat1 = c(-26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -6L, -10L,
-6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -20L, -16L, -13L, -10L, -6L, 0L, -20L,
-16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L,
0L, -13L, -10L, -6L, -26L, -23L, -20L, -16L, -13L, -10L, -6L,
0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -20L, -16L, -13L,
-10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -6L, 0L,
-10L, -6L, 0L, -10L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -23L,
-20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, 0L, -26L, -23L,
-20L, -13L, -6L, 0L, -26L, -23L, -20L, -13L, -10L, -6L, 0L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -16L, -23L, -20L, -16L, -13L, -10L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -13L, -10L, -6L, -26L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -13L, -10L, -6L, 0L, -23L,
-20L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L, 0L,
-23L, -20L, -16L, -13L, -10L, -6L, 0L, -23L, -20L, -16L, -13L,
-10L, -6L, -23L, -20L, -16L, -13L, -10L, -6L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
-6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -16L, -23L,
-20L, -13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -10L,
0L, -23L, -20L, -13L, -10L, -6L, 0L, -13L, 0L, -10L, -6L, 0L,
-26L, -23L, -16L, -13L, -10L, -26L, -23L, -20L, -16L, -13L, -10L,
0L, -26L, -23L, -20L, -13L, -10L, -6L, 0L, -26L, -23L, -20L,
-16L, -13L, -10L, -6L, 0L, 0L, -26L, -23L, -20L, -16L, -13L,
-10L, -6L, 0L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L,
-20L, -16L, -13L, -10L, -6L, -26L, -23L, -20L, -16L, -13L, -6L,
0L, -26L, -23L, -20L, -16L, -13L, -10L, -6L, 0L, -26L, -23L,
-16L, -13L, -10L, -6L, 0L, -10L, -6L, 0L, -26L, -23L, -20L, -16L,
-13L, -10L, -6L, 0L, -26L, -23L, -20L, -16L, -13L, -6L, 0L, -26L,
-23L, -26L, -23L, -20L, -16L, -13L, -10L, -6L), region = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Midwest",
"Northeast", "South", "West"), class = "factor"), sizelogc = c(0.408823946,
0.636182014, 0.663878359, 0.774106962, 0.837491565, 0.81147203,
0.731559255, 0.989580112, -0.52527906, -0.277305662, -0.02646953,
-0.011379257, 0.009202568, -0.141848484, -0.110337044, -0.078522425,
-0.136207762, -0.595759393, -0.282844698, -0.2826483, -0.277326766,
-0.250742573, -0.245563831, -0.183035124, -0.32184058, -0.287607408,
0.048395349, -0.092604697, 0.036508019, 0.127815082, 0.155366121,
0.255766404, 0.173947166, -0.884692994, -0.731071647, -0.394593501,
-0.316877791, -0.702103128, -0.74801155, -0.646588826, -0.647397051,
-0.587480728, -0.603441812, -0.601264967, -0.183904487, 0.168725229,
0.124365995, 0.088979071, 0.14285296, 0.250676301, 0.31927981,
0.28021636, -0.460329477, -0.384224561, -0.338558779, -0.562322168,
-0.200129557, -0.076069403, -0.494231029, -0.189163979, -0.110595941,
0.001720365, 0.121454067, 0.037814992, 1.598396458, 1.552348848,
1.528920185, 1.633910643, 1.606908979, 1.832432709, 1.57677083,
-0.640839651, -0.694966643, -0.547000501, -0.149472291, 0.361169794,
0.451506904, 0.497633974, 0.455591612, 0.556782641, 0.567741169,
0.559003403, 0.960085349, 1.049901785, 1.093877349, 1.153096098,
1.25397841, 1.307771936, 1.216469988, -0.340080349, -0.277326766,
-0.309985406, -0.232080481, -0.22633193, -0.259169473, -0.066114362,
0.07356142, 0.079348177, 0.189508651, 0.182273574, 0.213919294,
0.176972887, -0.622986408, -0.655332188, -0.375868374, -0.37900877,
-0.594707567, -0.776192083, -0.530252871, -0.726032643, -0.399728748,
-0.408587383, -0.241049151, -0.153805486, -0.432188637, -0.681483841,
-0.499994734, -0.399728748, -0.363009875, -0.40728457, -0.37386737,
-0.341941759, -0.327495407, -0.02646953, -0.062299347, -0.018869451,
-0.025830674, -0.045304574, 0.009124882, -0.222468106, -0.75938432,
-0.758622686, -0.680881903, -0.705224506, -0.662300858, -0.57205,
-0.5534199, -0.521425683, -0.512222273, -0.633091239, -0.726886003,
-0.765333084, -0.673617498, -0.517985978, -0.355459502, -0.328746935,
-0.357287329, -0.547952449, -0.390596139, -0.155965909, -0.141848484,
-0.041607712, 0.070160772, 0.081964777, -0.782026797, -0.850996006,
-0.680881903, -0.726886003, -0.897757669, 0.303072117, 0.499784169,
0.702821333, 0.801114163, 0.693473185, 0.685994373, 0.914163737,
0.870120068, -0.641478507, -0.669679706, -0.384224561, -0.4137822,
-0.165559974, -0.58143934, -0.195452876, -0.273388974, 0.015697784,
0.03330568, 0.085281529, -0.650093744, -0.36861152, -0.422177711,
-0.309985406, -0.254654803, -0.314148136, -0.245248134, 2.21254214,
2.464016344, 2.517465096, 2.561832541, 2.531204875, 2.596658583,
2.532332869, 2.463416317, -0.762813478, -0.524421951, -0.559536127,
-0.464267269, -0.367972664, -0.31100774, -0.33366095, -0.336878457,
-0.78968967, -0.804381978, -0.770484405, -0.820942213, -0.780928444,
-0.753377405, 0.507649068, 0.90594493, 0.950657497, 0.942646379,
1.012083374, 0.956143966, 1.041487785, 0.985550557, -0.674065298,
-0.666992791, -0.442860242, 1.740855448, 2.044544372, 2.029656842,
2.069429545, 1.968364716, 1.97746863, 1.883613066, 1.896172492,
0.042455234, 0.173384627, 0.639131166, 0.581430439, -0.55992864,
-0.223940699, -0.081275017, -0.095328469, -0.041607712, -0.022849423,
-0.012017382, 1.467743908, 1.517602151, 1.75795099, 1.728731945,
1.819062624, 1.618562794, 1.394151166, -1.056795911, -0.989847234,
-0.929836301, -0.819247308, -0.753377405, -0.741592151, -0.933797606,
-0.639992648, -0.358710449, -0.202247874, -0.272729057, -0.154788807,
-0.090038867, -0.25450746, -0.58143934, -0.36861152, -0.250608946,
-0.155965909, -0.19481402, -0.154788807, -0.076069403, -0.867895383,
-0.793473924, -0.722750187, -0.729635149, -0.660119911, -0.677053629,
-0.683503065, -0.501432433, -0.529322349, -0.582050305, -0.333739492,
-0.428790775, -0.36875227, -0.674704154, -0.565689993, -0.575819755,
-0.640839651, -0.562322168, -0.609914327, -0.929351167, -0.507130454,
-0.465900522, -0.47544057, -0.523783095, -0.606773931, -0.448369898,
-0.37287206, -0.503951844, -0.246966411, -0.301428195, -0.227878491,
-0.118317987, -0.098549089, -0.11347744, -0.24986708, -0.495218164,
-0.030009357, -0.069790019, -0.015317049, -0.087781263, -0.079206126,
-0.204449218, -0.302135509, -0.486740125, -0.578166227, -0.584382126,
-0.461791419, -0.370726974, -0.493575335, -0.399398814, 0.250851418,
0.597179856, 0.658448481, 0.714387727, 0.820962263, 0.859063513,
1.053196627, -0.804381978, -0.578112513, -0.660642278, -0.5811315,
-0.677053629, -0.655332188, -0.827090486, -0.812961133, -0.716472848,
-0.590621124, -0.489347051, -0.645030442, -0.363648731, -0.250052349,
-0.319419439, -0.218776516, -0.54880241, -0.282844698, -0.202247874,
-0.219144811, -0.165946036, -0.090766948, -0.326828121, -0.427965937,
-0.615670623, -0.578112513, -0.467270885, -0.417979859, -0.528873117,
-0.549971673, -0.751513923, -0.404058269, -0.494231029, -0.492438146,
-0.333739492, -0.273088505, -0.410424966, -0.448995755, -0.655332188,
-0.499575469, -0.122840343, -0.135833432, 0.209243937, 0.371355262,
0.498310873, 0.539341695, 0.552798626, -0.484530622, -0.460329477,
-0.410200048, -0.621421565, -0.606773931, -0.262371866, -0.217973717,
-0.436129248, -0.191277505, -0.088765921, 0.036508019, 0.065294725,
0.173384627, 0.140030452, -0.128162504, 0.159966049, 0.221644403,
0.209243937, 0.22036664, 0.614624755, 0.681129799, 1.065212169,
1.315573819, 1.477612502, 1.56898489, 1.741486445, 1.795245059,
1.968924143, 2.11464897, 0.14434477, 0.319548119, 0.663544174,
0.670843847, 0.680632429, 0.731559255, 0.749879332, -0.687899475,
-0.622986408, -0.73479636, -0.291390402, -0.058788322, 0.020036939,
0.217706611, 0.239519072, 0.076585244, 0.182078689, 0.051608314,
-0.199956196, 0.093274675, 0.204646826, 0.288196489, 0.380811597,
0.324394911, 0.291056575, -0.009041676, 0.268254825, -0.361143441,
-0.077205048, -0.172647405, -0.015317049, -0.207445767, -0.114297445,
-0.093907344)), row.names = c(NA, -432L), class = "data.frame")
> str(egdata)
'data.frame': 432 obs. of 5 variables:
$ AJID : int 8 8 8 8 8 8 8 8 13 51 ...
$ percentfemale: num 7.83 8.61 8.17 9.03 10.81 ...
$ timecat1 : int -26 -23 -20 -16 -13 -10 -6 0 0 -26 ...
$ region : Factor w/ 4 levels "Midwest","Northeast",..: 3 3 3 3 3 3 3 3 4 4 ...
$ sizelogc : num 0.409 0.636 0.664 0.774 0.837 ...
Given the data that you've provided, I can now see the underlying issue. Your dataset has a number of missing values. As noted in the documentation of this function (https://www.rdocumentation.org/packages/lme4/versions/1.1-23/topics/lmer):
The default action (na.omit, inherited from the 'factory fresh' value of getOption("na.action")) strips any observations with any missing values in any variables.
As such, your model will only leverage rows where there are no missing values. This is the reason you see no residual values for some levels of your data.
To see this yourself, you can run the following to extract only rows that have no missing values:
data2 = data[complete.cases(data),]
You can see that data2 is much smaller. Then run table(data2$region) to see the levels of region in this smaller dataset.
Please have a look at https://stats.stackexchange.com/questions/309718/how-to-deal-with-missing-data-in-mixed-effects-or-multi-level-models for information on how you might handle this missing data.
Update: Given your new dataset of only complete cases, I do not see the issue again. Perhaps clear your R session and restart? Reproducible code:
library(lme4)
library(lattice)
m5 <- lmer(percentfemale~ timecat1 + region + sizelogc +
(1 + timecat1|AJID), data=egdata, REML=F)
summary(m5)
xyplot(resid(m5) ~ region, data=egdata, jitter.x=T, abline=0, type=c("p", "g"))

Boxplot for only values that satisfy a condition in R

I have this dataset. I want to make side-by-side boxplots of only those movies whose indexes appear 67 times in the "movie" column. The following code tells me the indexes that appear 67 times in the "movie" column:
names(which(table(votes$movie) == 67))
But how can I make side-by-side boxplots for the "rating" each of these indexes? And how can I also add the averages as a single point on each of those boxplots?
I have tried:
boxplot(votes$rating[which(table(votes$movie) == 67)])
But this is clearly wrong, as it shows me only one boxplot
MRE:
# set.seed(1)
# votes2 <- votes[sample(1:nrow(votes), 100, TRUE), ]
votes2 <-
structure(list(user = c(869L, 620L, 42L, 341L, 930L, 267L, 708L,934L, 148L, 385L, 251L, 181L, 313L, 437L, 747L, 260L, 109L, 201L,229L, 366L, 921L, 829L, 934L, 868L, 321L, 226L, 527L, 726L, 26L,457L, 117L, 325L, 327L, 60L, 804L, 158L, 593L, 200L, 880L, 482L,868L, 339L, 328L, 347L, 100L, 896L, 846L, 676L, 357L, 496L, 541L,807L, 257L, 924L, 894L, 478L, 601L, 13L, 311L, 230L, 435L, 654L,742L, 180L, 887L, 201L, 147L, 326L, 749L, 465L, 727L, 200L, 216L,267L, 345L, 445L, 268L, 26L, 366L, 82L, 763L, 436L, 324L, 707L,802L, 280L, 682L, 343L, 826L, 325L, 508L, 618L, 405L, 655L, 645L,378L, 296L, 438L, 450L, 151L), movie = c(181L, 240L, 410L, 948L,143L, 926L, 1054L, 502L, 474L, 47L, 147L, 125L, 527L, 249L, 659L,319L, 576L, 1426L, 245L, 672L, 1028L, 151L, 492L, 90L, 182L,250L, 7L, 248L, 841L, 222L, 307L, 434L, 318L, 132L, 746L, 510L,692L, 79L, 585L, 269L, 739L, 485L, 679L, 386L, 347L, 686L, 12L,303L, 597L, 532L, 304L, 820L, 285L, 173L, 52L, 71L, 208L, 333L,504L, 266L, 961L, 195L, 294L, 216L, 491L, 179L, 304L, 655L, 62L,855L, 222L, 756L, 226L, 217L, 303L, 902L, 825L, 255L, 671L, 1128L,283L, 568L, 259L, 212L, 646L, 144L, 566L, 88L, 174L, 99L, 172L,44L, 482L, 863L, 674L, 696L, 292L, 269L, 722L, 443L), rating = c(3L,5L, 3L, 3L, 2L, 2L, 3L, 4L, 5L, 4L, 3L, 3L, 4L, 5L, 4L, 2L, 3L,2L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 4L, 5L, 2L, 2L, 5L, 5L, 5L, 5L,4L, 4L, 3L, 3L, 5L, 1L, 4L, 2L, 5L, 2L, 1L, 4L, 3L, 5L, 4L, 4L,5L, 4L, 3L, 5L, 5L, 4L, 3L, 4L, 3L, 4L, 4L, 1L, 4L, 3L, 5L, 2L,5L, 5L, 5L, 3L, 4L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 3L, 5L, 1L, 4L,5L, 5L, 4L, 4L, 2L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 3L, 3L, 3L, 5L,4L, 5L, 5L),
timestamp = structure(c(884490825, 889987954, 881110483,890758169, 879535462, 878970785, 877326158, 891194539, 877019882,879441982, 886272319, 878962816, 891013525, 880142027, 888639175,890618198, 880580663, 884114015, 891632385, 888858078, 879380142,891990672, 891192087, 877109874, 879439679, 883890491, 879456162,889832422, 891380200, 882392853, 880124339, 891478376, 887820828,883325944, 879444890, 880134296, 886193724, 884128499, 880175050,887643096, 877111542, 891032413, 885049460, 881654846, 891375212,887159146, 883947777, 892685403, 878952080, 876072633, 883864207,892532068, 882049950, 885458060, 882404507, 889388790, 876350017,881514810, 884364873, 880484286, 884133635, 887864350, 881005590,877128388, 881379566, 884114471, 885593942, 879875432, 878849052,883531444, 883709350, 876042493, 880244803, 878973760, 884900448,891200870, 875742893, 891377609, 888857990, 884714361, 878915600,887769416, 880575107, 886286792, 875986155, 891700514, 888519260,876405130, 885690481, 891479244, 883767157, 891308791, 885544739,887473995, 892054402, 880045044, 884196057, 879867960, 882471524,879524947), class = c("POSIXct","POSIXt"), tzone = "")),
.Names = c("user","movie", "rating", "timestamp"), row.names = c(26551L, 37213L,57286L, 90821L, 20169L, 89839L, 94468L, 66080L, 62912L, 6179L,20598L, 17656L, 68703L, 38411L, 76985L, 49770L, 71762L, 99191L,38004L, 77745L, 93471L, 21215L, 65168L, 12556L, 26723L, 38612L,1340L, 38239L, 86970L, 34035L, 48209L, 59957L, 49355L, 18622L,82738L, 66847L, 79424L, 10795L, 72372L, 41128L, 82095L, 64707L,78294L, 55304L, 52972L, 78936L, 2334L, 47724L, 73232L, 69274L,47762L, 86121L, 43810L, 24480L, 7068L, 9947L, 31628L, 51864L,66201L, 40684L, 91288L, 29361L, 45907L, 33240L, 65088L, 25802L,47855L, 76632L, 8425L, 87533L, 33908L, 83945L, 34669L, 33378L,47636L, 89220L, 86434L, 38999L, 77733L, 96062L, 43466L, 71252L,40000L, 32536L, 75709L, 20270L, 71113L, 12170L, 24549L, 14331L,23963L, 5894L, 64229L, 87627L, 77892L, 79731L, 45528L, 41009L,
81088L, 60494L), class = "data.frame")
names(which(table(votes2$movie) == 2))
# [1] "222" "269" "303" "304"
boxplot(votes2$rating[which(table(votes2$movie) == 2)])
Perhaps, as I understand the request as being for the side by side boxplots for ratings where the votes are exactly 67 in number:
boxplot( movie ~ rating, data=votes,
subset = movie %in% names( table(votes$movie) == 67)), 'rating' ])
Switched the order in my first guess at correct formula but testing should this was more successful on your example:
boxplot(movie~rating, data=votes2, subset = movie %in% names( table(votes2$movie) == 2))
You should probably do a search on rhelp and SO for plotting a point or text for the mean of categories on boxplots. Pretty sure this has been asked before. If unsuccessful, then report on the search terms used.

how to insert missing observations on a data frame

I have a data that are observations over time. Unfortunately, some large gaps of time points are missing on a treatment. They are not coded as NA and if I make a plot out of them it becomes apparent.
My data frame looks like this. The number of samples per time points are irregular. (edit: sorry for not making the example reproducible)s
structure(list(A = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 144L, 144L, 144L, 1809L, 1809L, 1809L,
1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2757L, 2757L, 2757L, 2909L, 2909L,
2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L), cond = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Con",
"Si"), class = "factor"), T = c(416L, 417L, 418L, 419L, 420L,
423L, 424L, 425L, 426L, 427L, 428L, 429L, 430L, 431L, 432L, 433L,
434L, 435L, 436L, 437L, 438L, 439L, 440L, 441L, 442L, 443L, 444L,
445L, 446L, 447L, 448L, 449L, 450L, 451L, 452L, 453L, 454L, 458L,
503L, 504L, 505L, 506L, 507L, 508L, 509L, 510L, 511L, 512L, 513L,
514L, 515L, 516L, 517L, 518L, 519L, 520L, 521L, 522L, 523L, 524L,
525L, 526L, 527L, 528L, 272L, 276L, 277L, 350L, 351L, 352L, 353L,
354L, 355L, 356L, 357L, 358L, 359L, 360L, 361L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L, 383L, 384L, 385L,
386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L, 394L, 395L, 396L,
397L, 398L, 399L, 400L, 401L, 437L, 438L, 439L, 440L, 441L, 442L,
443L, 444L, 445L, 446L, 447L, 448L, 449L, 450L, 451L, 452L, 453L,
454L, 455L, 493L, 494L, 495L, 382L, 383L, 384L, 385L, 386L, 387L,
388L, 389L, 390L, 391L, 523L, 524L, 525L, 526L, 527L, 528L, 529L,
530L, 531L, 532L, 533L, 534L, 535L, 536L, 537L, 538L, 539L, 540L,
541L, 542L, 543L, 544L, 545L, 546L, 547L, 548L, 549L, 550L, 551L,
552L, 553L, 554L, 555L, 556L, 557L, 582L, 583L, 584L, 585L, 586L,
587L, 588L, 589L, 590L, 591L, 592L, 593L, 594L, 595L, 596L),
Vlog = c(1.199206203, 0.92297866, 0.74831703, 1.180533889,
0.846435768, 1.823185531, 1.775303408, 0.9253633, 1.562371106,
1.237695416, 1.310507835, 1.431774566, 2.259365243, 1.721204598,
0.976929098, 0.673510525, 1.194940048, 0.878373924, 1.399859784,
1.04183351, 0.362465228, 1.345074816, 0.839639722, 1.235884973,
0.946877821, 0.810708992, 0.620516467, 0.99590939, 0.446167467,
0.635246561, 0.508835353, 0.470349764, 0.505083592, 0.363685506,
0.841427562, 1.502579534, 1.503814969, 1.962735861, 1.190111689,
1.208627789, 1.212606926, 1.3052429, 1.19648953, 1.399151795,
1.359988717, 1.530933258, 1.324386434, 1.429685474, 1.550040003,
1.209836455, 0.976675012, 1.396991989, 1.309972472, 0.884831368,
0.940578242, 0.622109712, 0.196736781, 0, 1.861481047, 1.166587204,
1.154778081, 0.750716468, 0.822148942, 0.324409805, 0.810379036,
2.218975354, 0.837542999, 1.597505982, 1.34988859, 2.109471773,
1.408734988, 1.006914696, 1.680242618, 1.842263128, 2.19564511,
1.80944452, 1.194273373, 1.953931263, 1.943781916, 2.136530509,
2.174627732, 1.837702354, 1.744745221, 1.744745221, 2.065910366,
1.3644043, 1.935629046, 1.327947423, 1.703751191, 1.595793931,
2.32443327, 1.815054551, 1.381916487, 1.535930503, 1.762742848,
1.214377396, 1.745046639, 0, 0, 1.314421325, 2.12544409,
1.961225517, 1.722393773, 1.763882649, 2.246794342, 1.462888398,
0, 2.699085109, 0.982206846, 1.678694356, 1.339419526, 1.856762396,
1.604863093, 1.439867691, 1.210451327, 0.988645101, 1.581116604,
0.868888993, 1.385699365, 1.377180499, 1.584445411, 1.76153307,
1.153021042, 1.427814276, 1.867219352, 1.726781152, 2.045476901,
1.231462515, 1.282774459, 1.194170351, 1.423430455, 1.813916126,
1.697914719, 1.343711186, 1.619115871, 1.590854952, 1.165150441,
0.84551636, 0.925836885, 0.0009995, 0, 2.672041587, 1.630536406,
2.084775235, 0.879027692, 2.150052605, 1.171591247, 2.589254624,
1.09594206, 1.788420568, 0.879027692, 1.768910948, 1.544705476,
0.961905249, 2.03675983, 1.189770451, 2.125034005, 1.921180059,
1.587902512, 1.113485404, 1.826744807, 0.961905249, 1.423828826,
1.392463308, 1.355448604, 1.638531529, 1.158778559, 1.257058585,
1.641075408, 1.652573524, 1.435915015, 1.072776171, 1.240686858,
1.647779212, 1.089811169, 1.723723056, 2.094419336, 0.544066958,
0.894454037, 1.651688305, 1.153416081, 0.961905249, 2.457446983,
0.704322704, 1.544705476, 1.970925317, 1.402837317, 1.651688305,
1.358923164, 1.153416081, 2.056674373)), .Names = c("A",
"cond", "T", "Vlog"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 66L, 67L, 68L, 201L, 202L, 203L, 204L,
205L, 206L, 207L, 208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L,
216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L,
227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L,
238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L, 247L, 248L,
249L, 250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L,
260L, 261L, 695L, 696L, 697L, 698L, 699L, 700L, 701L, 702L, 703L,
704L, 705L, 706L, 707L, 708L, 709L, 710L, 711L, 712L, 713L, 714L,
715L, 716L, 717L, 718L, 719L, 720L, 721L, 722L, 723L, 724L, 725L,
726L, 727L, 728L, 729L, 730L, 731L, 732L, 733L, 734L, 735L, 736L,
737L, 738L, 739L, 740L, 741L, 742L, 743L, 744L, 745L, 746L, 747L,
748L, 749L, 750L, 751L, 752L, 753L, 754L, 755L, 756L, 757L), class = "data.frame")
Is there a way of spotting the missing time points and insert n rows to it? What I thought of is to check the missing time points by making a freq table for each time point per treatment and then inserting a row. This is doable with a short time series but not with a large one. I am not sure if someone could help do it a little bit easier? Thanks!
edit: T is sequential but the number of data per T varies. And I want to insert a number of rows for each T. Hope the edits made it clear. :)
This largely depends on how general you wish your solution to be. But, if you want a non-general solution you can do #1 pretty simply. Here, I assume that you're using T as your time variable.
insert_miss <- function(df, time_val= "T", by= 1) {
val <- get(time_val, envir= as.environment(df))
val_range <- range(val)
comp <- seq(val_range[1], val_range[2], by=by)
which_miss <- comp[!comp %in% val]
# generating a sample row depends a lot on your particular problem
# also, specifically how to impute the missing values depends on your
# specific problem / domain
## here's one simple solution which is not generic
row_samp <- df[1,]
df2 <- do.call("rbind", replicate(length(which_miss), row_samp, simplify= FALSE))
df2[[time_val]] <- which_miss
others <- which(names(df2) != time_val)
df2[, others] <- NA
return(df2)
}
run
insert_miss(<your_df>)
R> A cond T Vlog
1 NA NA 421 NA
2 NA NA 422 NA
Your example data doesn't match the chart image you posted, but here's a answer with random data
# random x-y series
set.seed(123)
dat <- data.frame(x=1:200,
y=cumsum(rnorm(200)))
# punch some holes
dat <- dat[-c(20:40, 90:120), ]
# for each point, find gap to next point
diff2next <- with(dat, x[-1] - x[-nrow(dat)])
# now find position of non consecutive points (i.e. where gap > 1)
holes_start <- which(diff2next > 1)
holes_end <- holes_start + 1 #(by definition the gap ends with the next point)
# that's it. here's a plot of the line and the identified holes
ggplot() +
geom_line(data=dat, aes(x, y)) + # the line
geom_point(data=dat[c(holes_start, holes_end), ],
aes(x, y), color='red') # the hole start/ends
Assuming that your data frame is called ts.df and T variable is sequential (as in it increases by one and only by one on each and every data point), you can generate data.frame with all T values in range and OUTER JOIN it into your existing data.frame to get NAs filled in automatically:
ids <- data.frame(T=seq(from=min(ts.df$T), to=max(ts.df$T)), A=0, cond="Si")
ts.df <- merge(ts.df, ids, all.y=TRUE)
ggplot(ts.df, aes(T, Vlog)) + geom_line() + geom_point()
This will assign Si value for cond variable for all rows and 0 value for A variable. The first one seems about right, the second one is irrelevant for your chart anyway.
You might need to split entire data.frame by condition, run above code to modify subset for one condition and join data.frames again to get it working on your current ggplot() call, but since you haven't posted reproducible example of your problem, there is only so much I can do.

Resources