Related
Data
Here is my data:
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
Oftentimes I have odd combinations of variables that don't neatly fit into a faceted grid in ggplot. As an example, here is a code to stack my numeric data into one plot:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
However, as you can see, there are five plots, and arranging the rows and columns doesn't really fix the issue because it is an odd number:
I'm wonder if there is a possible way to paste in some kind of text box in this sixth quadrant of the faceted plot. I'm thinking something like this:
How can one accomplish this goal?
One option is to access the 'blank space' using gtable/grid, e.g.
library(tidyverse)
library(gtable)
library(grid)
work <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L
), Mins_Sleep = c(300L, 540L, 540L, 480L, 480L, 480L, 480L, 420L,
300L, 240L, 480L, 300L, 420L, 360L, 390L, 405L, 420L, 360L, 420L,
350L, 420L, 450L, 445L, 480L, 300L, 400L, 310L, 390L, 350L, 450L,
390L, 390L, 510L, 452L, 310L, 360L, 500L, 360L, NA, NA, 420L,
420L, 420L, 382L, 430L, 393L, 240L, 400L, 480L, 450L), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L), Tea_Cups = c(2L, 4L, 2L, 0L, 0L, 2L, 0L, 2L, 4L, 0L, 0L,
0L, 2L, 6L, 5L, 0L, 2L, 0L, 2L, 4L, 0L, 0L, 0L, 2L, 1L, 0L, 4L,
4L, 4L, 2L, 1L, 0L, 2L, 0L, 0L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 2L, 0L, 1L, 0L, 2L, 0L), Time_Wake = c(500L, 715L, 600L,
600L, 700L, 600L, 700L, 500L, 500L, 500L, 500L, 700L, 645L, 700L,
630L, 645L, 700L, 600L, 700L, 550L, 700L, 730L, 725L, 800L, 600L,
640L, 600L, 730L, 650L, 830L, 630L, 630L, 830L, 722L, 641L, 800L,
720L, 700L, NA, NA, NA, 700L, 700L, 622L, 710L, 632L, 400L, 640L,
700L, 730L)), class = "data.frame", row.names = c(NA, -50L))
p <- work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density()+
facet_wrap(~key,
scales = "free")
note <- paste("Notes on Plots:\n",
"- coffee skew\n",
"- sleep semi-normal\n",
"- work is weird\n",
"- etc\n",
"- etc")
p2 <- ggplot() +
annotate("text",
x = 0,
y = 0,
label = note,
size = 5,
hjust = 0) +
theme_void()
pg <- ggplotGrob(p)
#> Warning: Removed 5 rows containing non-finite values (stat_density).
p2g <- ggplotGrob(p2)
pl <- gtable_filter(pg, 'panel', trim=F)$layout
pg <- gtable_add_grob(pg, p2g, t=max(pl$t), l=max(pl$l), r = pl$r[3])
grid.newpage()
grid.draw(pg)
Created on 2022-09-15 by the reprex package (v2.0.1)
I wonder if this is the right direction to get you to start:
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(
legend.position = c(.8, .3)
)
Edit
To keep original black color for all graphs
cols <- c("Mins_Work" = "black", "Mins_Sleep"="black", "Coffee_Cups"="black", "Tea_Cups" = "black",
"Time_Wake" = "black")
work %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(x=value))+
geom_density(aes(color = key))+
facet_wrap(~key,
scales = "free")+
theme(legend.position = c(.8, .3))+
scale_color_manual(values = cols)
Reproducible Dataset
This is my data:
#### Data for Stack ####
stack <- structure(list(Mins_Work = c(435L, 350L, 145L, 135L, 15L, 60L,
60L, 390L, 395L, 395L, 315L, 80L, 580L, 175L, 545L, 230L, 435L,
370L, 255L, 515L, 330L, 65L, 115L, 550L, 420L, 45L, 266L, 196L,
198L, 220L, 17L, 382L, 0L, 180L, 343L, 207L, 263L, 332L, 0L,
0L, 259L, 417L, 282L, 685L, 517L, 111L, 64L, 466L, 499L, 460L,
269L, 300L, 427L, 301L, 436L, 342L, 229L, 379L, 102L, 146L, NA,
94L, 345L, 73L, 204L, 512L, 113L, 135L, 458L, 493L, 552L, 108L,
335L, 395L, 508L, 546L, 396L, 159L, 325L, 747L, 650L, 377L, 461L,
669L, 186L, 220L, 410L, 708L, 409L, 515L, 413L, 166L, 451L, 660L,
177L, 192L, 191L, 461L, 637L, 297L, 601L, 586L, 270L, 479L, 0L,
480L, 397L, 174L, 111L, 0L, 610L, 332L, 345L, 423L, 160L, 611L,
0L, 345L, 550L, 324L, 427L, 505L, 632L, 560L, 230L, 495L, 235L,
522L, 654L, 465L, 377L, 260L, 572L, 612L, 594L, 624L, 237L, 0L,
38L, 409L, 634L, 292L, 706L, 399L, 568L, 0L, 694L, 298L, 616L,
553L, 581L, 423L, 636L, 623L, 338L, 345L, 521L, 438L, 504L, 600L,
616L, 656L, 285L, 474L, 688L, 278L, 383L, 535L, 363L, 470L, 457L,
303L, 123L, 363L, 329L, 513L, 636L, 421L, 220L, 430L, 428L, 536L,
156L, 615L, 429L, 103L, 332L, 250L, 281L, 248L, 435L, 589L, 515L,
158L, 0L, 649L, 427L, 193L, 225L, 0L, 280L, 163L, 536L, 301L,
406L, 230L, 519L, 0L, 303L, 472L, 392L, 326L, 368L, 405L, 515L,
308L, 259L, 769L, 93L, 517L, 261L, 420L, 248L, 265L, 834L, 313L,
131L, 298L, 134L, 385L, 648L, 529L, 487L, 533L, 641L, 429L, 339L,
508L, 560L, 439L, 381L, 397L, 692L, NA), Coffee_Cups = c(3L,
0L, 2L, 6L, 4L, 5L, 3L, 3L, 2L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 0L,
1L, 1L, 4L, 4L, 3L, 0L, 1L, 3L, 0L, 0L, 0L, 0L, 2L, 0L, 1L, 2L,
3L, 2L, 2L, 4L, 3L, 6L, 6L, 3L, 4L, 6L, 8L, 3L, 5L, 0L, 2L, 2L,
8L, 6L, 4L, 6L, 4L, 4L, 2L, 6L, 6L, 5L, 1L, 3L, 1L, 5L, 4L, 6L,
5L, 0L, 6L, 6L, 4L, 4L, 2L, 2L, 6L, 6L, 7L, 3L, 3L, 0L, 5L, 7L,
6L, 3L, 5L, 3L, 3L, 1L, 9L, 9L, 3L, 3L, 6L, 6L, 6L, 3L, 0L, 7L,
6L, 6L, 3L, 9L, 3L, 8L, 8L, 3L, 3L, 7L, 6L, 3L, 3L, 3L, 6L, 6L,
6L, 1L, 9L, 3L, 3L, 2L, 6L, 3L, 6L, 9L, 6L, 8L, 9L, 6L, 6L, 6L,
0L, 3L, 0L, 3L, 3L, 6L, 3L, 0L, 9L, 3L, 0L, 2L, 0L, 6L, 6L, 6L,
3L, 6L, 3L, 9L, 3L, 0L, 0L, 6L, 3L, 3L, 3L, 3L, 6L, 0L, 6L, 3L,
3L, 5L, 5L, 3L, 0L, 6L, 4L, 2L, 0L, 2L, 4L, 0L, 6L, 4L, 4L, 2L,
2L, 0L, 9L, 6L, 3L, 6L, 6L, 9L, 0L, 6L, 6L, 6L, 6L, 6L, 6L, 3L,
3L, 0L, 9L, 6L, 3L, 6L, 3L, 6L, 1L, 6L, 6L, 6L, 6L, 6L, 1L, 3L,
9L, 6L, 3L, 6L, 9L, 3L, 5L, 6L, 3L, 0L, 6L, 3L, 3L, 5L, 0L, 6L,
3L, 5L, 3L, 0L, 6L, 7L, 3L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 7L, 6L,
6L, 4L, 3L)), class = "data.frame", row.names = c(NA, -244L))
Solution So Far
I'm trying to cut my coffee data into three groups, a low group, a medium group, and a high group. Here is how I tried doing so:
#### Load Libraries ####
library(tidyverse)
library(ggpubr)
#### Transform Data: Coffee ####
coffee_labels <- c("Low", "Medium", "High") # labels
range(stack$Coffee_Cups) # get range for split
coffee_breaks <- seq(from = 0,
to = 9,
by = 3) # split from 0 to 9 in 3 pt intervals
coffee_transform <- cut(x= stack$Coffee_Cups,
labels = coffee_labels,
breaks = coffee_breaks) # add labels and breaks
stack_transform <- stack %>%
mutate(coffee_level = coffee_transform) # mutate to add to data
tail(stack_transform$coffee_level, 30) # check transform
Problem
However, when I print the tail command at the end, I get these NA values, which I assume is from an improper cut:
[1] Low Medium Medium Low <NA> Medium Low Low Medium <NA> Medium
[12] Low Medium Low <NA> Medium High Low Medium Medium Medium Medium
[23] Low Medium Medium High Medium Medium Medium Low
Levels: Low Medium High
I looked and those values correspond to my coffee consumption equaling zero, yet I already set the cut from 0 to 9. Naturally, when I try to make a boxplot with this, the NA levels get included, which I don't want:
#### Transform Coffee Boxplot ####
ggboxplot(stack_transform,
x="coffee_level",
y="Mins_Work",
palette = "simpsons",
color = "coffee_level",
title = "Coffee Consumption Level Productivity",
caption = "*Data obtained from local matrix.",
xlab = "Coffee Consumption Level",
ylab = "Minutes of Productivity")+
theme_bw()+
theme(legend.position = "none",
plot.caption = element_text(face = "italic"),
plot.title = element_text(face = "bold",
size = 18,
family = "mono"))
Question
How do I fix these NA values? I want my zero values to be included into the "low" group if possible.
Would this work for you? It seems you are using base R, so a nested ifelse statement may be simpler:
stack$coffee_cat <- ifelse(stack$Coffee_Cups %in% 0:3, "Low",
ifelse(stack$Coffee_Cups %in% 4:6, "Medium",
ifelse(stack$Coffee_Cups %in% 7:9, "High", NA)))
Output
# Mins_Work Coffee_Cups coffee_cat
#1 435 3 Low
#2 350 0 Low
#3 145 2 Low
#4 135 6 Medium
#5 15 4 Medium
#6 60 5 Medium
case_when would be a dplyr alternative:
stack %>% mutate(coffee_level = case_when(Coffee_Cups %in% 0:3 ~ "Low",
Coffee_Cups %in% 4:6 ~ "Medium",
Coffee_Cups %in% 7:9 ~ "High"))
To include the zeros in cutting, you could also use the Hmisc::cut2function:
stack$coffee_Hmisc <- factor(Hmisc::cut2(stack$Coffee_Cups, g = 3), labels = coffee_labels)
I have four datasets derived and processed identically (though differing in size due to the availability of Landsat scenes)
I am trying to compute ANOVA using the formula:
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = JulianDay,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
Where:
*) LST = surface temperature deviation in C
*) JulianDay = days since start of year
*) Buffer = a value 100-1900 - one of 19 areas outward from the boundary of a solar power plant (each 100m wide)
*) TimePeriod = a factor with a value of 0 or 1 corresponding to pre-/post-construction of the solar power plant.
The intent is to investigate if the construction of the installation affected the adjacent land surface temperature.
At three sites the ANOVA runs successfully, however at the fourth site it doesn't and fails with the error:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
0 (non-NA) cases
I have 381 rows of data in 4 columns (extract below), the only difference I can think of here is that I had to remove two paired months from the time series as data was not available in one of the months. This means there are 20 months of data, rather than 24. Every other processing step is identical.
Reading online I have searched for N/As (there are none), and can't see how there are levels without values as every cell has data. I don't know how to properly evaluate this, though, as it seems this is the root of the error.
I'm hoping someone will know the code needed and/or be able to suggest a way forwards.
Buffer LST JulianDay TimePeriod
1800 -0.04576149 73 2
1900 -0.03422945 73 2
1900 -0.02089755 302 1
1900 -0.02062432 96 1
1900 -0.01465229 192 1
1900 -0.00643754 128 1
1900 -0.00333345 105 2
1800 -0.00266312 366 1
1900 -0.00181226 201 2
1900 -0.00158173 169 2
1900 -1.81E-05 41 2
1800 0.00144813 128 1
and 367 additional rows...
[Edits]
Per comments below:
dput() whole dataframe
dput() subset (as suggested)
Thanks #Dion for noting anova_test is from the RStatix package.
1)
> dput(LST_Weather_dataset_ANOVA)
structure(list(Buffer = c(100L, 200L, 300L, 400L, 500L, 600L,
700L, 800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L, 100L, 200L, 300L, 400L, 500L, 600L, 700L,
800L, 900L, 1000L, 1100L, 1200L, 1300L, 1400L, 1500L, 1600L,
1700L, 1800L, 1900L), LST = c(0.91797777, 0.95083024, 0.79129483,
0.74791195, 0.68740945, 0.64516119, 0.74870729, 0.78357522, 0.83663769,
0.82156894, 0.77440129, 0.62769619, 0.52052404, 0.46497939, 0.34456476,
0.20359411, 0.11688336, 0.04136486, -0.02089755, 1.15111659,
1.20353638, 1.11717501, 1.0286145, 0.90984545, 0.82983601, 0.78186792,
0.73227976, 0.6989393, 0.65015275, 0.56241798, 0.39651023, 0.34213091,
0.3386525, 0.24000145, 0.11809023, 0.07704512, -0.00266312, 0.01273022,
1.04229626, 1.14347392, 1.1156609, 1.10575157, 1.01202522, 0.77829087,
0.80477079, 0.79677169, 0.83116477, 0.83242401, 0.82394197, 0.72073306,
0.64099082, 0.58188225, 0.43328083, 0.28349521, 0.19752629, 0.10636456,
0.01987005, 0.74458844, 0.71512573, 0.6395358, 0.65294657, 0.63325921,
0.56155255, 0.60860815, 0.60614753, 0.59989994, 0.58766288, 0.57257261,
0.50018929, 0.4367402, 0.40497079, 0.31822141, 0.2300726, 0.16928876,
0.09449034, 0.01799424, 0.82747052, 0.78262774, 0.65488597, 0.62609552,
0.60057131, 0.59950609, 0.6609992, 0.6876772, 0.73196883, 0.75516596,
0.75554112, 0.64167458, 0.54703129, 0.49947692, 0.38230481, 0.25519237,
0.16087274, 0.07759223, 0.00820849, 0.75009747, 0.71421977, 0.62411035,
0.58621041, 0.58438012, 0.61346156, 0.72712994, 0.81372726, 0.87579554,
0.88934787, 0.87369461, 0.74686202, 0.64084028, 0.5599638, 0.40021941,
0.23612052, 0.13408522, 0.04484869, -0.02062432, 0.22133116,
0.28562902, 0.24359043, 0.17788898, 0.16563242, 0.11740664, 0.10102937,
0.07328697, 0.07948283, 0.07521508, 0.08526232, 0.0548022, 0.04632606,
0.06670398, 0.03262545, 0.00650875, 0.01186519, 0.00144813, -0.00643754,
0.26360849, 0.22139941, 0.16915041, 0.13499715, 0.12846785, 0.15351528,
0.15321108, 0.13963269, 0.13413671, 0.13097696, 0.15897844, 0.15489366,
0.12600815, 0.12363834, 0.0943688, 0.07324289, 0.0565765, 0.04005241,
0.01346488, 0.42361198, 0.39149841, 0.29086274, 0.21492842, 0.20664552,
0.24524285, 0.30548979, 0.35256808, 0.37350282, 0.38680061, 0.38567758,
0.31177736, 0.24643091, 0.22001284, 0.14356522, 0.07076854, 0.04168654,
0.01276553, -0.01465229, 0.57032414, 0.50658577, 0.41717664,
0.36134446, 0.35794989, 0.38457285, 0.43700723, 0.48358206, 0.50516801,
0.50086146, 0.49398709, 0.41516438, 0.33165215, 0.28357127, 0.20030152,
0.11993505, 0.08438345, 0.05755944, 0.01071499, 0.04963208, 0.34087747,
0.38385889, 0.40408637, 0.41182138, 0.15662208, 0.18857013, 0.17978741,
0.1533216, 0.1451422, 0.14890638, 0.14090521, 0.1782449, 0.23624089,
0.21003477, 0.13812217, 0.10759364, 0.07225312, 0.03185378, 0.27507486,
0.54404521, 0.56568824, 0.58543167, 0.49124799, 0.28299777, 0.27514982,
0.27526446, 0.27376722, 0.24620415, 0.22871699, 0.19647326, 0.2450593,
0.27133386, 0.15248773, 0.06240341, 0.04933824, 0.03356535, -1.81e-05,
0.21776379, 0.37010032, 0.32743525, 0.30588107, 0.31226738, 0.30518286,
0.32637517, 0.31003415, 0.23691586, 0.1985241, 0.16143326, 0.12384526,
0.11556386, 0.09243356, 0.05773894, 0.03660942, 0.02173758, -0.04576149,
-0.03422945, 0.06214728, 0.26440563, 0.24838816, 0.22704611,
0.17230754, 0.15660109, 0.18689433, 0.24464547, 0.28273218, 0.29602945,
0.29992488, 0.24679735, 0.24521192, 0.23913767, 0.15081173, 0.08724556,
0.05561237, 0.02530266, -0.00333345, 0.11993489, 0.20504424,
0.17323488, 0.14541868, 0.10994579, 0.12741154, 0.17959797, 0.22553943,
0.26564836, 0.29760832, 0.3207305, 0.28592135, 0.26551685, 0.2493214,
0.15767906, 0.0883716, 0.05058495, 0.02207594, 0.00162532, 0.05621313,
0.08020623, 0.05187855, 0.02643543, 0.02422505, 0.05372454, 0.09563737,
0.14735627, 0.18199015, 0.22456299, 0.25302274, 0.21978124, 0.19092835,
0.18255829, 0.11850551, 0.0581734, 0.03406168, 0.01868243, -0.00158173,
0.00980756, 0.07077972, 0.05126985, 0.03126771, 0.01828044, 0.00678076,
0.03566275, 0.05622289, 0.07218645, 0.08767578, 0.11078182, 0.08827425,
0.08881865, 0.10037876, 0.05952601, 0.03440435, 0.01843206, 0.0091852,
-0.00181226, 0.08737325, 0.14470842, 0.13066747, 0.12324597,
0.12014198, 0.13435757, 0.17843025, 0.19926835, 0.20503774, 0.20485414,
0.2124073, 0.1864257, 0.18810996, 0.20665551, 0.13839744, 0.08488387,
0.06246853, 0.03463723, 0.00349753, 0.35245488, 0.57692156, 0.64897028,
0.67306088, 0.68344534, 0.56106697, 0.52144197, 0.49250191, 0.47494065,
0.4359944, 0.39638743, 0.32554099, 0.28717774, 0.2826675, 0.22703594,
0.18186983, 0.15875118, 0.09672536, 0.04305742, 0.24294606, 0.54654222,
0.56344638, 0.53312729, 0.47324972, 0.34482643, 0.34915085, 0.33729055,
0.32086985, 0.29578347, 0.25030669, 0.17928298, 0.17007511, 0.18375903,
0.15222616, 0.10934224, 0.07536797, 0.04154465, 0.02550096),
JulianDay = c(302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L, 302L,
302L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L, 366L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L,
64L, 64L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L,
80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L, 96L,
96L, 96L, 96L, 96L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L, 128L,
128L, 128L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L, 160L,
160L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L, 192L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L,
224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 224L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L,
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L, 73L,
73L, 73L, 73L, 73L, 73L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L,
105L, 105L, 105L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L, 137L,
137L, 137L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L, 169L,
169L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L, 201L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L,
217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 217L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L,
313L, 313L, 313L, 313L, 313L, 313L, 313L, 313L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L, 361L,
361L, 361L, 361L, 361L, 361L, 361L, 361L), TimePeriod = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA,
-380L))
> dput(LST_Weather_dataset_ANOVA[sample(1:nrow(LST_Weather_dataset_ANOVA), 50),])
structure(list(Buffer = c(800L, 1400L, 500L, 200L, 400L, 1400L,
100L, 1600L, 1800L, 100L, 1400L, 1500L, 900L, 700L, 800L, 600L,
400L, 1300L, 500L, 700L, 700L, 300L, 700L, 200L, 200L, 500L,
500L, 900L, 1000L, 1300L, 1400L, 1600L, 700L, 400L, 500L, 200L,
400L, 1500L, 1400L, 800L, 500L, 1200L, 1500L, 1900L, 600L, 800L,
100L, 1000L, 900L, 1100L), LST = c(0.48358206, 0.46497939, 0.41182138,
0.07077972, 0.17788898, 0.18255829, 0.21776379, 0.03660942, 0.04154465,
0.42361198, 0.49947692, 0.38230481, 0.28273218, 0.18857013, 0.33729055,
0.56106697, 0.13499715, 0.28717774, 0.12014198, 0.78186792, 0.74870729,
0.56344638, 0.18689433, 0.54404521, 0.78262774, 0.60057131, 1.01202522,
0.20503774, 0.13097696, 0.34213091, 0.5599638, 0.08724556, 0.17843025,
1.0286145, 0.01828044, 0.22139941, 0.67306088, 0.15248773, 0.22001284,
0.27526446, 0.02422505, 0.50018929, 0.31822141, 0.01799424, 0.56155255,
0.13963269, 0.27507486, 0.29578347, 0.18199015, 0.3207305), JulianDay = c(224L,
302L, 9L, 201L, 128L, 169L, 73L, 73L, 361L, 192L, 80L, 80L, 105L,
9L, 361L, 313L, 160L, 313L, 217L, 366L, 302L, 361L, 105L, 41L,
80L, 80L, 16L, 217L, 160L, 366L, 96L, 105L, 217L, 366L, 201L,
160L, 313L, 41L, 192L, 41L, 169L, 64L, 64L, 64L, 64L, 160L, 41L,
361L, 169L, 137L), TimePeriod = c(1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L)), row.names = c(179L,
14L, 195L, 306L, 118L, 299L, 229L, 244L, 379L, 153L, 90L, 91L,
256L, 197L, 369L, 348L, 137L, 355L, 328L, 26L, 7L, 364L, 254L,
211L, 78L, 81L, 43L, 332L, 143L, 32L, 109L, 263L, 330L, 23L,
309L, 135L, 346L, 224L, 166L, 217L, 290L, 69L, 72L, 76L, 63L,
141L, 210L, 371L, 294L, 277L), class = "data.frame")
You are running a repeated anova and this requires the observations to be complete for every individual, within your specified within effects. In your case, you need the ensure for every JulianDay, the observations are complete for every combination of Buffer and TimePeriod
We can tabulate it by using table() and you can see for all the JulianDays they are incomplete, for example on 9 and 16:
with(LST_Weather_dataset_ANOVA,table(Buffer,TimePeriod,JulianDay))[,,c("9","16")]
, , JulianDay = 9
TimePeriod
Buffer 1 2
100 0 1
200 0 1
300 0 1
400 0 1
500 0 1
600 0 1
700 0 1
800 0 1
900 0 1
1000 0 1
1100 0 1
1200 0 1
1300 0 1
1400 0 1
1500 0 1
1600 0 1
1700 0 1
1800 0 1
1900 0 1
, , JulianDay = 16
TimePeriod
Buffer 1 2
100 1 0
200 1 0
300 1 0
400 1 0
500 1 0
600 1 0
700 1 0
800 1 0
900 1 0
1000 1 0
1100 1 0
1200 1 0
1300 1 0
1400 1 0
1500 1 0
1600 1 0
1700 1 0
1800 1 0
1900 1 0
As you have noted, if you reconcile the dates between sites, it will work. I am not very sure how you converted the JulianDay to months, but using your data, it works if I just do
df = LST_Weather_dataset_ANOVA
df$Month = months(strptime(paste("2020",df$JulianDay),"%Y %j"))
df = subset(df,Month %in% c("May","June"))
with(df,table(Buffer,TimePeriod,Month))
, , Month = June
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
, , Month = May
TimePeriod
Buffer 1 2
100 1 1
200 1 1
300 1 1
400 1 1
500 1 1
600 1 1
700 1 1
800 1 1
900 1 1
1000 1 1
1100 1 1
1200 1 1
1300 1 1
1400 1 1
1500 1 1
1600 1 1
1700 1 1
1800 1 1
1900 1 1
You can see for months June and May, they are complete (no zeros), and if we run anova, it works:
res.aov <- anova_test(
data = df, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 1 1.217 0.005 222.936 4.30e-02 * 0.933
2 Buffer 18 18 0.256 0.026 9.933 5.49e-06 * 0.746
3 TimePeriod 1 1 0.013 0.048 0.274 6.93e-01 0.130
4 Buffer:TimePeriod 18 18 0.181 0.008 21.476 1.20e-08 * 0.674
While ironing last night I wondered if JulianDay might be the source of the error. It is derived from the dates of the Landsat scenes from dependant variable data are derived, so is different for each site.
Editing the dataframe to replace the JulianDay column with Month and amending the code to:
str(LST_Weather_dataset_ANOVA)
res.aov <- anova_test(
data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
within = c(Buffer, TimePeriod),
effect.size = "ges",
detailed = TRUE,
)
get_anova_table(res.aov, correction = "auto")
...the ANOVA test runs successfully:
> res.aov <- anova_test(
+ data = LST_Weather_dataset_ANOVA, dv = LST, wid = Month,
+ within = c(Buffer, TimePeriod),
+ effect.size = "ges",
+ detailed = TRUE,
+ )
> get_anova_table(res.aov, correction = "auto")
ANOVA Table (type III tests)
Effect DFn DFd SSn SSd F p p<.05 ges
1 (Intercept) 1 9 36.781 6.593 50.212 5.75e-05 * 0.735
2 Buffer 18 162 8.042 3.041 23.801 1.81e-36 * 0.378
3 TimePeriod 1 9 5.065 2.506 18.194 2.00e-03 * 0.276
4 Buffer:TimePeriod 18 162 1.713 1.117 13.800 2.71e-24 * 0.114
But I still don't fully understand why...
Hopefully this will enable someone to comment and provide an explanation?
I have a ggplot related question, which should be easy but I could not find the answer yet. I am trying to plot a faceted plot with the code below and this dataset (11 kB).
ggplot(plot.dat, aes(x = estimate, y = reorder(countryyear, estimate))) +
geom_point() +
geom_segment(aes(x=conf.low, xend=conf.high, yend=countryyear)) +
facet_grid(. ~ facet) +
xlab("Random Effect Estimate") +
ylab("") + scale_x_continuous(breaks=c(seq(0, 5, 1)), limits=c(0, 5)) +
ggtitle("Random Slopes in Country*Year Groups from Northwestern Europe") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))
I would like countryyear to be organized by the values of estimate in the Extreme Right facet. Not quite sure how to order by values of a specific facet. Any ideas are welcome! Thanks.
Update: Here is the dput structure of a random subset of the dataset. It has some missing values, but it should work for the sake of the example. I also updated the download link above, that has the full version.
structure(list(estimate = c(1.41056902925372, 0.854859208455895,
1.16012834593894, 0.871339033194504, 0.803272289946221, 1.17540386134493,
0.996313357490551, 1.49940694539732, 1.33773365908762, 2.7318703090905,
1.19131935418045, 1.12765907711738, 0.746741192261761, 0.985847015192172,
0.912357310925342, 1.11582763712164, 1.21854572824977, 0.675712547978394,
0.566955524699616, 1.32611743759365, 0.519648352294682, 0.591013596394243,
1.30944973684044, 0.613722269599125, 1.13293279727271, 0.950788678552604,
1.1599446923567, 1.11493952112913, 0.95336321045095, 1.39002327097034,
0.794207546872633, 0.788545101449259, 1.01096883872495, 0.897407203907834,
1.38391605229103, 1.35754760293107, 1.0718508539761, 0.542191158958878,
0.757132752456427, 1.44172863221312, 1.04842251986171, 0.77260404885379,
0.879288027642055, 1.09372353598088, 0.745484830381145, 1.21211217249353,
0.628009608902132, 1.34864488674734), countryyear = structure(c(1L,
2L, 4L, 5L, 7L, 9L, 10L, 12L, 13L, 26L, 28L, 29L, 31L, 32L, 34L,
36L, 37L, 39L, 40L, 57L, 59L, 60L, 62L, 63L, 65L, 67L, 68L, 70L,
71L, 73L, 75L, 76L, 89L, 90L, 92L, 94L, 95L, 103L, 104L, 106L,
108L, 109L, 111L, 128L, 130L, 132L, 133L, 135L), .Label = c("AT02",
"AT04", "AT06", "AT14", "AT16", "BE02", "BE04", "BE06", "BE08",
"BE10", "BE12", "BE14", "BE16", "BG06", "BG08", "BG10", "BG12",
"CH14", "CZ02", "CZ04", "CZ08", "CZ10", "CZ12", "CZ14", "CZ16",
"DE02", "DE04", "DE06", "DE08", "DE10", "DE12", "DE14", "DE16",
"DK02", "DK04", "DK06", "DK08", "DK10", "DK12", "DK14", "EE04",
"EE06", "EE08", "EE10", "EE12", "EE14", "EE16", "ES02", "ES04",
"ES06", "ES08", "ES10", "ES12", "ES14", "ES16", "FI02", "FI04",
"FI06", "FI08", "FI10", "FI12", "FI14", "FI16", "FR06", "FR08",
"FR10", "FR12", "FR14", "FR16", "GB02", "GB04", "GB06", "GB08",
"GB10", "GB12", "GB14", "GB16", "GR02", "GR04", "GR08", "GR10",
"HU02", "HU06", "HU08", "HU10", "HU12", "HU14", "HU16", "IE02",
"IE04", "IE06", "IE08", "IE10", "IE12", "IE14", "IE16", "IT04",
"IT12", "IT16", "LT10", "LT12", "LT14", "NL02", "NL04", "NL06",
"NL08", "NL10", "NL12", "NL14", "NL16", "NO14", "PL02", "PL04",
"PL06", "PL08", "PL10", "PL12", "PL14", "PL16", "PT02", "PT04",
"PT06", "PT08", "PT10", "PT12", "PT14", "PT16", "SE02", "SE04",
"SE06", "SE08", "SE10", "SE12", "SE14", "SE16", "SI02", "SI04",
"SI06", "SI08", "SI10", "SI12", "SI14", "SI16", "SK04", "SK06",
"SK08", "SK10", "SK12"), class = "factor"), facet = structure(c(1L,
3L, 1L, 4L, 5L, 3L, 4L, 1L, 1L, 1L, 5L, 5L, 4L, 5L, 3L, 1L, 2L,
4L, 5L, 2L, 1L, 4L, 2L, 5L, 2L, 3L, 4L, 3L, 2L, 5L, 5L, 4L, 2L,
5L, 4L, 5L, 3L, 1L, 4L, 5L, 3L, 5L, 4L, 1L, 5L, 2L, 4L, 1L), .Label = c("Intercept",
"Extreme Left", "Center", "Right", "Extreme Right"), class = "factor"),
conf.low = c(1.16824810706745, 0.686215051613965, 0.910277310292764,
0.591705078386698, 0.37357342399703, 0.947951001435781, 0.663296044193037,
1.18794112232166, 1.06645119085865, 2.33578182814618, 0.580210898576738,
0.564235690522211, 0.530859530342114, 0.516191258265551,
0.730992343373883, 0.862424540370486, 0.827891784352444,
0.427638276259852, 0.275692447335368, 0.829763907986328,
0.370078643492081, 0.321852705445509, 0.83550621863293, 0.289836810427436,
0.847226120408727, 0.780056160572728, 0.873143885861924,
0.869757467125519, 0.615741777890997, 0.649483531741787,
0.349657606457465, 0.523294407847395, 0.670109418373736,
0.36656743494149, 0.952201390937053, 0.777207016700884, 0.888128473009524,
0.397085597526946, 0.479828726362257, 0.614533313431094,
0.813336887981082, 0.3129232351085, 0.61435321820328, 0.854801028643867,
0.346698059397102, 0.805414039007076, 0.434676644041643,
1.07780736338027), conf.high = c(1.70315275860739, 1.06494933995261,
1.47855797769819, 1.28312522319126, 1.7272277157504, 1.45743211956315,
1.49652679976667, 1.8925358720741, 1.67802460909168, 3.19512520208851,
2.44607918797515, 2.25369471581694, 1.05041423643869, 1.8828182806291,
1.13872035780431, 1.44368725318228, 1.79353596677755, 1.06769546329854,
1.16593171156554, 2.11938292490653, 0.729667639003753, 1.08526995489865,
2.05223919950836, 1.29954170985538, 1.51498719434776, 1.15888977865399,
1.54095070825389, 1.4292376699955, 1.47610807594453, 2.97492484321718,
1.80395225460704, 1.18824770090216, 1.52521060717706, 2.19697554354282,
2.01136404338166, 2.37122858469145, 1.29357889999432, 0.740322123703373,
1.19469713534712, 3.38237391450413, 1.35145693795059, 1.90755095606211,
1.25847381058047, 1.39942645489832, 1.60297301142912, 1.82417470710871,
0.907332092210651, 1.68753999308876)), row.names = c(1L,
9L, 17L, 25L, 33L, 41L, 49L, 57L, 65L, 128L, 136L, 144L, 152L,
160L, 168L, 176L, 184L, 192L, 200L, 283L, 291L, 299L, 307L, 315L,
323L, 331L, 339L, 347L, 355L, 363L, 371L, 379L, 442L, 450L, 458L,
466L, 474L, 512L, 520L, 528L, 536L, 544L, 552L, 640L, 648L, 656L,
664L, 672L), class = "data.frame")
I have a data that are observations over time. Unfortunately, some large gaps of time points are missing on a treatment. They are not coded as NA and if I make a plot out of them it becomes apparent.
My data frame looks like this. The number of samples per time points are irregular. (edit: sorry for not making the example reproducible)s
structure(list(A = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 144L, 144L, 144L, 1809L, 1809L, 1809L,
1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L, 1809L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L, 2070L,
2070L, 2070L, 2070L, 2070L, 2757L, 2757L, 2757L, 2909L, 2909L,
2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2909L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L, 2975L,
2975L, 2975L, 2975L, 2975L), cond = structure(c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Con",
"Si"), class = "factor"), T = c(416L, 417L, 418L, 419L, 420L,
423L, 424L, 425L, 426L, 427L, 428L, 429L, 430L, 431L, 432L, 433L,
434L, 435L, 436L, 437L, 438L, 439L, 440L, 441L, 442L, 443L, 444L,
445L, 446L, 447L, 448L, 449L, 450L, 451L, 452L, 453L, 454L, 458L,
503L, 504L, 505L, 506L, 507L, 508L, 509L, 510L, 511L, 512L, 513L,
514L, 515L, 516L, 517L, 518L, 519L, 520L, 521L, 522L, 523L, 524L,
525L, 526L, 527L, 528L, 272L, 276L, 277L, 350L, 351L, 352L, 353L,
354L, 355L, 356L, 357L, 358L, 359L, 360L, 361L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L, 383L, 384L, 385L,
386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L, 394L, 395L, 396L,
397L, 398L, 399L, 400L, 401L, 437L, 438L, 439L, 440L, 441L, 442L,
443L, 444L, 445L, 446L, 447L, 448L, 449L, 450L, 451L, 452L, 453L,
454L, 455L, 493L, 494L, 495L, 382L, 383L, 384L, 385L, 386L, 387L,
388L, 389L, 390L, 391L, 523L, 524L, 525L, 526L, 527L, 528L, 529L,
530L, 531L, 532L, 533L, 534L, 535L, 536L, 537L, 538L, 539L, 540L,
541L, 542L, 543L, 544L, 545L, 546L, 547L, 548L, 549L, 550L, 551L,
552L, 553L, 554L, 555L, 556L, 557L, 582L, 583L, 584L, 585L, 586L,
587L, 588L, 589L, 590L, 591L, 592L, 593L, 594L, 595L, 596L),
Vlog = c(1.199206203, 0.92297866, 0.74831703, 1.180533889,
0.846435768, 1.823185531, 1.775303408, 0.9253633, 1.562371106,
1.237695416, 1.310507835, 1.431774566, 2.259365243, 1.721204598,
0.976929098, 0.673510525, 1.194940048, 0.878373924, 1.399859784,
1.04183351, 0.362465228, 1.345074816, 0.839639722, 1.235884973,
0.946877821, 0.810708992, 0.620516467, 0.99590939, 0.446167467,
0.635246561, 0.508835353, 0.470349764, 0.505083592, 0.363685506,
0.841427562, 1.502579534, 1.503814969, 1.962735861, 1.190111689,
1.208627789, 1.212606926, 1.3052429, 1.19648953, 1.399151795,
1.359988717, 1.530933258, 1.324386434, 1.429685474, 1.550040003,
1.209836455, 0.976675012, 1.396991989, 1.309972472, 0.884831368,
0.940578242, 0.622109712, 0.196736781, 0, 1.861481047, 1.166587204,
1.154778081, 0.750716468, 0.822148942, 0.324409805, 0.810379036,
2.218975354, 0.837542999, 1.597505982, 1.34988859, 2.109471773,
1.408734988, 1.006914696, 1.680242618, 1.842263128, 2.19564511,
1.80944452, 1.194273373, 1.953931263, 1.943781916, 2.136530509,
2.174627732, 1.837702354, 1.744745221, 1.744745221, 2.065910366,
1.3644043, 1.935629046, 1.327947423, 1.703751191, 1.595793931,
2.32443327, 1.815054551, 1.381916487, 1.535930503, 1.762742848,
1.214377396, 1.745046639, 0, 0, 1.314421325, 2.12544409,
1.961225517, 1.722393773, 1.763882649, 2.246794342, 1.462888398,
0, 2.699085109, 0.982206846, 1.678694356, 1.339419526, 1.856762396,
1.604863093, 1.439867691, 1.210451327, 0.988645101, 1.581116604,
0.868888993, 1.385699365, 1.377180499, 1.584445411, 1.76153307,
1.153021042, 1.427814276, 1.867219352, 1.726781152, 2.045476901,
1.231462515, 1.282774459, 1.194170351, 1.423430455, 1.813916126,
1.697914719, 1.343711186, 1.619115871, 1.590854952, 1.165150441,
0.84551636, 0.925836885, 0.0009995, 0, 2.672041587, 1.630536406,
2.084775235, 0.879027692, 2.150052605, 1.171591247, 2.589254624,
1.09594206, 1.788420568, 0.879027692, 1.768910948, 1.544705476,
0.961905249, 2.03675983, 1.189770451, 2.125034005, 1.921180059,
1.587902512, 1.113485404, 1.826744807, 0.961905249, 1.423828826,
1.392463308, 1.355448604, 1.638531529, 1.158778559, 1.257058585,
1.641075408, 1.652573524, 1.435915015, 1.072776171, 1.240686858,
1.647779212, 1.089811169, 1.723723056, 2.094419336, 0.544066958,
0.894454037, 1.651688305, 1.153416081, 0.961905249, 2.457446983,
0.704322704, 1.544705476, 1.970925317, 1.402837317, 1.651688305,
1.358923164, 1.153416081, 2.056674373)), .Names = c("A",
"cond", "T", "Vlog"), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L,
60L, 61L, 62L, 63L, 64L, 66L, 67L, 68L, 201L, 202L, 203L, 204L,
205L, 206L, 207L, 208L, 209L, 210L, 211L, 212L, 213L, 214L, 215L,
216L, 217L, 218L, 219L, 220L, 221L, 222L, 223L, 224L, 225L, 226L,
227L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 237L,
238L, 239L, 240L, 241L, 242L, 243L, 244L, 245L, 246L, 247L, 248L,
249L, 250L, 251L, 252L, 253L, 254L, 255L, 256L, 257L, 258L, 259L,
260L, 261L, 695L, 696L, 697L, 698L, 699L, 700L, 701L, 702L, 703L,
704L, 705L, 706L, 707L, 708L, 709L, 710L, 711L, 712L, 713L, 714L,
715L, 716L, 717L, 718L, 719L, 720L, 721L, 722L, 723L, 724L, 725L,
726L, 727L, 728L, 729L, 730L, 731L, 732L, 733L, 734L, 735L, 736L,
737L, 738L, 739L, 740L, 741L, 742L, 743L, 744L, 745L, 746L, 747L,
748L, 749L, 750L, 751L, 752L, 753L, 754L, 755L, 756L, 757L), class = "data.frame")
Is there a way of spotting the missing time points and insert n rows to it? What I thought of is to check the missing time points by making a freq table for each time point per treatment and then inserting a row. This is doable with a short time series but not with a large one. I am not sure if someone could help do it a little bit easier? Thanks!
edit: T is sequential but the number of data per T varies. And I want to insert a number of rows for each T. Hope the edits made it clear. :)
This largely depends on how general you wish your solution to be. But, if you want a non-general solution you can do #1 pretty simply. Here, I assume that you're using T as your time variable.
insert_miss <- function(df, time_val= "T", by= 1) {
val <- get(time_val, envir= as.environment(df))
val_range <- range(val)
comp <- seq(val_range[1], val_range[2], by=by)
which_miss <- comp[!comp %in% val]
# generating a sample row depends a lot on your particular problem
# also, specifically how to impute the missing values depends on your
# specific problem / domain
## here's one simple solution which is not generic
row_samp <- df[1,]
df2 <- do.call("rbind", replicate(length(which_miss), row_samp, simplify= FALSE))
df2[[time_val]] <- which_miss
others <- which(names(df2) != time_val)
df2[, others] <- NA
return(df2)
}
run
insert_miss(<your_df>)
R> A cond T Vlog
1 NA NA 421 NA
2 NA NA 422 NA
Your example data doesn't match the chart image you posted, but here's a answer with random data
# random x-y series
set.seed(123)
dat <- data.frame(x=1:200,
y=cumsum(rnorm(200)))
# punch some holes
dat <- dat[-c(20:40, 90:120), ]
# for each point, find gap to next point
diff2next <- with(dat, x[-1] - x[-nrow(dat)])
# now find position of non consecutive points (i.e. where gap > 1)
holes_start <- which(diff2next > 1)
holes_end <- holes_start + 1 #(by definition the gap ends with the next point)
# that's it. here's a plot of the line and the identified holes
ggplot() +
geom_line(data=dat, aes(x, y)) + # the line
geom_point(data=dat[c(holes_start, holes_end), ],
aes(x, y), color='red') # the hole start/ends
Assuming that your data frame is called ts.df and T variable is sequential (as in it increases by one and only by one on each and every data point), you can generate data.frame with all T values in range and OUTER JOIN it into your existing data.frame to get NAs filled in automatically:
ids <- data.frame(T=seq(from=min(ts.df$T), to=max(ts.df$T)), A=0, cond="Si")
ts.df <- merge(ts.df, ids, all.y=TRUE)
ggplot(ts.df, aes(T, Vlog)) + geom_line() + geom_point()
This will assign Si value for cond variable for all rows and 0 value for A variable. The first one seems about right, the second one is irrelevant for your chart anyway.
You might need to split entire data.frame by condition, run above code to modify subset for one condition and join data.frames again to get it working on your current ggplot() call, but since you haven't posted reproducible example of your problem, there is only so much I can do.