Rotate a faceted, grouped bar plot - r

**UPDATED BELOW
I have created a plot, I literally need it horizontal, but the coord_flip() leaves the facets on the bottom instead of having nested groups on the left.
The data:
srvc_data <- structure(list(dept = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Distribution Centre Services",
"IT", "Marketing", "Merchandise & Inventory", "Operations and Communication"
), class = "factor"), label = c("test5", "test7", "test3", "test10",
"test4", "test6", "test2", "test1", "test11", "test12", "test9",
"test8", "test18", "test19", "test15", "test17", "test13", "test16",
"test20", "test14", "test22", "test21", "test25", "test23", "test24",
"test27", "test26", "test28", "test29", "test31", "test33", "test30",
"test32", "test38", "test36", "test37", "test43", "test34", "test35",
"test40", "test39", "test42", "test41", "test5", "test7", "test3",
"test10", "test4", "test6", "test2", "test1", "test11", "test12",
"test9", "test8", "test18", "test19", "test15", "test17", "test13",
"test16", "test20", "test14", "test22", "test21", "test25", "test23",
"test24", "test27", "test26", "test28", "test29", "test31", "test33",
"test30", "test32", "test38", "test36", "test37", "test43", "test34",
"test35", "test40", "test39", "test42", "test41"), Gap = c(-0.07,
-0.13, -0.15, -0.16, -0.16, -0.21, -0.22, -0.24, -0.24, -0.25,
-0.3, -0.3, -0.18, -0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41,
-0.46, -0.63, -0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31,
-0.31, -0.35, -0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22,
-0.27, -0.29, -0.29, -0.31, -0.36, -0.07, -0.13, -0.15, -0.16,
-0.16, -0.21, -0.22, -0.24, -0.24, -0.25, -0.3, -0.3, -0.18,
-0.19, -0.24, -0.29, -0.3, -0.34, -0.36, -0.41, -0.46, -0.63,
-0.16, -0.18, -0.21, -0.22, -0.27, -0.29, -0.31, -0.31, -0.35,
-0.39, -0.42, -0.15, -0.15, -0.2, -0.21, -0.22, -0.27, -0.29,
-0.29, -0.31, -0.36), impeff = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("Importance", "Effectiveness"), class = "factor"),
score = c(0.87, 0.79, 0.78, 0.82, 0.81, 0.81, 0.92, 0.92,
0.78, 0.81, 0.86, 0.91, 0.79, 0.79, 0.87, 0.93, 0.9, 0.9,
0.82, 0.95, 0.91, 0.95, 0.77, 0.79, 0.82, 0.8, 0.83, 0.9,
0.91, 0.94, 0.89, 0.94, 0.91, 0.82, 0.74, 0.78, 0.81, 0.83,
0.85, 0.82, 0.81, 0.8, 0.83, 0.8, 0.66, 0.63, 0.66, 0.65,
0.6, 0.7, 0.68, 0.54, 0.56, 0.56, 0.61, 0.61, 0.6, 0.63,
0.64, 0.6, 0.56, 0.46, 0.54, 0.45, 0.32, 0.61, 0.61, 0.61,
0.58, 0.56, 0.61, 0.6, 0.63, 0.54, 0.55, 0.49, 0.67, 0.59,
0.58, 0.6, 0.61, 0.58, 0.53, 0.52, 0.49, 0.47)), row.names = c(NA,
-86L), .Names = c("dept", "label", "Gap", "impeff", "score"), class = "data.frame")
And the code:
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(~dept, switch = "x", scales = "free", space = "free") +
#coord_flip()+
The plot (without the flip) looks like the below, I need it horizontal, with the facet categories on the far left. How does the coord_flip() work? Why wouldn't it also flip/move the facet strips? Please ignore the crammed formatting!
**UPDATE
So thanks to #neilfws I have fixed the plot, by switching the order of the data.
ggplot(data = srvc_data, aes(x = label, y = score)) +
geom_bar( aes(fill = impeff),stat = "identity", position = "dodge",width = 1) +
facet_grid(dept~., switch = "y", scales = "free_y", space = "free") +
coord_flip()
Now I have the correctly oriented plot, but there is lots of unused space for all the labels that are unused in each facet. Within the facet_grid call, setting scales = "free" doesn't work, nor does drop = T. Any ideas? Plot below for reference.

If you coord_flip, you also need to reverse the faceting relationship (~), to place it on the side, and the switch, to place it on the y-axis. Does this get you close to what you want?
ggplot(srvc_data, aes(label, score)) +
geom_bar( aes(fill = impeff), stat = "identity", position = "dodge", width = 1) +
facet_grid(dept ~ ., switch = "y", scales = "free", space = "free") + coord_flip()

Related

Flexdashboard checkboxGroupInput losing ggplot data

I have an issue that is related to this one, but was unable to come to a solution for mine.
I have a reactive ggplot that I would like to update using a check box based on group data.
Currently, when I have ONE box selected, the data displays correctly. If I select more than one check box, I lose data points. See pictures below. I think I have to change the way I'm filtering my data and use droplevels somewhere but not sure how to integrate that (I'm new to shiny!). Any suggestions are appreciated!
WHOC_Sum_CMJ <- structure(list(Athlete = structure(c(1L, 1L, 1L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L), .Label = c("Athlete 1", "Athlete 10",
"Athlete 11", "Athlete 12", "Athlete 13", "Athlete 14", "Athlete 2",
"Athlete 3", "Athlete 4", "Athlete 5", "Athlete 6", "Athlete 7",
"Athlete 8", "Athlete 9"), class = "factor"), Date = structure(c(1L,
4L, 5L, 1L, 3L, 5L, 7L, 2L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L,
5L, 7L, 1L, 3L, 6L, 7L, 2L, 4L, 5L, 8L, 1L, 3L, 5L, 7L, 1L, 3L,
5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L,
6L, 7L, 1L, 3L, 5L, 7L), .Label = c("2020-01-06", "2020-01-07",
"2020-01-13", "2020-01-14", "2020-01-21", "2020-01-23", "2020-01-27",
"2020-01-28"), class = "factor"), Position = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("DEF", "FWD", "GOALIE"), class = "factor"),
Program = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Navy", "Red", "RTP", "White"), class = "factor"),
mRSI = c(0.36, 0.38, 0.42, 0.46, 0.46, 0.47, 0.48, 0.31,
0.3, 0.24, 0.3, 0.29, 0.26, 0.28, 0.28, 0.36, 0.35, 0.43,
0.43, 0.28, 0.31, 0.28, 0.3, 0.33, 0.36, 0.35, 0.37, 0.37,
0.36, 0.37, 0.36, 0.3, 0.36, 0.34, 0.37, 0.26, 0.28, 0.34,
0.3, 0.39, 0.4, 0.43, 0.43, 0.43, 0.47, 0.46, 0.48, 0.34,
0.36, 0.33, 0.37, 0.28, 0.28, 0.34, 0.33), SystemWeight = c(617.21,
612.4, 620.45, 672.08, 682.23, 670.5, 663.41, 517.33, 515.23,
511.62, 517.85, 697.55, 703.92, 689.43, 691.33, 859.06, 845.9,
850.97, 851.84, 655.79, 665.09, 673.91, 667.92, 626.78, 632.92,
634.52, 624.88, 637.55, 645.6, 648.78, 646.64, 558.03, 563.23,
569.58, 560.95, 693.63, 695.54, 684.37, 684.58, 641.18, 660.8,
663.95, 660, 594.92, 596.97, 591.36, 585.64, 522.35, 518.17,
530.95, 523.5, 780.65, 789.81, 775.84, 775.48), FTCT = c(0.61,
0.62, 0.67, 0.74, 0.75, 0.77, 0.77, 0.54, 0.55, 0.44, 0.53,
0.53, 0.49, 0.53, 0.56, 0.6, 0.58, 0.68, 0.68, 0.53, 0.57,
0.54, 0.55, 0.61, 0.63, 0.64, 0.65, 0.59, 0.58, 0.59, 0.59,
0.51, 0.59, 0.59, 0.59, 0.53, 0.57, 0.63, 0.59, 0.76, 0.76,
0.79, 0.78, 0.67, 0.72, 0.72, 0.74, 0.63, 0.65, 0.61, 0.63,
0.49, 0.5, 0.53, 0.57), JumpHeight_cm = c(28.97, 29.78, 31.43,
35.83, 35.41, 36.59, 36.92, 27.56, 26.11, 26.15, 26.82, 26.15,
25.08, 24.98, 24.62, 29.39, 30.17, 32.42, 32.56, 26.6, 27.25,
25.58, 27.88, 29.17, 31.58, 28.48, 31.24, 33.73, 32.78, 33.09,
33.43, 29.73, 31.91, 30.65, 32.98, 24.15, 24.24, 27.57, 25.44,
26.68, 26.39, 27.43, 28.87, 35.44, 36.29, 35.71, 36.06, 26.79,
27.76, 26.82, 29.71, 28.69, 26.9, 31.12, 29.77), EJH = c(17.6,
18.58, 21.11, 26.66, 26.69, 28.08, 28.38, 14.99, 14.39, 11.41,
14.33, 13.8, 12.34, 13.29, 13.67, 17.58, 17.5, 22.03, 22.19,
14.03, 15.59, 13.92, 15.39, 17.7, 19.75, 18.37, 20.3, 19.99,
18.9, 19.62, 19.61, 15.09, 18.8, 18.18, 19.6, 12.78, 13.87,
17.28, 15.06, 20.44, 20.12, 21.74, 22.52, 23.8, 26.25, 25.68,
26.73, 16.99, 18.13, 16.42, 18.82, 14.09, 13.43, 16.61, 16.9
), Weight = c(62.94, 62.45, 63.27, 68.54, 69.57, 68.38, 67.65,
52.76, 52.54, 52.17, 52.81, 71.13, 71.78, 70.31, 70.5, 87.61,
86.26, 86.78, 86.87, 66.88, 67.82, 68.72, 68.11, 63.92, 64.54,
64.71, 63.72, 65.02, 65.84, 66.16, 65.94, 56.91, 57.44, 58.09,
57.2, 70.74, 70.93, 69.79, 69.81, 65.39, 67.39, 67.71, 67.31,
60.67, 60.88, 60.31, 59.72, 53.27, 52.84, 54.15, 53.39, 79.61,
80.54, 79.12, 79.08)), class = "data.frame", row.names = c(NA,
-55L))
```
checkboxGroupInput("Program", label = "Program", choices = unique(WHOC_Sum_CMJ$Program), selected = "Red", inline = TRUE)
# (Note: for the code I cut out some of the styling to make it more readable. That's why it looks different than the pictures).
```
```
renderPlot({
f <- WHOC_Sum_CMJ %>%
select(Date, Athlete, JumpHeight_cm, Program)%>%
filter(Program == input$Program)
p <- ggplot(f)+
geom_line(aes(x=Date, y=JumpHeight_cm, colour = Athlete))+
geom_point(aes(x=Date, y=JumpHeight_cm, colour = Athlete))+
theme_bw() +
labs(title = "Team Jump Height",
x = "Date",
y = "Jump Height (cm)")+
scale_x_date(limits = c(min = min(WHOC_Sum_CMJ$Date), max = max(WHOC_Sum_CMJ$Date)), labels = date_format("%m/%d"),
date_breaks = "2 weeks", expand = c(.08,0))+
guides(col = guide_legend(nrow = 3))+
geom_text_repel(data= subset(f, Date == min(Date)), aes(x=Date, y=JumpHeight_cm,label = unique(Athlete)),
force = .1,
nudge_x = -2,
direction = "y",
hjust = 1,
)
p
})
The issue in your code indeed is based on the filter call. You'll need to use %in%instead of ==, when filtering a vector of statements. Please see the following:
---
title: "Test"
output: flexdashboard::flex_dashboard
runtime: shiny
---
```{r global, include=FALSE}
library(ggplot2)
library(dplyr)
library(scales)
library(ggrepel)
WHOC_Sum_CMJ <- structure(list(Athlete = structure(c(1L, 1L, 1L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L), .Label = c("Athlete 1", "Athlete 10",
"Athlete 11", "Athlete 12", "Athlete 13", "Athlete 14", "Athlete 2",
"Athlete 3", "Athlete 4", "Athlete 5", "Athlete 6", "Athlete 7",
"Athlete 8", "Athlete 9"), class = "factor"), Date = structure(c(1L,
4L, 5L, 1L, 3L, 5L, 7L, 2L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L,
5L, 7L, 1L, 3L, 6L, 7L, 2L, 4L, 5L, 8L, 1L, 3L, 5L, 7L, 1L, 3L,
5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L, 5L, 7L, 1L, 3L,
6L, 7L, 1L, 3L, 5L, 7L), .Label = c("2020-01-06", "2020-01-07",
"2020-01-13", "2020-01-14", "2020-01-21", "2020-01-23", "2020-01-27",
"2020-01-28"), class = "factor"), Position = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("DEF", "FWD", "GOALIE"), class = "factor"),
Program = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), .Label = c("Navy", "Red", "RTP", "White"), class = "factor"),
mRSI = c(0.36, 0.38, 0.42, 0.46, 0.46, 0.47, 0.48, 0.31,
0.3, 0.24, 0.3, 0.29, 0.26, 0.28, 0.28, 0.36, 0.35, 0.43,
0.43, 0.28, 0.31, 0.28, 0.3, 0.33, 0.36, 0.35, 0.37, 0.37,
0.36, 0.37, 0.36, 0.3, 0.36, 0.34, 0.37, 0.26, 0.28, 0.34,
0.3, 0.39, 0.4, 0.43, 0.43, 0.43, 0.47, 0.46, 0.48, 0.34,
0.36, 0.33, 0.37, 0.28, 0.28, 0.34, 0.33), SystemWeight = c(617.21,
612.4, 620.45, 672.08, 682.23, 670.5, 663.41, 517.33, 515.23,
511.62, 517.85, 697.55, 703.92, 689.43, 691.33, 859.06, 845.9,
850.97, 851.84, 655.79, 665.09, 673.91, 667.92, 626.78, 632.92,
634.52, 624.88, 637.55, 645.6, 648.78, 646.64, 558.03, 563.23,
569.58, 560.95, 693.63, 695.54, 684.37, 684.58, 641.18, 660.8,
663.95, 660, 594.92, 596.97, 591.36, 585.64, 522.35, 518.17,
530.95, 523.5, 780.65, 789.81, 775.84, 775.48), FTCT = c(0.61,
0.62, 0.67, 0.74, 0.75, 0.77, 0.77, 0.54, 0.55, 0.44, 0.53,
0.53, 0.49, 0.53, 0.56, 0.6, 0.58, 0.68, 0.68, 0.53, 0.57,
0.54, 0.55, 0.61, 0.63, 0.64, 0.65, 0.59, 0.58, 0.59, 0.59,
0.51, 0.59, 0.59, 0.59, 0.53, 0.57, 0.63, 0.59, 0.76, 0.76,
0.79, 0.78, 0.67, 0.72, 0.72, 0.74, 0.63, 0.65, 0.61, 0.63,
0.49, 0.5, 0.53, 0.57), JumpHeight_cm = c(28.97, 29.78, 31.43,
35.83, 35.41, 36.59, 36.92, 27.56, 26.11, 26.15, 26.82, 26.15,
25.08, 24.98, 24.62, 29.39, 30.17, 32.42, 32.56, 26.6, 27.25,
25.58, 27.88, 29.17, 31.58, 28.48, 31.24, 33.73, 32.78, 33.09,
33.43, 29.73, 31.91, 30.65, 32.98, 24.15, 24.24, 27.57, 25.44,
26.68, 26.39, 27.43, 28.87, 35.44, 36.29, 35.71, 36.06, 26.79,
27.76, 26.82, 29.71, 28.69, 26.9, 31.12, 29.77), EJH = c(17.6,
18.58, 21.11, 26.66, 26.69, 28.08, 28.38, 14.99, 14.39, 11.41,
14.33, 13.8, 12.34, 13.29, 13.67, 17.58, 17.5, 22.03, 22.19,
14.03, 15.59, 13.92, 15.39, 17.7, 19.75, 18.37, 20.3, 19.99,
18.9, 19.62, 19.61, 15.09, 18.8, 18.18, 19.6, 12.78, 13.87,
17.28, 15.06, 20.44, 20.12, 21.74, 22.52, 23.8, 26.25, 25.68,
26.73, 16.99, 18.13, 16.42, 18.82, 14.09, 13.43, 16.61, 16.9
), Weight = c(62.94, 62.45, 63.27, 68.54, 69.57, 68.38, 67.65,
52.76, 52.54, 52.17, 52.81, 71.13, 71.78, 70.31, 70.5, 87.61,
86.26, 86.78, 86.87, 66.88, 67.82, 68.72, 68.11, 63.92, 64.54,
64.71, 63.72, 65.02, 65.84, 66.16, 65.94, 56.91, 57.44, 58.09,
57.2, 70.74, 70.93, 69.79, 69.81, 65.39, 67.39, 67.71, 67.31,
60.67, 60.88, 60.31, 59.72, 53.27, 52.84, 54.15, 53.39, 79.61,
80.54, 79.12, 79.08)), class = "data.frame", row.names = c(NA,
-55L))
WHOC_Sum_CMJ$Date <- as.Date(WHOC_Sum_CMJ$Date)
```
Column {.sidebar}
-----------------------------------------------------------------------
```{r}
checkboxGroupInput("Program", label = "Program", choices = unique(WHOC_Sum_CMJ$Program), selected = "Red", inline = TRUE)
# (Note: for the code I cut out some of the styling to make it more readable. That's why it looks different than the pictures).
```
Column
-----------------------------------------------------------------------
```{r}
renderPlot({
f <- WHOC_Sum_CMJ %>%
dplyr::select(Date, Athlete, JumpHeight_cm, Program) %>%
filter(Program %in% input$Program)
p <- ggplot(f) +
geom_line(aes(x=Date, y=JumpHeight_cm, colour = Athlete)) +
geom_point(aes(x=Date, y=JumpHeight_cm, colour = Athlete)) +
theme_bw() +
labs(title = "Team Jump Height",
x = "Date",
y = "Jump Height (cm)") +
scale_x_date(limits = c(min = min(WHOC_Sum_CMJ$Date), max = max(WHOC_Sum_CMJ$Date)), labels = date_format("%m/%d"),
date_breaks = "2 weeks", expand = c(.08,0)) +
guides(col = guide_legend(nrow = 3)) +
geom_text_repel(data= subset(f, Date == min(Date)), aes(x=Date, y=JumpHeight_cm,label = unique(Athlete)),
force = .1,
nudge_x = -2,
direction = "y",
hjust = 1,
)
p
})
```

shade a facet.grid of kernel density plots with ggplot2

Consider the following df:
df<-structure(list(Trial = structure(c(1L, 5L, 1L, 5L, 1L, 4L, 3L,
2L, 2L, 4L, 3L, 3L, 2L, 5L, 4L, 1L, 2L, 3L, 5L, 1L, 2L, 1L, 4L,
3L, 1L, 3L, 3L, 2L, 3L, 5L, 1L, 3L, 3L, 5L, 5L, 1L, 4L, 3L, 3L,
1L, 1L, 5L, 5L, 1L, 3L, 5L, 2L, 1L, 5L, 3L, 2L, 1L, 4L, 3L, 5L,
3L, 4L, 1L, 2L, 2L, 2L, 2L, 4L, 1L, 4L, 5L, 3L, 1L, 5L, 3L, 3L,
4L, 2L, 2L, 4L, 4L, 1L, 3L, 4L, 5L, 4L, 2L, 3L, 1L, 1L, 4L, 2L,
3L, 5L, 2L, 2L, 4L, 1L, 4L, 4L, 5L, 2L, 4L, 2L, 4L, 1L, 4L, 3L,
5L, 4L, 5L, 2L, 3L, 2L, 2L, 5L, 1L, 3L, 3L, 3L, 1L, 2L, 4L, 5L,
3L, 1L, 2L, 5L, 1L, 4L, 3L, 2L, 2L, 5L, 1L, 5L, 1L, 4L, 5L, 5L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 2L, 5L, 2L, 5L, 2L, 4L, 1L), .Label =
c("ES8-13", "ES14-25", "ES26-38", "SA1-12", "SA14-25"), class = "factor"),
MAF = c(-0.022, 0.141, -0.035, 0.076, -0.019, -0.064, -0.044,
0.088, 0.067, 0.049, 0.088, 0.053, -0.052, -0.078, 0.104,
-0.019, -0.075, -0.049, 0.098, -0.145, 0.094, 0.014, 0.016,
0.00599999999999999, 0.027, 0.117, -0.011, 0.055, 0.079,
0.26, -0.049, 0.065, 0.227, 0.141, -0.091, -0.021, -0.306,
0.162, -0.089, -0.068, 0.00700000000000001, 0.034, 0.02,
0.038, 0.129, 0.099, 0.06, -0.09, 0.104, 0.195, 0.165, -0.047,
0.074, -0.01, 0.002, -0.068, 0.054, 0.012, -0.012, 0.222,
0.046, 0.00700000000000001, -0.022, 0.00499999999999995,
-0.051, 0.126, 0.073, 0.094, -0.254, 0.185, 0.238, 0.099,
0.027, 0.044, -0.018, 0.014, -0.058, -0.005, -0.00999999999999998,
-0.002, 0.061, 0.178, 0.001, 0.105, -0.001, -0.088, 0.113,
0.134, 0.175, 0.06, -0.026, 0.048, 0.003, 0.049, 0.0649999999999999,
-0.135, -0.036, -0.069, 0.015, -0.058, 0.024, 0.093, 0.123,
-0.144, 0.011, 0.343, 0.002, -0.018, 0.055, -0.047, -0.317,
-0.033, -0.018, 0.068, -0.044, 0.05, 0.079, 0.122, -0.071,
0.13, 0.078, 0.085, 0.012, -0.02, -0.088, -0.086, -0.026,
0.046, 0.101, -0.026, 0.005, 0.00700000000000001, 0.064,
0.066, -0.085, 0.114, 0.003, 0.004, -0.003, 0.097, 0.055,
-0.063, -0.089, 0.104, -0.199, 0.01, 0.184, 0.183, 0.129,
-0.059)), row.names = c(1146L, 163986L, 34946L, 168682L,
33356L, 152862L, 103827L, 54557L, 68666L, 141066L, 118349L, 93909L,
67299L, 193633L, 129212L, 39273L, 71459L, 102636L, 176655L, 30543L,
46107L, 32608L, 122906L, 100356L, 37635L, 81566L, 116510L, 61803L,
96219L, 187927L, 9211L, 106999L, 88554L, 181316L, 176250L, 32656L,
150472L, 80615L, 111414L, 16038L, 23319L, 185075L, 175803L, 32648L,
106332L, 185991L, 65155L, 32165L, 189972L, 92486L, 44161L, 404L,
123856L, 80513L, 180030L, 101190L, 145315L, 5498L, 75891L, 77358L,
67571L, 72894L, 127763L, 6584L, 139250L, 163126L, 101492L, 22520L,
181276L, 82673L, 94756L, 142750L, 48377L, 59931L, 140900L, 154339L,
2769L, 110265L, 130494L, 186334L, 138079L, 50754L, 82207L, 24578L,
26393L, 128021L, 69283L, 84549L, 187875L, 76775L, 45715L, 138049L,
1972L, 137218L, 158324L, 200014L, 61611L, 147430L, 60938L, 154928L,
22421L, 159532L, 98190L, 166565L, 151667L, 180407L, 55681L, 89127L,
54396L, 65975L, 172695L, 21969L, 80439L, 81202L, 87282L, 35394L,
53137L, 131886L, 163181L, 84221L, 32007L, 57711L, 160393L, 32843L,
157924L, 104820L, 63993L, 55023L, 160342L, 20800L, 167583L, 15849L,
143476L, 172878L, 195659L, 49812L, 4971L, 44583L, 24399L, 77026L,
16862L, 56500L, 113282L, 65688L, 188635L, 75437L, 190601L, 54633L,
137420L, 27389L), class = "data.frame")
Here is a snippet of the df:
Trial MAF
ES8-13 -0.022
SA14-25 0.141
ES8-13 -0.035
SA14-25 0.076
ES8-13 -0.019
SA1-12 -0.064
I have produced the following kernel density plot :
p <- ggplot(df,aes(x=MAF)) +
geom_density(fill='grey') + facet_grid(Trial ~.)
p
I would like to shade both tail regions that fall above 90% of the values. With the following command for example I could get the quantiles for the whole df:
qt <- quantile(df$MAF,probs=c(.05,.95))
But I rather need the quantiles for every level of the factor Trial as follows:
require(dplyr)
qt05<-alele_freq_dev %>% group_by(Trial) %>%
summarise(quantile(MAF,probs=c(.05)))
qt95<-alele_freq_dev %>% group_by(Trial) %>%
summarise(quantile(MAF,probs=c(.95)))
With those quantiles in mind I would need to shade every level of factor Trial for every facet of the graph. I found solutions for this problem but only for a singular plot case.
Could someone help me to get this done for a facet.grid case ?
I use library(ggridges) for distribution viz like this, because it has a lot of nice features, including the ability to customize quantile shading!
Here is an example without the faceting, because with this strategy you might not need to facet anymore:
library(ggridges)
ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
name = NULL,
labels = c("lower 5%", "middle 90%", "upper 90%"))
If you still want to do the facet route, one drawback is that stat_density_ridges requires a y aesthetic. So I would do something like this to tweak the theme a bit and keep the plot looking pretty and clean (no one will ever know there is a y aes lurking in there!):
ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
name = NULL,
labels = c("lower 5%", "middle 90%", "upper 5%")) +
facet_grid(Trial~ ., scales = "free_y") +
theme(axis.text.y = element_blank(), # clean up overhead
axis.ticks.y = element_blank())
Obviously you can tweak the colors and labels as you see fit, just make sure they make sense with the quantiles you set in the geom layer. Let me know if you have more questions.

Optimizing degrees of freedom in spline regression

I have two gene-expression time-course data sets:
First, gene expression was measured over 14 time points from 4 groups:
df1 <- structure(list(val = c(-0.1, -0.13, -0.4, -0.3, -0.3, -0.2, -0.24,
0.1, 0.2, 0.13, 0, 0.63, 0.83, 0.85, -0.07, -0.07, -0.27, -0.2,
-0.2, -0.1, 0.2, 0.1, 0.07, 0.17, 0.6, 0.75, 1.1, 1.1, -0.13,
-0.15, -0.26, -0.25, -0.14, 0.04, 0.2, 0.24, 0.23, 0.2, 0.1,
0.73, 1, 1.3, 0, 0.06, -0.24, -0.17, -0.17, -0.04, 0.16, 0.1,
0.14, 0.27, 0.34, 0.9, 0.97, 1.04),
time = c(-1, 0, 1, 1.58,2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17,7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58,6.17, 7.39),
group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,4L),
.Label = c("a", "b", "c", "d"), class = "factor")), .Names = c("val","time", "group"),
row.names = c(NA, -56L), class = "data.frame")
df1$group <- factor(df1$group,levels=c("a","b","c","d"))
which looks like this (adding a loess smoothed trend line):
library(ggplot2)
ggplot(df1,aes(x=time,y=val,color=group))+geom_point()+theme_minimal()+geom_smooth(se=F)+theme(legend.position="top",legend.title=element_blank())
Second, gene expression was measured over similar 14 time points but now from 2 different groups, each represented by the two sexes:
df2 <- structure(list(val = c(-0.23, -0.01, -0.14, -0.01, -0.21, -0.16,
-0.24, -0.11, 0.02, -0.11, -0.01, -0.25, -0.47, -1.25, 0.02,
-0.3, -0.02, 0.14, 0.25, -0.05, 0.15, 0.11, -0.24, -0.18, -0.39,
-0.49, -0.5, -0.65, -0.06, 0.09, 0.1, 0.15, 0.08, 0.15, 0.4,
0.24, 0.07, 0.08, -0.18, -0.35, -0.19, -0.81, -0.16, 0.29, -0.05,
0.14, 0.14, 0.48, 0.34, 0.11, -0.07, -0.13, -0.41, -0.22, -0.54,
-0.76, 0.35, 0.34, -0.06, 0.21, 0.14, 0.14, 0.25, 0.22, 0.25,
0.16, 0.3, 0.44, 0.08, 0.48, 0.1, 0.16, -0.03, -0.22, 0.2, 0.01,
-0.09, -0.02, -0.01, 0.06, -0.13, 0.19, 0.11, -0.04, -0.39, 0.03,
-0.01, 0.09, 0.1, -0.14, -0.12, -0.1, 0.36, 0.08, 0.09, 0.09,
0.42, 0.37, -0.14, 0.12, 0.09, 0.03, 0.06, -0.25, 0.2, -0.06,
-0.44, 0.23, 0.03, 0.16, 0.81, 0.83),
time = c(-1, 0, 1, 1.58,2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39,
-1, 0,1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17,7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58,6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58,5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17,4.58, 5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39,
-1, 0, 1, 1.58, 2, 2.58, 3, 3.32, 3.58, 4.17, 4.58, 5.58, 6.17, 7.39),
sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
.Label = c("F", "M"), class = "factor"), group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("a", "b"), class = "factor")), .Names = c("val", "time", "sex", "group"), row.names = c(NA, -112L), class = "data.frame")
df2$sex <- ordered(df2$sex,levels=c("M","F"))
df2$group <- ordered(df2$group,levels=c("a","b"))
df2$col <- factor(paste0(df2$group,":",df2$sex))
which looks like this (adding a loess smoothed trend line):
ggplot(df2,aes(x=time,y=val,color=col))+geom_point()+theme_minimal()+geom_smooth(se=F)+theme(legend.position="top",legend.title=element_blank())
For df1, I would like to estimate the effect of time on val, adjusting for group.
For df2, I would like to estimate the effect of time:group on val, adjusting for sex.
Looking at the data I thought using spline regressions would be appropriate so I used the gam function from the mgcv package, which as far as I understand optimizes the degrees of freedom of the splines fitted to the data.
This is what I fitted for df1:
mgcv1.fit <- mgcv::gam(val ~ group+s(time),data=df1)
Which gives:
Family: gaussian
Link function: identity
Formula:
val ~ group + s(time)
Estimated degrees of freedom:
7.18 total = 11.18
GCV score: 0.01258176
But 7.18 degrees of freedom seems too much for these data.
For df2:
mgcv2.fit <- mgcv::gam(val ~ sex+s(time,by=group),data=df2)
which gives:
Family: gaussian
Link function: identity
Formula:
val ~ sex + s(time, by = group)
Estimated degrees of freedom:
1.72 total = 3.72
GCV score: 0.08522094
I guess that in this case I'd imagine the degrees of freedom to be slightly higher.
One more point. Plotting the fitted values for these two data sets:
df1$mgcv <- mgcv1.fit$fitted.values
ggplot(df1,aes(x=time,y=mgcv,color=group))+geom_point()+theme_minimal()+geom_smooth(se=F)+theme(legend.position="top",legend.title=element_blank())
which looks fine.
But for df2
df2$mgcv <- mgcv2.fit$fitted.values
ggplot(df2,aes(x=time,y=mgcv,color=col))+geom_point()+theme_minimal()+geom_smooth(se=F)+theme(legend.position="top",legend.title=element_blank())
Looks like it flipped the group labels.
So my questions are:
Am I using mgcv::gam correctly for optimizing the spline degrees of freedom for my questions?
Does mgcv reorders the samples in its fitted.values?
First of all, mgcv does the right thing on the factor levels. If you check str(df2$sex), you will see that "M" (male) is the first level and "F" (female) is the second. But it seems from str(df2$col) that "F" is the first, so you get some mislabeling when making plot.
Secondly, your second model has not been specified correctly.
The spline s(time) is under centering constraint when there is no "by" variable, or the "by" is a factor. So you to provide your "by" variable group as a separate term in your model formula to catch its marginal effect;
Since the "by" variable group is an ordered variable, mgcv applies contrasts on it, dropping the first level "a" when constructing the s(time, by = group). So you need to provide a separate s(time) as the baseline smooth.
Your current mgcv2.fit is a rather poor model (not surprising), giving an explained deviance of 9%. But if you do the following you get 64%.
gam(val ~ sex + s(time) + group + s(time, by = group), data = df2, method = "REML")
The ggplot now looks right (I haven't changed df2$col so the coloring is still probably reversed).
gam defaults to use "GCV.Cp" as smoothing parameter selection method. But it is recommended to use "REML" as it is less prone to overfitting.
Remark 1
If the "by" variable group is a (non-ordered) factor, it is not subject to contrasts. So the model formula should be:
val ~ sex + group + s(time, by = group)
The following is quoted from 'by' variables section of ?gam.models:
If a ‘by’ variable is a ‘factor’ then it generates an indicator
vector for each level of the factor, unless it is an ‘ordered’
factor. In the non-ordered case, the model matrix for the smooth
term is then replicated for each factor level, and each copy has
its rows multiplied by the corresponding rows of its indicator
variable. The smoothness penalties are also duplicated for each
factor level. In short a different smooth is generated for each
factor level (the ‘id’ argument to ‘s’ and ‘te’ can be used to
force all such smooths to have the same smoothing parameter).
‘ordered’ ‘by’ variables are handled in the same way, except that
no smooth is generated for the first level of the ordered factor
(see ‘b3’ example below). This is useful for setting up
identifiable models when the same smooth occurs more than once in
a model, with different factor ‘by’ variables.
Remark 2
I am not to judge your model, but there seems to be a clear within-group difference between "F" and "M". From your data we see that "F" and "M" has a bigger difference in group "b" than in group "a". At the moment the effect of sex is identical in both groups, and it is just a vertical shift. You can observe this in the above ggplot in this answer. It is up to you to decide the model in the end, but just in case that you want to model this sex-group interaction, you can do
df2$sex_group <- with(df2, interaction(sex, group)) ## the new variable is unordered
test <- gam(val ~ sex + group + s(time, by = sex_group), data = df2, method = "REML")
Note how I provide two factor variables to by. An auxiliary variable sex_group is created.

Apply rules to growing window

I want to loop through the dataframe Out using a window that:
Grows one increment at a time (so the rear of the window is fixed and the front of the window grows - window gets bigger)
At each increment, the following rules should be run over the window:
if (mean(Speed_out) <= 0.152682)
Behaviour <- Lying
else if (Movement_Out == “left”) <= 20.8 && (mean(Speed_Out) >=
0.200921)
Behaviour <- Grazing
If no rules are met then the window should grow one increment at a time until a rule is met.
Once a rule is met, all of the previous increments should be labelled with the Behaviour assigned to that rule above.
The next window should then start at the next element after where the last window terminated.
The initial window size should be adjustable (the window size at the start and after each terminated window).
Notes:
The units (Movement_Out == “left”) <= 20.8 mean that if "left" occupies less than 20.8% of the window.
Example:
Here's a short example of the output I'd like from the data provided below where the starting window size was set to 4:
Speed_Out Movement_Out Behaviour
1 0.220 left Lying
2 0.155 left Lying
3 0.120 forward Lying
4 0.090 non-moving Lying <== window terminates here
5 0.125 non-moving Grazing <== new window starts here
6 0.125 non-moving Grazing
7 0.155 non-moving Grazing
8 0.340 forward Grazing
9 0.370 forward Grazing <== window terminates here
10 0.185 forward Grazing <== new window starts here
11 0.155 right Grazing
12 0.220 non-moving Grazing
13 0.220 non-moving Grazing
14 0.280 non-moving Grazing <== window terminates here
15 0.215 non-moving Grazing <== new window starts here
16 0.060 right Grazing
17 0.340 non-moving Grazing
18 0.555 forward Grazing <== window terminates here
19 0.275 right And so on..
20 0.215 forward
Dataframe for your use
Out <- structure(list(Speed_Out = c(0.22, 0.155, 0.12, 0.09, 0.125,
0.125, 0.155, 0.34, 0.37, 0.185, 0.155, 0.22, 0.22, 0.28, 0.215,
0.06, 0.34, 0.555, 0.275, 0.215, 0.185, 0.06, 0.245, 0.31, 0.345,
0.375, 0.375, 0.87, 1.025, 0.405, 0, 0.185, 0.31, 0.155, 0.125,
0.22, 0.375, 0.345, 0.345, 0.405, 0.31, 0.34, 0.245, 0.155, 0.19,
0.22, 0.185, 0.12, 0.185, 0.155, 0.245, 0.31, 0.155, 0.155, 0.25,
0.215, 0.09, 0.06, 0.245, 0.495, 0.495, 0.34, 0.28, 0.31, 0.28,
0.25, 0.25, 0.185, 0.155, 0.25, 0.28, 0.28, 0.34, 0.215, 0.125,
0.155, 0.34, 0.34, 0.09, 0.59, 1.71, 1.18, 0.185, 0.215, 0.185,
0.185, 0.155, 0.19, 0.19, 0.19, 0.87, 2.045, 2.73, 1.585, 0.22,
0.25, 0.435, 0.405, 0.405, 0.405, 0.715, 0.62, 0.37, 0.4, 0.185,
0.375, 0.59, 0.525, 0.245, 0.495, 0.495, 0.68, 0.775, 0.25, 0.31,
0.34, 0.28, 0.28, 0.25, 1.55, 2.695, 1.705, 1.21, 0.87, 0.25,
1.52, 1.52, 0.405, 0.81, 2.08, 2.915, 1.705, 0.435, 0.22, 0.78,
1.215, 0.84, 0.495, 0.495, 0.56, 0.375, 0.28, 0.715, 1.025, 0.495,
0.65, 1.18, 1.09, 0.995, 0.87, 0.435, 0.125, 0.435, 0.555, 0.775,
1.12, 1.555, 1.15, 0.25, 0.87, 0.93, 0.28, 0.31, 0.31, 0.375,
0.78, 0.655, 0.53, 0.62, 0.525, 0.37, 0.555, 1.025, 0.655, 1.12,
1.585, 0.715, 0.155, 0.28, 1.12, 2.11, 1.645, 0.715, 0.465, 0.84,
0.81, 0.655, 0.84, 0.435, 0.28, 0.215, 0.93, 1.335, 0.65, 0.185,
0.155, 0.34, 0.4, 0.37, 0.435, 0.405, 0.28, 0.28, 0.25, 0.25,
0.745, 1.24, 0.805, 1.055, 1.085, 0.465, 0.375, 0.5, 0.59, 0.37,
0.185, 0.34, 0.37, 0.435, 0.405, 0.06, 0.125, 0.25, 0.31, 0.405,
0.78, 0.56, 0.215, 0.495, 0.87, 1.025, 0.62, 0.405, 0.405, 0.405,
0.31, 0.215, 0.465, 0.435, 0.34, 0.275, 0.215, 0.25, 0.22, 0.22,
0.125, 0.245, 0.34, 0.31, 0.37, 0.31, 0.31, 0.245, 0.185, 0.25,
0.22, 0.22, 0.31, 0.28, 0.22, 0.28, 0.53, 0.655, 0.375, 0.19,
0.405, 0.435, 0.28, 0.215, 0.77, 0.96, 1.865, 1.83, 0.495, 0.655,
1.615, 1.395, 0.31, 0.31, 0.25, 0.28, 0.34, 0.34), Movement_Out = structure(c(2L,
2L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 4L, 3L,
1L, 4L, 1L, 1L, 2L, 2L, 3L, 4L, 3L, 2L, 4L, 1L, 2L, 1L, 3L, 3L,
1L, 3L, 2L, 4L, 3L, 1L, 3L, 1L, 1L, 1L, 4L, 3L, 3L, 3L, 3L, 1L,
3L, 3L, 3L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 1L, 1L, 2L, 4L, 1L, 2L,
4L, 3L, 3L, 4L, 3L, 3L, 2L, 4L, 2L, 1L, 2L, 4L, 4L, 2L, 4L, 2L,
1L, 2L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 3L,
2L, 2L, 3L, 1L, 2L, 4L, 3L, 4L, 2L, 3L, 1L, 4L, 4L, 3L, 1L, 2L,
1L, 1L, 4L, 1L, 2L, 4L, 2L, 1L, 1L, 2L, 4L, 2L, 2L, 4L, 1L, 1L,
2L, 4L, 2L, 4L, 2L, 1L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 3L, 1L, 4L,
2L, 1L, 1L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 2L, 4L, 1L, 1L, 4L, 2L,
4L, 4L, 3L, 4L, 4L, 2L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 4L, 2L, 2L,
4L, 1L, 2L, 2L, 4L, 4L, 4L, 2L, 2L, 1L, 4L, 4L, 2L, 3L, 1L, 2L,
2L, 4L, 4L, 1L, 2L, 4L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 4L, 1L, 1L,
2L, 1L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 4L, 2L,
4L, 2L, 4L, 3L, 1L, 4L, 1L, 1L, 2L, 4L, 2L, 1L, 4L, 1L, 4L, 3L,
2L, 3L, 2L, 4L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 2L, 4L, 2L, 3L, 2L, 1L, 4L, 3L, 2L, 4L, 4L, 2L, 4L, 1L, 1L,
2L, 2L, 4L, 1L, 2L, 4L, 2L, 4L, 3L, 4L), .Label = c("forward",
"left", "non-moving", "right"), class = "factor")), .Names = c("Speed_Out",
"Movement_Out"), row.names = c(NA, 283L), class = "data.frame")
Ok, I have to say that this has been less trivial than I expected. My answer is ugly and most likely not optimal, but it seems to work.
There seem to be a few spots where even when the rest of the data was taken into account, none of the conditions were met, so the behaviour for those stayed at NA.
library(dplyr)
# Create id variable used to join results later
Out <- Out %>%
mutate(id=row_number())
# Initial window size
window_size <- 4
# Initialize variables used in loop
w <- window_size
i<-1
window_cnt<-1
out_behaviour <- data.frame(id=as.numeric(), Behaviour=as.character(), stringsAsFactors = FALSE)
while (i <= NROW(Out)){
print(paste0("Row: ", i, ", Window Size: ", w))
df <- Out[i:(i+w-1),] %>%
mutate(mean_sp=mean(Speed_Out),
mvmt=sum(ifelse(Movement_Out=="left",1 ,0))/NROW(.)) %>%
mutate(Behaviour=case_when(mean_sp <= 0.152682 ~ "Lying",
mvmt <= 0.208 & mean_sp >= 0.200921 ~ "Grazing",
TRUE ~ as.character(NA)),
window_nr=window_cnt)
if (!all(is.na(df$Behaviour))){
i<-w+i
w<-window_size
out_behaviour <- rbind(out_behaviour, df %>% select(id, Behaviour, window_nr))
window_cnt<-window_cnt+1
} else {
if (w<=NROW(Out)-i){
w<-w+1
} else {
w<-window_size
i<-i+1
}
}
rm(df)
}
# Join Behaviour column bacl to original data frame
Out <- left_join(Out, out_behaviour, by="id") %>% select(-id)
# Clean up workspace
rm(i, w, window_size, window_cnt, out_behaviour)
And the first 20 outputs
Speed_Out Movement_Out Behaviour window_nr
1 0.220 left Lying 1
2 0.155 left Lying 1
3 0.120 forward Lying 1
4 0.090 non-moving Lying 1
5 0.125 non-moving Grazing 2
6 0.125 non-moving Grazing 2
7 0.155 non-moving Grazing 2
8 0.340 forward Grazing 2
9 0.370 forward Grazing 2
10 0.185 forward Grazing 3
11 0.155 right Grazing 3
12 0.220 non-moving Grazing 3
13 0.220 non-moving Grazing 3
14 0.280 non-moving Grazing 3
15 0.215 non-moving Grazing 4
16 0.060 right Grazing 4
17 0.340 non-moving Grazing 4
18 0.555 forward Grazing 4
19 0.275 right Grazing 5
20 0.215 forward Grazing 5
I know the code is a mess, so let me know if it needs some extra commenting.

JAGS code for estimating group means with Beta distribution

I'd like to estimate the means and sd's of percent canopy cover for 13 sites (9 are birds and 4 are potential habitats) using JAGS. I'm using a beta distribution to account for the fact that the data are bound by 0 and 1.
I have code for the model statement that works perfectly for other distributions (Poisson and log-normal) and I was attempting to adapt that code but I failed miserably.
Below are the R code, the model statement, and the data. I'm using R 3.1.1 in Windows Vista. If you could look at the model statement and straighten me out I would be very thankful.
Thanks,
Jeff
######## MODEL ##############
model{
for (i in 1:227) {
log(mean[i]) <- a[site[i]]
cover20p[i] ~ dbeta(1, 0.5)
}
for (i in 1:13){
a[i] ~ dnorm(0, tau)
median[i] <- exp(a[i])
}
sd ~ dunif(0, 10)
tau <- 1 / (sd*sd) # precision
}
######### R code ##########
frag <- read.csv("f:\\brazil\\TIandFRAG.csv", header=T)
library(R2jags)
library(rjags)
setwd("f://brazil")
site <- frag$site
cover20p <- frag$cover20p/100
N <- length(frag$site)
jags.data <- list("site", "cover20p")
jags.params <- c("median", "test100MF","test100MT","test100fc","test100fa",
"test100gv","test100hm","test100mc", "test100ca","test100ct", "test10MF",
"test10MT", "test10fc","test10fa", "test10gv", "test10hm", "test10mc", "test10ca", "test10ct",
"test1MF", "test1MT", "test1fc", "test1fa", "test1gv", "test1hm",
"test1mc", "test1ca", "test1ct", "t1est1_con","t2est10_con","t3est100_con",
"t4est1_100","t5est1_10","t6est10_100")
#inits1 <- list(a=0, sd=0)
#inits2 <- list(a=100, sd=50)
#jags.inits <- list(inits1, inits2)
jags.inits <- function() {
list(a=c(0,0,0,0,0,0,0,0,0,0,0,0,0), sd=1)}
jagsfit <- jags(data=jags.data, inits=jags.inits, jags.params,
n.iter=1000000, n.burnin=20000, model.file="fragmodelbeta.txt")
my.coda <- as.mcmc(jagsfit)
summary(my.coda, quantiles=c(0.05, 0.25,0.5,0.75, 0.95))
print(jagsfit, digits=3)
##### DATA ###################
structure(list(site = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L
), canopy = c(0.95, 0.8, 0.85, 0.9, 0.35, 0.999, 0.999, 0.999,
0.95, 0.55, 0.9, 0.85, 0.7, 0.65, 0.05, 0.6, 0.999, 0.999, 0.85,
0.9, 1e-04, 0.45, 0.999, 0.7, 0.95, 0.5, 0.95, 0.6, 0.65, 0.7,
0.4, 0.85, 0.6, 0.95, 0.75, 0.9, 0.85, 0.75, 0.7, 0.85, 0.3,
0.7, 0.8, 0.7, 0.75, 0.8, 0.75, 0.95, 0.9, 0.05, 0.85, 0.6, 0.65,
0.5, 0.85, 0.95, 0.85, 0.25, 0.75, 0.999, 0.65, 0.95, 0.8, 0.9,
0.6, 0.8, 0.999, 0.2, 0.8, 0.4, 0.999, 0.95, 0.4, 0.999, 0.999,
0.95, 0.45, 0.2, 0.7, 0.95, 0.7, 0.8, 0.5, 0.85, 0.55, 1e-04,
0.25, 0.45, 0.999, 0.95, 0.999, 0.9, 0.6, 0.35, 0.95, 0.3, 0.999,
0.999, 0.5, 0.4, 0.9, 0.999, 0.7, 0.999, 0.9, 0.999, 0.4, 0.55,
0.8, 0.7, 0.999, 1e-04, 0.8, 1e-04, 0.7, 0.5, 0.8, 0.75, 1e-04,
0.45, 0.1, 1e-04, 0.4, 0.55, 0.4, 0.999, 0.9, 0.9, 0.15, 0.55,
0.35, 0.9, 0.65, 0.25, 0.999, 0.85, 0.999, 0.95, 0.7, 0.5, 0.7,
0.2, 0.95, 0.999, 0.999, 0.25, 0.85, 0.5, 0.8, 0.75, 0.85, 0.7,
0.95, 0.05, 0.65, 0.65, 0.999, 0.999, 0.999, 0.65, 0.4, 0.6,
0.9, 0.85, 0.75, 0.5, 0.65, 0.999, 0.65, 0.55, 0.75, 0.4, 0.9,
0.35, 0.999, 0.999, 0.4, 0.5, 0.8, 0.95, 0.95, 0.55, 0.7, 0.85,
0.8, 0.8, 0.65, 0.999, 0.6, 0.5, 0.999, 0.8, 0.999, 0.45, 0.999,
0.999, 0.8, 0.85, 0.999, 0.999, 0.999, 0.999, 0.5, 0.6, 0.15,
0.75, 0.6, 0.1, 0.05, 1e-04, 0.999, 0.6, 0.1, 0.35, 0.9, 0.9,
0.95, 0.95, 0.9, 0.55, 0.65, 0.9, 0.4, 0.999, 0.65, 0.5, 0.8)), .Names = c("site",
"canopy"), class = "data.frame", row.names = c(NA, -227L))
In your model, you have cover20p as one of the variables, but have the data for canopy in the frag data.frame. I suspect you want canopy[i] ~ dbeta(1,0.5) in your model specification, and canopy <- frag$canopy and jags.params = "median" in your r code.
I think you could use a logit model for your probabilities. Maybe something like the following.
First, I convert your canopy observations back to the format that I suspect they began in, i.e. the number of canopy hits out of 20 samples at each site. I set 0.0001 to 0 and 0.999 to 1, and multiply the other canopy values by 20.
d$hits <- ifelse(d$canopy < 0.05, 0, ifelse(d$canopy > 0.95, 20, d$canopy * 20))
M <- function() {
for (i in 1:n) {
hits[i] ~ dbin(p[site[i]], 20)
}
for (j in 1:nsites) {
logit.p[j] ~ dnorm(mu, sigma^-2)
logit(p[j]) <- logit.p[j]
}
mu ~ dnorm(0, 0.0001) # uninformative prior for grand mean of logit(p)
sigma ~ dunif(0, 10) # uninformative prior for sd of logit(p)
}
j <- jags(list(site=d$site, hits=d$hits, n=nrow(d), nsites=length(unique(d$site))),
NULL, 'p', M)
plot(j$BUGSoutput$summary[-1, '50%'], pch=20, xlab='site', xaxt='n', las=1,
ylim=c(0, 1), ylab=expression("p (median" %+-% "95% credible interval)"))
segments(1:13, j$BUGSoutput$summary[-1, '2.5%'],
y1=j$BUGSoutput$summary[-1, '97.5%'])
axis(1, 1:13, 1:13)

Resources