reshape data frame and concatenating columns in R - r

I tried to reshape my data frame from wide to long format. At the moment the data frame looks like this:
structure(list(study_site = structure(c(5L, 5L, 5L, 5L, 5L, 5L,
5L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 1L, 2L, 3L, 1L, 4L, 1L,
4L, 3L, 3L, 3L, 1L, 3L, 5L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 4L, 4L,
4L, 1L, 4L, 3L, 5L, 5L, 5L, 1L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L,
4L, 2L), .Label = c("N", "no_nest", "O", "S", "W"), class = "factor"),
coords.N = structure(c(54L, 54L, 40L, 40L, 40L, 40L, 39L,
67L, 67L, 55L, 55L, 64L, 64L, 64L, 78L, 81L, 47L, 80L, 83L,
60L, 46L, 46L, 76L, 88L, 88L, 88L, 84L, 84L, 30L, 58L, 58L,
58L, 25L, 25L, 19L, 19L, 42L, 42L, 42L, 29L, 45L, 90L, 91L,
91L, 91L, 91L, 89L, 89L, 87L, 87L, 87L, 56L, 56L, 61L, 35L,
36L), .Label = c("40.40463", "48.40168", "48.40178", "48.40215",
"48.40235", "48.40309", "48.40390", "48.40393", "48.40396",
"48.40405", "48.40410", "48.40411", "48.40415", "48.40416",
"48.40424", "48.40425", "48.40430", "48.40435", "48.40436 ",
"48.40438", "48.40443", "48.40450", "48.40451", "48.40454",
"48.40455", "48.40459", "48.40460", "48.40461", "48.40466",
"48.40466 ", "48.40467", "48.40469", "48.40471", "48.40477",
"48.40479 ", "48.40481", "48.40482", "48.40483", "48.40488 ",
"48.40491", "48.40493", "48.40504 ", "48.40508", "48.40513",
"48.40515", "48.40519 ", "48.40522 ", "48.40523", "48.40525",
"48.40526", "48.40529", "48.40532", "48.40537", "48.40537 ",
"48.40538 ", "48.40543 ", "48.40549", "48.40549 ", "48.40557",
"48.40557 ", "48.40558", "48.40565", "48.40571", "48.40575",
"48.40580", "48.40584", "48.40586 ", "48.40591", "48.40596",
"48.40598", "48.40599", "48.40611", "48.40612", "48.40617",
"48.40626", "48.40632 ", "48.40633", "48.40635 ", "48.40636",
"48.40637", "48.40638 ", "48.40639", "48.40639 ", "48.40641 ",
"48.40652", "48.40655", "48.40656 ", "48.40657 ", "48.40687 ",
"48.40690 ", "48.40703", "48.40718", "48.40719", "48.40726",
"48.40742", "48.40748", "NO_DATA"), class = "factor"), coords.E = structure(c(67L,
67L, 49L, 49L, 49L, 49L, 27L, 67L, 67L, 70L, 70L, 68L, 68L,
68L, 87L, 94L, 68L, 83L, 90L, 73L, 52L, 52L, 2L, 95L, 95L,
95L, 93L, 93L, 32L, 69L, 69L, 69L, 55L, 55L, 24L, 24L, 29L,
29L, 29L, 30L, 48L, 85L, 1L, 1L, 1L, 1L, 78L, 78L, 79L, 79L,
79L, 64L, 64L, 63L, 66L, 45L), .Label = c(" 015.82024", " 015.82164",
"015.80237", "015.80263", "015.80309", "015.80341", "015.80369",
"015.80388", "015.80394", "015.80399", "015.80406", "015.80435",
"015.80436", "015.80466", "015.80512", "015.80517", "015.80548",
"015.80551", "015.80572", "015.80583", "015.80609", "015.80636",
"015.80659", "015.80703", "015.80723", "015.80779", "015.80795",
"015.80803", "015.80821", "015.80843", "015.80871", "015.80875",
"015.80888", "015.80897", "015.80901", "015.80903", "015.80905",
"015.80906", "015.80908", "015.80909", "015.80921", "015.80923",
"015.80929", "015.80939", "015.80993", "015.81007", "015.81018",
"015.81087", "015.81113", "015.81132", "015.81151", "015.81180",
"015.81241", "015.81273", "015.81305", "015.81406", "015.81422",
"015.81522", "015.81526", "015.81543", "015.81546", "015.81564",
"015.81628", "015.81632", "015.81678", "015.81682", "015.81700",
"015.81703", "015.81735", "015.81739", "015.81770", "015.81783",
"015.81784", "015.81800", "015.81849", "015.81992", "015.82012",
"015.82029", "015.82039", "015.82083", "015.82099", "015.82126",
"015.82180", "015.82230", "015.82232", "015.82255", "015.82265",
"015.82290", "015.82303", "015.82304", "015.82346", "015.82362",
"015.82376", "015.82398", "015.82451", "015.82500", "015.82519",
"015.82555", "015.82579", "015.82634", "NO_DATA"), class = "factor"),
study_ID = c(120L, 120L, 1L, 1L, 1L, 1L, 9L, 39L, 39L, 109L,
109L, 110L, 110L, 110L, 45L, 58L, 121L, 96L, 97L, 40L, 43L,
43L, 47L, 57L, 57L, 57L, 114L, 114L, 67L, 71L, 71L, 71L,
83L, 83L, 4L, 4L, 10L, 10L, 10L, 106L, 108L, 46L, 115L, 115L,
115L, 115L, 116L, 116L, 117L, 117L, 117L, 70L, 70L, 119L,
95L, 3L), species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L), .Label = c("barn swallow", "no_nest"), class = "factor"),
first_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L, 0L,
1L, 0L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 3L, 0L, 0L, 0L, 4L,
1L, 8L, 0L, 0L, 1L, 2L, 1L, 5L, 0L, 0L, 1L, 0L, 1L, 1L, 0L
), second_visit = c(1L, 2L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 5L,
0L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L, 4L, 0L, 0L,
4L, 1L, 0L, 8L, 0L, 1L, 2L, 1L, 0L, 5L, 0L, 1L, 0L, 0L, 1L,
0L), third_visit = c(0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
0L, 5L, 0L, 1L, 2L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L, 0L, 0L, 6L,
1L, 4L, 1L, 0L, 0L, 8L, 1L, 2L, 1L, 0L, 0L, 5L, 1L, 1L, 0L,
0L, 0L), used_1st_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 2L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L), used_2nd_visit = c(0L, 2L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 2L, 0L,
4L, 0L, 0L, 0L, 1L, 0L, 5L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L), used_3rd_visit = c(0L, 0L, 1L, 0L, 0L, 0L,
1L, 0L, 1L, 0L, 4L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 0L, 1L, 0L, 2L,
0L, 0L, 6L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 2L,
0L, 1L, 0L, 0L, 0L), nest_condition = structure(c(3L, 5L,
5L, 2L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 2L, 5L, 3L, 2L,
4L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 2L, 2L, 5L, 1L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 2L, 3L, 5L, 5L, 5L, 2L, 5L,
3L, 5L, 5L, 5L, 2L, 5L, 3L, 5L, 4L), .Label = c(" ready ",
"damaged", "in_progress", "no_nest", "ready"), class = "factor"),
nesting_site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L), .Label = c("inside", "no_nest", "outside"), class = "factor"),
distance = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 3L, 6L, 4L, 4L, 2L, 2L, 4L, 2L,
2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 2L, 4L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 6L), .Label = c("1", "2", "3", "4", "no_data", "no_nest"
), class = "factor"), material = structure(c(5L, 5L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 5L, 5L, 1L, 1L, 2L), .Label = c("fine", "fine plaster",
"medium fine plaster", "no_data", "rough", "rough plaster",
"smooth plaster", "under construction", "wood"), class = "factor"),
housetype = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 5L, 5L, 5L,
4L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 3L, 4L, 4L, 5L,
5L, 4L), .Label = c("auto repair shop", "barn ", "hall",
"residence", "stable"), class = "factor"), usage_house = structure(c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L), .Label = c("auto_repair",
"barn", "inhabited", "under construction", "used"), class = "factor"),
age = c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L)), row.names = c(1L,
2L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 21L, 22L, 23L, 24L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 89L, 90L, 91L, 92L, 93L, 94L,
95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L,
106L, 107L, 108L, 109L, 111L), class = "data.frame")
used_1st..2nd... means that the birds have used this certain number of nests at the first, second,... control.
I would like to have that each row in my data frame to always represent a used/ unused nest as well as no_nest:
ID species `1st_visit` `2nd_visit` `3rd_visit` used_1st_visit used_2nd_visit used_3rd_visit
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 120 barn s~ 1 1 0 0 0 0
2 120 barn s~ 1 1 0 1 1 0
3 120 barn s~ 1 1 0 1 1 0
4 39 barn s~ 1 1 1 1 1 1
5 8 barn s~ 1 1 1 1 0 0
6 8 barn s~ 1 1 1 0 0 0
Unfortunately I have no idea how to concatenate the columns to get the final data frame.
Does anybody has an idea?

I'm not completely sure what you are asking for, but this is what I understood: In the long data frame...
if all visits (coulmns used_first_visit, used_sec_visit etc.) are 0, combine them to one row, marking it 0
if any visits are not 0, keep as many rows as there are non-zero visits and mark them with 1
This is my dplyr-solution (it's not very pretty, but it works):
# create data
dat <- data.frame("visits" = c("first", "first", "second", "second", "third", "third"), "study_id" = rep(120, 6), "used_first_visit" = c(0, 2, 0, 2, 0, 2), "used_sec_visit" = c(0, 2, 0, 2, 0, 2), "used_thrd_visit" = rep(0, 6), "nest_cond" = c("damaged", "ready", "damaged", "ready", "damaged", "ready"))
# make long data frame and filter values
dat_long <- dat %>%
pivot_longer(c(3:5),names_to = "whatever", values_to = "used") %>% # make long data frame
select(-c(whatever)) %>% # get rid of name column
group_by(visits, nest_cond) %>% # group data
mutate(used = ifelse(all(used == 0) & row_number() == 1, 10, used)) %>% # if the whole group is 0, mark one row for later filtering
filter(used > 0 ) %>% # filter
mutate(used = ifelse(used == 10, 0, 1)) # change to correct numbers
Let me know if this is not what you are looking for!

Related

How do I create a ggplot in R from a non-linear model using the mgcv package?

I have a non-linear survival model which I have coded using the mgcv package. I can produce a regular plot, but I would like to be able to do code a ggplot2 instead. How do I go about this?
Here is my code:
df <- structure(list(SurvYear =c(3L, 2L, 3L, 6L, 8L, 3L, 5L, 2L, 9L,
8L, 1L, 7L, 1L, 4L, 6L, 8L, 2L, 5L, 1L, 1L, 7L, 1L, 5L, 3L, 2L,
1L, 9L, 1L, 5L, 2L, 2L, 1L, 2L, 3L, 4L, 8L, 7L, 2L, 2L, 6L, 9L,
7L, 3L, 9L, 6L, 8L, 2L, 8L, 2L, 1L, 1L, 6L, 5L, 3L, 3L, 7L, 2L,
4L, 5L, 2L, 3L, 7L, 4L, 1L, 2L, 2L, 3L, 5L, 1L, 9L, 2L, 2L, 3L,
9L, 6L, 2L, 2L, 4L, 3L, 1L, 9L, 7L, 3L, 1L, 2L, 1L, 6L, 3L, 1L,
5L, 6L, 5L, 6L, 4L, 2L, 1L, 3L, 1L, 1L, 3L, 4L, 3L, 8L, 9L, 7L,
6L, 3L, 5L, 2L, 7L, 9L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 9L, 1L,
4L, 8L, 1L, 8L, 1L, 1L, 8L, 5L, 2L, 9L, 4L, 8L, 4L, 9L, 2L, 2L,
3L, 2L, 9L, 3L, 2L, 1L, 3L, 2L, 1L, 9L, 9L, 2L, 1L, 1L, 1L, 2L,
9L, 1L, 5L, 1L, 6L, 9L, 3L, 2L, 2L, 5L, 7L, 4L, 2L, 7L, 2L, 4L,
5L, 3L, 3L, 9L, 2L, 6L, 1L, 3L, 4L, 5L, 9L, 8L, 1L, 2L, 8L, 2L,
9L, 1L, 7L, 3L, 3L, 1L, 6L, 3L, 4L, 9L, 1L, 3L, 4L, 4L, 2L, 7L,
2L, 3L, 1L, 1L, 7L, 2L, 1L, 1L, 2L, 1L, 9L, 1L, 2L, 9L, 1L, 1L,
2L, 3L, 7L, 3L, 1L, 1L, 2L, 5L, 4L, 6L, 7L, 1L, 9L, 2L, 1L, 8L,
1L, 2L, 1L, 4L, 2L, 3L, 3L, 9L, 9L, 9L, 4L, 1L, 1L, 4L, 9L, 3L,
1L, 1L, 3L, 3L, 4L, 1L, 1L, 1L, 1L, 6L, 9L, 1L, 1L, 8L, 1L, 3L,
3L, 8L, 3L, 5L, 1L, 2L, 1L, 2L, 4L, 3L, 1L, 6L, 1L, 4L, 8L, 1L,
3L, 2L, 2L, 3L, 6L, 2L, 1L, 1L, 1L, 9L, 3L, 1L, 7L, 3L, 9L, 1L,
9L, 5L, 4L), Gender = c(1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L,
1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L,
1L, 1L), Age = c(63L, 66L, 34L, 43L, 63L, 21L, 24L, 44L, 52L,
59L, 27L, 32L, 30L, 20L, 56L, 55L, 35L, 26L, 53L, 43L, 39L, 19L,
34L, 28L, 19L, 24L, 50L, 22L, 58L, 24L, 50L, 25L, 37L, 30L, 51L,
69L, 23L, 49L, 22L, 46L, 58L, 31L, 23L, 53L, 59L, 25L, 38L, 44L,
34L, 49L, 19L, 39L, 24L, 51L, 29L, 27L, 48L, 77L, 22L, 43L, 59L,
49L, 60L, 51L, 49L, 47L, 50L, 44L, 41L, 44L, 50L, 42L, 46L, 54L,
35L, 21L, 26L, 26L, 40L, 21L, 48L, 49L, 20L, 20L, 32L, 37L, 22L,
36L, 46L, 28L, 39L, 35L, 51L, 39L, 49L, 57L, 46L, 18L, 52L, 47L,
27L, 32L, 23L, 43L, 42L, 57L, 22L, 40L, 19L, 58L, 71L, 55L, 42L,
20L, 51L, 21L, 20L, 61L, 36L, 54L, 19L, 35L, 38L, 41L, 34L, 22L,
41L, 42L, 56L, 50L, 53L, 53L, 48L, 22L, 59L, 27L, 28L, 32L, 37L,
68L, 24L, 26L, 61L, 21L, 20L, 20L, 50L, 62L, 61L, 29L, 18L, 40L,
67L, 43L, 25L, 43L, 22L, 56L, 47L, 41L, 40L, 43L, 27L, 37L, 61L,
35L, 23L, 54L, 38L, 38L, 39L, 45L, 49L, 63L, 49L, 44L, 44L, 23L,
37L, 58L, 61L, 25L, 18L, 59L, 25L, 51L, 40L, 27L, 42L, 22L, 38L,
22L, 45L, 33L, 32L, 36L, 53L, 52L, 19L, 45L, 53L, 27L, 65L, 25L,
53L, 57L, 29L, 23L, 62L, 36L, 56L, 59L, 41L, 61L, 44L, 24L, 21L,
38L, 29L, 55L, 33L, 18L, 21L, 19L, 65L, 24L, 59L, 34L, 25L, 45L,
48L, 18L, 41L, 61L, 32L, 37L, 21L, 20L, 57L, 25L, 65L, 50L, 61L,
32L, 27L, 19L, 50L, 63L, 19L, 45L, 20L, 36L, 20L, 19L, 53L, 39L,
50L, 20L, 24L, 57L, 28L, 21L, 39L, 49L, 21L, 20L, 39L, 20L, 44L,
19L, 39L, 53L, 29L, 60L, 43L, 21L, 23L, 30L, 42L, 42L, 51L, 35L,
50L, 51L, 56L, 52L, 22L, 36L, 56L, 28L, 57L, 20L, 47L, 48L, 65L,
71L, 21L, 70L, 23L, 63L), Highest_Educationmx = c(4L, 5L, 3L,
2L, 3L, 2L, 3L, 1L, 3L, 1L, 7L, 3L, 2L, 3L, 3L, 2L, 6L, 2L, 3L,
6L, 3L, 2L, 2L, 7L, 2L, 1L, 2L, 3L, 6L, 3L, 5L, 3L, 5L, 6L, 2L,
1L, 5L, 2L, 5L, 1L, 1L, 3L, 2L, 3L, 1L, 7L, 5L, 4L, 7L, 3L, 1L,
1L, 6L, 3L, 3L, 2L, 4L, 6L, 5L, 4L, 2L, 6L, 1L, 3L, 4L, 2L, 1L,
5L, 5L, 3L, 1L, 5L, 3L, 3L, 1L, 4L, 2L, 3L, 5L, 3L, 1L, 4L, 2L,
1L, 2L, 7L, 2L, 5L, 3L, 2L, 6L, 1L, 1L, 3L, 4L, 1L, 5L, 1L, 3L,
4L, 2L, 7L, 2L, 4L, 4L, 7L, 4L, 6L, 3L, 1L, 2L, 1L, 5L, 5L, 1L,
5L, 2L, 7L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 2L, 4L, 1L, 2L, 1L, 2L,
6L, 1L, 2L, 5L, 2L, 2L, 5L, 1L, 6L, 5L, 2L, 1L, 2L, 1L, 1L, 3L,
2L, 4L, 3L, 2L, 3L, 1L, 5L, 5L, 7L, 1L, 3L, 3L, 2L, 1L, 3L, 4L,
5L, 1L, 1L, 3L, 3L, 3L, 5L, 3L, 6L, 4L, 3L, 1L, 3L, 5L, 7L, 1L,
3L, 4L, 5L, 3L, 3L, 1L, 1L, 1L, 7L, 3L, 1L, 4L, 3L, 3L, 5L, 1L,
4L, 5L, 4L, 2L, 5L, 3L, 1L, 1L, 5L, 4L, 7L, 5L, 2L, 2L, 5L, 3L,
1L, 1L, 2L, 3L, 5L, 3L, 7L, 5L, 1L, 5L, 3L, 1L, 1L, 1L, 1L, 7L,
5L, 7L, 3L, 1L, 5L, 7L, 6L, 3L, 7L, 2L, 2L, 3L, 1L, 2L, 1L, 5L,
5L, 2L, 4L, 1L, 1L, 2L, 1L, 4L, 7L, 3L, 2L, 5L, 3L, 2L, 4L, 2L,
1L, 7L, 5L, 2L, 2L, 2L, 3L, 4L, 1L, 2L, 5L, 2L, 3L, 3L, 1L, 3L,
2L, 3L, 5L, 1L, 3L, 1L, 5L, 4L, 5L, 4L, 5L, 5L, 5L, 1L, 3L, 3L,
1L, 3L, 6L, 3L, 4L, 3L, 3L, 5L, 3L), Censor = c(0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L,
1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L)), class = "data.frame",
row.names = c(NA, -300L))
Here is the script:
library(mgcv)
library(ggplot2)
#Run the model
Model1 <- gam(SurvYear~
(Gender)+
s(Age, k=50)+
s(Highest_Educationmx, k=7),
weights=Censor, data=df, gamma=1.5, family=cox.ph())
summary(Model1)
#Build a perspective chart
vis.gam(Model1, view=c("Age","Highest_Educationmx"),
plot.type="persp", color="gray", se=-1, theta=45, phi=25,
xlab="Age", ylab= "Highest Education",
ticktype="detailed", zlim=c(-5.00, 2.00))
#Plot individual predictors using plot command from mgcv
plot(Model1, all.terms=T, rug=T, residuals=F, se=T, shade=T, seWithMean=T)
#Plot individual predictors using ggplot instead of plot command from mgcv
#UNSURE HOW DO TO THIS
I'm biased (I wrote it) but you can use the gratia package for this.
You can use the draw() function as a replacement for plot.gam(), and if you want total control, just use evaluate_smooth() to produce a tidy representation of the smooth which is then easily plotted using ggplot2.
Here is the script based on the suggestion from Gavin Simpson above:
library(gratia)
#Plot individual predictors using ggplot instead of the plot command from mgcv
sm <- gratia::evaluate_smooth(Model1, "Age")
ggplot(sm, aes(x=Age, y=est)) + geom_line(size=1.0) +
geom_ribbon(aes(ymax=est+se, ymin=est-se), alpha=0.20) +
coord_cartesian(xlim=c(20.00, 75.00), ylim=c(-2.00, 1.00)) +
scale_x_continuous(breaks=seq(20.00, 75.00, 5.00)) +
scale_y_continuous(breaks=seq(-2.00, 1.00, 1.00)) +
labs(title="Age") +
xlab("Age") +
ylab("Linear Risk Score") +
theme(plot.title=element_text(size=10)) +
geom_hline(yintercept=0, linetype="dashed", size=0.5) +
geom_vline(xintercept=mean(df$Age), linetype="dashed", size=0.5)

ggplot2 select categories for bar chart and create labels

I am trying make bar chart with ggplot2 with the dataset below. When I use the code
ggplot(p.data, aes(x = `Period Number`, y = `Total Jumps`)) +
stat_summary(data = subset(p.data, Status = "Starter"), fun ="mean", geom = "bar")
I get this graph:
The most concerning aspect is the for period 2, 3, 4, and 5 the bars should be taller (period 2 should be around 9.9). Additionally, I would like to remove period 0 and period 1 and add bar labels with the raw data and without creating an additional data frame.
p.data <- structure(list(`Period Number` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L),
`Total Jumps` = c(112L, 97L, 28L, 132L, 162L, 19L, 92L, 112L,
97L, 141L, 68L, 86L, 76L, 26L, 105L, 125L, 19L, 92L, 112L,
64L, 101L, 68L, 4L, 8L, 0L, 8L, 12L, 0L, 0L, 0L, 13L, 8L,
0L, 8L, 2L, 2L, 5L, 12L, 0L, 0L, 0L, 5L, 11L, 0L, 0L, 6L,
0L, 9L, 8L, 0L, 0L, 0L, 7L, 10L, 0L, 14L, 5L, 0L, 5L, 5L,
0L, 0L, 0L, 8L, 11L, 0L, 108L, 131L, 47L, 136L, 159L, 35L,
114L, 116L, 111L, 190L, 64L, 75L, 95L, 47L, 116L, 123L, 27L,
103L, 108L, 70L, 152L, 64L, 4L, 7L, 0L, 14L, 10L, 0L, 0L,
0L, 15L, 10L, 0L, 4L, 0L, 0L, 3L, 7L, 7L, 8L, 8L, 5L, 10L,
0L, 7L, 14L, 0L, 3L, 10L, 1L, 0L, 0L, 11L, 7L, 0L, 18L, 15L,
0L, 0L, 9L, 0L, 3L, 0L, 10L, 11L, 0L, 118L, 96L, 48L, 143L,
170L, 37L, 118L, 117L, 116L, 165L, 56L, 80L, 68L, 48L, 114L,
130L, 36L, 114L, 107L, 80L, 123L, 56L, 2L, 10L, 0L, 8L, 11L,
0L, 0L, 0L, 5L, 9L, 0L, 4L, 12L, 0L, 6L, 5L, 0L, 4L, 8L,
12L, 8L, 0L, 7L, 4L, 0L, 10L, 10L, 0L, 0L, 0L, 12L, 13L,
0L, 25L, 2L, 0L, 5L, 14L, 1L, 0L, 2L, 7L, 12L, 0L), Status = structure(c(1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L), .Label = c("Bench", "Starter"), class = "factor")), row.names = c(NA,
198L), class = "data.frame")
Thank you for your help!
It's best to pass that data you actually want to plot to the plotting function, rather than trying to coerce it within the plotting function. In this case you were trying to subset a different data frame from the one you passed to ggplot inside stat_summary. The call to ggplot had already set up the aesthetics you wanted mapped, then in your only geom layer, you were telling ggplot you wanted a completely different set of aesthetics.
You don't need to create another data frame to reshape your data. Here's how you could do it using dplyr:
library(dplyr)
library(ggplot2)
p.data %>%
filter(Status == "Starter") %>%
group_by(`Period Number`) %>%
summarise(`Total Jumps` = mean(`Total Jumps`)) %>%
filter(`Period Number` > 1) %>%
ggplot(aes(x = `Period Number`, y = `Total Jumps`)) +
geom_col(fill = "dodgerblue", colour = "black") +
geom_text(aes(y = `Total Jumps` + 1, label = signif(`Total Jumps`, 2)))

dplyr::left_join() produces an unexpected error

I have
> head(p)
studie sex n_fjernet n_sygdom
1 Group1 Male 22 1
2 Group1 Male 61 2
3 Group1 Female 50 1
4 Group1 Female 47 3
5 Group1 Female 30 1
6 Group1 Female 60 0
and
> head(u)
studie alder sex n_fjernet n_sygdom n_otte
1 Group4 59 Female 26 0 0
2 Group4 85 Male 7 1 1
3 Group4 74 Female 17 9 6
4 Group4 78 Male 13 0 0
5 Group4 41 Male 11 0 0
6 Group4 62 Male 12 0 0
I want to add u$n_otte to p for all cases of p$studie==u$studieandp$sex==u$sexandp$n_fjernet==u$n_fjernetandp$n_sygdom==u$n_sygdom, which is 895 cases in u out of the total of 1485 cases in p. All cases in p that does not match and gets u$n_otte left_joined(), should just be listed as NA
So I wrote
left_join(p, u %>% distinct(studie, sex, n_fjernet, n_sygdom, .keep_all = TRUE), by = "n_otte")
Which returned an error
Error: `by` can't contain join column `n_otte` which is missing from LHS
I tried different left_join() approaches but all returned an error. What am I doing wrong?
u <- structure(list(studie = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Group4",
"Group3"), class = "factor"), sex = structure(c(1L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L
), .Label = c("Female", "Male"), class = "factor"), n_fjernet = c(26L,
7L, 17L, 13L, 11L, 12L, 8L, 2L, 14L, 8L, 35L, 23L, 5L, 20L, 11L,
5L, 30L, 12L, 23L, 37L, 13L, 26L, 9L, 9L, 9L, 15L, 39L, 13L,
5L, 9L, 19L, 32L, 18L, 16L, 45L, 35L, 25L, 20L, 27L, 34L, 11L,
44L, 20L, 48L, 92L, 6L, 29L, 12L, 26L, 37L, 30L, 54L, 32L, 39L,
15L, 21L, 22L, 34L, 39L, 30L, 36L, 19L, 26L, 43L, 26L, 42L, 18L,
15L, 32L, 29L, 36L, 28L, 38L, 35L, 66L, 11L, 49L, 32L, 61L, 49L,
36L, 51L, 42L, 13L, 10L, 36L, 45L, 49L, 52L, 21L, 42L, 29L, 38L,
28L, 37L, 47L, 33L, 50L, 19L, 45L, 23L, 29L, 31L, 59L, 60L, 32L,
32L, 30L, 50L, 29L, 32L, 42L, 24L, 22L, 47L, 24L, 22L, 8L, 38L,
25L, 34L, 45L, 50L, 51L, 28L, 8L, 21L, 17L, 30L, 36L, 20L, 56L,
23L, 77L, 23L, 76L, 58L, 35L, 33L, 52L, 34L, 17L, 66L, 38L, 58L,
16L, 58L, 44L, 22L, 42L, 17L, 33L, 9L, 31L, 15L, 46L, 31L, 32L,
25L, 17L, 31L, 35L, 29L, 18L, 69L, 28L, 25L, 35L, 19L, 18L, 15L,
51L, 41L, 55L, 35L, 19L, 45L, 24L, 39L, 57L, 45L, 37L, 30L, 33L,
34L, 47L, 21L, 16L, 22L, 26L, 36L, 32L, 17L, 28L, 32L, 35L, 37L,
30L, 32L, 29L, 41L, 18L, 26L, 32L, 30L, 17L, 35L, 17L, 27L, 27L,
10L, 30L, 50L, 28L, 22L, 13L, 32L, 35L, 51L, 44L, 16L, 17L, 43L,
27L, 21L, 34L, 13L, 18L, 37L, 20L, 8L, 19L, 43L, 24L, 48L, 15L,
11L, 22L, 20L, 19L, 20L, 23L, 12L, 31L, 28L, 34L, 25L, 22L, 38L,
28L, 26L, 30L, 45L, 50L, 39L, 22L, 41L, 14L, 60L, 35L, 10L, 29L,
24L, 25L, 31L, 32L, 33L, 10L, 16L, 10L, 10L, 32L, 30L, 34L, 31L,
24L, 15L, 20L, 20L, 31L, 33L, 15L, 27L, 19L, 40L, 17L, 48L, 35L,
25L, 25L, 22L, 19L, 24L, 20L, 30L, 13L, 28L, 19L, 7L, 29L, 18L,
41L, 11L, 42L, 35L, 24L, 16L, 29L, 39L, 28L, 32L, 16L, 31L, 30L,
27L, 17L, 28L, 29L, 12L, 25L, 30L, 14L, 19L, 13L, 32L, 16L, 12L,
24L, 10L, 34L, 49L, 17L, 11L, 37L, 38L, 36L, 18L, 42L, 14L, 33L,
41L, 21L, 10L, 16L, 16L, 14L, 32L, 25L, 22L, 19L, 28L, 16L, 24L,
28L, 29L, 34L, 27L, 23L, 33L, 23L, 57L, 30L, 16L, 13L, 20L, 42L,
14L, 18L, 31L, 19L, 22L, 27L, 11L, 12L, 7L, 25L, 29L, 35L, 21L,
64L, 39L, 51L, 21L, 16L, 36L, 22L, 15L, 29L, 38L, 20L, 23L, 5L,
33L, 15L, 20L, 52L, 31L, 16L, 10L, 12L, 47L, 23L, 28L, 27L, 18L,
24L, 34L, 45L, 24L, 43L, 28L, 34L, 20L, 26L, 17L, 41L, 25L, 38L,
35L, 25L, 21L, 24L, 21L, 24L, 14L, 40L, 19L, 11L, 21L, 38L, 43L,
23L, 28L, 17L, 78L, 12L, 27L, 16L, 24L, 16L, 21L, 43L, 25L, 50L,
44L, 30L, 33L, 31L, 20L, 47L, 47L, 34L, 22L, 31L, 28L, 51L, 23L,
45L, 30L, 34L, 32L, 39L, 41L, 25L, 15L, 19L, 14L, 41L, 40L, 49L,
27L, 35L, 26L, 22L, 59L, 10L, 29L, 38L, 64L, 16L, 36L, 56L, 31L,
50L, 23L, 27L, 49L, 30L, 28L, 25L, 38L, 37L, 25L, 30L, 23L, 18L,
31L, 48L, 47L, 49L), n_sygdom = c(0L, 1L, 9L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 21L, 0L, 2L,
0L, 0L, 0L, 2L, 1L, 1L, 0L, 0L, 2L, 2L, 0L, 0L, 7L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 11L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 5L, 6L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 0L, 3L, 0L, 0L, 19L, 2L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 5L, 0L, 2L, 6L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 16L, 1L, 6L, 0L, 2L, 5L, 0L, 0L, 0L, 0L, 3L, 0L, 2L, 3L,
4L, 0L, 1L, 0L, 0L, 0L, 4L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 4L, 0L, 9L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 2L,
2L, 3L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 5L, 1L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L,
2L, 5L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 2L, 0L, 14L, 3L, 0L, 0L, 0L, 0L, 4L, 1L, 0L, 0L, 2L, 0L,
1L, 0L, 0L, 1L, 0L, 2L, 0L, 5L, 0L, 0L, 0L, 1L, 0L, 0L, 4L, 0L,
1L, 1L, 3L, 0L, 2L, 0L, 0L, 0L, 2L, 7L, 18L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 3L, 1L, 0L, 0L, 6L, 1L, 0L, 0L, 7L, 2L,
0L, 0L, 0L, 1L, 0L, 8L, 0L, 0L, 3L, 3L, 1L, 3L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 3L, 0L, 4L, 0L, 0L,
1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 9L, 0L, 0L,
6L, 0L, 1L, 0L, 1L, 1L, 2L, 0L, 5L, 4L, 0L, 4L, 0L, 0L, 0L, 2L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L, 0L, 3L,
0L, 3L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 3L, 1L, 7L, 3L, 0L, 0L, 2L,
0L, 1L, 0L, 0L, 0L, 2L, 0L, 2L, 0L, 3L, 1L, 0L, 3L, 0L, 0L, 4L,
0L, 1L, 5L, 4L, 16L, 0L, 1L, 5L, 1L, 0L, 1L, 0L, 0L, 0L, 3L,
0L, 4L, 2L, 4L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), n_otte = c(0L, 1L, 6L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 3L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 6L, 0L, 3L, 0L, 0L, 0L,
2L, 6L, 6L, 0L, 0L, 4L, 6L, 0L, 0L, 6L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 4L, 3L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 0L, 6L, 0L, 0L, 6L, 6L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 0L,
0L, 0L, 0L, 0L, 4L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L,
0L, 3L, 4L, 0L, 0L, 6L, 0L, 6L, 0L, 1L, 0L, 0L, 6L, 6L, 6L, 0L,
3L, 6L, 0L, 0L, 0L, 0L, 4L, 0L, 3L, 3L, 6L, 0L, 1L, 0L, 0L, 0L,
3L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 4L, 0L, 3L,
0L, 0L, 0L, 1L, 0L, 4L, 0L, 0L, 0L, 4L, 6L, 4L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 4L, 1L, 6L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 4L, 6L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 3L, 0L, 6L, 3L, 0L,
0L, 0L, 0L, 6L, 1L, 0L, 0L, 6L, 0L, 1L, 0L, 0L, 1L, 6L, 6L, 0L,
3L, 6L, 0L, 0L, 1L, 0L, 0L, 3L, 0L, 1L, 1L, 3L, 6L, 3L, 0L, 0L,
0L, 3L, 3L, 6L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 6L,
0L, 0L, 6L, 1L, 0L, 0L, 6L, 2L, 0L, 0L, 0L, 1L, 0L, 6L, 0L, 0L,
6L, 4L, 1L, 3L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 6L,
0L, 0L, 0L, 6L, 0L, 4L, 0L, 0L, 4L, 0L, 6L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 4L, 0L, 0L, 4L, 0L, 0L, 4L, 0L, 6L, 0L, 1L, 1L, 6L, 0L,
6L, 6L, 0L, 3L, 0L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 6L, 0L, 0L, 3L, 0L, 6L, 0L, 0L, 0L, 0L, 6L, 3L,
0L, 6L, 1L, 6L, 6L, 0L, 0L, 3L, 0L, 1L, 0L, 0L, 0L, 3L, 0L, 6L,
0L, 6L, 1L, 0L, 6L, 0L, 0L, 6L, 0L, 1L, 3L, 6L, 6L, 0L, 1L, 6L,
1L, 0L, 1L, 0L, 0L, 0L, 6L, 0L, 4L, 6L, 3L, 6L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA,
500L), class = "data.frame")
And
p <- structure(list(studie = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Group2",
"Group3", "Group4"), class = "factor"), sex = structure(c(2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"),
n_fjernet = c(18L, 26L, 24L, 20L, 41L, 31L, 13L, 41L, 25L,
16L, 18L, 26L, 35L, 36L, 22L, 20L, 16L, 10L, 19L, 46L, 6L,
49L, 70L, 46L, 55L, 25L, 22L, 37L, 28L, 52L, 27L, 15L, 11L,
7L, 24L, 11L, 56L, 47L, 27L, 14L, 16L, 21L, 43L, 25L, 50L,
44L, 30L, 33L, 31L, 20L, 47L, 47L, 34L, 22L, 31L, 28L, 51L,
23L, 45L, 30L, 34L, 32L, 39L, 41L, 25L, 15L, 19L, 14L, 41L,
40L, 49L, 27L, 35L, 26L, 22L, 59L, 10L, 29L, 38L, 64L, 16L,
36L, 56L, 31L, 50L, 23L, 27L, 49L, 30L, 28L, 25L, 38L, 37L,
25L, 30L, 23L, 18L, 31L, 48L, 47L, 49L, 38L, 19L, 3L, 69L,
26L, 30L, 57L, 52L, 40L, 32L, 17L, 42L, 32L, 15L, 63L, 25L,
29L, 45L, 49L, 27L, 21L, 43L, 31L, 13L, 22L, 28L, 45L, 24L,
17L, 49L, 34L, 61L, 51L, 51L, 29L, 32L, 23L, 9L, 14L, 28L,
35L, 43L, 46L, 32L, 52L, 22L, 34L, 66L, 27L, 59L, 31L, 27L,
34L, 38L, 69L, 50L, 63L, 48L, 37L, 41L, 31L, 48L, 35L, 36L,
30L, 38L, 39L, 22L, 97L, 19L, 29L, 72L, 25L, 113L, 17L, 62L,
29L, 44L, 24L, 20L, 48L, 66L, 30L, 24L, 19L, 42L, 27L, 87L,
24L, 19L, 45L, 30L, 34L, 57L, 51L, 28L, 26L, 40L, 102L, 23L,
54L, 32L, 18L, 22L, 4L, 40L, 56L, 3L, 34L, 46L, 29L, 14L,
33L, 52L, 15L, 33L, 44L, 25L, 35L, 33L, 45L, 50L, 38L, 33L,
24L, 45L, 61L, 17L, 38L, 18L, 65L, 61L, 19L, 19L, 25L, 68L,
39L, 21L, 18L, 39L, 36L, 46L, 35L, 68L, 18L, 14L, 18L, 28L,
55L, 30L, 40L, 57L, 52L, 91L, 60L, 84L, 92L, 26L, 65L, 39L,
73L, 36L, 33L, 51L, 133L, 66L, 62L, 38L, 53L, 70L, 33L, 20L,
52L, 45L, 64L, 106L, 70L, 24L, 23L, 44L, 35L, 31L, 52L, 46L,
33L, 15L, 42L, 35L, 33L, 19L, 54L, 64L, 37L, 27L, 51L, 27L,
52L, 61L, 38L, 31L, 46L, 86L, 44L, 58L, 32L, 27L, 13L, 12L,
38L, 72L, 20L, 59L, 37L, 27L, 23L, 59L, 36L, 28L, 38L, 26L,
64L, 34L, 38L, 21L, 34L, 44L, 33L, 55L, 38L, 51L, 49L, 45L,
44L, 40L, 33L, 19L, 18L, 45L, 52L, 63L, 16L, 24L, 50L, 59L,
98L, 60L, 63L, 49L, 59L, 35L, 35L, 38L, 56L, 78L, 68L, 56L,
42L, 80L, 58L, 39L, 50L, 17L, 37L, 40L, 22L, 51L, 32L, 34L,
17L, 33L, 18L, 33L, 25L, 4L, 57L, 47L, 27L, 33L, 20L, 42L,
29L, 41L, 22L, 17L, 9L, 17L, 39L, 78L, 19L, 37L, 50L, 34L,
14L, 29L, 49L, 25L, 33L, 54L, 47L, 12L, 18L, 30L, 22L, 33L,
52L, 80L, 20L, 33L, 61L, 34L, 36L, 67L, 35L, 36L, 24L, 12L,
47L, 29L, 38L, 30L, 25L, 19L, 28L, 37L, 72L, 31L, 39L, 36L,
30L, 60L, 45L, 29L, 56L, 44L, 124L, 42L, 39L, 26L, 74L, 25L,
25L, 124L, 32L, 28L, 32L, 9L, 21L, 25L, 24L, 40L, 14L, 42L,
49L, 21L, 28L, 44L, 38L, 24L, 28L, 34L, 26L, 46L, 36L, 31L,
39L, 22L, 80L, 37L, 54L, 19L, 14L, 55L, 42L, 45L, 23L, 31L,
21L, 33L, 25L, 18L, 46L, 22L, 54L, 32L, 28L, 28L, 31L, 28L,
29L, 41L, 34L, 24L, 41L, 32L, 39L, 14L, 32L, 46L, 32L), n_sygdom = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 3L, 8L, 5L, 8L,
3L, 6L, 3L, 3L, 3L, 6L, 13L, 7L, 16L, 12L, 5L, 4L, 6L, 10L,
8L, 3L, 7L, 6L, 6L, 10L, 5L, 7L, 8L, 5L, 3L, 2L, 3L, 4L,
4L, 2L, 4L, 5L, 2L, 2L, 5L, 2L, 2L, 12L, 7L, 3L, 7L, 4L,
9L, 6L, 3L, 3L, 4L, 1L, 12L, 3L, 3L, 4L, 3L, 2L, 2L, 3L,
2L, 3L, 2L, 4L, 8L, 2L, 2L, 3L, 4L, 4L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 5L, 1L, 9L, 2L, 22L, 3L, 2L, 6L, 4L, 2L, 3L, 3L,
2L, 4L, 4L, 4L, 4L, 3L, 17L, 2L, 7L, 2L, 1L, 4L, 6L, 6L,
8L, 8L, 5L, 2L, 3L, 3L, 3L, 3L, 5L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 6L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 2L, 2L, 3L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L,
2L, 2L, 2L, 3L, 3L, 4L, 3L, 2L, 3L, 2L, 2L, 8L, 2L, 3L, 3L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 3L, 1L, 3L, 13L, 4L, 9L, 4L, 3L, 2L, 3L, 4L,
3L, 2L, 8L, 4L, 10L, 10L, 2L, 3L, 6L, 8L, 6L, 3L, 3L, 2L,
7L, 5L, 3L, 12L, 2L, 2L, 1L, 2L, 3L, 1L, 2L, 5L, 2L, 7L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), row.names = c(NA,
500L), class = "data.frame")
merge(p, u, by = c('studie', 'sex', 'n_fjernet', 'n_sygdom'), all.x = T)
or
p %>%
left_join(., u, by = c('studie', 'sex', 'n_fjernet', 'n_sygdom'))

Linear function for condition1 and cubic function for condition2 in one plot

I have data of participants that had numerous trials, where certain trials had one condition, and other trials were another.
My analyses show that for condition 1, there is a linear null effect (flat line), while for condition 2 there is a cubic effect. I want to plot them together.
The code below creates a plot that gives the cubic function for both groups:
ggplot(dat, aes(x=trial, y=y, group=condition, colour=condition)) +
geom_point() + geom_jitter(height=0.2) +
geom_smooth(alpha=0.1, method="lm", formula = y ~ poly(x,3, raw=TRUE)) +
labs(x="Trial", y="y") +
scale_x_discrete(breaks=c(1,9,18,27,36,45,54,63))
What I want is to not have the cubic function for condition 2, but have a linear function. I tried to force this through aes() calls within geom_smooth(), but this seems to give me a much flatter cubic function for condition 1:
ggplot(dat, aes(x=trial, y=y)) +
geom_point(aes(group=condition, colour=condition)) + geom_jitter(height=0.2, aes(group=condition, colour=condition)) +
geom_smooth(alpha=0.1, method="lm", formula = y ~ poly(x,3, raw=TRUE), aes(group=(condition="1"), colour=(condition="1"))) +
geom_smooth(alpha=0.1, method="lm", aes(group=(condition="2"), colour=(condition="2"))) +
labs(x="Trial", y="y") +
scale_x_discrete(breaks=c(1,9,18,27,36,45,54,63))
Obviously this is not the way to go. How would I accomplish this? Script for reproducible example (first 250 lines of the total dataset, so your figures will be different) below:
structure(list(id = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L
), trial = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L,
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L,
35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L,
61L), condition = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
y = c(NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, NA, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, NA, NA, NA, 0L, NA, 0L, NA, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, NA, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, NA, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, NA,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, NA, NA, 1L, 1L,
1L, 1L, NA, 1L, 1L, 1L, 1L, NA, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L)), .Names = c("id",
"trial", "condition", "y"), row.names = c(NA, 250L), class = "data.frame")
Edit: The reason I'm not using geom_smooth() using gam or loess, is because there are multiple polynomials in condition 1, so it will show more than just the cubic function if I use that solution. I wish to show the cubic function, not the composite of multiple polynomials.
You could filter your data inside geom_smooth.
library(tidyverse)
ggplot(dat, aes(x=trial, y=y, colour=as.factor(condition))) +
geom_point() + geom_jitter(height=0.2) +
geom_smooth(data = filter(dat, condition == 2), alpha=0.1, method="lm", formula = y ~ poly(x,3, raw=TRUE)) +
geom_smooth(data = filter(dat, condition == 1), alpha=0.1, method="lm", formula = y ~ 1) +
labs(x="Trial", y="y") +
scale_x_continuous(breaks=c(1,9,18,27,36,45,54,63))
Which gives you this plot

Grouped Frequency Bars in R using ggplot

I'm trying to produce a bar graph with frequencies of multiple groups. I tried using geom_bar() but I keep running into "Error: stat_count() must not be used with a y aesthetic." I have one line for each participant, with age (2 categories), condition (2 categories), and their performance (0 or 1). From what I read on the manual and in pretty much everywhere online, if I use
bar<-ggplot(data, aes(age, performance, fill = condition)) + geom_bar(position = "dodge")
I should get what I want (which is this), but instead I get the error and I can't figure out what I'm missing. Isn't the geom_bar() supposed to give count by default? When I use stat="identity" I get full bars like so: how it actually looks.
Please help! Any advice will be greatly appreciated.
EDITED:
Here's my actual data:
structure(list(ageyears = c(4L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 5L, 4L,
5L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 5L, 4L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 4L, 4L, 5L, 4L, 5L, 5L, 4L,
5L, 4L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L,
5L, 5L, 5L, 4L, 5L, 5L, 4L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 5L,
4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 5L, 5L, 5L,
5L, 5L, 4L, 4L, 4L, 5L, 4L), MatrixLabels = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), Mat_sort_pass_fail = c(0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L)), .Names = c("ageyears",
"MatrixLabels", "Mat_sort_pass_fail"), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 38L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 48L,
49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 60L, 61L, 62L,
63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 74L, 75L, 76L,
77L, 78L, 79L, 80L, 82L, 83L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L,
104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L,
115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L,
126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L,
137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L,
148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 157L, 158L,
159L, 160L, 197L, 198L, 200L, 201L, 202L, 203L, 204L, 205L, 206L,
207L), class = "data.frame")
From the documentation of geom_bar :
By default, geom_bar uses stat="count" which makes the height of the
bar proportion to the number of cases in each group (or if the weight
aethetic is supplied, the sum of the weights). If you want the heights
of the bars to represent values in the data, use stat="identity" and
map a variable to the y aesthetic.
In your case you should use the height as your sum of your performance, since you have a summarized data , so the ggplot should use stat = identity
EDIT After OP pasted the dput:
You need to first summarize your data, I am assuming the df is your dataframe, you can use anything to do the summarization, I am using data.table and baseR aggregate, you can pick either of them to do it as below:
###1. base R aggregate
df <- aggregate(Mat_sort_pass_fail ~ ageyears + MatrixLabels, data=df1 ,sum)
df$perc <- df$Mat_sort_pass_fail/sum(df$Mat_sort_pass_fail)
names(df) <- c("age","condition","performance","percentage")
###2. sumarization using data.table
library(data.table)
dt <- setDT(df)
dt1 <- dt[,list(Performance = sum(Mat_sort_pass_fail)),by=c("ageyears","MatrixLabels")]
dt1[,perc:=Performance/sum(Performance)] ##percentage within column
df <- data.frame(dt1)
names(df) <- c("age","condition","performance","percentage")
library(ggplot2)
library(RColorBrewer)
ggplot(df, aes(x = condition ,y=performance)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
###In case you need the percentage run the below code:
ggplot(df, aes(x = condition ,y=percentage)) +
geom_bar(aes(fill = factor(age)),stat="identity",position = "dodge") +
ggtitle("Matrix Sort Performance") +
scale_fill_brewer(palette = "Dark2")
Normally it calculates frquencies from your data. If your data is already grouped try below:
+ geom_bar(stat="identity",position = "dodge")
You can use geom_col() as an alias for geom_bar(stat = "identity").
You also had what I think is wrong aes mapping.
I mimicked some data based on the graph you posted:
df <- data.frame(age = factor(rep(4:5, each = 2), labels = c('4-Years-Olds', '5-Years-Olds')),
performance = c(48,37,65,65),
condition = factor(c(1,2,1,2), labels = c('No Label', 'Label')))
library(ggplot2)
ggplot(df) +
geom_col(aes(condition, performance, fill = age), position = 'dodge') +
scale_fill_manual(values = c('skyblue', 'darkolivegreen1'))

Resources