I want make a time series plot grouped by ID. My dataset has 42 different IDs with 7 different timeframes. The timeframe varies per ID and ranges from 9/2016 to 8/2018. I.e., ID1 can start 10/2016 and end 7/2017 (with 7 rows containing a different date) and ID40 can start 11/2016 and ends 6/2018 (also with 7 rows containing a different date). I try to plot this with the following code
p <- ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID))
p + geom_point(size = 1.2,
alpha = .8) + stat_smooth(aes(group = 1)) + stat_summary(aes(group = 1), geom =
"point", fun.y = mean,
shape = 17, size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle =
90, vjust = 0.5, hjust=1))
This gives me the following graph:
As one can see the X-axis is not chronological. I should start at 09/2016 and end at 08/2018 and then correspond with the Y value based on the ID. I got the following dataset:
structure(list(ID = c("ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10",
"ID11", "ID11", "ID11", "ID11", "ID11", "ID12"), Time = c("1",
"2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5", "6", "7",
"1", "2", "3", "4", "5", "1"), AI = c(0.393672183448241, 0.4876954603533,
0.411717908455957, 0.309769862660288, 0.149826889496538, 0.2448558592586,
0.123606753324621, 0.296109333767922, 0.309960002123076, 0.445886231347992,
0.370013553008003, 0.393414429902431, 0.318940511323733, 0.131112361225666,
0.31961673567578, 0.227268892979164, 0.433471105477564, 0.207184572401005,
0.144257239122978, 0.520204263001733), AI_VAR = c(0.154977788020905,
0.237846862049217, 0.169511636143347, 0.0959573678125739, 0.0224480968162077,
0.0599543918132674, 0.0152786294674538, 0.0876807375444826, 0.0960752029161373,
0.198814531305715, 0.136910029409606, 0.154774913655455, 0.101723049763444,
0.0171904512661696, 0.102154857724042, 0.0516511497159746, 0.187897199283942,
0.0429254470409874, 0.020810151039384, 0.270612475245176), activity = c(0,
0.303472222222222, 0.232638888888889, 0.228472222222222, 0.348611111111111,
0.215972222222222, 0.123611111111111, 0.357638888888889, 0.235416666666667,
0.233333333333333, 0.2875, 0.353472222222222, 0.356944444444444,
0.149305555555556, 0.448611111111111, 0.213888888888889, 0.248611111111111,
0.288888888888889, 0.25625, 0.238888888888889), ZIM_SD = c(0,
0.148002025121106, 0.095781596758851, 0.0707738088994687, 0.0522313184217097,
0.0528820640482116, 0.0152791681192935, 0.105900213118389, 0.0729697504998075,
0.104040120647865, 0.106378896489801, 0.139061072791901, 0.113844043625277,
0.0195758039329988, 0.143383618921218, 0.0486102909983211, 0.107765733167339,
0.059853320915846, 0.036965917525263, 0.124271018383747), ZIM_VAR = c(0,
0.0721799157746582, 0.039434998686126, 0.0219235930627339, 0.00782565597342798,
0.0129484832318932, 0.00188860836472692, 0.0313580415523671,
0.0226177040198407, 0.0463900573046668, 0.0393616334552618, 0.0547086326740462,
0.0363094774850072, 0.00256662987654616, 0.0458278042289798,
0.0110476070225835, 0.0467133314886466, 0.0124006847007297, 0.00533260120384214,
0.0646463135307921), CHECK = c(10L, 13L, 11L, 7L, 7L, 5L, 4L,
36L, 36L, 34L, 34L, 32L, 29L, 21L, 28L, 27L, 26L, 25L, 21L, 36L
), BULBAR = c(2L, 4L, 4L, 4L, 4L, 2L, 2L, 9L, 9L, 9L, 9L, 9L,
7L, 6L, 12L, 12L, 11L, 11L, 11L, 11L), FINE = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 9L, 9L, 8L, 8L, 7L, 6L, 4L, 2L, 1L, 1L, 1L, 0L, 7L
), GROSS = c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 9L, 9L, 9L, 9L, 8L,
8L, 6L, 3L, 3L, 3L, 3L, 2L, 6L), RESPI = c(6L, 7L, 5L, 1L, 1L,
1L, 1L, 9L, 9L, 8L, 8L, 8L, 8L, 5L, 11L, 11L, 11L, 10L, 8L, 12L
), GROSS_RENEWD = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L,
5L, 5L, 4L, 3L, 3L, 3L, 3L, 2L, 3L), ACTIVE = c(2L, 2L, 2L, 2L,
2L, 2L, 1L, 18L, 18L, 17L, 17L, 15L, 14L, 10L, 5L, 4L, 4L, 4L,
2L, 13L), NON.ACTIVE = c(8L, 11L, 9L, 5L, 5L, 3L, 3L, 18L, 18L,
17L, 17L, 17L, 15L, 11L, 23L, 23L, 22L, 21L, 19L, 23L), START = c("09/2016",
"11/2016", "01/2017", "04/2017", "06/2017", "10/2017", "02/2018",
"10/2016", "12/2016", "02/2017", "04/2017", "07/2017", "11/2017",
"04/2018", "10/2016", "12/2016", "02/2017", "04/2017", "07/2017",
"10/2016"), STOP = c("10/2016", "11/2016", "01/2017", "04/2017",
"06/2017", "10/2017", "03/2018", "10/2016", "12/2016", "02/2017",
"04/2017", "07/2017", "11/2017", "04/2018", "10/2016", "12/2016",
"02/2017", "04/2017", "07/2017", "10/2016")), row.names = c(NA,
20L), class = "data.frame")
In general I want the column START to start with the begin date and end with the last date when it is plotted
You should convert your "START" column to a date format. You could use the package zoo with the function as.yearmon for that. To start the axis with your start date and end it with the end date, you could create a vector of date breaks using the min (start) date and max (end) date. Here is a reproducible example:
library(ggplot2)
library(zoo)
library(dplyr)
df6 <- df6 %>%
mutate(START = as.Date(as.yearmon(START, format = '%m/%Y')))
breaks.vec <- c(min(df6$START),
seq(from=min(df6$START), to=max(df6$START), by = 'month'))
ggplot(data = df6, aes(x = START, y = AI, col = ID, group = ID)) +
geom_point(size = 1.2, alpha = .8) +
stat_smooth(aes(group = 1)) +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3) +
scale_x_date(breaks = breaks.vec, date_labels = "%m/%Y") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#> Warning: `fun.y` is deprecated. Use `fun` instead.
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Created on 2022-10-17 with reprex v2.0.2
I have a dataframe from Alzheimer disease patients. I would like to create a table with counts for the number of patients (indicated by patient ID: iid) with age_at_onset <75, <70 for each status (1,2,3) per Ethnicity. How can I do this in R?
df <- structure(list(iid = structure(c(`5068` = 80L, `15562` = 58L,
`8939` = 52L, `17602` = 34L, `3173` = 40L, `12591` = 30L, `17391` = 97L,
`8241` = 93L, `9746` = 10L, `9673` = 7L, `16594` = 29L, `16911` = 60L,
`4796` = 18L, `6598` = 12L, `11462` = 26L, `16425` = 17L, `12698` = 37L,
`17118` = 81L, `1501` = 76L, `13294` = 92L, `8072` = 84L, `11642` = 46L,
`4164` = 85L, `9035` = 62L, `16691` = 35L, `16002` = 86L, `3915` = 21L,
`7409` = 54L, `9759` = 11L, `6130` = 6L, `15153` = 23L, `13539` = 100L,
`13262` = 87L, `742` = 28L, `17592` = 33L, `16812` = 53L, `213` = 66L,
`11963` = 77L, `12093` = 89L, `11910` = 68L, `15813` = 73L, `1104` = 51L,
`1966` = 95L, `5589` = 61L, `8860` = 41L, `482` = 16L, `3967` = 55L,
`5869` = 1L, `12435` = 20L, `11675` = 50L, `16701` = 36L, `5893` = 2L,
`16880` = 57L, `13290` = 90L, `1097` = 49L, `1476` = 71L, `9100` = 67L,
`6220` = 8L, `15393` = 42L, `16631` = 31L, `9641` = 4L, `13485` = 99L,
`1028` = 44L, `8200` = 91L, `12190` = 94L, `5581` = 19L, `7266` = 43L,
`12254` = 98L, `15763` = 69L, `17764` = 79L, `16239` = 96L, `7548` = 59L,
`12037` = 83L, `7813` = 70L, `12943` = 63L, `17748` = 75L, `12703` = 38L,
`11964` = 78L, `14018` = 45L, `1769` = 88L, `13713` = 22L, `13100` = 74L,
`13866` = 32L, `2527` = 25L, `2281` = 15L, `4463` = 39L, `5815` = 14L,
`14040` = 47L, `16560` = 24L, `12887` = 56L, `11167` = 13L, `6123` = 5L,
`5668` = 48L, `3036` = 82L, `7622` = 65L, `11470` = 27L, `4770` = 64L,
`17050` = 72L, `6295` = 9L, `9575` = 3L), .Label = c("08AD09051_NACC295883",
"08AD10766_NACC977458", "08AD9133", "09AD14006", "09AD14313_NACC904765",
"09AD14360_NACC785663", "09AD14874", "09AD14943_NACC009736",
"09AD15417_NACC169039", "09AD15778", "09AD15810", "09AD17022_NACC426380",
"25795", "NACC026302", "NACC026743", "NACC044624", "NACC062886",
"NACC083669", "NACC088187", "NACC094571", "NACC107551", "NACC134929",
"NACC178119", "NACC178349", "NACC183751", "NACC186606", "NACC192719",
"NACC193548", "NACC209758", "NACC224665", "NACC243923", "NACC246256",
"NACC261383", "NACC283729", "NACC298544", "NACC305567", "NACC310219",
"NACC310896", "NACC312856", "NACC336802", "NACC342957", "NACC350799",
"NACC351234_09AD13080", "NACC355338", "NACC355951", "NACC361682",
"NACC369873", "NACC397276", "NACC402765", "NACC403144", "NACC407162",
"NACC412031", "NACC413408", "NACC422516_08AD10849", "NACC436908",
"NACC465387", "NACC472288", "NACC479723", "NACC485644_08AD8204",
"NACC504120", "NACC508353", "NACC509594", "NACC510498", "NACC519864",
"NACC521718_08AD9198", "NACC559675", "NACC585997", "NACC605438",
"NACC612578", "NACC619036_09AD14621", "NACC621261", "NACC634809",
"NACC635885", "NACC639654", "NACC640099", "NACC642393", "NACC660918",
"NACC660981", "NACC684037", "NACC690933", "NACC695603", "NACC703758",
"NACC740374", "NACC744168_08AD7716", "NACC766835", "NACC769330",
"NACC775129", "NACC792439", "NACC796641", "NACC805995", "NACC806269_09AD13056",
"NACC809589", "NACC824113_08AD9038", "NACC884140", "NACC916661",
"NACC921664", "NACC926195", "NACC929277", "NACC959601", "NACC992086"
), class = "factor"), omit = structure(c(`5068` = 1L, `15562` = 1L,
`8939` = 1L, `17602` = 1L, `3173` = 1L, `12591` = 2L, `17391` = 1L,
`8241` = 1L, `9746` = 1L, `9673` = 2L, `16594` = 2L, `16911` = 2L,
`4796` = 1L, `6598` = 2L, `11462` = 1L, `16425` = 1L, `12698` = 1L,
`17118` = 1L, `1501` = 1L, `13294` = 1L, `8072` = 1L, `11642` = 2L,
`4164` = 1L, `9035` = 1L, `16691` = 1L, `16002` = 1L, `3915` = 1L,
`7409` = 1L, `9759` = 1L, `6130` = 1L, `15153` = 1L, `13539` = 2L,
`13262` = 1L, `742` = 2L, `17592` = 1L, `16812` = 1L, `213` = 2L,
`11963` = 2L, `12093` = 2L, `11910` = 2L, `15813` = 1L, `1104` = 1L,
`1966` = 1L, `5589` = 1L, `8860` = 1L, `482` = 1L, `3967` = 1L,
`5869` = 2L, `12435` = 1L, `11675` = 2L, `16701` = 1L, `5893` = 1L,
`16880` = 2L, `13290` = 2L, `1097` = 1L, `1476` = 1L, `9100` = 1L,
`6220` = 1L, `15393` = 1L, `16631` = 1L, `9641` = 1L, `13485` = 2L,
`1028` = 1L, `8200` = 2L, `12190` = 1L, `5581` = 2L, `7266` = 1L,
`12254` = 1L, `15763` = 1L, `17764` = 1L, `16239` = 1L, `7548` = 1L,
`12037` = 1L, `7813` = 1L, `12943` = 2L, `17748` = 1L, `12703` = 1L,
`11964` = 1L, `14018` = 1L, `1769` = 1L, `13713` = 1L, `13100` = 1L,
`13866` = 2L, `2527` = 1L, `2281` = 1L, `4463` = 1L, `5815` = 1L,
`14040` = 1L, `16560` = 2L, `12887` = 1L, `11167` = 2L, `6123` = 2L,
`5668` = 1L, `3036` = 1L, `7622` = 1L, `11470` = 1L, `4770` = 1L,
`17050` = 2L, `6295` = 2L, `9575` = 1L), .Label = c("0", "1"), class = "factor"),
sex = structure(c(1L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 1L, NA,
2L, 2L, 1L, NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, NA, 2L, 2L, 1L, 2L, 2L, NA, NA, NA,
2L, 1L, 2L, 1L, 1L, 2L, 2L, NA, 2L, NA, 1L, 2L, 1L, NA, 2L,
1L, 2L, 2L, 2L, 2L, 1L, NA, 2L, NA, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 1L, NA, 2L, 2L, 1L, 2L, 1L, 1L, 2L, NA, 1L, 2L,
1L, 2L, 2L, 1L, 1L, NA, NA, 1L, 1L, 2L, 2L, 2L, 2L, NA, 2L
), .Label = c(" 1", " 2", "-9"), class = "factor"), status = structure(c(2L,
2L, 2L, 1L, 3L, NA, 2L, 2L, 2L, NA, 2L, NA, 1L, NA, 2L, 1L,
1L, 1L, 2L, 1L, 2L, NA, 3L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
NA, 1L, 2L, 2L, 2L, NA, NA, NA, NA, 2L, 1L, 1L, 3L, 2L, 1L,
3L, NA, 1L, NA, 2L, 1L, NA, NA, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
NA, 1L, NA, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, NA, 1L,
1L, 2L, 2L, 2L, 1L, 1L, NA, 3L, 1L, 3L, 3L, 1L, NA, 2L, NA,
NA, 3L, 2L, 2L, 1L, 2L, 2L, NA, 2L), .Label = c(" 1", " 2",
" 3", "-9"), class = "factor"), age_at_onset = structure(c(`5068` = 4L,
`15562` = 16L, `8939` = 24L, `17602` = NA, `3173` = 24L,
`12591` = NA, `17391` = 15L, `8241` = 13L, `9746` = 18L,
`9673` = NA, `16594` = 20L, `16911` = NA, `4796` = NA, `6598` = NA,
`11462` = 20L, `16425` = NA, `12698` = NA, `17118` = NA,
`1501` = 5L, `13294` = NA, `8072` = 11L, `11642` = NA, `4164` = 25L,
`9035` = NA, `16691` = NA, `16002` = NA, `3915` = NA, `7409` = 21L,
`9759` = 14L, `6130` = NA, `15153` = NA, `13539` = NA, `13262` = NA,
`742` = 26L, `17592` = 28L, `16812` = 9L, `213` = 14L, `11963` = NA,
`12093` = NA, `11910` = NA, `15813` = 10L, `1104` = NA, `1966` = NA,
`5589` = 16L, `8860` = 8L, `482` = NA, `3967` = 7L, `5869` = NA,
`12435` = NA, `11675` = NA, `16701` = 19L, `5893` = NA, `16880` = 22L,
`13290` = NA, `1097` = NA, `1476` = 7L, `9100` = 22L, `6220` = NA,
`15393` = NA, `16631` = NA, `9641` = NA, `13485` = NA, `1028` = NA,
`8200` = NA, `12190` = NA, `5581` = NA, `7266` = 17L, `12254` = 17L,
`15763` = NA, `17764` = 6L, `16239` = NA, `7548` = 14L, `12037` = 27L,
`7813` = 26L, `12943` = NA, `17748` = NA, `12703` = NA, `11964` = 20L,
`14018` = 23L, `1769` = 25L, `13713` = NA, `13100` = NA,
`13866` = NA, `2527` = 12L, `2281` = NA, `4463` = 1L, `5815` = 3L,
`14040` = NA, `16560` = NA, `12887` = 14L, `11167` = NA,
`6123` = NA, `5668` = 5L, `3036` = 2L, `7622` = 7L, `11470` = NA,
`4770` = 17L, `17050` = 15L, `6295` = NA, `9575` = 19L), .Label = c("44",
"52", "56", "58", "60", "61", "62", "64", "65", "66", "67",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78",
"79", "80", "81", "82", "83", "88", "90"), class = "factor"),
age_last_visit = structure(c(`5068` = 8L, `15562` = 18L,
`8939` = 24L, `17602` = 16L, `3173` = 21L, `12591` = NA,
`17391` = 17L, `8241` = NA, `9746` = NA, `9673` = NA, `16594` = 25L,
`16911` = 4L, `4796` = 5L, `6598` = NA, `11462` = 21L, `16425` = 10L,
`12698` = 25L, `17118` = 12L, `1501` = 7L, `13294` = 9L,
`8072` = NA, `11642` = NA, `4164` = 21L, `9035` = 21L, `16691` = 3L,
`16002` = 14L, `3915` = 13L, `7409` = NA, `9759` = NA, `6130` = 25L,
`15153` = 22L, `13539` = NA, `13262` = 24L, `742` = 26L,
`17592` = 30L, `16812` = 9L, `213` = 11L, `11963` = NA, `12093` = NA,
`11910` = NA, `15813` = 10L, `1104` = 24L, `1966` = 14L,
`5589` = 18L, `8860` = 23L, `482` = 15L, `3967` = 7L, `5869` = NA,
`12435` = 6L, `11675` = NA, `16701` = 25L, `5893` = NA, `16880` = 20L,
`13290` = NA, `1097` = 8L, `1476` = 5L, `9100` = 28L, `6220` = 21L,
`15393` = 17L, `16631` = 9L, `9641` = 24L, `13485` = NA,
`1028` = 7L, `8200` = NA, `12190` = 8L, `5581` = 15L, `7266` = NA,
`12254` = 19L, `15763` = 7L, `17764` = 6L, `16239` = 11L,
`7548` = NA, `12037` = 29L, `7813` = NA, `12943` = NA, `17748` = 23L,
`12703` = 27L, `11964` = 23L, `14018` = 26L, `1769` = 24L,
`13713` = 13L, `13100` = 20L, `13866` = NA, `2527` = 13L,
`2281` = 21L, `4463` = 4L, `5815` = 3L, `14040` = 2L, `16560` = 14L,
`12887` = 24L, `11167` = NA, `6123` = NA, `5668` = 12L, `3036` = 1L,
`7622` = NA, `11470` = 18L, `4770` = 18L, `17050` = 18L,
`6295` = NA, `9575` = NA), .Label = c("59", "60", "61", "62",
"64", "65", "67", "68", "69", "70", "71", "72", "73", "74",
"75", "76", "77", "79", "80", "81", "82", "83", "84", "85",
"86", "89", "91", "92", "93", "94"), class = "factor"), age_at_death = structure(c(`5068` = 2L,
`15562` = NA, `8939` = NA, `17602` = NA, `3173` = NA, `12591` = NA,
`17391` = NA, `8241` = 10L, `9746` = 9L, `9673` = NA, `16594` = NA,
`16911` = NA, `4796` = NA, `6598` = NA, `11462` = NA, `16425` = NA,
`12698` = NA, `17118` = NA, `1501` = NA, `13294` = NA, `8072` = 6L,
`11642` = NA, `4164` = NA, `9035` = NA, `16691` = NA, `16002` = NA,
`3915` = NA, `7409` = 16L, `9759` = 8L, `6130` = NA, `15153` = NA,
`13539` = NA, `13262` = NA, `742` = 14L, `17592` = NA, `16812` = NA,
`213` = NA, `11963` = NA, `12093` = NA, `11910` = NA, `15813` = NA,
`1104` = NA, `1966` = NA, `5589` = NA, `8860` = NA, `482` = NA,
`3967` = NA, `5869` = NA, `12435` = NA, `11675` = NA, `16701` = NA,
`5893` = 16L, `16880` = NA, `13290` = NA, `1097` = NA, `1476` = 1L,
`9100` = NA, `6220` = NA, `15393` = NA, `16631` = NA, `9641` = NA,
`13485` = NA, `1028` = NA, `8200` = NA, `12190` = NA, `5581` = NA,
`7266` = 11L, `12254` = NA, `15763` = NA, `17764` = 3L, `16239` = NA,
`7548` = 6L, `12037` = 15L, `7813` = 13L, `12943` = NA, `17748` = NA,
`12703` = NA, `11964` = NA, `14018` = NA, `1769` = 12L, `13713` = NA,
`13100` = NA, `13866` = NA, `2527` = 5L, `2281` = NA, `4463` = NA,
`5815` = NA, `14040` = NA, `16560` = NA, `12887` = NA, `11167` = NA,
`6123` = NA, `5668` = NA, `3036` = NA, `7622` = 4L, `11470` = NA,
`4770` = NA, `17050` = NA, `6295` = NA, `9575` = 7L), .Label = c("66",
"70", "71", "73", "74", "75", "77", "79", "82", "83", "85",
"86", "88", "90", "93", "94"), class = "factor"), aaoaae = structure(c(3L,
16L, 24L, 19L, 25L, NA, 15L, 13L, 18L, NA, 20L, 6L, 7L, NA,
20L, 13L, 29L, 15L, 4L, 12L, 10L, NA, 25L, NA, 5L, 17L, 16L,
21L, 14L, 29L, 26L, NA, 28L, 26L, 31L, 8L, 14L, NA, NA, NA,
9L, 28L, 17L, 22L, 7L, 18L, 10L, NA, 8L, NA, 19L, 33L, 24L,
NA, 11L, 6L, 22L, 25L, 20L, 12L, NA, NA, 10L, NA, 11L, 18L,
17L, 17L, 10L, 5L, 14L, 14L, 30L, 26L, NA, 27L, 32L, 20L,
23L, 25L, 16L, 24L, NA, 16L, 25L, 6L, 5L, 4L, 17L, 14L, NA,
NA, 15L, 2L, 6L, 22L, 17L, 15L, NA, 19L), .Label = c("-9",
"52", "58", "60", "61", "62", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78",
"79", "80", "81", "82", "83", "84", "85", "86", "88", "90",
"91", "94"), class = "factor"), aaoaae2 = structure(c(3L,
16L, 24L, 19L, 25L, NA, 15L, 13L, 18L, NA, 20L, 6L, 7L, NA,
20L, 13L, 29L, 15L, 4L, 12L, 10L, NA, 25L, NA, 5L, 17L, 16L,
21L, 14L, 29L, 26L, NA, 28L, 26L, 31L, 8L, 14L, NA, NA, NA,
9L, 28L, 17L, 22L, 7L, 18L, 10L, NA, 8L, NA, 19L, 33L, 24L,
NA, 11L, 6L, 22L, 25L, 20L, 12L, NA, NA, 10L, NA, 11L, 18L,
17L, 17L, 10L, 5L, 14L, 14L, 30L, 26L, NA, 27L, 32L, 20L,
23L, 25L, 16L, 24L, NA, 16L, 25L, 6L, 5L, 4L, 17L, 14L, NA,
NA, 15L, 2L, 6L, 22L, 17L, 15L, NA, 19L), .Label = c("-9",
"52", "58", "60", "61", "62", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78",
"79", "80", "81", "82", "83", "84", "85", "86", "88", "90",
"91", "94"), class = "factor"), apoe_1 = structure(c(3L,
3L, 3L, 3L, 3L, NA, 3L, 3L, 3L, NA, 3L, 2L, 3L, NA, 3L, 2L,
3L, 4L, 4L, 3L, 3L, NA, 4L, 3L, 3L, 3L, 3L, 3L, 3L, NA, 2L,
NA, 3L, 3L, 2L, 4L, 4L, NA, NA, NA, 4L, 3L, 4L, 2L, NA, 3L,
4L, NA, 3L, NA, 4L, 3L, 2L, NA, 4L, 3L, 3L, 3L, 3L, 3L, 3L,
NA, 3L, NA, 4L, 3L, 3L, 3L, 3L, 4L, 3L, 3L, 3L, 3L, NA, 3L,
3L, 4L, 3L, 3L, 3L, 3L, NA, 3L, 3L, 3L, 3L, 3L, 3L, 3L, NA,
NA, 3L, 3L, 3L, 3L, 3L, 4L, NA, 4L), .Label = c("-9", "2",
"3", "4"), class = "factor"), apoe_2 = structure(c(4L, 4L,
3L, 3L, 3L, NA, 4L, 4L, 4L, NA, 3L, 3L, 3L, NA, 4L, 3L, 3L,
4L, 4L, 3L, 4L, NA, 2L, 4L, 3L, 4L, 3L, 4L, 3L, NA, 3L, NA,
3L, 3L, 3L, 4L, 2L, NA, NA, NA, 2L, 3L, 3L, 3L, NA, 3L, 3L,
NA, 3L, NA, 3L, 3L, 2L, NA, 3L, 4L, 4L, 4L, 3L, 4L, 4L, NA,
4L, NA, 2L, 3L, 3L, 3L, 4L, 3L, 2L, 4L, 4L, 3L, NA, 3L, 3L,
4L, 3L, 3L, 4L, 3L, NA, 3L, 3L, 2L, 3L, 2L, 3L, 4L, NA, NA,
2L, 4L, 3L, 4L, 2L, 3L, NA, 4L), .Label = c("-9", "2", "3",
"4"), class = "factor"), apoe4any = structure(c(3L, 3L, 2L,
2L, 2L, NA, 3L, 3L, 3L, NA, 2L, 2L, 2L, NA, 3L, 2L, 2L, 3L,
3L, 2L, 3L, NA, 3L, 3L, 2L, 3L, 2L, 3L, 2L, NA, 2L, NA, 2L,
2L, 2L, 3L, 3L, NA, NA, NA, 3L, 2L, 3L, 2L, NA, 2L, 3L, NA,
2L, NA, 3L, 2L, 2L, NA, 3L, 3L, 3L, 3L, 2L, 3L, 3L, NA, 3L,
NA, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, NA, 2L, 2L, 3L,
2L, 2L, 3L, 2L, NA, 2L, 2L, 2L, 2L, 2L, 2L, 3L, NA, NA, 2L,
3L, 2L, 3L, 2L, 3L, NA, 3L), .Label = c("-9", "0", "1"), class = "factor"),
apoe4dose = structure(c(3L, 3L, 2L, 2L, 2L, NA, 3L, 3L, 3L,
NA, 2L, 2L, 2L, NA, 3L, 2L, 2L, 4L, 4L, 2L, 3L, NA, 3L, 3L,
2L, 3L, 2L, 3L, 2L, NA, 2L, NA, 2L, 2L, 2L, 4L, 3L, NA, NA,
NA, 3L, 2L, 3L, 2L, NA, 2L, 3L, NA, 2L, NA, 3L, 2L, 2L, NA,
3L, 3L, 3L, 3L, 2L, 3L, 3L, NA, 3L, NA, 3L, 2L, 2L, 2L, 3L,
3L, 2L, 3L, 3L, 2L, NA, 2L, 2L, 4L, 2L, 2L, 3L, 2L, NA, 2L,
2L, 2L, 2L, 2L, 2L, 3L, NA, NA, 2L, 3L, 2L, 3L, 2L, 3L, NA,
4L), .Label = c("-9", "0", "1", "2"), class = "factor"),
Ethnicity = structure(c(`5068` = 4L, `15562` = 4L, `8939` = 4L,
`17602` = 3L, `3173` = 4L, `12591` = 4L, `17391` = 4L, `8241` = 4L,
`9746` = 4L, `9673` = 4L, `16594` = 4L, `16911` = 4L, `4796` = 4L,
`6598` = 4L, `11462` = 4L, `16425` = 4L, `12698` = 4L, `17118` = 4L,
`1501` = 4L, `13294` = 4L, `8072` = 4L, `11642` = 4L, `4164` = 1L,
`9035` = 4L, `16691` = 4L, `16002` = 4L, `3915` = 2L, `7409` = 4L,
`9759` = 4L, `6130` = 4L, `15153` = 4L, `13539` = 4L, `13262` = 4L,
`742` = 4L, `17592` = 3L, `16812` = 4L, `213` = 1L, `11963` = 4L,
`12093` = 4L, `11910` = 4L, `15813` = 4L, `1104` = 4L, `1966` = 4L,
`5589` = 1L, `8860` = 4L, `482` = 4L, `3967` = 4L, `5869` = 4L,
`12435` = 4L, `11675` = 4L, `16701` = 4L, `5893` = 4L, `16880` = 4L,
`13290` = 4L, `1097` = 4L, `1476` = 4L, `9100` = 4L, `6220` = 4L,
`15393` = 4L, `16631` = 4L, `9641` = 4L, `13485` = 4L, `1028` = 4L,
`8200` = 4L, `12190` = 4L, `5581` = 4L, `7266` = 4L, `12254` = 4L,
`15763` = 4L, `17764` = 3L, `16239` = 4L, `7548` = 4L, `12037` = 4L,
`7813` = 4L, `12943` = 4L, `17748` = 3L, `12703` = 4L, `11964` = 4L,
`14018` = 4L, `1769` = 4L, `13713` = 4L, `13100` = 4L, `13866` = 4L,
`2527` = 4L, `2281` = 2L, `4463` = 4L, `5815` = 4L, `14040` = 4L,
`16560` = 4L, `12887` = 4L, `11167` = 4L, `6123` = 4L, `5668` = 4L,
`3036` = 4L, `7622` = 4L, `11470` = 4L, `4770` = 2L, `17050` = 4L,
`6295` = 4L, `9575` = 4L), .Label = c("AA", "Asian", "Hispanic",
"NHW"), class = "factor")), row.names = c(NA, -100L), class = "data.frame")
We can do a group by 'status', 'Ethnicity' and get the sum of logical vector
library(dplyr)
df %>%
group_by(status, Ethnicity) %>%
summarise(n_75 = sum(as.numeric(as.character(age_at_onset)) < 75,
na.rm = TRUE),
n_70= sum(as.numeric(as.character(age_at_onset)) < 70,
na.rm = TRUE) )
-output
# A tibble: 10 x 4
# Groups: status [4]
# status Ethnicity n_75 n_70
# <fct> <fct> <int> <int>
# 1 " 1" Asian 0 0
# 2 " 1" Hispanic 0 0
# 3 " 1" NHW 0 0
# 4 " 2" Asian 1 0
# 5 " 2" Hispanic 1 1
# 6 " 2" NHW 18 9
# 7 " 3" AA 1 0
# 8 " 3" NHW 5 5
# 9 <NA> AA 1 0
#10 <NA> NHW 0 0
This is work:
> df %>% select(iid, age_at_onset, status, Ethnicity) %>%
+ mutate(LT75 = ifelse(as.numeric(as.character(df$age_at_onset)) < 75, 1,0), LT70 = ifelse(as.numeric(as.character(df$age_at_onset)) < 70, 1,0)) %>% group_by(status, Ethnicity) %>%
+ summarise(Lessthan75 = sum(LT75, na.rm = 1), Lessthan70 = sum(LT70, na.rm = 1))
`summarise()` regrouping output by 'status' (override with `.groups` argument)
# A tibble: 10 x 4
# Groups: status [4]
status Ethnicity Lessthan75 Lessthan70
<fct> <fct> <dbl> <dbl>
1 " 1" Asian 0 0
2 " 1" Hispanic 0 0
3 " 1" NHW 0 0
4 " 2" Asian 1 0
5 " 2" Hispanic 1 1
6 " 2" NHW 18 9
7 " 3" AA 1 0
8 " 3" NHW 5 5
9 NA AA 1 0
10 NA NHW 0 0
>
You have age_at_onset as factor, convert it to numeric, then use cut to divide data into different buckets and count to count how many iid fall into each bucket.
library(dplyr)
df %>%
mutate(age_at_onset = as.numeric(as.character(age_at_onset)),
age_group = cut(age_at_onset, c(-Inf, 70, 75, Inf))) %>%
count(Ethnicity, status, age_group)
# Ethnicity status age_group n
#1 AA 3 (70,75] 1
#2 AA 3 (75, Inf] 1
#3 AA <NA> (70,75] 1
#4 Asian 1 <NA> 2
#5 Asian 2 (70,75] 1
#6 Hispanic 1 <NA> 2
#7 Hispanic 2 (-Inf,70] 1
#8 Hispanic 2 (75, Inf] 1
#9 NHW 1 <NA> 29
#10 NHW 2 (-Inf,70] 10
#11 NHW 2 (70,75] 9
#12 NHW 2 (75, Inf] 13
#13 NHW 2 <NA> 2
#14 NHW 3 (-Inf,70] 5
#15 NHW 3 (75, Inf] 1
#16 NHW <NA> (75, Inf] 1
#17 NHW <NA> <NA> 20
If needed you can drop the NA values in age_group column with filter(!is.na(age_group)).
I am working with both observed and modeled soil moisture measurements at multiple sites:
DF <- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L), .Label = c("CA-Oas", "CA-Ojp", "CA-Qfo",
"US-Ho1", "US-UMB"), class = "factor"), month = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), satellite = c(NA,
NA, NA, 0.246855412760089, 0.239430871664309, 0.247785585430952,
0.254201344766859, 0.228525727473456, 0.220153005451572, 0.248914102315903,
0.226286688271691, NA, NA, NA, NA, 0.289740440845489, 0.377737456677007,
0.349513851965849, 0.369372068320291, 0.33528384697019, 0.316710417976185,
0.353778275847435, 0.366419460285794, NA, NA, NA, NA, NA, 0.523234443318459,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, 0.541902482509613, NA, NA, NA, 0.490694537758827,
0.592309034864108, 0.636846342572459, 0.645659983158112, 0.642242492328991,
0.644422933720706, 0.634390437856634, 0.604341197472352, 0.601287194034632,
0.586221873760223, NA, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923), satellite.low = c(NA, NA, NA, 0.208491480949955,
0.208809739158129, 0.190487245023279, 0.206581580485592, 0.190563366897309,
0.180458581035219, 0.229389992708553, 0.200156716900049, NA,
NA, NA, NA, 0.197556973794879, 0.303169270710883, 0.255839831380852,
0.308712828188052, 0.272073699535891, 0.262898007889838, 0.279825783579647,
0.257326671080677, NA, NA, NA, NA, NA, 0.477209513195344, 0.541902482509613,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, NA, NA, NA, 0.364323639893309, 0.534208357528997,
0.611485343460275, 0.643659507474862, 0.627827219126354, 0.637549292123253,
0.608606893522788, 0.5526295760826, 0.539322500377704, 0.519807807424512,
NA, NA, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923), satellite.high = c(NA,
NA, NA, 0.285219344570222, 0.270052004170489, 0.305083925838626,
0.301821109048126, 0.266488088049604, 0.259847429867925, 0.268438211923252,
0.252416659643333, NA, NA, NA, NA, 0.3819239078961, 0.45230564264313,
0.443187872550847, 0.430031308452529, 0.398493994404489, 0.370522828062531,
0.427730768115223, 0.47551224949091, NA, NA, NA, NA, NA, 0.569259373441575,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, 0.541902482509613, NA, NA, NA, 0.617065435624345,
0.650409712199219, 0.662207341684644, 0.647660458841361, 0.656657765531627,
0.651296575318159, 0.660173982190479, 0.656052818862104, 0.663251887691561,
0.652635940095934, NA, NA, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923
), observed = c(0.140086734851409, 0.137745990685859, 0.146660019201229,
0.275950971628449, 0.298260250896057, 0.26870029739777, 0.227566661823465,
0.197824137311287, 0.195409734063355, 0.229745648248465, 0.226546607074933,
0.158508782420749, 0.0809095246636771, 0.0804010923965351, 0.0845644708882278,
0.136702248824284, 0.121883242349049, 0.108167424836601, 0.0970784232538687,
0.0860934461299105, 0.0910916878172589, 0.10747642248062, 0.102700195758564,
0.0811833903700756, 0.115733715437788, 0.0631616319005478, 0.0631265153446416,
0.171535848109378, 0.18694684173028, 0.142807562821677, 0.145926108701425,
0.154393702185792, 0.171436382382201, 0.188897212829005, 0.186402403754978,
0.165098945598251, 0.0713685071127924, 0.0436531172429078, 0.0624862109235555,
0.127141665482761, 0.134542260869565, 0.124414092512545, 0.100807230998223,
0.0765214392215714, 0.0798724029741452, 0.103098854664915, 0.116568256944444,
0.1105108739241, 0.108650005144474, 0.0976296689160692, 0.105006219572287,
0.122777662914972, 0.102765292125318, 0.0851933017211099, 0.0566760862577016,
0.056282148272957, 0.0718264626865672, 0.0909327257326783, 0.10461694624978,
0.103895834299474), observed.low = c(0.123032811442984, 0.126127332034484,
0.118118534835286, 0.198817004012519, 0.254140718534211, 0.208009439993492,
0.164189152182023, 0.138753714747272, 0.124966620815314, 0.161415523471958,
0.159972232752574, 0.121607685043651, 0.0591772062927612, 0.05892126834644,
0.0577332139485083, 0.0979872715014624, 0.0939120895219702, 0.0785008888345453,
0.0635909823809719, 0.0516671874880994, 0.0513847048326183, 0.0716629121839855,
0.0686069735468453, 0.0605781104970885, 0.0725611238034912, 0.0287878644886994,
0.0312612891352408, 0.117719361193379, 0.158601178554096, 0.103448327552773,
0.108434377599704, 0.123534447310382, 0.150300870070247, 0.171762349088762,
0.173223674947214, 0.143153513926194, 0.0176750483828094, -0.00825194618307156,
0.00841588326444485, 0.0851767193470053, 0.114292404939469, 0.104180435677072,
0.0730616681527658, 0.03446185464827, 0.0417092732525248, 0.0672335656317878,
0.091673056919691, 0.0718925232829272, 0.0765541880959607, 0.0759211571229279,
0.0803948566686958, 0.0858717319021568, 0.0760499923996711, 0.0506548126690479,
0.0369099617804679, 0.0337436690922423, 0.0466183548891693, 0.0663011553037621,
0.0900575679369071, 0.0899421880715561), observed.high = c(0.157140658259833,
0.149364649337235, 0.175201503567172, 0.353084939244379, 0.342379783257904,
0.329391154802047, 0.290944171464907, 0.256894559875301, 0.265852847311397,
0.298075773024972, 0.293120981397293, 0.195409879797847, 0.102641843034593,
0.10188091644663, 0.111395727827947, 0.175417226147105, 0.149854395176127,
0.137833960838657, 0.130565864126765, 0.120519704771722, 0.130798670801899,
0.143289932777255, 0.136793417970284, 0.101788670243063, 0.158906307072085,
0.0975353993123963, 0.0949917415540424, 0.225352335025378, 0.215292504906464,
0.182166798090582, 0.183417839803146, 0.185252957061203, 0.192571894694156,
0.206032076569248, 0.199581132562743, 0.187044377270308, 0.125061965842775,
0.0955581806688872, 0.116556538582666, 0.169106611618516, 0.154792116799661,
0.144647749348019, 0.128552793843681, 0.118581023794873, 0.118035532695766,
0.138964143698041, 0.141463456969198, 0.149129224565273, 0.140745822192987,
0.11933818070921, 0.129617582475879, 0.159683593927787, 0.129480591850964,
0.119731790773172, 0.0764422107349353, 0.0788206274536718, 0.097034570483965,
0.115564296161594, 0.119176324562654, 0.117849480527392)), .Names = c("site",
"month", "modeled", "modeled.low", "modeled.high", "observed",
"observed.low", "observed.high"), row.names = c(NA, -60L), class = "data.frame")
What I need to do is to create line plots of both "modeled" and "observed" soil moisture against months, and to add corresponding error bars modeled.low and modeled.high and observed.low and observed.high to those lines.
Also, I need to create facets based on the site column.
My first approach would be melting that data frame and starting from there, but the error bars might complicate that approach:
library(reshape2)
library(ggplot2)
DF.m <- melt(DF, id=c('site','month'))
ggplot(data=DF.m) +
geom_line(aes(x=month, y=value, colour=variable, group=variable)) +
facet_wrap(~site) +
theme_bw(base_size = 18) +
scale_x_discrete(limits=month.abb) +
ylab('Soil water content (%)') + xlab('')
Which obviously does not work because modeled.low and modeled.high and observed.low and observed.high are interpreted as lines to plot, whereas I need them as error bars.
I know that I should use geom_errorbar() in this code in order to achieve what I need, but I am not sure how to use it with the molten data frame.
Any tips?
library(dplyr)
df_m <- DF %>%
select(site:modeled.high) %>%
mutate(var="modeled") %>%
setNames(c("site", "month", "val", "low", "high", "var"))
df_ob <- DF %>%
select(site:month, observed:observed.high) %>%
mutate(var="observed") %>%
setNames(c("site", "month", "val", "low", "high", "var"))
df <- rbind(df_m, df_ob)
ggplot(df, aes(month, val, colour=var)) +
geom_errorbar(aes(ymax=high, ymin=low)) +
geom_point() +
facet_wrap(~site)
Line plot with ribbon illustrating high and low value ranges:
ggplot(df, aes(month, val, colour=var)) +
geom_ribbon(aes(ymax=high, ymin=low, linetype=NA), alpha=.2) +
geom_line() +
facet_wrap(~site)
A data.table solution:
library(data.table)
plt <- melt(as.data.table(DF), id=1:2, measure = patterns("modeled$|observed$", "low", "high"))
plt[, variable := factor(variable, levels = 1:2, labels = c('modeled', 'observed'))]
ggplot(plt, aes(x = month, y = value1, ymin = value2, ymax = value3, color = variable)) +
geom_line() +
geom_errorbar() +
facet_wrap(~site)