How can I reshape the dataframe so there is only 1 observation for each id? - r

I need to reshape (I assume it's some sort of reshape like I would do in stata) this dataframe so that there is only 1 observation for each id. In addition, I need to preserve all of the other variables. So, one row should have columns for id, each year which contains the value of var1, x, var2 (var2 is not absolutely necessary. I've tried a lot of different things and I keep getting the same long data for the id variables. I apologize for the long dput() but there would not be information if I only did the first 6 rows.
structure(list(id= c(1806968L, 1806968L, 1806968L, 1806968L,
1806968L, 1806968L, 1806968L, 1806968L, 1806968L, 1806968L, 1806968L,
1806968L, 1806968L, 1806968L, 1806968L, 2022610L, 2022610L, 2022610L,
2022610L, 2022610L), var1 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4877, 5819, 6560, 8262, 0, 0, 0, 0, 0), x = c(25518,
25518, 25518, 25518, 25518, 25518, 25518, 25518, 25518, 25518,
25518, 25518, 25518, 25518, 25518, 34611, 34611, 34611, 34611,
34611), var2 = c(200812L, 200912L, 201012L, 201112L, 201212L,
201312L, 201512L, 201612L, 201712L, 201812L, 201912L, 200612L,
200512L, 200712L, 201412L, 199612L, 199712L, 199812L, 199912L,
200012L), `1987` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), `1988` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), `1989` = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1990` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1991` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1992` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1993` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1994` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1995` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1996` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1997` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1998` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1999` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2000` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2001` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2002` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2003` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2004` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2005` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5819, 0, 0, 0, 0, 0, 0, 0),
`2006` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4877, 0, 0, 0,
0, 0, 0, 0, 0), `2007` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 6560, 0, 0, 0, 0, 0, 0), `2008` = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2009` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`2010` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `2011` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), `2012` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2013` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`2014` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8262,
0, 0, 0, 0, 0), `2015` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2016` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2017` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`2018` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `2019` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), `2020` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-20L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x55b410de6890>, sorted = c("id",
"var1", "x", "var2"))
I would like it to look something like this, for which the value in each year is the value of var1 that corresponds with that year. Here is a something of what I want without all the years. I still need a column for all years.
id <- c(1806968L, 2022610L)
"1987" <- c(0, 8262)
x <- c(25518, 34611)
data <- data.frame(id, `1987`, x)

It's unclear whether you also want years which are 0 when x is 0. If you do, delete & var1 > 0 below.
library(tidyr)
library(dplyr)
tmp %>%
tidyr::pivot_longer(c(starts_with("1"), starts_with("2")), names_to = "year") %>%
filter(value == var1 & var1 > 0) %>%
select(-value)

Related

Problems with merging two files with yearly binary data for two overlapping subsets of individuals

I work with mark-recaptures of animals, and I have two capture histories I need to merge. Both files look like this:
Both files include subsets of the same group of animals, however, all inividuals are not present in both files. Also, one file contains more YEARS (in columns) than the other. The 0's and 1's indicate whether the animal was observed this year or not.
I need to merge both files, ending up with a file that contains all individuals that are included in these files. Observation data need to be merged for those individuals that are present in both files. If observation status for a given animal is 0 in FILE1 and 0 in FILE2, the observation status in the merged file need to be 0, if 0 in FILE1 and 1 in FILE2, observation status in the merged file should be 1, and if 1 in both files, it still needs to be 1 in the merged file (NOT 2).
Below you'll find samples of both files, FILE1 and FILE2. Any help appreciated.
FILE1:
> dput(FILE1)
structure(list(ID = c("1", "LL-30", "M-300", "NKW-001", "NKW-002",
"NKW-003", "NKW-004", "NKW-006", "NKW-007", "NKW-009", "NKW-010",
"NKW-011", "NKW-012", "NKW-013", "NKW-014", "NKW-015", "NKW-016",
"NKW-017", "NKW-018", "NKW-019", "NKW-021", "NKW-022", "NKW-023",
"NKW-024", "NKW-025", "NKW-026", "NKW-028", "NKW-029", "NKW-030",
"NKW-031", "NKW-032", "NKW-033", "NKW-034", "NKW-035", "NKW-036",
"NKW-037", "NKW-038", "NKW-039", "NKW-040"), `1986` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1987` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1988` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1989` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1990` = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1991` = c(0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1992` = c(0,
0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1993` = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1994` = c(0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1995` = c(1,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1996` = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1997` = c(0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1998` = c(1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1999` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2000` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2001` = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2002` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2003` = c(1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2004` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2005` = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0), `2006` = c(0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2007` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2008` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2012` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2013` = c(0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), `2014` = c(0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0), `2015` = c(0,
0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1), `2016` = c(0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0), `2017` = c(0,
0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1), `2018` = c(0,
0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1), `2019` = c(0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1)), class = "data.frame", row.names = c(NA,
-39L))
FILE2:
> dput(FILE2)
structure(list(ID = c("KI03", "KI05", "KI06", "KI07", "KI08",
"KI10", "NKW-001", "NKW-004", "NKW-005", "NKW-009", "NKW-019",
"NKW-023", "NKW-025", "NKW-027", "NKW-031", "NKW-032", "NKW-040",
"NKW-045", "NKW-424", "NKW-431", "NKW-441", "NKW-443"), `2008` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0
), `2009` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), `2010` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2011` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), `2012` = c(0,
0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
), `2013` = c(1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0), `2014` = c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2015` = c(1, 1, 1, 0, 1, 1,
1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0), `2016` = c(1,
0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0
), `2017` = c(1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 1, 0, 0, 0), `2018` = c(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `2019` = c(0, 0, 0, 1, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0), `2020` = c(0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
)), class = "data.frame", row.names = c(NA, -22L))
Here is a scalable data.table solution with no merging involved.
If you have got more files, just add them to the list L
library( data.table )
setDT(df1);setDT(df2) #set to data.table format
L <- list( df1, df2 ) #put the data.tables in a list
#melt all data.tables in the list to long format
L.melt <- lapply( L, melt, id.vars = "ID", variable.name = "year", variable.factor = FALSE )
#rowbind to one large data.table
DT <- data.table::rbindlist( L.melt, use.names = TRUE, fill = TRUE )
#summarise, output a logical TRUE (=1) of FALSE = 0 based on the sum of 0's and 1's
ans <- DT[, .( seen = as.numeric( sum(value) > 0 ) ), by = .(ID, year) ]
#cast to wide again, fill in missing observations in years with 0
dcast( ans, ID ~ year, value.var = "seen", fill = 0 )

ggplot with two manual color scales

I am trying to reuse some old code which I have used to make two separate plots in past, and would like to pout it together into one now.
However I have few problems
color_var <- vector(mode = "double",length = length(OP_2016$risk))
color_var[color_var== '0']<- NA
color_var[OP_2016$risk>=1 & OP_2016$risk<12] <- "yellow"
color_var[OP_2016$risk>=12] <- "red"
ggplot(OP_2016)+
geom_col(aes(x = short_date, y = risk, color = color_var , group = 1), size= 0.9) +
scale_y_continuous(limits = c(0, 100),name = "Accumulated EBHours")+
scale_color_identity("Risk Level", breaks= levels(as.factor(color_var))[c(1,2)],
labels = c("High >12 EBH","Medium 0-12EBH"),
guide = "legend"
)+
geom_line(aes(x = short_date, y= 12), linetype= "dotted", size = 0.8, colour = "red")+
# scale_color_manual("Varieties", values =c( "British Queen"= "orchid1"))+
geom_line(data = dis_fun_df, aes(x= date, y = rating, colour = "green"))
Problems:
Bars and and boxes in the legend are not filled,
I can not add manual color for geom_line and add it to the legend, that I have added from other plot.
Apologies, data set to reproduce the plot is a bit big.
dis_fun_df <- structure(list(date = structure(c(15534, 15540, 15548, 15555,
15562, 15573, 15580), class = "Date"), rating = c(10.2, 30, 61.6666666666667,
81.6666666666667, 95.8333333333333, 99.1666666666667, 100)), row.names = c(NA,
-7L), class = c("tbl_df", "tbl", "data.frame"))
OP_2016 <- structure(list(date = structure(c(1342224000, 1342227600, 1342231200,
1342234800, 1342238400, 1342242000, 1342245600, 1342249200, 1342252800,
1342256400, 1342260000, 1342263600, 1342267200, 1342270800, 1342274400,
1342278000, 1342281600, 1342285200, 1342288800, 1342292400, 1342296000,
1342299600, 1342303200, 1342306800, 1342310400, 1342314000, 1342317600,
1342321200, 1342324800, 1342328400, 1342332000, 1342335600, 1342339200,
1342342800, 1342346400, 1342350000, 1342353600, 1342357200, 1342360800,
1342364400, 1342368000, 1342371600, 1342375200, 1342378800, 1342382400,
1342386000, 1342389600, 1342393200, 1342396800, 1342400400, 1342404000,
1342407600, 1342411200, 1342414800, 1342418400, 1342422000, 1342425600,
1342429200, 1342432800, 1342436400, 1342440000, 1342443600, 1342447200,
1342450800, 1342454400, 1342458000, 1342461600, 1342465200, 1342468800,
1342472400, 1342476000, 1342479600, 1342483200, 1342486800, 1342490400,
1342494000, 1342497600, 1342501200, 1342504800, 1342508400, 1342512000,
1342515600, 1342519200, 1342522800, 1342526400, 1342530000, 1342533600,
1342537200, 1342540800, 1342544400, 1342548000, 1342551600, 1342555200,
1342558800, 1342562400, 1342566000, 1342569600, 1342573200, 1342576800,
1342580400, 1342584000, 1342587600, 1342591200, 1342594800, 1342598400,
1342602000, 1342605600, 1342609200, 1342612800, 1342616400, 1342620000,
1342623600, 1342627200, 1342630800, 1342634400, 1342638000, 1342641600,
1342645200, 1342648800, 1342652400, 1342656000, 1342659600, 1342663200,
1342666800, 1342670400, 1342674000, 1342677600, 1342681200, 1342684800,
1342688400, 1342692000, 1342695600, 1342699200, 1342702800, 1342706400,
1342710000, 1342713600, 1342717200, 1342720800, 1342724400, 1342728000,
1342731600, 1342735200, 1342738800, 1342742400, 1342746000, 1342749600,
1342753200, 1342756800, 1342760400, 1342764000, 1342767600, 1342771200,
1342774800, 1342778400, 1342782000, 1342785600, 1342789200, 1342792800,
1342796400, 1342800000, 1342803600, 1342807200, 1342810800, 1342814400,
1342818000, 1342821600, 1342825200, 1342828800, 1342832400, 1342836000,
1342839600, 1342843200, 1342846800, 1342850400, 1342854000, 1342857600,
1342861200, 1342864800, 1342868400, 1342872000, 1342875600, 1342879200,
1342882800, 1342886400, 1342890000, 1342893600, 1342897200, 1342900800,
1342904400, 1342908000, 1342911600, 1342915200, 1342918800, 1342922400,
1342926000, 1342929600, 1342933200, 1342936800, 1342940400, 1342944000,
1342947600, 1342951200, 1342954800, 1342958400, 1342962000, 1342965600,
1342969200, 1342972800, 1342976400, 1342980000, 1342983600, 1342987200,
1342990800, 1342994400, 1342998000, 1343001600, 1343005200, 1343008800,
1343012400, 1343016000, 1343019600, 1343023200, 1343026800, 1343030400,
1343034000, 1343037600, 1343041200, 1343044800, 1343048400, 1343052000,
1343055600, 1343059200, 1343062800, 1343066400, 1343070000, 1343073600,
1343077200, 1343080800, 1343084400, 1343088000, 1343091600, 1343095200,
1343098800, 1343102400, 1343106000, 1343109600, 1343113200, 1343116800,
1343120400, 1343124000, 1343127600, 1343131200, 1343134800, 1343138400,
1343142000, 1343145600, 1343149200, 1343152800, 1343156400, 1343160000,
1343163600, 1343167200, 1343170800, 1343174400, 1343178000, 1343181600,
1343185200, 1343188800, 1343192400, 1343196000, 1343199600, 1343203200,
1343206800, 1343210400, 1343214000, 1343217600, 1343221200, 1343224800,
1343228400, 1343232000, 1343235600, 1343239200, 1343242800, 1343246400,
1343250000, 1343253600, 1343257200, 1343260800, 1343264400, 1343268000,
1343271600, 1343275200, 1343278800, 1343282400, 1343286000, 1343289600,
1343293200, 1343296800, 1343300400, 1343304000, 1343307600, 1343311200,
1343314800, 1343318400, 1343322000, 1343325600, 1343329200, 1343332800,
1343336400, 1343340000, 1343343600, 1343347200, 1343350800, 1343354400,
1343358000, 1343361600, 1343365200, 1343368800, 1343372400, 1343376000,
1343379600, 1343383200, 1343386800, 1343390400, 1343394000, 1343397600,
1343401200, 1343404800, 1343408400, 1343412000, 1343415600, 1343419200,
1343422800, 1343426400, 1343430000, 1343433600, 1343437200, 1343440800,
1343444400, 1343448000, 1343451600, 1343455200, 1343458800, 1343462400,
1343466000, 1343469600, 1343473200, 1343476800, 1343480400, 1343484000,
1343487600, 1343491200, 1343494800, 1343498400, 1343502000, 1343505600,
1343509200, 1343512800, 1343516400, 1343520000, 1343523600, 1343527200,
1343530800, 1343534400, 1343538000, 1343541600, 1343545200, 1343548800,
1343552400, 1343556000, 1343559600, 1343563200, 1343566800, 1343570400,
1343574000, 1343577600, 1343581200, 1343584800, 1343588400, 1343592000,
1343595600, 1343599200, 1343602800, 1343606400, 1343610000, 1343613600,
1343617200, 1343620800, 1343624400, 1343628000, 1343631600, 1343635200,
1343638800, 1343642400, 1343646000, 1343649600, 1343653200, 1343656800,
1343660400, 1343664000, 1343667600, 1343671200, 1343674800, 1343678400,
1343682000, 1343685600, 1343689200, 1343692800, 1343696400, 1343700000,
1343703600, 1343707200, 1343710800, 1343714400, 1343718000, 1343721600,
1343725200, 1343728800, 1343732400, 1343736000, 1343739600, 1343743200,
1343746800, 1343750400, 1343754000, 1343757600, 1343761200, 1343764800,
1343768400, 1343772000, 1343775600, 1343779200, 1343782800, 1343786400,
1343790000, 1343793600, 1343797200, 1343800800, 1343804400, 1343808000,
1343811600, 1343815200, 1343818800, 1343822400, 1343826000, 1343829600,
1343833200, 1343836800, 1343840400, 1343844000, 1343847600, 1343851200,
1343854800, 1343858400, 1343862000, 1343865600, 1343869200, 1343872800,
1343876400, 1343880000, 1343883600, 1343887200, 1343890800, 1343894400,
1343898000, 1343901600, 1343905200, 1343908800, 1343912400, 1343916000,
1343919600, 1343923200, 1343926800, 1343930400, 1343934000, 1343937600,
1343941200, 1343944800, 1343948400, 1343952000, 1343955600, 1343959200,
1343962800, 1343966400, 1343970000, 1343973600, 1343977200, 1343980800,
1343984400, 1343988000, 1343991600, 1343995200, 1343998800, 1344002400,
1344006000, 1344009600, 1344013200, 1344016800, 1344020400, 1344024000,
1344027600, 1344031200, 1344034800, 1344038400, 1344042000, 1344045600,
1344049200, 1344052800, 1344056400, 1344060000, 1344063600, 1344067200,
1344070800, 1344074400, 1344078000, 1344081600, 1344085200, 1344088800,
1344092400, 1344096000, 1344099600, 1344103200, 1344106800, 1344110400,
1344114000, 1344117600, 1344121200, 1344124800, 1344128400, 1344132000,
1344135600, 1344139200, 1344142800, 1344146400, 1344150000, 1344153600,
1344157200, 1344160800, 1344164400, 1344168000, 1344171600, 1344175200,
1344178800, 1344182400, 1344186000, 1344189600, 1344193200, 1344196800,
1344200400, 1344204000, 1344207600, 1344211200, 1344214800, 1344218400,
1344222000, 1344225600, 1344229200, 1344232800, 1344236400, 1344240000,
1344243600, 1344247200, 1344250800, 1344254400, 1344258000, 1344261600,
1344265200, 1344268800, 1344272400, 1344276000, 1344279600, 1344283200,
1344286800, 1344290400, 1344294000, 1344297600, 1344301200, 1344304800,
1344308400, 1344312000, 1344315600, 1344319200, 1344322800, 1344326400,
1344330000, 1344333600, 1344337200, 1344340800, 1344344400, 1344348000,
1344351600, 1344355200, 1344358800, 1344362400, 1344366000, 1344369600,
1344373200, 1344376800, 1344380400, 1344384000, 1344387600, 1344391200,
1344394800, 1344398400, 1344402000, 1344405600, 1344409200, 1344412800,
1344416400, 1344420000, 1344423600, 1344427200, 1344430800, 1344434400,
1344438000, 1344441600, 1344445200, 1344448800, 1344452400, 1344456000,
1344459600, 1344463200, 1344466800, 1344470400, 1344474000, 1344477600,
1344481200, 1344484800, 1344488400, 1344492000, 1344495600, 1344499200,
1344502800, 1344506400, 1344510000, 1344513600, 1344517200, 1344520800,
1344524400, 1344528000, 1344531600, 1344535200, 1344538800, 1344542400,
1344546000, 1344549600, 1344553200, 1344556800, 1344560400, 1344564000,
1344567600, 1344571200, 1344574800, 1344578400, 1344582000, 1344585600,
1344589200, 1344592800, 1344596400, 1344600000, 1344603600, 1344607200,
1344610800, 1344614400, 1344618000, 1344621600, 1344625200, 1344628800,
1344632400, 1344636000, 1344639600, 1344643200, 1344646800, 1344650400,
1344654000, 1344657600, 1344661200, 1344664800, 1344668400, 1344672000,
1344675600, 1344679200, 1344682800, 1344686400, 1344690000, 1344693600,
1344697200, 1344700800, 1344704400, 1344708000, 1344711600, 1344715200,
1344718800, 1344722400, 1344726000, 1344729600, 1344733200, 1344736800,
1344740400, 1344744000, 1344747600, 1344751200, 1344754800, 1344758400,
1344762000, 1344765600, 1344769200, 1344772800, 1344776400, 1344780000,
1344783600, 1344787200, 1344790800, 1344794400, 1344798000, 1344801600,
1344805200, 1344808800, 1344812400, 1344816000, 1344819600, 1344823200,
1344826800, 1344830400, 1344834000, 1344837600, 1344841200, 1344844800,
1344848400, 1344852000, 1344855600, 1344859200, 1344862800, 1344866400,
1344870000, 1344873600, 1344877200, 1344880800, 1344884400, 1344888000,
1344891600, 1344895200, 1344898800, 1344902400, 1344906000, 1344909600,
1344913200, 1344916800, 1344920400, 1344924000, 1344927600, 1344931200,
1344934800, 1344938400, 1344942000, 1344945600, 1344949200, 1344952800,
1344956400, 1344960000, 1344963600, 1344967200, 1344970800, 1344974400,
1344978000, 1344981600, 1344985200, 1344988800, 1344992400, 1344996000,
1344999600, 1345003200, 1345006800, 1345010400, 1345014000, 1345017600,
1345021200, 1345024800, 1345028400, 1345032000, 1345035600, 1345039200,
1345042800, 1345046400, 1345050000, 1345053600, 1345057200, 1345060800,
1345064400, 1345068000, 1345071600, 1345075200, 1345078800, 1345082400,
1345086000, 1345089600, 1345093200, 1345096800, 1345100400, 1345104000,
1345107600, 1345111200, 1345114800, 1345118400, 1345122000, 1345125600,
1345129200, 1345132800, 1345136400, 1345140000, 1345143600, 1345147200,
1345150800, 1345154400, 1345158000, 1345161600, 1345165200, 1345168800,
1345172400, 1345176000, 1345179600, 1345183200, 1345186800, 1345190400,
1345194000, 1345197600, 1345201200, 1345204800, 1345208400, 1345212000,
1345215600, 1345219200, 1345222800, 1345226400, 1345230000, 1345233600,
1345237200, 1345240800, 1345244400, 1345248000, 1345251600, 1345255200,
1345258800, 1345262400, 1345266000, 1345269600, 1345273200, 1345276800,
1345280400, 1345284000, 1345287600, 1345291200, 1345294800, 1345298400,
1345302000, 1345305600, 1345309200, 1345312800, 1345316400, 1345320000,
1345323600, 1345327200, 1345330800, 1345334400, 1345338000, 1345341600,
1345345200, 1345348800, 1345352400, 1345356000, 1345359600, 1345363200,
1345366800, 1345370400, 1345374000, 1345377600, 1345381200, 1345384800,
1345388400, 1345392000, 1345395600, 1345399200, 1345402800, 1345406400,
1345410000, 1345413600, 1345417200, 1345420800, 1345424400, 1345428000,
1345431600, 1345435200, 1345438800, 1345442400, 1345446000, 1345449600,
1345453200, 1345456800, 1345460400, 1345464000, 1345467600, 1345471200,
1345474800, 1345478400, 1345482000, 1345485600, 1345489200, 1345492800,
1345496400, 1345500000, 1345503600, 1345507200, 1345510800, 1345514400,
1345518000, 1345521600, 1345525200, 1345528800, 1345532400, 1345536000,
1345539600, 1345543200, 1345546800, 1345550400, 1345554000, 1345557600,
1345561200, 1345564800, 1345568400, 1345572000, 1345575600, 1345579200,
1345582800, 1345586400, 1345590000, 1345593600, 1345597200, 1345600800,
1345604400, 1345608000, 1345611600, 1345615200, 1345618800, 1345622400,
1345626000, 1345629600, 1345633200, 1345636800, 1345640400, 1345644000,
1345647600, 1345651200, 1345654800, 1345658400, 1345662000, 1345665600,
1345669200, 1345672800, 1345676400, 1345680000, 1345683600, 1345687200,
1345690800, 1345694400, 1345698000, 1345701600, 1345705200, 1345708800,
1345712400, 1345716000, 1345719600, 1345723200, 1345726800, 1345730400,
1345734000, 1345737600, 1345741200, 1345744800, 1345748400, 1345752000,
1345755600, 1345759200, 1345762800, 1345766400, 1345770000, 1345773600,
1345777200, 1345780800, 1345784400, 1345788000, 1345791600, 1345795200,
1345798800, 1345802400, 1345806000, 1345809600, 1345813200, 1345816800,
1345820400, 1345824000, 1345827600, 1345831200, 1345834800, 1345838400,
1345842000, 1345845600, 1345849200, 1345852800, 1345856400, 1345860000,
1345863600, 1345867200, 1345870800, 1345874400, 1345878000, 1345881600,
1345885200, 1345888800, 1345892400, 1345896000, 1345899600, 1345903200,
1345906800, 1345910400, 1345914000, 1345917600, 1345921200, 1345924800,
1345928400, 1345932000, 1345935600, 1345939200, 1345942800, 1345946400,
1345950000, 1345953600, 1345957200, 1345960800, 1345964400, 1345968000,
1345971600, 1345975200, 1345978800, 1345982400, 1345986000, 1345989600,
1345993200, 1345996800, 1346000400, 1346004000, 1346007600, 1346011200,
1346014800, 1346018400, 1346022000, 1346025600, 1346029200, 1346032800,
1346036400, 1346040000, 1346043600, 1346047200, 1346050800, 1346054400,
1346058000, 1346061600, 1346065200, 1346068800, 1346072400, 1346076000,
1346079600, 1346083200, 1346086800, 1346090400, 1346094000, 1346097600,
1346101200, 1346104800, 1346108400), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), risk = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7,
8, 9, 10, 11, 0, 0, 0, 12, 13, 14, 15, 16, 17, 18, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3,
4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0)), row.names = c(NA, -1080L), class = c("tbl_df",
"tbl", "data.frame"))
I think this might do the trick, using fill instead of colour
OP_2016$date <- as.Date(OP_2016$date)
color_var <- vector(mode = "double",length = length(OP_2016$risk))
color_var[color_var== '0']<- NA
color_var[OP_2016$risk>=1 & OP_2016$risk<12] <- "yellow"
color_var[OP_2016$risk>=12] <- "red"
ggplot(OP_2016)+
geom_col(aes(x = date, y = risk, group = 1,fill=color_var), size= 0.9) +
scale_y_continuous(limits = c(0, 100),name = "Accumulated EBHours")+
scale_fill_identity("Risk Level", breaks= levels(as.factor(color_var))[c(1,2)],
labels = c("High >12 EBH","Medium 0-12EBH"),
guide = "legend"
)+
geom_line(aes(x = date, y= 12), linetype= "dotted", size = 0.8, colour = "red")+
geom_line(data = dis_fun_df, aes(x= date, y = rating),colour = "green")
To my knowledge ggplot does not support multiple scales of the same type, but others would know better than I.
UPDATE:
For anyone looking to actually use multiple scales for the same type of geom the {ggnewscale} package should provide the functionality that you are looking for:
https://github.com/eliocamp/ggnewscale

Subset rows with similar strings in one of the column and plot them together

I would like to group/subset the rows which have the same "base" string in one of the column and plot them on one graph. It would be great to have everything in one pdf file.. Each graph on separate page of pdf.
Data:
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370500, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1091361.9, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1512409.6,
0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4231358.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 5995680.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2266775, 0, 0, 0, 0, 0, 0, 6864490.1, 0, 0,
0, 0, 0, 0), `84` = c(0, 0, 0, 0, 1783350, 0, 0, 0, 1177650,
0, 0, 0, 0, 0, 0, 0, 0, 4316664.7, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 9262556.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2831286.1, 0, 0, 0, 0, 0, 0, 10643218.2,
0, 0, 0, 0, 0, 0), `110` = c(0, 0, 0, 0, 1778743.3, 0, 0, 0,
1465966.7, 0, 0, 0, 0, 0, 0, 0, 0, 3111700, 0, 0, 1955337.5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5584784.4, 5584784.4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3092525, 0,
0, 0, 0, 0, 0, 7847143.8, 0, 0, 0, 0, 0, 0), `134` = c(0, 0,
0, 0, 1121869.4, 0, 0, 0, 1439430.6, 0, 0, 0, 0, 0, 0, 0, 0,
2854250, 0, 0, 0, 0, 0, 0, 914890, 0, 0, 847880, 0, 0, 0, 0,
0, 0, 0, 8191800, 0, 0, 0, 0, 0, 0, 1830904.5, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1650150, 0, 0, 837130, 0, 0, 0, 4925095.1, 0,
0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 1432775, 0, 0, 0, 1394186.1,
0, 1120183.3, 0, 0, 0, 0, 0, 0, 2262421.7, 0, 0, 0, 615660, 0,
0, 1292795.8, 0, 0, 712622.5, 0, 0, 0, 0, 0, 0, 0, 2683469.4,
0, 0, 0, 0, 0, 0, 2318485.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1561800,
0, 0, 0, 0, 0, 0, 4382993.7, 0, 0, 763460, 0, 0, 0), `199` = c(0,
0, 0, 0, 1314220, 0, 0, 0, 1439718.8, 0, 1929266.7, 0, 0, 0,
1101800, 0, 0, 2759366.7, 0, 0, 0, 1291728.6, 0, 0, 2489775.6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2858345.8, 0, 0, 0, 1819542.1,
0, 0, 1497640.3, 0, 0, 0, 1300250, 0, 0, 0, 0, 0, 0, 1566875,
0, 0, 0, 0, 0, 0, 4625895.6, 0, 0, 1308158.3, 0, 0, 0), `234` = c(1257250,
0, 0, 0, 0, 0, 0, 0, 1276080, 0, 1848500, 0, 0, 0, 1529350, 0,
0, 2155275, 0, 0, 0, 2023041.9, 0, 0, 1966447.7, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1184200, 1184200, 0, 0, 1652350, 0, 0, 2018581.7,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1835225, 0, 0, 0, 0, 0, 0, 4639414.7,
0, 0, 720715, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 669442.5, 0,
0, 1253026.7, 0, 960410, 960410, 0, 0, 1258267.5, 0, 0, 1707392.5,
0, 0, 0, 563280, 0, 0, 2403237.9, 0, 0, 0, 1044100, 0, 2075700,
0, 0, 0, 0, 0, 5718450, 0, 0, 1704550, 0, 0, 1350286.9, 0, 0,
0, 0, 2011700, 0, 0, 0, 0, 0, 1739500, 0, 0, 0, 0, 0, 0, 4612520.8,
4612520.8, 0, 0, 0, 0, 0), `362` = c(0, 1593500, 0, 0, 0, 1610625.3,
0, 0, 1234902.5, 0, 0, 1481036.8, 0, 0, 1583647.5, 0, 0, 1752089.2,
0, 0, 0, 0, 0, 0, 2410809.2, 0, 0, 0, 654940, 0, 0, 0, 0, 0,
0, 0, 7014905.6, 0, 0, 0, 0, 0, 1165672.1, 0, 0, 0, 0, 0, 0,
0, 1029910, 0, 0, 2153087.5, 0, 0, 0, 422920, 0, 0, 0, 7495855.9,
0, 0, 0, 0, 0), `433` = c(0, 0, 0, 0, 0, 1340283.9, 0, 0, 1268996.9,
0, 0, 1416683.3, 0, 0, 1047862.5, 0, 0, 1819653.8, 0, 0, 0, 0,
0, 0, 2227565.7, 0, 0, 0, 763765, 0, 0, 1595430, 0, 0, 0, 0,
4894549, 0, 0, 0, 0, 0, 1061375.4, 0, 0, 0, 0, 0, 2251950, 0,
1042130, 0, 0, 2055300, 0, 0, 0, 696278.3, 0, 0, 0, 5353797.8,
0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2020300,
2020300, 0, 0, 0, 0, 0, 0, 7681526, 0, 0, 0, 0, 0), `581` = c(0,
0, 1749237.5, 0, 0, 0, 2421665.8, 0, 0, 1773262.5, 0, 0, 2251004.3,
0, 0, 2570175, 0, 0, 3379756.9, 0, 0, 0, 2054455.6, 0, 0, 2518270.8,
0, 0, 0, 0, 0, 0, 2917968.2, 0, 0, 0, 0, 7004350, 0, 0, 1451600,
0, 0, 1394411, 0, 0, 0, 0, 0, 2507858.3, 0, 2377012.5, 0, 0,
3719165.4, 0, 0, 0, 1472870.3, 0, 0, 9666916.1, 0, 0, 1730300,
0, 0), `652` = c(0, 0, 476910, 476910, 0, 0, 1149078.8, 1149078.8,
0, 1082468.7, 0, 0, 882769.7, 0, 0, 1370449.4, 1370449.4, 0,
1529049, 1529049, 0, 0, 943632.2, 0, 0, 916587.8, 0, 0, 0, 988261.1,
0, 0, 1778007.1, 1778007.1, 0, 0, 0, 3087304.8, 3087304.8, 0,
782860, 782860, 0, 510158.5, 510158.5, 0, 0, 0, 0, 1503750, 0,
1100677.5, 1100677.5, 0, 1669260, 1669260, 0, 0, 770733.2, 0,
0, 4939242.8, 4939242.8, 0, 643564.4, 643564.4, 0), `733` = c(0,
0, 0, 1095060, 0, 0, 0, 1674089.3, 0, 1252101.3, 0, 0, 1259111,
0, 0, 0, 2429293.3, 0, 0, 2326928.3, 0, 0, 1259216.5, 0, 0, 1238837.5,
0, 0, 0, 1224858.3, 0, 0, 0, 2952529.9, 0, 0, 0, 0, 4626414.7,
0, 0, 1121440, 0, 0, 1025386.2, 0, 0, 0, 0, 1917900, 0, 0, 2197533.3,
0, 0, 2840155.5, 0, 0, 1054285.7, 0, 0, 0, 7516814.2, 0, 0, 1329434.4,
0), `818` = c(0, 0, 0, 720551.1, 0, 0, 0, 714662.7, 0, 617012.9,
0, 0, 549850.8, 0, 0, 0, 1197460, 0, 0, 771979.2, 0, 0, 585847.5,
585847.5, 0, 875475.4, 0, 0, 0, 576774, 0, 0, 0, 1147389.8, 0,
0, 0, 0, 2292421.7, 0, 0, 755258.3, 0, 0, 0, 0, 0, 0, 0, 858930,
0, 0, 1242668.3, 0, 0, 1580088.3, 0, 0, 641938.6, 641938.6, 0,
0, 3838660.4, 0, 0, 733140.8, 733140.8), `896` = c(0, 0, 0, 590480,
0, 0, 0, 817087.6, 0, 569869.5, 0, 0, 650822.5, 650822.5, 0,
0, 1624052.5, 0, 0, 682570.8, 0, 0, 0, 1538800, 0, 690488.6,
690488.6, 0, 0, 797923.9, 0, 0, 0, 1204889.3, 0, 0, 0, 0, 2184432.2,
0, 0, 676654.7, 0, 0, 0, 210680, 0, 0, 0, 791152.5, 0, 0, 1599855.8,
0, 0, 1358543.8, 0, 0, 0, 931288, 0, 0, 4683895.2, 0, 0, 0, 1202806
), `972` = c(0, 0, 0, 799116.4, 0, 0, 0, 759169.9, 0, 408845,
0, 0, 0, 948980, 0, 0, 968766.7, 0, 0, 675349.7, 0, 0, 0, 0,
0, 0, 1811117.6, 0, 0, 609098.5, 0, 0, 0, 1073749.1, 0, 0, 0,
0, 2392258.9, 0, 0, 743580, 0, 0, 0, 1020485, 0, 0, 0, 446596.7,
0, 0, 1178583, 0, 0, 1438261.7, 0, 0, 0, 1133057.9, 0, 0, 4445814.7,
0, 0, 0, 1057776.9), `1039` = c(0, 0, 0, 447255.3, 0, 0, 0, 609409.1,
0, 304340, 0, 0, 0, 0, 0, 0, 694232.8, 0, 0, 473015.3, 0, 0,
0, 0, 0, 0, 419524.9, 0, 0, 447760.6, 0, 0, 0, 932513.5, 0, 0,
0, 0, 1251960.5, 0, 0, 276560, 0, 0, 0, 259640, 0, 0, 0, 354995,
0, 0, 1570222.5, 0, 0, 1021822, 0, 0, 0, 811614, 0, 0, 2941698.2,
0, 0, 0, 1199942.5), Gene = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3",
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4",
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3",
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1",
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3",
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1",
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3",
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4",
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1",
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2",
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3",
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2",
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3",
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4")), .Names = c("10",
"34", "59", "84", "110", "134", "165", "199", "234", "257", "362",
"433", "506", "581", "652", "733", "818", "896", "972", "1039",
"Gene"), row.names = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3",
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4",
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3",
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1",
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3",
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1",
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3",
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4",
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1",
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2",
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3",
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2",
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3",
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4"), class = "data.frame")
I would like to subset rows with the same "basis" which means string before dash.
I was trying to combine subset and grep function but it works for me only if I give the string manually. It would be quite a lot of work do to it string by string.
You could gather() your data into long format, separate() the Gene column into label and number and, for better plot aesthetics, replace the 0 values with NAs in the y column using na_if() and specify the order of the x axis using factor()
library(dplyr)
library(tidyr)
long_df <- df %>%
gather(x, y, -Gene) %>%
separate(Gene, into = c("label", "number")) %>%
mutate(y = na_if(y, 0),
x = factor(x, levels = unique(x)))
Then, based on this answer, you could create the plot p and iterate through the groups using dplyr's do() or purrr's by_slice()
library(ggplot2)
p = ggplot(data = long_df, aes(x = x, y = y, color = number)) + geom_point()
# Using dplyr's do()
res <- long_df %>%
group_by(label) %>%
do(plots = p %+% . + facet_wrap(~label))
# Using purrr's by_slice()
library(purrr)
res <- long_df %>%
slice_rows("label") %>%
by_slice(~(p %+% . + facet_wrap(~label)), .to = "plots")
Which gives:
#Source: local data frame [19 x 2]
#Groups: <by row>
#
## A tibble: 19 × 2
# label plots
#* <chr> <list>
#1 AT1G04170 <S3: gg>
#2 AT1G08520 <S3: gg>
#3 AT1G10670 <S3: gg>
#4 AT1G53500 <S3: gg>
#5 AT1G54270 <S3: gg>
#6 AT1G80480 <S3: gg>
#7 AT2G16950 <S3: gg>
#8 AT3G03960 <S3: gg>
#9 AT3G57290 <S3: gg>
#10 AT3G63460 <S3: gg>
#11 AT4G20890 <S3: gg>
#12 AT4G20980 <S3: gg>
#13 AT4G24190 <S3: gg>
#14 AT4G29670 <S3: gg>
#15 AT5G23740 <S3: gg>
#16 AT5G23860 <S3: gg>
#17 AT5G40450 <S3: gg>
#18 AT5G62700 <S3: gg>
#19 ATCG00780 <S3: gg>
You can then access each plot, for example:
res$plots[1]
Which gives:
To save all plots into a pdf, simply do:
pdf()
res$plots
dev.off()

Each ggplot on separate pdf page - plot row by row [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 6 years ago.
Improve this question
I would like to put one line plot on each page of pdf file. All the data for plotting is stored in single data frame. Each row should be plotted.
That's how the data looks like:
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370500, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1091361.9, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1512409.6,
0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4231358.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 5995680.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2266775, 0, 0, 0, 0, 0, 0, 6864490.1, 0, 0,
0, 0, 0, 0), `84` = c(0, 0, 0, 0, 1783350, 0, 0, 0, 1177650,
0, 0, 0, 0, 0, 0, 0, 0, 4316664.7, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 9262556.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 2831286.1, 0, 0, 0, 0, 0, 0, 10643218.2,
0, 0, 0, 0, 0, 0), `110` = c(0, 0, 0, 0, 1778743.3, 0, 0, 0,
1465966.7, 0, 0, 0, 0, 0, 0, 0, 0, 3111700, 0, 0, 1955337.5,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5584784.4, 5584784.4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3092525, 0,
0, 0, 0, 0, 0, 7847143.8, 0, 0, 0, 0, 0, 0), `134` = c(0, 0,
0, 0, 1121869.4, 0, 0, 0, 1439430.6, 0, 0, 0, 0, 0, 0, 0, 0,
2854250, 0, 0, 0, 0, 0, 0, 914890, 0, 0, 847880, 0, 0, 0, 0,
0, 0, 0, 8191800, 0, 0, 0, 0, 0, 0, 1830904.5, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1650150, 0, 0, 837130, 0, 0, 0, 4925095.1, 0,
0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 1432775, 0, 0, 0, 1394186.1,
0, 1120183.3, 0, 0, 0, 0, 0, 0, 2262421.7, 0, 0, 0, 615660, 0,
0, 1292795.8, 0, 0, 712622.5, 0, 0, 0, 0, 0, 0, 0, 2683469.4,
0, 0, 0, 0, 0, 0, 2318485.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1561800,
0, 0, 0, 0, 0, 0, 4382993.7, 0, 0, 763460, 0, 0, 0), `199` = c(0,
0, 0, 0, 1314220, 0, 0, 0, 1439718.8, 0, 1929266.7, 0, 0, 0,
1101800, 0, 0, 2759366.7, 0, 0, 0, 1291728.6, 0, 0, 2489775.6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2858345.8, 0, 0, 0, 1819542.1,
0, 0, 1497640.3, 0, 0, 0, 1300250, 0, 0, 0, 0, 0, 0, 1566875,
0, 0, 0, 0, 0, 0, 4625895.6, 0, 0, 1308158.3, 0, 0, 0), `234` = c(1257250,
0, 0, 0, 0, 0, 0, 0, 1276080, 0, 1848500, 0, 0, 0, 1529350, 0,
0, 2155275, 0, 0, 0, 2023041.9, 0, 0, 1966447.7, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1184200, 1184200, 0, 0, 1652350, 0, 0, 2018581.7,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1835225, 0, 0, 0, 0, 0, 0, 4639414.7,
0, 0, 720715, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 669442.5, 0,
0, 1253026.7, 0, 960410, 960410, 0, 0, 1258267.5, 0, 0, 1707392.5,
0, 0, 0, 563280, 0, 0, 2403237.9, 0, 0, 0, 1044100, 0, 2075700,
0, 0, 0, 0, 0, 5718450, 0, 0, 1704550, 0, 0, 1350286.9, 0, 0,
0, 0, 2011700, 0, 0, 0, 0, 0, 1739500, 0, 0, 0, 0, 0, 0, 4612520.8,
4612520.8, 0, 0, 0, 0, 0), `362` = c(0, 1593500, 0, 0, 0, 1610625.3,
0, 0, 1234902.5, 0, 0, 1481036.8, 0, 0, 1583647.5, 0, 0, 1752089.2,
0, 0, 0, 0, 0, 0, 2410809.2, 0, 0, 0, 654940, 0, 0, 0, 0, 0,
0, 0, 7014905.6, 0, 0, 0, 0, 0, 1165672.1, 0, 0, 0, 0, 0, 0,
0, 1029910, 0, 0, 2153087.5, 0, 0, 0, 422920, 0, 0, 0, 7495855.9,
0, 0, 0, 0, 0), `433` = c(0, 0, 0, 0, 0, 1340283.9, 0, 0, 1268996.9,
0, 0, 1416683.3, 0, 0, 1047862.5, 0, 0, 1819653.8, 0, 0, 0, 0,
0, 0, 2227565.7, 0, 0, 0, 763765, 0, 0, 1595430, 0, 0, 0, 0,
4894549, 0, 0, 0, 0, 0, 1061375.4, 0, 0, 0, 0, 0, 2251950, 0,
1042130, 0, 0, 2055300, 0, 0, 0, 696278.3, 0, 0, 0, 5353797.8,
0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2020300,
2020300, 0, 0, 0, 0, 0, 0, 7681526, 0, 0, 0, 0, 0), `581` = c(0,
0, 1749237.5, 0, 0, 0, 2421665.8, 0, 0, 1773262.5, 0, 0, 2251004.3,
0, 0, 2570175, 0, 0, 3379756.9, 0, 0, 0, 2054455.6, 0, 0, 2518270.8,
0, 0, 0, 0, 0, 0, 2917968.2, 0, 0, 0, 0, 7004350, 0, 0, 1451600,
0, 0, 1394411, 0, 0, 0, 0, 0, 2507858.3, 0, 2377012.5, 0, 0,
3719165.4, 0, 0, 0, 1472870.3, 0, 0, 9666916.1, 0, 0, 1730300,
0, 0), `652` = c(0, 0, 476910, 476910, 0, 0, 1149078.8, 1149078.8,
0, 1082468.7, 0, 0, 882769.7, 0, 0, 1370449.4, 1370449.4, 0,
1529049, 1529049, 0, 0, 943632.2, 0, 0, 916587.8, 0, 0, 0, 988261.1,
0, 0, 1778007.1, 1778007.1, 0, 0, 0, 3087304.8, 3087304.8, 0,
782860, 782860, 0, 510158.5, 510158.5, 0, 0, 0, 0, 1503750, 0,
1100677.5, 1100677.5, 0, 1669260, 1669260, 0, 0, 770733.2, 0,
0, 4939242.8, 4939242.8, 0, 643564.4, 643564.4, 0), `733` = c(0,
0, 0, 1095060, 0, 0, 0, 1674089.3, 0, 1252101.3, 0, 0, 1259111,
0, 0, 0, 2429293.3, 0, 0, 2326928.3, 0, 0, 1259216.5, 0, 0, 1238837.5,
0, 0, 0, 1224858.3, 0, 0, 0, 2952529.9, 0, 0, 0, 0, 4626414.7,
0, 0, 1121440, 0, 0, 1025386.2, 0, 0, 0, 0, 1917900, 0, 0, 2197533.3,
0, 0, 2840155.5, 0, 0, 1054285.7, 0, 0, 0, 7516814.2, 0, 0, 1329434.4,
0), `818` = c(0, 0, 0, 720551.1, 0, 0, 0, 714662.7, 0, 617012.9,
0, 0, 549850.8, 0, 0, 0, 1197460, 0, 0, 771979.2, 0, 0, 585847.5,
585847.5, 0, 875475.4, 0, 0, 0, 576774, 0, 0, 0, 1147389.8, 0,
0, 0, 0, 2292421.7, 0, 0, 755258.3, 0, 0, 0, 0, 0, 0, 0, 858930,
0, 0, 1242668.3, 0, 0, 1580088.3, 0, 0, 641938.6, 641938.6, 0,
0, 3838660.4, 0, 0, 733140.8, 733140.8), `896` = c(0, 0, 0, 590480,
0, 0, 0, 817087.6, 0, 569869.5, 0, 0, 650822.5, 650822.5, 0,
0, 1624052.5, 0, 0, 682570.8, 0, 0, 0, 1538800, 0, 690488.6,
690488.6, 0, 0, 797923.9, 0, 0, 0, 1204889.3, 0, 0, 0, 0, 2184432.2,
0, 0, 676654.7, 0, 0, 0, 210680, 0, 0, 0, 791152.5, 0, 0, 1599855.8,
0, 0, 1358543.8, 0, 0, 0, 931288, 0, 0, 4683895.2, 0, 0, 0, 1202806
), `972` = c(0, 0, 0, 799116.4, 0, 0, 0, 759169.9, 0, 408845,
0, 0, 0, 948980, 0, 0, 968766.7, 0, 0, 675349.7, 0, 0, 0, 0,
0, 0, 1811117.6, 0, 0, 609098.5, 0, 0, 0, 1073749.1, 0, 0, 0,
0, 2392258.9, 0, 0, 743580, 0, 0, 0, 1020485, 0, 0, 0, 446596.7,
0, 0, 1178583, 0, 0, 1438261.7, 0, 0, 0, 1133057.9, 0, 0, 4445814.7,
0, 0, 0, 1057776.9), `1039` = c(0, 0, 0, 447255.3, 0, 0, 0, 609409.1,
0, 304340, 0, 0, 0, 0, 0, 0, 694232.8, 0, 0, 473015.3, 0, 0,
0, 0, 0, 0, 419524.9, 0, 0, 447760.6, 0, 0, 0, 932513.5, 0, 0,
0, 0, 1251960.5, 0, 0, 276560, 0, 0, 0, 259640, 0, 0, 0, 354995,
0, 0, 1570222.5, 0, 0, 1021822, 0, 0, 0, 811614, 0, 0, 2941698.2,
0, 0, 0, 1199942.5), Gene = 1:67), .Names = c("10", "34", "59",
"84", "110", "134", "165", "199", "234", "257", "362", "433",
"506", "581", "652", "733", "818", "896", "972", "1039", "Gene"
), row.names = c(NA, 67L), class = "data.frame")
I have tried something like that so far...:
for(i in 1:nrow(Tra_decon)){
Tra_decon_melt <- melt(Tra_decon[i,], id = "Gene")
pdf("Test_plot.pdf", onefile = TRUE)
ggplot(Tra_decon_melt, aes(variable, log10(value), group=factor(Gene))) +
theme(legend.title=element_blank()) +
ylab("XXX") +
xlab("XXX") +
geom_line(aes(color=factor(Gene)), size = 1.2) +
ggtitle("XXXX") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()
}
A solution without a for loop (and so faster) is this one:
plots <- lapply(1:nrow(Tra_decon), function(i){
Tra_decon_melt <- melt(Tra_decon[i,], id = "Gene")
ggplot(Tra_decon_melt, aes(variable, log10(value), group=factor(Gene))) +
theme(legend.title=element_blank()) +
ylab("XXX") +
xlab("XXX") +
geom_line(aes(color=factor(Gene)), size = 1.2) +
ggtitle("XXXX") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
})
pdf("Test_plot.pdf", onefile = TRUE)
plots
dev.off()
This works for me:
pdf("Test_plot.pdf", onefile = TRUE)
for(i in 1:nrow(Tra_decon)){
Tra_decon_melt <- melt(Tra_decon[i,], id.vars = "Gene")
plot<-list()
plot[[i]]<-ggplot(Tra_decon_melt, aes(variable, log10(value), group=factor(Gene))) +
theme(legend.title=element_blank()) +
ylab("XXX") +
xlab("XXX") +
geom_line(aes(color=factor(Gene)), size = 1.2) +
ggtitle("XXXX") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(plot[[i]])
}
dev.off()

Find the smallest distance between the profiles

I would like to find the smallest distance between the profiles stored in a data frame. I am interested especially in one row in comparison to the rest of the rows stored in the data frame.
That's a data frame:
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 393090, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6718400,
0, 311350, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2164949.7,
4834137.6, 0, 0, 0, 1187816.7, 0, 0, 0, 0, 0, 0, 1340912.5, 0
), `84` = c(0, 0, 0, 0, 0, 0, 0, 0, 8607100, 0, 0, 17586713.2,
22629743.6, 0, 0, 0, 2808791.7, 0, 0, 4026222.5, 0, 0, 0, 1981900,
0), `110` = c(2296000, 0, 0, 0, 0, 2140221.7, 0, 0, 5809230.6,
0, 0, 37134898.5, 3861828.7, 2553100, 0, 12075845.8, 0, 0, 1272950,
8695273, 0, 0, 2657180, 2710080, 0), `134` = c(0, 0, 0, 1176150,
0, 1329596.7, 1471000, 0, 6511934, 6511934, 0, 18709227.3, 0,
1041211.2, 0, 6544176.9, 0, 0, 2412651.7, 7724956.9, 2878418.3,
0, 8620131.7, 2386972.8, 0), `165` = c(0, 1226610, 0, 1345098.7,
2083771.9, 0, 1808231.4, 0, 0, 10742997.7, 0, 13060798.9, 0,
538340, 538340, 2791649.5, 0, 0, 6217622, 1316097.1, 4716931.8,
0, 6615816.9, 1510532, 0), `199` = c(0, 1571525, 0, 1903038.3,
1676700, 0, 888832.2, 0, 0, 9084418.6, 0, 11189460.1, 0, 0, 1807662.5,
2564275, 0, 0, 18080359.7, 0, 0, 0, 2397710.2, 1717949.2, 0),
`234` = c(0, 1314900, 2482696, 1325684, 0, 0, 0, 0, 0, 7321432.7,
0, 9843409.2, 0, 0, 1073341.7, 2762775, 0, 0, 9335312.8,
0, 0, 0, 1950788.2, 1509100, 0), `257` = c(0, 1568700, 14604298.7,
940162.2, 0, 0, 0, 0, 0, 4779505.9, 0, 9691692.4, 0, 0, 735290,
2650165, 0, 2311383.7, 5193383.4, 0, 0, 0, 1341998.7, 1225325.6,
0), `362` = c(0, 0, 4190740.5, 288800, 0, 0, 0, 0, 0, 4846634.8,
0, 9574498.7, 0, 0, 0, 1425600, 0, 8339312.1, 3877892.5,
0, 0, 0, 1752866.7, 0, 0), `433` = c(0, 0, 773280, 0, 0,
0, 0, 0, 0, 3926582.8, 3926582.8, 5962586.5, 0, 0, 0, 1041400,
0, 1972909.3, 1895439.4, 0, 0, 0, 963891.2, 0, 1109800),
`506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9332272, 0, 0, 0,
0, 0, 0, 2219100, 0, 0, 0, 0, 0, 0, 0), `581` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 4371537.1, 0, 0, 0, 0, 0, 0, 2428800,
0, 0, 0, 0, 0, 0, 0), `652` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1689871.4, 0, 0, 0, 0, 0, 0, 988399.7, 0, 0, 0, 0, 0,
0, 0), `733` = c(0, 0, 0, 0, 0, 0, 0, 1250100, 0, 0, 1754205.3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `818` = c(0, 0,
0, 0, 0, 0, 0, 517340, 0, 0, 1149227.6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 579846.7,
0, 0, 985931.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`972` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 858255.5, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1039` = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 848993.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0)), .Names = c("10", "34", "59", "84", "110", "134",
"165", "199", "234", "257", "362", "433", "506", "581", "652",
"733", "818", "896", "972", "1039"), row.names = c("Mark_1",
"Mark_2", "Alex_1", "Katrin_1", "Georg_1", "Martin_1",
"Tim_1", "Tom_1", "Mike_1", "Mike_2", "Mike_3",
"Hare_1", "Dea_1", "Monty_1", "Monty_2", "Niko_1",
"Lee_1", "Marq_1", "Otto_1", "Priaq_1", "Surkta_1",
"Norsa_1", "Norsa_2", "Quer_1", "Quer_2"), class = "data.frame")
So the row named Katrin_1 is the one which is interesting for me. I would like to find which rows have the smallest euclidean distance to Katrin_1. Let say 3-5 rows.
Let's get rid of Katrin_1 column with df[!rownames(df) %in% "Katrin_1", ], subtract df["Katrin_1", ] from each of the remaining rows with sweep, find Euclidean distances by squaring the resulting matrix element-wise and using rowSums, use which.min to get the final result:
names(which.min(rowSums(sweep(df[!rownames(df) %in% "Katrin_1", ], 2, as.numeric(df["Katrin_1", ]), `-`)^2)))
# [1] "Mark_2"
This should be much more efficient than using dist as dist would compute all possible distances, while we need need only a few.

Resources