Linear mixed model estimator output incorrect - r

I want to perform a linear mixed model with a fixed effect for time plus a random intercept & slope for time per subject. I do this with the following code:
lme.rik<-lmer(formula = AI ~ Time + (1+Time|ID), data=df6, control = lmerControl(check.nobs.vs.nRE = "ignore"))
I use the control = lmerControl(check.nobs.vs.nRE = "ignore" because there is probably something wrong with my Time variable. Nonetheless, the mode gives some output where it shows as fixed effects some sort of dummy variable for the number of weeks (i.e., time variable). So first the intercept, then Time[2], Time[..], Time[7]. Only excluding Time[1]. Same applies for the random effects. Is there a way to change the Time variable so that it will only include 1 fixed and 1 random estimate for estimate for Time?
The following output is my dataset:
structure(list(ID = c("ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10", "ID10",
"ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID11", "ID12",
"ID12", "ID12", "ID12", "ID12", "ID12", "ID12", "ID13", "ID13",
"ID13", "ID13", "ID13", "ID13", "ID13", "ID14", "ID14", "ID14",
"ID14", "ID14"), Time = c("1", "2", "3", "4", "5", "6", "7",
"1", "2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5", "6",
"7", "1", "2", "3", "4", "5", "6", "7", "1", "2", "3", "4", "5",
"6", "7", "1", "2", "3", "4", "5"), AI = c(0.393672183448241,
0.4876954603533, 0.411717908455957, 0.309769862660288, 0.149826889496538,
0.2448558592586, 0.123606753324621, 0.296109333767922, 0.309960002123076,
0.445886231347992, 0.370013553008003, 0.393414429902431, 0.318940511323733,
0.131112361225666, 0.31961673567578, 0.227268892979164, 0.433471105477564,
0.207184572401005, 0.144257239122978, NA, NA, 0.520204263001733,
0.194175420670027, 0.507417309543222, 0.1934679395598, 0.0831932654836405,
0.115391861884329, 0.141420940969022, 0.361215896677733, 0.256393554215028,
0.429431082438377, NA, NA, NA, NA, 0.239250372076152, 0.219099984707727,
NA, 0.289692898163938, 0.287732972580083), AI_VAR = c(0.154977788020905,
0.237846862049217, 0.169511636143347, 0.0959573678125739, 0.0224480968162077,
0.0599543918132674, 0.0152786294674538, 0.0876807375444826, 0.0960752029161373,
0.198814531305715, 0.136910029409606, 0.154774913655455, 0.101723049763444,
0.0171904512661696, 0.102154857724042, 0.0516511497159746, 0.187897199283942,
0.0429254470409874, 0.020810151039384, NA, NA, 0.270612475245176,
0.0377040939923819, 0.257472326024082, 0.0374298436375145, 0.00692111942183149,
0.0133152817891321, 0.0199998825445637, 0.130476924012699, 0.0657376546430145,
0.184411054564196, NA, NA, NA, NA, 0.0572407405385771, 0.0480048032989263,
NA, 0.0839219752466215, 0.0827902635097706), activity = c(0,
0.303472222222222, 0.232638888888889, 0.228472222222222, 0.348611111111111,
0.215972222222222, 0.123611111111111, 0.357638888888889, 0.235416666666667,
0.233333333333333, 0.2875, 0.353472222222222, 0.356944444444444,
0.149305555555556, 0.448611111111111, 0.213888888888889, 0.248611111111111,
0.288888888888889, 0.25625, NA, NA, 0.238888888888889, 0.263888888888889,
0.247916666666667, 0.315277777777778, 0.298611111111111, 0.173611111111111,
0.185416666666667, 0.45625, 0.239583333333333, 0.335416666666667,
NA, NA, NA, NA, 0.36875, 0.251388888888889, NA, 0.266666666666667,
0.309722222222222)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))

Related

How do I create a line graph using multiple variables when the multiple variables are all in the same column?

structure(list(Sample.Id = c(NA, "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3"
), Sampling..Date = c(NA, "08-Sep-14", "14-Oct-14", "02-Nov-14",
"21-Nov-14", "03-Dec-14", "15-Dec-14", "11-Jan-15", "08-Feb-15",
"01-Mar-15", "06-Apr-15", "03-Sep-14", "08-Sep-14", "14-Oct-14",
"02-Nov-14", "21-Nov-14", "03-Dec-14", "15-Dec-14", "11-Jan-15",
"26-Jan-15"), Tot.P = c("µg/ml", "0.002", "0.017", "0.035",
"0.04", "0.059", "0.155", "0.021", "0.022", "0.025", "<0.009",
"0.021", "0.003", "0.036", "0.141", "0.041", "0.044", "0.01",
"0.023", "0.016"), DOC = c("µg/ml", NA, "12.3", "13.4", "12.5",
"9.9", "14.7", "8.8", "8.3", "0.026", "7.5", "13.4", NA, "14.6",
"16.6", "14.7", "12.6", "12.6", "10.6", "11.4"), Tot.N = c("µg/ml",
NA, "3.63", "4.12", "3.98", "4.08", "3.38", "3.63", "4.88", "8.3",
"2.74", "2.48", NA, "3.07", "3.38", "3.3", "3.43", "2.19", "2.77",
"4.25"), DOC.1 = c("µg/ml", "13.6", NA, NA, NA, NA, NA, NA,
NA, NA, NA, "14.44", "16.85", NA, NA, NA, NA, NA, NA, NA), Tot.P.1 = c("µg/ml",
"0.053", NA, NA, NA, NA, NA, NA, NA, NA, NA, "0.08", "0.071",
NA, NA, NA, NA, NA, NA, NA), Total.N = c("µg/ml", "3.363", NA,
NA, NA, NA, NA, NA, NA, NA, NA, "2.645", "2.637", NA, NA, NA,
NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame"
I have a set of water quality data from 2014-2022 over different sites and different time periods. Each site has a different monitoring period and the data was analysed using two different devices of which there are only two periods of overlap where the samples were analysed using both machines. I am trying to plot a time series showing the P, N and DOC across each site over time and shade in the areas where one machine was used instead of another. This is all a bit complicated and I am so new to R so have been running in circles for a week. My problem is I am unsure how to select the section of a column I need to create the variable I want so it makes sense.
I have tried to look it up on blogs but can't seem to mash the different pieces of advice together to make it work. Any tips would be much appreciated. Here is the data that I'm on about.
You will definitely need to clean up your data to fit this solution, but your basic way about this is pivoting from wide to long form.
Then you need to ensure that your dates are the propper POSIXct format.
Then it is just a matter of grouping by your relevant variables and plotting with geom_line()
I added the facet_grid to separate by Sample.Id.
library(tidyverse)
#> Warning: pakke 'ggplot2' blev bygget under R version 4.2.2
#> Warning: pakke 'tidyr' blev bygget under R version 4.2.2
#> Warning: pakke 'purrr' blev bygget under R version 4.2.2
#> Warning: pakke 'dplyr' blev bygget under R version 4.2.2
#> Warning: pakke 'stringr' blev bygget under R version 4.2.2
#> Warning: pakke 'forcats' blev bygget under R version 4.2.2
df <- structure(list(Sample.Id = c("2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3"),
Sampling..Date = c("08-Sep-14", "14-Oct-14", "02-Nov-14",
"21-Nov-14", "03-Dec-14", "15-Dec-14", "11-Jan-15", "08-Feb-15",
"01-Mar-15", "06-Apr-15", "03-Sep-14", "08-Sep-14", "14-Oct-14",
"02-Nov-14", "21-Nov-14", "03-Dec-14", "15-Dec-14", "11-Jan-15",
"26-Jan-15"), Tot.P = c("0.002", "0.017", "0.035", "0.04",
"0.059", "0.155", "0.021", "0.022", "0.025", "<0.009", "0.021",
"0.003", "0.036", "0.141", "0.041", "0.044", "0.01", "0.023",
"0.016"), DOC = c(NA, "12.3", "13.4", "12.5", "9.9", "14.7",
"8.8", "8.3", "0.026", "7.5", "13.4", NA, "14.6", "16.6",
"14.7", "12.6", "12.6", "10.6", "11.4"), Tot.N = c(NA, "3.63",
"4.12", "3.98", "4.08", "3.38", "3.63", "4.88", "8.3", "2.74",
"2.48", NA, "3.07", "3.38", "3.3", "3.43", "2.19", "2.77",
"4.25"), DOC.1 = c("13.6", NA, NA, NA, NA, NA, NA, NA, NA,
NA, "14.44", "16.85", NA, NA, NA, NA, NA, NA, NA)), row.names = 2:20, class = "data.frame")
df |>
mutate(Tot.P = str_replace(Tot.P, "<", ""),
across(Tot.P:DOC.1, as.numeric),
Sampling..Date = as.POSIXct(Sampling..Date, format = "%d-%b-%y")) |>
select(-c(DOC.1)) |>
pivot_longer(cols = c(Tot.P, DOC, Tot.N)) |>
ggplot(aes(x = Sampling..Date, y = value, group = name, col = name)) +
geom_line() +
facet_grid(~Sample.Id)
#> Warning: Removed 5 rows containing missing values (`geom_line()`).
Created on 2023-02-14 with reprex v2.0.2

Make ggplot connect datapoints in a scatterplot chronologically

This seems very simple but for some reason I can't make it work.
I have a dataset with 3 variables. The first variable is a measurement which is taken several times per day across two months (it can take the values 1, 2, 3, 4, 5 and 6 - these are not groups, it is values that have been measured). The second variable are the dates. The third variable are the times the measurement was taken. I want to plot how this measurement changes across time so I need the datapoints to be connected chronologically.
Things I have tried:
I have tried to plot just using date by making sure it is a date format and it is ordered correctly and then specified + geom_path() which should tell R I want it to go row by row connecting
DF$Date <- as.Date(DF$Date)
DF <- DF[order(DF$Date),]
ggplot(DF, aes(x = Date, y = Measurement)) +
geom_line(linewidth=1, colour="green") +
geom_path()
I created a DateTime variable:
DF$DateTime <- as.POSIXct(paste(DF$Date, DF$Time, format="%y/%m/%d %H:%M:%S"))
ggplot(DF, aes(x = DateTime, y = Measurement)) +
geom_line(linewidth=1, colour="green")
In both cases R just connects all the response of value 1 to each other, all responses of value 2 to each other and so on. And does not do it chronologically.
Thank you!
structure(list(Measurement = c("1", "1", "1", "1", "2", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "2", "2", "1", "2", "2", "2", "1", "2", "1", "1",
"1", "2", "1", "2", "2", "2", "1", "2", "3", "2", "2", "4", "3",
"2", "2", "2", "2", "3", "3", "3", "3", "3", "4", "3", "3", "2",
"2", "4", "4", "3", "1", "1", "2", "1", "1", "4", "3", "3", "3",
"3", "3", "3", "4", "3", "3", "3", "3", "3", "3", "3", "3", "3",
"3", "5", "4", "5", "3", "5", "5", "5", "4", "4", "4", "4", "4",
"4", "3", "5", "4", "4", "4", "4", "4", "4", "4", "4", "4", "5",
"4", "4", "5", "5", "5", "5", "4", "3", "4", "4", "4", "4", "3",
"4", "3", "4", "4", "4", "4", "3", "5", "4", "4", "5", "4", "4",
"4", "4", "4", "4", "4", "4", "3", "3", "3", "3", "4", "3", "4",
"3", "3", "2", "3", "4", "4", "4", "4", "4", "5", "4", "4", "4",
"4", "3", "4", "5", "4", "3", "4", "4", "4", "4", "4", "3", "4",
"1", "4", "4", "3", "4", "4", "3", "4", "4", "4", "3", "4", "4",
"4", "4", "6", "4", "4", "4", "4", "3", "3", "4", "4", "3", "3",
"4", "3", "3", "3", "4", "5", "4", "4", "4", "4", "1", "3", "4",
"3", "4", "4", "4", "1", "3", "4", "5", "5", "5", "5", "5", "5",
"5", "4", "4", "4", "4", "4", "5", "4", "3", "4", "4", "4", "4",
"5", "4", "4", "3", "4", "4", "4", "4", "4", "5", "4", "4", "4",
"4", "4", "3", "5", "4", "4", "4", "4", "3", "3", "3", "4", "4",
"3", "4", "4", "5"), Date = structure(c(19333, 19333,
19334, 19334, 19334, 19334, 19334, 19334, 19334, 19335, 19335,
19335, 19335, 19335, 19335, 19335, 19335, 19336, 19336, 19336,
19336, 19336, 19336, 19337, 19337, 19337, 19337, 19337, 19337,
19338, 19338, 19338, 19338, 19338, 19338, 19338, 19338, 19338,
19339, 19339, 19339, 19339, 19339, 19339, 19339, 19339, 19339,
19340, 19340, 19340, 19340, 19341, 19341, 19341, 19341, 19342,
19342, 19342, 19342, 19342, 19342, 19342, 19343, 19343, 19343,
19343, 19343, 19344, 19344, 19344, 19344, 19344, 19345, 19345,
19345, 19345, 19346, 19346, 19346, 19346, 19347, 19347, 19347,
19347, 19348, 19348, 19349, 19350, 19350, 19350, 19350, 19350,
19350, 19351, 19351, 19351, 19352, 19353, 19353, 19353, 19353,
19353, 19354, 19355, 19355, 19355, 19356, 19356, 19356, 19356,
19357, 19357, 19357, 19357, 19358, 19358, 19358, 19358, 19359,
19359, 19359, 19359, 19360, 19360, 19360, 19360, 19360, 19361,
19361, 19362, 19362, 19362, 19362, 19363, 19363, 19363, 19363,
19364, 19364, 19364, 19364, 19364, 19365, 19365, 19365, 19365,
19365, 19366, 19366, 19366, 19366, 19366, 19367, 19367, 19367,
19367, 19367, 19368, 19368, 19368, 19368, 19369, 19369, 19369,
19370, 19370, 19370, 19371, 19371, 19371, 19372, 19372, 19372,
19372, 19372, 19373, 19373, 19373, 19373, 19374, 19374, 19374,
19374, 19374, 19374, 19375, 19375, 19375, 19375, 19376, 19376,
19376, 19377, 19377, 19377, 19377, 19377, 19378, 19378, 19378,
19378, 19379, 19379, 19379, 19379, 19379, 19380, 19380, 19380,
19380, 19380, 19381, 19381, 19381, 19381, 19382, 19382, 19382,
19382, 19383, 19383, 19385, 19385, 19385, 19385, 19385, 19385,
19385, 19385, 19385, 19385, 19386, 19386, 19386, 19386, 19386,
19387, 19387, 19387, 19387, 19387, 19387, 19387, 19387, 19387,
19388, 19388, 19388, 19388, 19388, 19388, 19388, 19388, 19388,
19389, 19389, 19389, 19389, 19389, 19389, 19389, 19389, 19390,
19390, 19390, 19390, 19390, 19390, 19390, 19390, 19390), class = "Date"),
Time = structure(c(43810, 44174, 49104, 49343, 50921,
54029, 59443, 65767, 70544, 40647, 40731, 43219, 50506, 54044,
58687, 68571, 71016, 36049, 38921, 44148, 55413, 66503, 70310,
34796, 48468, 48770, 56701, 67069, 73131, 32103, 37937, 43270,
43941, 49507, 57796, 59420, 65187, 70669, 28787, 33612, 38807,
43900, 49607, 54026, 60525, 65861, 76855, 29833, 43197, 45349,
67928, 34018, 44887, 54024, 65491, 34687, 45029, 45029, 45096,
45096, 56881, 70503, 30726, 49625, 54871, 76945, 76990, 30348,
51899, 58286, 65893, 76301, 34075, 54033, 54075, 66322, 34158,
47973, 69070, 69113, 29971, 43838, 43891, 68344, 58512, 64840,
74134, 42286, 48249, 53712, 75669, 75669, 75669, 34484, 67922,
67922, 63298, 30761, 30814, 52835, 67936, 78132, 69679, 44485,
61309, 65893, 32443, 46595, 55031, 65701, 40995, 43257, 78737,
78783, 35103, 58260, 65353, 78583, 36651, 44588, 53857, 74257,
34262, 44172, 50954, 56508, 68744, 32577, 54241, 32233, 45405,
59002, 68596, 33529, 44235, 56676, 65104, 35378, 43263, 59195,
70423, 76305, 34704, 40350, 43769, 54069, 65163, 32335, 43220,
52463, 64829, 64883, 33312, 47326, 56974, 78210, 78249, 37710,
47664, 51668, 67281, 39815, 57103, 67451, 52368, 54111, 66853,
45038, 45079, 64861, 35856, 45970, 54136, 54174, 67102, 32497,
49309, 56959, 68312, 33326, 44280, 53945, 54763, 65275, 65313,
32958, 52099, 57512, 65378, 27223, 58171, 64993, 32862, 44507,
44547, 54631, 76109, 33983, 49720, 58810, 66231, 29886, 53075,
54592, 64904, 64942, 29982, 40303, 43288, 54319, 65762, 28881,
36993, 44716, 65239, 34587, 43395, 64886, 66650, 41670, 53480,
29252, 38412, 38477, 38477, 44963, 48648, 56521, 59572, 65410,
70232, 32517, 38681, 43273, 50715, 74179, 33337, 39419, 39419,
40341, 49560, 59123, 60091, 65164, 70217, 37318, 37822, 43242,
43287, 49346, 55187, 59908, 64815, 72710, 32553, 33678, 37864,
43283, 54029, 59412, 68693, 78965, 34730, 38193, 43936, 51483,
54039, 54134, 59417, 66687, 72937), class = c("hms", "difftime"
), units = "secs"), DateTime = structure(c(1670415010, 1670415374,
1670506704, 1670506943, 1670508521, 1670511629, 1670517043,
1670523367, 1670528144, 1670584647, 1670584731, 1670587219,
1670594506, 1670598044, 1670602687, 1670612571, 1670615016,
1670666449, 1670669321, 1670674548, 1670685813, 1670696903,
1670700710, 1670751596, 1670765268, 1670765570, 1670773501,
1670783869, 1670789931, 1670835303, 1670841137, 1670846470,
1670847141, 1670852707, 1670860996, 1670862620, 1670868387,
1670873869, 1670918387, 1670923212, 1670928407, 1670933500,
1670939207, 1670943626, 1670950125, 1670955461, 1670966455,
1671005833, 1671019197, 1671021349, 1671043928, 1671096418,
1671107287, 1671116424, 1671127891, 1671183487, 1671193829,
1671193829, 1671193896, 1671193896, 1671205681, 1671219303,
1671265926, 1671284825, 1671290071, 1671312145, 1671312190,
1671351948, 1671373499, 1671379886, 1671387493, 1671397901,
1671442075, 1671462033, 1671462075, 1671474322, 1671528558,
1671542373, 1671563470, 1671563513, 1671610771, 1671624638,
1671624691, 1671649144, 1671725712, 1671732040, 1671827734,
1671882286, 1671888249, 1671893712, 1671915669, 1671915669,
1671915669, 1671960884, 1671994322, 1671994322, 1672076098,
1672129961, 1672130014, 1672152035, 1672167136, 1672177332,
1672255279, 1672316485, 1672333309, 1672337893, 1672390843,
1672404995, 1672413431, 1672424101, 1672485795, 1672488057,
1672523537, 1672523583, 1672566303, 1672589460, 1672596553,
1672609783, 1672654251, 1672662188, 1672671457, 1672691857,
1672738262, 1672748172, 1672754954, 1672760508, 1672772744,
1672822977, 1672844641, 1672909033, 1672922205, 1672935802,
1672945396, 1672996729, 1673007435, 1673019876, 1673028304,
1673084978, 1673092863, 1673108795, 1673120023, 1673125905,
1673170704, 1673176350, 1673179769, 1673190069, 1673201163,
1673254735, 1673265620, 1673274863, 1673287229, 1673287283,
1673342112, 1673356126, 1673365774, 1673387010, 1673387049,
1673432910, 1673442864, 1673446868, 1673462481, 1673521415,
1673538703, 1673549051, 1673620368, 1673622111, 1673634853,
1673699438, 1673699479, 1673719261, 1673776656, 1673786770,
1673794936, 1673794974, 1673807902, 1673859697, 1673876509,
1673884159, 1673895512, 1673946926, 1673957880, 1673967545,
1673968363, 1673978875, 1673978913, 1674032958, 1674052099,
1674057512, 1674065378, 1674113623, 1674144571, 1674151393,
1674205662, 1674217307, 1674217347, 1674227431, 1674248909,
1674293183, 1674308920, 1674318010, 1674325431, 1674375486,
1674398675, 1674400192, 1674410504, 1674410542, 1674461982,
1674472303, 1674475288, 1674486319, 1674497762, 1674547281,
1674555393, 1674563116, 1674583639, 1674639387, 1674648195,
1674669686, 1674671450, 1674732870, 1674744680, 1674893252,
1674902412, 1674902477, 1674902477, 1674908963, 1674912648,
1674920521, 1674923572, 1674929410, 1674934232, 1674982917,
1674989081, 1674993673, 1675001115, 1675024579, 1675070137,
1675076219, 1675076219, 1675077141, 1675086360, 1675095923,
1675096891, 1675101964, 1675107017, 1675160518, 1675161022,
1675166442, 1675166487, 1675172546, 1675178387, 1675183108,
1675188015, 1675195910, 1675242153, 1675243278, 1675247464,
1675252883, 1675263629, 1675269012, 1675278293, 1675288565,
1675330730, 1675334193, 1675339936, 1675347483, 1675350039,
1675350134, 1675355417, 1675362687, 1675368937), class = c("POSIXct",
"POSIXt"), tzone = "")), row.names = c(NA, -271L), class = c("tbl_df",
"tbl", "data.frame"))
You can connect the points chronologically by specifying group = 1. Additionally, you can use scale_x_datetime to control the breaks on your x-axis. For the way the x-axis is labelled, see this r-bloggers post for more information about date formats.
I've included one example that best recreates what you were trying to do and another example where the points are colored by Measurement.
library(tidyverse)
DF$Date <- as.Date(DF$Date)
DF <- DF[order(DF$Date),]
DF$DateTime <- as.POSIXct(paste(DF$Date, DF$Time, format = "%y/%m/%d %H:%M:%S"))
# what I think you want
ggplot(DF, aes(x = DateTime, y = Measurement, group = 1)) +
geom_line(linewidth = 1, color = "green") +
geom_point() +
scale_x_datetime(date_breaks = "10 days", date_labels = "%b %d %y")
# another option
ggplot(DF, aes(x = DateTime, y = Measurement, color = Measurement, group = 1)) +
geom_line(linewidth = 1, color = "gray") +
geom_point() +
scale_x_datetime(date_breaks = "10 days", date_labels = "%b %d %y")

Create (many) columns conditional on similarly named columns

I want to create a new column that take the value of one of two similarly named columns, depending on a third column. There are many such columns to create. Here's my data.
dt <- structure(list(malvol_left_1_w1 = c("1", "1", "4", "3", "4",
"4", "1", "4", "4", "3", "1", "4", "4", "3", "4", "4", "5", "2",
"4", "2"), malvol_left_2_w1 = c("1", "1", "4", "3", "4", "4",
"1", "3", "4", "2", "2", "2", "4", "1", "5", "4", "5", "2", "4",
"2"), malvol_right_1_w1 = c("1", "1", "4", "3", "4", "4", "1",
"3", "4", "2", "1", "4", "4", "5", "5", "4", "2", "6", "4", "1"
), malvol_right_2_w1 = c("1", "1", "4", "3", "4", "4", "1", "3",
"4", "2", "1", "2", "4", "5", "5", "4", "5", "5", "4", "5"),
malvol_left_1_w2 = c("1", "1", "3", "3", "4", "4", "1", "5",
"4", "4", "4", "2", "1", "4", "5", "4", "3", "2", "4", "4"
), malvol_left_2_w2 = c("1", "1", "3", "3", "4", "4", "7",
"5", "4", "2", "3", "1", "1", "4", "4", "4", "3", "4", "4",
"4"), malvol_right_1_w2 = c("1", "3", "3", "3", "4", "4",
"1", "4", "4", "3", "2", "2", "4", "1", "4", "4", "5", "5",
"4", "4"), malvol_right_2_w2 = c("1", "2", "3", "3", "4",
"4", "1", "2", "4", "2", "3", "2", "4", "1", "4", "4", "5",
"4", "4", "3"), leftright_w1 = c("right", "right", "left",
"right", "right", "right", "left", "right", "right", "left",
"left", "left", "left", "right", "left", "left", "right",
"right", "right", "left"), leftright_w2 = c("right", "right",
"left", "left", "right", "left", "left", "right", "right",
"left", "left", "left", "left", "right", "left", "left",
"right", "right", "left", "left")), class = "data.frame", row.names = c("12",
"15", "69", "77", "95", "96", "112", "122", "150", "163", "184",
"216", "221", "226", "240", "298", "305", "354", "370", "379"
))
Now I can do this in dplyr like:
dt <- dt %>%
mutate(
malvol_1_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_1_w1,
leftright_w1 == "right" ~ malvol_left_1_w1),
malvol_2_w1 = case_when(
leftright_w1 == "left" ~ malvol_right_2_w1,
leftright_w1 == "right" ~ malvol_left_2_w1),
malvol_1_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_1_w2,
leftright_w2 == "right" ~ malvol_left_1_w2),
malvol_2_w2 = case_when(
leftright_w2 == "left" ~ malvol_right_2_w2,
leftright_w2 == "right" ~ malvol_left_2_w2))
However, it's not really a feasible solution, because there will be more of both numbers defining a variable (e.g. both malvol_3_w1 and malvol_1_w3 will need to be created).
One solution is to this with a loop:
for (wave in 1:2) {
for (var in 1:2) {
dt[, paste0("malvol_", var, "_w", wave)] <- dt[, paste0("malvol_right_", var, "_w", wave)]
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_", var, "_w", wave)] <-
dt[dt[[paste0("leftright_w", wave)]] == "right", paste0("malvol_left_", var, "_w", wave)]
}
}
However, what is a tidyverse solution?
UPDATE:
I came up with a tidyverse solution myself, however, not every elegant. Still looking for more canonical solutions.
dt <- dt %>%
mutate(
malvol_1_w1 = NA, malvol_2_w1 = NA,
malvol_1_w2 = NA, malvol_2_w2 = NA) %>%
mutate(
across(matches("malvol_\\d"),
~ case_when(
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "left" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_right", str_split(cur_column(), "malvol", simplify = T)[2]))),
eval(parse(text = paste0("leftright_", str_extract(cur_column(), "w.")))) == "right" ~
eval(parse(text = paste0(str_split(cur_column(), "_\\d", simplify = T)[1],
"_left", str_split(cur_column(), "malvol", simplify = T)[2]))))))
What makes your problem difficult is that a lot of information is hidden in variable names rather than data cells. Hence, you need some steps to transform your data into "tidy" format. In the code below, the crucial part is (1) to turn the variables [malvol]_[lr]_[num]_[w] into four separate columns malvol, lr, num, w (all prefixed with m_), and (2) from the variables leftright_[w] extract variable w (prefixed with l_) using the functions pivot_longer and than separate.
# Just adding a row_id to your data, for later joining
dt <- dt %>% mutate(id = row_number())
df <- dt %>%
# Tidy the column "malvol"
pivot_longer(cols = starts_with('malvol'), names_to = "m_var", values_to = "m_val") %>%
separate(m_var, into = c("m_malvol", "m_lr", "m_num", "m_w")) %>%
# They the column "leftright"
pivot_longer(cols = starts_with('leftright'), names_to = 'l_var', values_to = 'l_lr') %>%
separate(l_var, into = c(NA, "l_w")) %>%
# Implement the logic
filter(l_w == m_w) %>%
filter(l_lr != m_lr) %>%
# Pivot into original wide format
select(-c(l_w, l_lr, m_lr)) %>%
pivot_wider(names_from = c(m_malvol, m_num, m_w), values_from = m_val)
# Merging back results to original data
dt <- dt %>% mutate(id = row_number()) %>% inner_join(df, by="id")
Although I pivoted the data back into your desired format in the end (to check whether results are in line with your desired results), I would suggest you leave the data in the long format, which is "tidy" and more easy to work with, compared to your "wide" format. So maybe skip the last pivot_wider operation.

color code based on presence/absence of factor in a list with ggplot

I have some genomic data that consists of various statistics ("BHTD", "BHNS") calculated for each gene. There is also a column ("func.cat") containing functional classification for each of the genes; these categories are not mutually exclusive and are represented by the numbers 1-34, ie. a gene may belong to multiple functional categories such as 1,5, and 7. I am trying to write some code that will allow me to explore where the genes in each functional category land in the distribution of the whole dataset. The problem I am having pertains to how to iterate over the list in the $func.cat column (the functional classifications which are labeled as 1-34 separated by ",").
What I have tried so far is this:
library(ggplot2)
dat$cat <- with(dat, factor(ifelse(func.cat == "4", 1, 0)))
(note I would change the "4" depending on which category I wish to look at)
I realize that "==" is causing my problem as something with "1,4,5" will not equal 4, but I am not sure how to iterate over the items in that list - this is the root of my problem.
p1 <- ggplot(dat, aes(x=BHNS, y=BHTD)) +
geom_point(aes(colour = cat))
p1
This produces a plot that will highlight all the dots corresponding to genes classified as category 4, but only category 4. What I desire is to have any gene that has been classified as category 4 to be colored, regardless of whether it has also been classified as another category.
Thank you in advance for any suggestions!
Here is a subset of the data:
dput(dat)
structure(list(BHTD = c(-2.407361658, -1.796755011, -2.033328407,
-1.999300651, -1.705683372, -2.07316744, -2.14888815, -1.238415232,
-1.964081372, -0.946236, -1.804456645, -1.117478076, -0.865610729,
-2.014708065, -1.700170785, -2.066551154, -2.321719704, -1.382856082,
-1.860020378, -1.938464139, -1.772410788, -1.98276263, -1.474897562,
-2.341638838, -1.477241173, -1.997347068, -0.951587062, -1.472879561,
-2.205905395, -1.573116753, NA, -1.793176812, -1.508295823, -1.411124368,
-1.659159007, NA, -2.184113573, -1.753603939, -0.946572775, -2.230161692,
-2.257687027, -2.052893551, -1.684633689, -1.723560773, -1.311466597,
-0.878316233, -1.760067015, -0.991159868, -1.597358958, -1.68808286,
-2.24992473, -1.384269004, -1.245007137, -0.733919882, -0.951587062,
-1.716349956, -1.857569436, -2.03690476, -1.742918492, -1.652319766,
-1.018325037, -1.237294825, -1.705582368, -2.251182699, -1.44607638,
-1.86373038, -1.238915153, -1.999390778, -2.258813941, -2.307483301,
-1.824001963, -2.046894346, -1.973018785, -1.822540934, -2.098850332,
-1.622573473, -1.385826692, -0.13581749, -1.473072964, -1.912742974,
-1.331192083, -1.35995547, -1.414638574, -2.3080906, -1.928952143,
-1.73843242, -1.78263942, -2.045782556, -1.83695726, -1.782816678,
-1.384301082, -1.738544771, -2.122562601, -1.008751008, NA, 0.210106499,
NA, -1.497540674, -1.175421047, -2.077668436, 0, -2.392002104,
-2.172277005, -2.248237481, -1.754777666, -1.205907566, -2.157860036,
-2.372725867, -1.776418131, NA, -1.981646655, -1.83215468, -1.632970565,
-1.796441664, -1.16749959, -2.097760427, -1.688111791, -1.091669998,
-2.031922436, -1.609781944, -1.701092173, -1.741641383, -1.648047931,
0, NA, -2.155091718, -2.19686492, -2.043064889, -1.900228157,
-1.033211891, -1.741600124, -1.952787018, -1.635681652, -1.414146075,
-1.59652266, -0.67250785, -1.741490676, -1.668132969, -1.547999618,
-2.096369244, -1.585198776, -0.893064463, -1.720571973, -2.22165509,
-0.87006502, -2.026357729, -1.903054158, -2.336779135, -1.804456645,
-2.187397942, -1.738544771, NA, NA, -2.244769328, -1.443026239,
-2.1075961, -1.284505684, -2.171199234, 0, -1.832601732, NA,
NA, -1.92361227, -1.404739557, -1.673782332, -1.630260976, -1.949121617,
-1.535783949, -0.93437506, -1.682586839, -1.211290477, -1.837644381,
-2.006840092, -2.094965703, -1.501569366, -1.686096428, -1.766893171,
-1.025055976, -1.445292041, -1.804456645, -1.295447345, -2.333513471,
-2.024633218, -1.945932896, -2.138973359, -1.192594283, -1.032891761,
-2.274612038, -1.572404387, -1.157314923, -1.821517283, -0.951587062,
-1.491063082, -1.262268477, -1.285825224, -2.125036752, -2.254193395,
-2.025063005, -1.781927799, -1.817341356, -2.363964155, -1.324016132,
-1.807867509, -1.28536312, -2.304611837, -1.805333968, -1.981011267,
-1.148360279, -2.254965576, -1.832997391, -1.284505684, -1.36694829,
-1.511236509, -2.199333133, -2.162070519, -1.613952632, -1.98074573,
-1.937973938, -1.775626812, -1.110822415, -2.350956471, -0.798852174,
-1.570167789, -1.205143435, -1.227387321, -1.870071641, -1.804119234,
-1.852186861, -1.980559594, -1.910654458, -1.846696172, -0.851873121,
-2.093777922, -2.009089876, -1.341030926, -1.714790638, -2.035487818,
-1.655358562, -1.24566631, -2.268812805, -2.033792651, -1.964347513,
-1.177587982, -1.014825692, -1.543347759, -1.982771035, -2.095046073,
-2.396210518, -1.190624287, -1.980840471, -1.629812596, -2.075443039,
-1.117986711, -1.284505684, -1.737984295, -1.909068715, -1.44607638,
-0.13581749, -2.26199378, NA, -1.13387384, -1.775626812, -1.796501476,
-1.98917241, -1.980551021, -1.804456645, -2.030319284, -1.450348529,
-2.094406662, -2.473686253, -2.076634731, -1.507574766, -1.964339138,
-1.459205458, -2.408551092, -2.150651491, -2.058411824, -2.073951488,
-1.674678062, -2.066920278, -2.322477648, NA, NA, NA, -2.066255854,
-1.589735284, -1.607268225, -2.548035167, NA, NA, -0.951587062,
-1.700335988, -2.189638552, -1.659055578, -2.205270213, -1.472855439,
-1.785739256, -1.627265507, -1.910645503, -2.050893789, NA, -0.062843564,
-0.673444373, -1.091002703, -1.326825703, -1.050001441, -1.891635725,
NA, NA, -1.494850186, -1.156519132, -1.999300651, -0.092286396,
-0.183330333, -1.845843023, -1.640896562, -1.572404387, -1.860224553,
-1.648638668, -1.872150192, -2.245024819, -1.737840368, -1.385826692,
-1.981735368, -1.284505684, -2.14125006, -1.804456645, -0.973371896,
-0.314443643, -1.715455449, -1.551622633, -1.673782332, -1.467606151,
-1.910546588, -1.275308274, -1.78203764, -0.835133, -0.951587062,
NA, NA, -1.467647481, -2.351522017, -1.63428397, -1.355853241,
-2.110040041, -1.960573835, -2.230550217, -1.919743893, -1.064911257,
-1.999122333, -1.717022526, -1.284505684, -2.195620712, -1.678699875,
-0.951587062, -0.985647431, -1.760291205, NA, NA, -1.980847743,
-1.546669986, -1.571405894, -1.210776173, -0.981913114, -1.507574766,
-1.736694039, -1.540253602, -1.407640256, -1.697004898, -1.425404963,
-0.952113105, -1.504651095, -0.664885437, -0.988878576, -1.963728301,
-1.873625986, -2.375331349, -0.951587062, -1.714055933, -1.835483999,
0.531735976, -1.247617445, -1.597358958, -0.870980998, -1.910955616,
-1.472767466, -1.909412827, -1.989851075, -1.928950806, -2.125207581,
-0.702179857, NA, -1.258758453, -0.951587062, -1.910744418, -1.977408625,
-2.185569218, -1.649306639, -1.67542209, -1.859870782, -1.688905215,
-1.014897763, -2.098942661, -1.50114744, -1.507574766, -2.401004247,
-1.630373071, -1.704144046, -2.341487878, -1.648352839, -1.449448077,
-1.568042202, -1.056046556, -1.955734929, -1.326772791, -1.125210141,
-1.263721543, -1.787741424, -1.631502453, -1.507574766, -1.832997391,
-2.074270271, -1.720214873, -1.832601732, -1.981646655, -1.538923716,
-2.074197994, -1.804456645, -2.219273959, -1.630260976, -2.095269031,
-1.058597445, -1.103308726, -0.222433651, 0.363131214, -1.316124286,
-1.428135774, -1.13088371, -2.237049349, -1.212032461, -1.651632249,
-1.321079056, -2.058330473, -1.908682716, -1.285163648, NA, -2.033320494,
-1.561765422, -0.650534061, -1.54891411, -1.775629164, -1.16720697,
-1.67466461, -2.11038659, -1.622183558, -0.805494207, NA, -1.284505684,
-1.629812596, 0, -1.733554458, -1.471842151, -2.325564799, -1.931011616,
-0.951587062, -1.674650606, -2.084007693, -0.13581749, -0.728334229,
-0.697339499, -1.717022526, -1.84336553, -2.102771894, -1.613099192,
-1.373005006, -1.404053215, -1.563983261, -1.240665527, -1.169474104,
-0.499894742, -1.477392537, 0, -1.35617011, -1.552690092, -1.266997489,
-1.800049126, -1.284505684, -1.738096391, -1.081560759, -0.353725082,
-1.833482909, -1.820326999, -1.929571801, -1.445764205, -1.507261023,
-1.029306695, -1.507418679, -1.28536312, -1.888950799, -0.925858686
), BHNS = c(-0.536445534500279, -2.51590975211276, 0.975176838838268,
1.16762127648105, 0.306137539516978, -1.74214520673759, -1.64717485268251,
-3.95922066144353, -3.39635293660797, -0.970330946311565, 0.172619995428375,
2.48298883472192, 0.793974082613881, 0.501039140250149, 0.804337655982506,
-3.30714604156556, -1.20707193264363, 2.45797739273923, -1.59701672552781,
-1.66914695085005, 0.0674012955390476, 1.62417842412305, -2.58203639235661,
-2.12135486007817, NA, 1.90106289648472, NA, -4.03524619467747,
-1.22854600858282, -1.07477486644834, NA, -2.28606696929486,
-2.66907956622894, -2.72803842459239, 1.85426238301574, NA, -1.21282898286863,
NA, 2.2855102029862, 0.277515630354603, -2.22927291562984, -0.408924692322956,
1.28800115719665, -3.38950127542429, 0.674326255003232, 0.100856586743738,
-1.09361761185504, 4.48642120653207, NA, -0.778285568839945,
-1.61559599922626, 1.11437504404848, NA, 2.36961492996114, NA,
-3.31657121257007, -1.26574933393628, -1.65527597153758, -1.43219956248087,
-4.48337471597694, -0.38392378868904, 2.18997939435775, -1.45440502892558,
-0.498677781445362, NA, 0.263936976427928, 2.13996790558608,
2.02108542548667, 2.04917825749504, 0.0491580369030717, 3.50533020548612,
0.691750405572648, -2.95240608774007, -3.02575598315416, -0.8637227975704,
0.759542644984083, -0.144270618649459, NA, -0.357833850766232,
-2.35061857677948, 0.846721856058998, -4.30457112149904, NA,
0.521384795227345, -2.68755642840854, 0.301655700405515, -2.73328355695093,
1.81192054331534, -2.93795656056712, -4.54923476572377, -0.0705314386206745,
1.33535189698769, -0.814805548174206, 1.37244958099412, 1.06210539305522,
-0.31272164063358, NA, NA, NA, -0.974460560578356, NA, -1.09691503978493,
-1.16129610027415, 0.807081820419685, 0.219769515667387, NA,
-2.11671131861236, -0.00825953442660159, -3.34135267869532, NA,
-2.14077271733041, -1.53984154690997, -2.55464025816218, 1.87699362206363,
NA, 1.21608554754864, 2.37418777663669, 2.55877670413167, 0.802426328525387,
-2.46322401518915, -2.68656498120426, -1.82904543820427, NA,
NA, NA, -2.60297624909947, -3.67353291535057, -0.495574112777514,
2.03017007695609, -0.0124338742030449, 1.12209795456402, NA,
0.866863701855014, 3.7209135742554, 0.375208084011011, -1.45849683298558,
-0.19123665984119, -0.188018140361373, 1.19734815284619, 0.00853725978565168,
-0.730240497597164, NA, -4.96375079708713, 1.19107712360479,
-0.633787160803514, 0.499368742064575, 0.664122184333449, -0.103947741244196,
-2.14739584713273, 1.74822462363828, 1.32687851322831, NA, NA,
-0.616579186144451, -4.44411553936161, -2.84734557522817, -1.54029271130266,
-0.292331969757492, NA, -1.66741119163565, NA, NA, -4.34294861371069,
NA, NA, 1.84578349226819, -2.17212862641572, -0.548751753048712,
2.42407222784798, 1.98051589623148, 0.857017231589848, 1.02923101633178,
0.603889368418457, -0.155950277329299, -1.34170207835036, -3.12632714628269,
3.17556103654373, 2.20650266094645, -5.13428281112186, -0.921178268637226,
-2.38181625620856, 0.160972414602853, 1.77091538996229, 4.63247625224926,
-1.86579167976827, -4.69436622845745, 1.63801459948559, -2.43515279935704,
NA, NA, -3.78670320087694, NA, -0.288033414912556, NA, NA, 0.0313451485705829,
0.966671153586328, -1.45301303679307, 0.865396954575461, -1.04732750646681,
-1.13862805823273, -3.33182170877955, -0.716789934481586, -1.46422431089161,
1.80608017491006, -0.819565581598858, -3.31489269127239, 2.30213502005916,
-1.19546412608738, 1.69987803022498, -1.50379312132888, 1.12150667071641,
3.29476953236869, -0.174091501660731, 0.0355055954410258, 1.68302476193471,
-0.933232270264013, -1.20837727976455, -5.00113669120977, 0.549514565656909,
-1.22088165343573, -4.9131413352054, 0.249406010650101, -0.308297840121518,
NA, 2.04846718184171, 0.560020743571995, -0.463120853229649,
1.40062718046321, -3.79755640951251, -1.88533639543057, 1.57110593873126,
-0.0299794333846566, 1.22247764726521, NA, -2.03497022023155,
-0.407010083622181, -2.37498324523997, NA, -0.349381507825823,
-3.11946075987499, NA, -2.76573219788431, 3.29139667042278, 2.21696800111447,
-0.086332960380396, 0.836753244947099, -1.30865978206261, NA,
-0.192919803126557, -5.35233490542491, -1.18731608332232, -3.69045479061667,
-1.57841486746488, 1.18664265800922, -2.19679997569897, -2.0452403212891,
NA, -1.45283826875268, NA, 4.56092709451823, -1.33156364802532,
-4.39414824444583, -3.50663975811275, -0.994204002807067, 0.465461528013715,
0.449957137212169, NA, -2.20473150634823, -2.43893778416633,
-0.21859079543478, -2.49654949099443, -0.101281326767697, -1.83129379417679,
-0.240462905250426, -1.0882958290627, -1.39034392349669, -1.14066661465865,
3.78358608669209, 1.02135372275077, -0.740754000333138, NA, NA,
NA, -0.256965993799418, -2.49360159097253, -2.67415175736433,
-1.75805187150358, NA, NA, NA, 2.56779851815765, -1.43656049387778,
2.29452798322785, 0.862942019010368, 2.09421244256466, -2.00177427820928,
1.38403406309234, 0.740884397517659, 0.521458258281686, NA, NA,
3.45339851475839, NA, -3.26999440857194, 3.22431706477516, 1.12146738985331,
NA, NA, -3.35074461633528, -4.77834384105443, 0.861820227100293,
-1.2056462223418, NA, -0.764953366530192, 2.15999357109029, -3.15904866930723,
-2.02972340363139, 1.59312313935804, -0.690697193933604, -2.29058736067177,
-5.13147313144091, 1.07236945986908, 0.108280351864803, -1.52505465628159,
-1.88392986532853, -0.913313498148003, 3.45065659884515, -1.51081560834924,
NA, 3.00567498412257, NA, 2.29831624955477, -2.39939672938326,
NA, NA, NA, NA, NA, NA, -5.3996538732268, -1.42752873210144,
0.0724852922370845, -0.0705972311145999, -1.61242654109663, 1.29582434107304,
1.25077987465511, -5.98015252408187, -3.61932820726323, -1.07781775258866,
0.286505399786564, -1.56602611718645, 0.399192492042174, 1.85548824455123,
NA, -0.327392012404141, -1.97227490719439, NA, NA, -3.35557605502864,
-3.290672107048, NA, 3.48660286422453, -2.86599190336319, -0.581371472493803,
-2.4331708832036, -2.51410746360557, 0.288914981358372, -0.505196655524221,
1.05716585109166, NA, 0.989822924708553, NA, 0.0225311655108685,
1.62443349472587, -2.10875387227572, -0.587500068529866, NA,
0.425562050427503, 1.22298892433114, -0.927706126888752, -0.409816745039093,
-4.06011308919161, -1.03787150354729, -0.526565521964921, 1.80298858884609,
-0.448432216080462, -3.45358148541365, -2.91502694941587, -0.991517726403897,
3.4514878901496, NA, -1.12635167429918, NA, -1.50038036462404,
2.32725535516867, 0.111782581861886, 0.470383741015466, -0.031317322019304,
-0.718043104468153, -1.12920706646041, NA, 2.01061929939003,
3.17413595098028, NA, 0.190603542356822, -2.22133728253356, -1.06894473164686,
-0.4330706698996, 1.01987682109617, -2.03059414210894, -1.7628873247019,
-2.53482602530358, -1.21416018667071, -3.67031160485705, -1.52082991320074,
1.02860643265223, 0.500315605421826, -0.716811146495161, NA,
-0.195815384833931, -0.493307414686167, 3.23812969301253, NA,
-2.09109040458043, -3.1164773497449, -1.6343505259607, 0.379609686590297,
-0.0343891823066718, 1.10485260681212, -1.13046916214953, -0.69700197840047,
-5.4193074181233, NA, NA, -4.42064283033986, NA, 1.3717981277516,
0.727970491803614, -0.105635233546526, -2.39685477369571, 5.1151037437021,
-1.83992182746077, NA, -1.40244592017941, NA, 0.486864923336782,
2.60269396800354, -3.362465559503, NA, -3.98399191993587, NA,
-1.4572757861452, -1.19771254081594, -3.95432863178781, 3.21854033879276,
NA, NA, 1.80796548853591, NA, -4.04344709825764, 2.05959593320529,
-0.161804833493671, -0.0552164788513592, NA, -2.86741460844131,
0.31984524711282, NA, NA, 4.74172559718353, 0.165786351005475,
-4.36875963238208, -2.31924247294842, -0.935833349775849, NA,
-3.28217665426608, 1.32131433363562, 1.71193300136779, -0.306511172657601,
-3.558816156159, -1.55714569371858, NA, 1.18540625153828, -4.1986425447059,
4.05642691202438, -3.75551752685627, NA, 1.39774357580828, -7.41748774382377,
0.490138847007415, -1.32879211731965, 0.529098504821919, -0.798177526937817,
-0.845196892300538, -2.53754126606442, -2.63918687041961, NA,
NA, 3.90070676740233, -1.36316117657554), func.cat = c("5,11,22",
"5,22", "5,9,22", "1,9,28", "5,11,22", "5,8,22", "4", "4,9",
"5,9,25", "4,9", "4,9", "4,9,29", "7,15,18", "6,11,21,22,28,29,30",
"6,11,21,28,30", "4,9,23", "4,9", "6,9,30", "6,9,30", "6,9,30",
"1,9,28", "6,9", "6,9,21", "3,9,23", "1,9", "1,9", "1,9", "1",
"1,9", "1,9", "9", "7,9,18", "2,9,19,27", "1,9,28", "2,9,19,27",
"1,9", "4,9", "1,9,21", "4,9", "4,9", "5,11,20", "6,21", "6,9,21",
"7,9,13", "2,9,28", "7,9", "1,9,21", "4,9", "1,9", "4,8,9,23",
"4,9,29", "1,9,28", "5,20", "5,22", "5", "5,20", "1", "5,11,22",
"1,9", "1,11", "1,9", "7,9,17", "7,9,13", "3,4,8,9", "3,9,28",
"7,9", "6,21", "7,9,19,27,28", "7,9,15", "7,9,15", "5,9,22",
"4,9", "4,9,32", "1,27", "7,9,15", "4,9", "7,9,28", "1,1,6,21",
"1", "1,9", "1,9", "6,9", "7,9,13", "7,9,13,26", "7,9,13", "7,11,13",
"7,11,13,26", "7,13", "2", "7,27,28", "4,9,29", "7,9,16", "4,9,26",
"4,29", "9", "9", "", "7,9,27", "2,9,10", "2,9,10,19,27", "1,9,10",
"2,9,10,27", "4,11,29", "4,9,26", "1,9,30", "5,9,20", "1,28",
"4,9,26", "1,9", "8", "4,9,28", "7,9,19", "4,11,17,23", "4,9,17",
"4,9,15", "4,9,28", "4,9,29", "6,9,21", "7,11,15,18", "2,19,27",
"5,20", "1,9", "1,9", "1,9", "", "7,25", "3,9", "3,11,17", "4,9",
"2,9,28,33", "7,9,19", "2,9,19", "7,9,13", "7,9", "3,9,28", "6,9,21",
"7,9,27", "7,9", "1,9", "7,9,17,23", "1,9", "1,9,28", "1,9,22",
"4,9,26", "6,9,21", "2,9,27", "2,9,27", "7,9,13", "7,9,19,27,28",
"7,9,13,28", "1", "9", "9", "6,30", "2,9,19", "7,13", "7,9,13",
"1,7,9", "1", "6,9,21", "9", "9", "7,13,33", "7,9,13", "1,9,28",
"1,19", "6,9,21", "2,9,19,27", "3", "3,9", "3,9,10,27", "3,9,10,27",
"3,9,10,27", "3,27", "4,9,10,27", "3,9,27", "4,9,10", "4,9,10",
"1,9", "4,9", "4,9", "4,29", "1,9,28", "5,9,21", "7,9,19", "1,9",
"1,9", "3,7", "7,9,28", "4,9", "7,11", "1,9,29", "4,9,17", "1,4,9,29",
"4", "1,9", "4,9,32", "6,9,21,30", "6,9", "7,9,13", "7,9,25",
"4,10", "4,9", "1,9", "4,9,28", "4", "4,9,10,29", "4,11,28",
"4,9,28", "1,9", "7,11,28", "1,9", "1,9", "7,13", "6,9,18", "7,9,13",
"2,9,19,27", "2,9,19", "7,9,10,19", "7,9,19", "4,8,10,28", "4,9",
"7,9,19", "2", "2,19", "7,13", "7,11,27,28", "4,23", "4,11",
"4,11", "7,11,19", "4,9,28", "2,9,28", "2,9,19", "6,9,21", "5,9,16",
"7,13", "4,9", "4", "4,4,11", "4,9", "6,9,21", "3,9", "3,9,28",
"7,9,19", "2,9,19,27,28,33", "2,9,19", "2,19", "7,9,28", "4,9",
"7,13", "7,13", "7,9,10", "1,9", "3,9,25", "7,9,13", "7,9,26,28",
"7,9,18", "7,9,18", "9", "1,9", "1,9", "1,9,29", "6,9,18,21,30",
"4,9,26", "3,9", "1,9", "1,9", "4,9,26", "7,9,15,27", "4,9,26",
"1,9,14", "1,9,22", "2,9,19,27", "2,9,19", "1,9", "6,9,21", "1,9,15",
"6", "1,9,29", "3,3,9", "8", "8", "8", "2,9,27", "4,11,25", "4,11",
"4,11", "11", "11", "4", "4,9,29", "4,11", "4,11", "7,11,25",
"4,11", "1,9", "7,9,28", "1,9,29", "7,9,26", "9", "1,9", "1,9",
"3,9", "3,9,28", "6,9,21", "7,9,19,27,28", "9", "9,24", "7,9,13",
"1,9", "4,19", "4,29", "1,9", "1,11", "1,11,25", "1,9", "4,9",
"7,9,17", "7,9,27", "7,9,13", "4,9,26", "7,9,25", "4,9", "7,9,16",
"7,9,23", "1,9,29", "6,9,21,26", "9", "9,10", "7,27", "6,9,21",
"1,9", "1,9,21", "7,9,28", "1,9", "1,9", "7,11,23", "11", "9",
"7,11,15", "7,11,13", "6,21,30", "1,9", "4,9", "4,9,28", "4,28",
"4,9", "1,9,28", "4,9,15", "4,11", "6,9", "1,9", "3,11,25", "3,11,25",
"3,11,25", "3,9,21", "9", "24", "1,9,27", "7,11", "1", "4,9,28",
"1,9", "4,9", "4,9,26", "7,17", "4,29", "1,9,17", "1,9,29", "1,9",
"1,9,28", "7,9,29", "7,28", "1,9,28", "1,11,25", "7,9,13", "7,13",
"7,9", "1,9,25", "6,9,21", "1", "4,9,29", "7,9,20", "1,9", "7,9,16",
"4,11", "3,11,25", "7,9,13,18", "6,9,21,28,30", "1,9", "8,9",
"7,9,16", "1,9,26", "7,9,15", "7,9,13", "9", "4,9", "1,9", "1,9",
"1,9", "4", "4,11,32", "2,11,19", "4,9", "4,9,28", "4,9", "2,11,19,27",
"2,9,27,33", "1,9,28", "7,9,13", "7,13,28", "7,9,13", "6,11,21,28,30",
"4,11,15,30", "4,11", "5,9,22,28", "7,10,18", "7,11,15", "7,11,18",
"7,11,18", "4,9,28", "4,9", "4,9", "1,9,28", "7,11,18", "7,11",
"1,9", "4,11,26", "4", "5,10,22", "7,9", "5,11", "1,11", "4,9",
"3,9", "1,9,29", "1,9,28", "4,9", "2,19", "2,9,19", "7,9,18",
"7,9,19,27,28", "3,11,25", "1,9", "9", "1,9", "5,9", "5,9,21",
"4,9,29", "2,9,23", "1,9", "1,28", "4,11,28", "4,9", "6,9,21",
"9", "1,9", "1,9", "3", "2,3,3,9,19", "7,9,15", "7,9,13", "1,9",
"1", "4,9", "7,11,13", "4,9", "1,9,29", "6,21,28", "1,9,19",
"7,13", "2,9,19", "2,9,23,33", "1,18", "1,2,9,10,18,23,33", "1,9",
"6,9,21", "4,9,29", "6,9,21", "4,9", "4,9", "4,9", "7,9,16",
"4,11", "7,9,28", "1,9", "4,9", "4,9,29", "7,9,28", "6,9,17,21",
"7,23", "1,9", "4,28", "7,17", "6,9,10", "6,9", "1", "1,7,9,15",
"1,9")), .Names = c("BHTD", "BHNS", "func.cat"), class = "data.frame", row.names = c(NA,
500L))
So here's a slightly different approach, using the first ten rows of your dat.
dat$cat <- strsplit(dat$func.cat,",")
library(ggplot2)
ggplot(dat) +
geom_point(aes(x=BHTD,y=BHNS,color=sapply(cat,function(cat) "4" %in% cat)),
size=3)+
scale_color_discrete("Fun.Cat=4")
This code creates a column, $cat as a list of vectors, each containing all the functional categories for the gene represented in that row. Using
color=sapply(cat,function(cat) "4" %in% cat)
tests each element of cat (each row of dat). If the vector in that row contains "4", color=TRUE. This may have the slight advantage that you don't have to grep the whole table for each category you want to highlight.
I think this is what you mean. I use a for-loop and grep to create the category that is used in ggplot. As I understood it, you want the match one substring of func.cat per iteration (e.g., "1").
I created a minimal example based on yours:
library(ggplot2)
c1 <- c("1","2","2")
c2 <- c("2","","3")
dat <- data.frame(BHNS=1:3,BHTD=1:3,func.cat=paste(c1,c2,sep=","))
for(ii in c("1","3")){
dat$new.cat <- 0
dat <- within(dat,{
new.cat[grep(gsub("i",ii,"^i$|^i,|,i$|,i,"),func.cat)] <- 1
})
p1 <- ggplot(dat, aes(x=BHNS, y=BHTD)) +
geom_point(aes(colour = factor(new.cat)))
png(file=paste("plot_",ii,".png",sep=""))
print(p1)
dev.off()
}
Hope this helps.
Cheers!
EDIT: Updated the call to grep to not match unwanted categories.

Two Color Scales for geom_line in ggplot2

I have a chart (code to replicate will be below) that has two lines (and points) of data that need to be color coded, then three sets of confidence intervals (lines) which need to have their own color coding.
Unfortunately, ggplot sees the two calls to geom_line() and fits them all in the same scale.
Is there a way to have the central lines and dots have one scale (and legend entry) while the outer lines have a seperate scale (and legend entry)?
I've seen (complex) answers like ggplot2: Multiple color scales or shift colors systematically on different layers? but that relies on the old proto system which I believe has been phased out by now(?).
Thanks for any help.
Code to produce data and graphs. Sorry for the length:
exShapedMayGroup <- structure(list(Date = structure(c(14730, 14730, 14730, 14731,
14731, 14731, 14734, 14734, 14734, 14735, 14735, 14735, 14736,
14736, 14736, 14737, 14737, 14737, 14740, 14740, 14740, 14741,
14741, 14741, 14742, 14742, 14742, 14743, 14743, 14743, 14744,
14744, 14744, 14745, 14745, 14745, 14746, 14746, 14746, 14748,
14748, 14748, 14749, 14749, 14749, 14750, 14750, 14750, 14750,
14750, 14750, 14751, 14751, 14751, 14752, 14752, 14752, 14752,
14752, 14752, 14754, 14754, 14754, 14756, 14756, 14756, 14757,
14757, 14757, 14758, 14758, 14758, 14758, 14758, 14758, 14759,
14759, 14759, 14760, 14760, 14760), class = "Date"), Score = c(0.028,
0.028, 0.028, 0.03289, 0.03289, 0.03289, 0.034512, 0.034512,
0.034512, 0.0373496, 0.0373496, 0.0373496, 0.03201968, 0.03201968,
0.03201968, 0.040805744, 0.040805744, 0.040805744, 0.0344045952,
0.0344045952, 0.0344045952, 0.04017367616, 0.04017367616, 0.04017367616,
0.035998940928, 0.035998940928, 0.035998940928, 0.0342191527424,
0.0342191527424, 0.0342191527424, 0.09799532219392, 0.09799532219392,
0.09799532219392, 0.122746257755136, 0.122746257755136, 0.122746257755136,
0.0999570062041088, 0.0999570062041088, 0.0999570062041088, 0.0950656049632871,
0.0950656049632871, 0.0950656049632871, 0.0837224839706296, 0.0837224839706296,
0.0837224839706296, 0.00418, 0.00418, 0.00418, 0.0806379871765037,
0.0806379871765037, 0.0806379871765037, 0.009624, 0.009624, 0.009624,
0.0099792, 0.0099792, 0.0099792, 0.090740389741203, 0.090740389741203,
0.090740389741203, 0.0905523117929624, 0.0905523117929624, 0.0905523117929624,
0.0761218494343699, 0.0761218494343699, 0.0761218494343699, 0.0707874795474959,
0.0707874795474959, 0.0707874795474959, 0.02132336, 0.02132336,
0.02132336, 0.0636099836379967, 0.0636099836379967, 0.0636099836379967,
0.0550479869103974, 0.0550479869103974, 0.0550479869103974, 0.0466883895283179,
0.0466883895283179, 0.0466883895283179), Right = c("1", "2",
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1",
"2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2",
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3",
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1",
"2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2",
"3"), .id = c("0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0",
"0", "0", "1", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), Lower = c(0.0452301816389807,
0.0299531343622987, 0.0146760870856168, 0.0409430625769167, 0.0213788962381707,
0.00181472989942479, 0.0386359600820249, 0.0167646912483872,
-0.00510657758525054, 0.037279363974053, 0.0140514990324434,
-0.00917636590916623, 0.0364512577706185, 0.0123952866255743,
-0.0116606845194698, 0.0359359120595814, 0.0113645952035002,
-0.0132067216525811, 0.0356116886483614, 0.0107161483810601,
-0.0141793918862411, 0.035406383399575, 0.0103055378834873, -0.0147953076326005,
0.0352758647295475, 0.0100445005434323, -0.0151868636426829,
0.0351926859362388, 0.00987814295681498, -0.0154364000226088,
0.035139594640892, 0.00977196036612139, -0.0155956739086492,
0.0351056744462797, 0.00970411997689682, -0.0156974344924861,
0.0350839892725913, 0.00966074962952, -0.0157624900135513, 0.0350701204632195,
0.00963301201077625, -0.0158040964416669, 0.035061248392137,
0.00961526786861143, -0.0158307126549142, NA, NA, NA, 0.0350555718896789,
0.00960391486369513, -0.0158477421622886, NA, NA, NA, NA, NA,
NA, 0.0350519395924259, 0.00959665026918906, -0.0158586390540477,
0.0350496151941651, 0.00959200147266757, -0.01586561224883, 0.0350481276906492,
0.00958902646563569, -0.0158700747593778, 0.035047175734008,
0.00958712255235328, -0.0158729306293014, NA, NA, NA, 0.0350465665004368,
0.00958590408521094, -0.0158747583300149, 0.0350461765986017,
0.00958512428154069, -0.0158759280355203, 0.0350459270645606,
0.00958462521345864, -0.0158766766376434), Upper = c(0.0757842761923446,
0.0910613234690266, 0.106338370745709, 0.0800713952544086, 0.0996355615931546,
0.119199727931901, 0.0823784977493004, 0.104249766582938, 0.126121035416576,
0.0837350938572723, 0.106962958798882, 0.130190823740492, 0.0845632000607068,
0.108619171205751, 0.132675142350795, 0.0850785457717439, 0.109649862627825,
0.134221179483906, 0.0854027691829639, 0.110298309450265, 0.135193849717566,
0.0856080744317504, 0.110708919947838, 0.135809765463926, 0.0857385931017778,
0.110969957287893, 0.136201321474008, 0.0858217718950865, 0.11113631487451,
0.136450857853934, 0.0858748631904333, 0.111242497465204, 0.136610131739975,
0.0859087833850456, 0.111310337854428, 0.136711892323811, 0.085930468558734,
0.111353708201805, 0.136776947844877, 0.0859443373681059, 0.111381445820549,
0.136818554272992, 0.0859532094391883, 0.111399189962714, 0.136845170486239,
NA, NA, NA, 0.0859588859416464, 0.11141054296763, 0.136862199993614,
NA, NA, NA, NA, NA, NA, 0.0859625182388994, 0.111417807562136,
0.136873096885373, 0.0859648426371602, 0.111422456358658, 0.136880070080155,
0.0859663301406761, 0.11142543136569, 0.136884532590703, 0.0859672820973173,
0.111427335278972, 0.136887388460627, NA, NA, NA, 0.0859678913308885,
0.111428553746114, 0.13688921616134, 0.0859682812327236, 0.111429333549785,
0.136890385866846, 0.0859685307667647, 0.111429832617867, 0.136891134468969
)), .Names = c("Date", "Score", "Right", ".id", "Lower", "Upper"
), row.names = c(NA, 81L), class = "data.frame")
ggplot(exShapedMayGroup, aes_string(x="Date", y="Score")) + geom_line(aes_string(group=".id", colour=".id")) +
geom_point(aes_string(colour=".id")) + geom_line(aes_string(y="Lower", colour="Right")) +
geom_line(aes_string(y="Upper", colour="Right")) + scale_color_discrete(name="Limits")
P.S. Only using aes_string because this is called in a function which allows the user to input columns as a character.
The ggplot2 way to do this is to not use geom_line for your confidence bands. There's a geom built specifically for that: geom_ribbon.
ggplot(exShapedMayGroup, aes(x=Date, y=Score)) +
geom_ribbon(aes(ymin = Lower,ymax = Upper,fill = Right,group = Right),alpha = 0.25) +
geom_line(aes(group= .id, colour= .id)) +
geom_point(aes(colour = Right)) +
scale_color_discrete(name="Limits")
Obviously, I can't be sure that you'll be happy with how this looks, but this is generally how you approach this kind of graph in ggplot2. Note that I removed the aes_string uses in your code.
Unfortunately Hadley confirmed this is not possible. So I decided to use linetype instead (and not have anything distinguishing the points). This made everyone happy.
ggplot(exShapedMayGroup, aes_string(x="Date", y="Score")) +
geom_line(aes_string(group=".id", linetype=".id")) +
geom_point() +
geom_line(aes_string(y="Lower", colour="Right")) +
geom_line(aes_string(y="Upper", colour="Right")) +
scale_linetype_discrete(name="Group") + scale_color_discrete(name="Limits")
This is much simpler now, thanks to the Elio Campitelli's ggnewscale package
#install.packages("ggnewscale")
library(ggnewscale)
ggplot(exShapedMayGroup, aes(x=Date, y=Score)) +
geom_line(aes(color=.id)) +
geom_point(aes(color = .id)) +
scale_color_brewer(palette = "Dark2") +
ggnewscale::new_scale_color() +
geom_line(aes(y=Lower, colour=Right)) +
geom_line(aes(y=Upper, colour=Right)) +
scale_linetype_discrete(name="Group") +
scale_color_discrete(name="Limits")
Created on 2021-06-25 by the reprex package (v1.0.0)

Resources