Related
I need to loop different functions in dataframes allocated in my Global Environment and save the output of each "run" of the loop in a new dataframe that includes the initial name.
For this end, I'm using assign() with for() loop. It works well, except if I use the dplyr pipe %>%. The function itself works, but there is some error with the name assigned to the output dataframe. How can I fix this issue with %>% ? If not possible to fix, can I change assign() for another function?
This works well:
code1:
for(i in unique(table$V1)){
assign(paste0(i, "_target"),table[grepl(i,table$V1),])
}
Explanation: Selects unique entries in column 1 of the "table" and subset the rows with these entries to a new dataframe per entry. Output: the new dataframe name is "entry name" + "_target"
This doesn't work well (and I would like to know why):
code2:
for(i in mget(ls(pattern = "_target"))){
assign(paste0(i, "_slim"),data.frame(i %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
Explanation: Selects all dataframes in the Global Env that name contains "_target". In each dataframe: it does the mean of the values "(C__)" associated to entries with same characters "(Sample.Name)". Should be output: the new dataframe name is "entry name_target" + "_slim". Real output: the new dataframe presents the mean of the same characters, but is named "c(aleatory numbers)_slim".
code2 input:
STA_target <- structure(list(Well = structure(c(8L, 9L, 10L, 21L, 22L, 23L,
33L, 34L, 35L, 46L, 47L, 48L, 58L, 59L, 60L, 73L, 74L, 75L, 85L,
86L, 87L, 97L, 98L, 99L), .Label = c("", "A1", "A10", "A11",
"A12", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "Analysis Type",
"B1", "B10", "B11", "B12", "B2", "B3", "B4", "B5", "B6", "B7",
"B8", "B9", "C1", "C10", "C11", "C12", "C2", "C3", "C4", "C5",
"C6", "C7", "C8", "C9", "Chemistry", "D1", "D10", "D11", "D12",
"D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "E1", "E10",
"E11", "E12", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9",
"Endogenous Control", "Experiment File Name", "Experiment Run End Time",
"F1", "F10", "F11", "F12", "F2", "F3", "F4", "F5", "F6", "F7",
"F8", "F9", "G1", "G10", "G11", "G12", "G2", "G3", "G4", "G5",
"G6", "G7", "G8", "G9", "H1", "H10", "H11", "H12", "H2", "H3",
"H4", "H5", "H6", "H7", "H8", "H9", "Instrument Type", "Passive Reference",
"Reference Sample", "RQ Min/Max Confidence Level", "Well"), class = "factor"),
Sample.Name = c("Control_in", "Control_in", "Control_in",
"Sample2_in", "Sample2_in", "Sample2_in", "Sample5_in", "Sample5_in",
"Sample5_in", "Sample3_in", "Sample3_in", "Sample3_in", "Control_c",
"Control_c", "Control_c", "Sample2_c", "Sample2_c", "Sample2_c",
"Sample3_c", "Sample3_c", "Sample3_c", "Sample5_c", "Sample5_c",
"Sample5_c"), Target.Name = c("STA", "STA", "STA", "STA",
"STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA",
"STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA",
"STA", "STA"), Task = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("", "Task", "UNKNOWN"), class = "factor"),
Reporter = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("", "Reporter", "SYBR"), class = "factor"),
Quencher = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("", "None", "Quencher"), class = "factor"),
RQ = structure(c(12L, 12L, 12L, 8L, 8L, 8L, 6L, 6L, 6L, 11L,
11L, 11L, 1L, 1L, 1L, 5L, 5L, 5L, 14L, 14L, 14L, 18L, 18L,
18L), .Label = c("", "0.706286132", "0.714652956", "0.724364996",
"0.7665869", "0.828774512", "0.838611245", "0.846661508",
"0.863589227", "0.896049678", "0.929288268", "1", "1.829339266",
"15.57538891", "17.64183807", "27.67574501", "3.064466953",
"34.78881073", "41.82569504", "8.117406845", "8.884188652",
"RQ"), class = "factor"), RQ.Min = structure(c(9L, 9L, 9L,
7L, 7L, 7L, 8L, 8L, 8L, 10L, 10L, 10L, 1L, 1L, 1L, 2L, 2L,
2L, 21L, 21L, 21L, 17L, 17L, 17L), .Label = c("", "0.032458056",
"0.429091513", "0.460811675", "0.541289926", "0.611138761",
"0.674698055", "0.71383971", "0.742018044", "0.753834546",
"0.772591949", "0.7868222", "0.803419232", "0.820919514",
"0.826185584", "0.989573121", "22.58564949", "27.2142868",
"4.501103401", "4.745172024", "4.843928814", "4.979007244",
"9.076541901", "RQ Min"), class = "factor"), RQ.Max = structure(c(13L,
13L, 13L, 8L, 8L, 8L, 6L, 6L, 6L, 9L, 9L, 9L, 1L, 1L, 1L,
16L, 16L, 16L, 19L, 19L, 19L, 20L, 20L, 20L), .Label = c("",
"0.858568788", "0.910271943", "0.943540215", "0.947846115",
"0.962214947", "0.971821666", "1.062453985", "1.145578504",
"1.162549496", "1.218146205", "1.244680166", "1.347676158",
"14.63914394", "15.85231876", "18.10507202", "20.37916756",
"3.381742954", "50.08181381", "53.58541107", "64.28199768",
"65.58969879", "84.38751984", "RQ Max"), class = "factor"),
C_ = c(25.48042297, 25.4738903, 25.83390617, 25.7304306,
25.78297043, 25.41260529, 25.49670792, 25.52298164, 25.6956234,
25.34812355, 25.51462555, 25.15455437, 0, 0, 0, 32.29237366,
37.10370636, 32.22016525, 29.50172043, 30.18544579, 29.91492081,
25.14842796, 24.89806747, 24.99397278), C_.Mean = c(25.59607506,
25.59607506, 25.59607506, 25.64200401, 25.64200401, 25.64200401,
25.57177162, 25.57177162, 25.57177162, 25.33910179, 25.33910179,
25.33910179, NA, NA, NA, 33.87208176, 33.87208176, 33.87208176,
29.86736107, 29.86736107, 29.86736107, 25.01348877, 25.01348877,
25.01348877), C_.SD = structure(c(21L, 21L, 21L, 20L, 20L,
20L, 12L, 12L, 12L, 19L, 19L, 19L, 1L, 1L, 1L, 31L, 31L,
31L, 23L, 23L, 23L, 14L, 14L, 14L), .Label = c("", "0.039937571",
"0.043110434", "0.049541138", "0.05469643", "0.061177365",
"0.066671595", "0.07365533", "0.079849631", "0.082057081",
"0.095515646", "0.108060829", "0.120047837", "0.126316145",
"0.129658803", "0.130481929", "0.142733917", "0.172286868",
"0.180205062", "0.200392827", "0.205995336", "0.236968249",
"0.344334781", "0.36769405", "0.413046211", "0.445171326",
"0.514641941", "0.640576839", "0.895943522", "0.993181109",
"2.798901796", "C_ SD"), class = "factor"), `_C_` = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "_C_"), class = "factor"),
`_C_.Mean` = structure(c(8L, 8L, 8L, 5L, 5L, 5L, 4L, 4L,
4L, 7L, 7L, 7L, 1L, 1L, 1L, 3L, 3L, 3L, 13L, 13L, 13L, 14L,
14L, 14L), .Label = c("", "_C_ Mean", "-0.577166259", "-0.68969661",
"-0.720502198", "-0.776381195", "-0.85484314", "-0.96064502",
"-1.058534026", "-2.04822278", "-2.545912504", "-3.293611526",
"-4.921841145", "-6.081196308", "0.477069855", "1.373315215",
"2.092705965", "2.244637728", "2.251055479", "2.346632004",
"2.456220627", "2.557917356", "2.729323149", "2.746313095"
), class = "factor"), `_C_.SE` = structure(c(13L, 13L, 13L,
11L, 11L, 11L, 6L, 6L, 6L, 9L, 9L, 9L, 1L, 1L, 1L, 24L, 24L,
24L, 21L, 21L, 21L, 15L, 15L, 15L), .Label = c("", "_C_ SE",
"0.042180877", "0.042606823", "0.048373949", "0.077573851",
"0.088320434", "0.102536619", "0.108728357", "0.113733612",
"0.117972165", "0.144372106", "0.155044988", "0.223316222",
"0.224465802", "0.258952528", "0.300881863", "0.306413502",
"0.319273174", "0.579304695", "0.606897891", "0.635279417",
"0.682336032", "1.643036604"), class = "factor"), HK.Control._C_.Mean = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "HK Control _C_ Mean"
), class = "factor"), HK.Control._C_.SE = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "HK Control _C_ SE"
), class = "factor"), `__C_` = structure(c(12L, 12L, 12L,
16L, 16L, 16L, 18L, 18L, 18L, 13L, 13L, 13L, 1L, 1L, 1L,
19L, 19L, 19L, 7L, 7L, 7L, 10L, 10L, 10L), .Label = c("",
"__C_", "-0.871322632", "-1.61563623", "-3.021018982", "-3.15124011",
"-3.961196184", "-4.140928745", "-4.790550232", "-5.120551586",
"-5.38631773", "0", "0.105801903", "0.15834935", "0.211582825",
"0.240142822", "0.253925949", "0.27094841", "0.383478791",
"0.465211242", "0.484685272", "0.501675308"), class = "factor"),
Automatic.Ct.Threshold = structure(c(3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("", "Automatic Ct Threshold",
"TRUE"), class = "factor"), Ct.Threshold = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "0.056211855",
"0.208910329", "0.693888608", "0.704941193", "Ct Threshold"
), class = "factor"), Automatic.Baseline = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("", "Automatic Baseline",
"TRUE"), class = "factor"), Baseline.Start = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "3", "Baseline Start"
), class = "factor"), Baseline.End = structure(c(3L, 3L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 13L, 14L, 14L, 8L,
12L, 8L, 6L, 7L, 7L, 3L, 3L, 3L), .Label = c("", "21", "22",
"23", "25", "26", "27", "29", "30", "31", "32", "34", "35",
"39", "Baseline End"), class = "factor"), Efficiency = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "1", "Efficiency"
), class = "factor"), Comments = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "Comments"), class = "factor"),
HIGHSD = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("", "HIGHSD", "N", "Y"), class = "factor"),
NOAMP = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"N", "NOAMP", "Y"), class = "factor"), OUTLIERRG = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "N", "OUTLIERRG",
"Y"), class = "factor"), EXPFAIL = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("", "EXPFAIL", "N", "Y"
), class = "factor")), .Names = c("Well", "Sample.Name",
"Target.Name", "Task", "Reporter", "Quencher", "RQ", "RQ.Min",
"RQ.Max", "C_", "C_.Mean", "C_.SD", "_C_", "_C_.Mean", "_C_.SE",
"HK.Control._C_.Mean", "HK.Control._C_.SE", "__C_", "Automatic.Ct.Threshold",
"Ct.Threshold", "Automatic.Baseline", "Baseline.Start", "Baseline.End",
"Efficiency", "Comments", "HIGHSD", "NOAMP", "OUTLIERRG", "EXPFAIL"
), row.names = c(12L, 13L, 14L, 24L, 25L, 26L, 36L, 37L, 38L,
48L, 49L, 50L, 60L, 61L, 62L, 72L, 73L, 74L, 84L, 85L, 86L, 96L,
97L, 98L), class = "data.frame")
code2 "output":
> dput(`c(8, 9, 10, 21, 22, 23, 33, 34, 35, 46, 47, 48, 58, 59, 60, 73, 74, 75, 85, 86, 87, 97, 98, 99)_slim`)
structure(list(Group.1 = c("Sample2_c", "Sample2_in", "Sample3_c",
"Sample5_in", "Control_c", "Control_in", "Sample5_c", "Sample3_in"
), x = c(33.8720817566667, 25.6420021066667, 29.8673623433333,
25.5717709866667, 0, 25.5960731466667, 25.0134894033333, 25.3391011566667
)), .Names = c("Group.1", "x"), row.names = c(NA, -8L), class = "data.frame")
I don't know if this is really the output because of the given name. But the expected output should be something like that with the correct name: STA_slim
Thank you for your time
First of all, I strongly suggest you avoid assign() in your R code. It's much better to use one of the many mapping/apply function in R to build related data in lists. Using get/assign is sign that you are not doing things in a very R-like way.
Your problem has nothing to do with dplyr really, it's what you are looping over in your loop. When you do
for(i in mget(ls(pattern = "_target"))){
assign(paste0(i, "_slim"),data.frame(i %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
that i isn't the name of the data.frame, because you did mget() it's the data frame itself. It doesn't make sense to paste that into a new name.
To "fix" this, you could do
for(i in ls(pattern = "_target")){
assign(paste0(i, "_slim"),data.frame(get(i) %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
But even then you don't have a column named C__ in your example data set. You have C_ or _C_ or __C_ (what do these names even mean??). So you'd need to fix that.
The better list way would be
slim <- lapply(mget(ls(pattern = "_target$")) , function(x) {
x %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C_))
})
Following this guide I have plotted the following graph using the following code. I did split my dataset into one that contains the data that goes in all plots 'control', and the rest 'dfnocontrol'.
ggplot(dfnocontrol,aes(y=value,x=year)) + geom_line(data=dfnocontrol,
aes(color=survivorship),size=1.5) + facet_wrap(~density,nrow=2) +
geom_line(data=dfcontrol,aes(linetype=simulname),color='grey',size=1.5)
I have tried many ways to have only one legend, or to edit the existing two legend but nothing seems to work. scale_fill_manual() seems to be ignored, even though I don't get any error message. I was forced to use linetype to make the 'control' appear in the legend. How can I merge these two legends?
edit: these are the data for control
structure(list(year = 1:2, psize = structure(c(6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(392.884450281975,
392.76842677951), simulname = structure(c(1L, 1L), .Label = c("control",
"d02s70", "d02s80", "d02s90", "d05s70", "d05s80", "d05s90", "d07s70",
"d07s80", "d07s90", "d1s70", "d1s80", "d1s90", "d2s70", "d2s80",
"d2s90", "d3s70", "d3s80", "d3s90", "d4s70", "d4s80", "d4s90",
"d5s70", "d5s80", "d5s90"), class = "factor"), survivorship = structure(c(1L,
1L), .Label = c("control", "s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "survivorship"), row.names = 2501:2502, class = "data.frame")
and data for the rest
structure(list(year = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L), psize = structure(c(6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(391.933827876557,
390.784233661738, 391.931768654094, 390.777949423224, 391.930831801103,
390.775125884957, 391.904131913644, 390.671681105517, 391.903377880798,
390.669377819171, 391.902842713777, 390.667498067697, 391.874743014214,
390.557893743236, 391.874006362415, 390.555639401299, 391.8735511448,
390.554149478021, 391.84367266143, 390.443618794749, 391.843064602404,
390.442149462261, 391.842594963982, 390.440725187945, 391.72267802326,
388.998242801555, 391.722309813432, 388.996838950063, 391.721745089041,
388.995715149179, 384.967818982887, 383.215849576989, 384.967407490871,
383.214728664341, 384.96689031843, 383.213390281481, 391.897592532656,
389.445606459513, 391.897234485415, 389.444632515097, 391.89681267375,
389.443358475326, 391.402389493961, 388.987279260992, 391.401979078947,
388.985920091544, 391.401583421483, 388.984891027315), simulname = structure(c(2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L,
10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L,
23L, 24L, 24L, 25L, 25L), .Label = c("control", "d02s70", "d02s80",
"d02s90", "d05s70", "d05s80", "d05s90", "d07s70", "d07s80", "d07s90",
"d1s70", "d1s80", "d1s90", "d2s70", "d2s80", "d2s90", "d3s70",
"d3s80", "d3s90", "d4s70", "d4s80", "d4s90", "d5s70", "d5s80",
"d5s90"), class = "factor"), density = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("control",
"d02", "d05", "d07", "d1", "d2", "d3", "d4", "d5"), class = "factor"),
survivorship = structure(c(2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("control",
"s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "density", "survivorship"), row.names = c(6081L,
6082L, 9845L, 9846L, 14345L, 14346L, 17985L, 17986L, 21797L,
21798L, 26297L, 26298L, 30567L, 30568L, 34528L, 34529L, 38744L,
38745L, 43144L, 43145L, 47519L, 47520L, 51983L, 51984L, 56483L,
56484L, 60983L, 60984L, 65483L, 65484L, 69983L, 69984L, 74483L,
74484L, 78983L, 78984L, 83483L, 83484L, 87983L, 87984L, 92483L,
92484L, 96983L, 96984L, 101483L, 101484L, 105983L, 105984L), class = "data.frame")
Since you provided no data, I will give you an example using the economics data set.
library(wesanderson) # for the colours
library(tidyverse)
data("economics")
We will need two data sets for this task. Variable unemploy will serve as our 'control' (6th column). All variables will be scaled.
First data set:
economics_gathered <- economics[, 1:5] %>% # exclude unemploy
modify_if(is.numeric, scale) %>%
gather(key, value, -date)
Second data set:
economics_control <- economics[, c(1, 6)] %>%
dplyr::rename(control = unemploy) %>%
gather(some_other_key, value, 2) %>%
mutate(value = scale(value))
Now we can plot:
ggplot() +
geom_line(data = economics_control, aes(x = date, y = value, col = some_other_key)) +
geom_line(data = economics_gathered, aes(date, value, col = key)) +
scale_colour_manual(values = c("grey", wes_palette("GrandBudapest"))) +
facet_wrap(~key, scales = "free_y")
To which the result is the plot below.
EDIT
With the data provided by the OP the following code
ggplot() +
geom_line(data = dfcontrol, aes(year, value, col = survivorship), size = 1.5) +
geom_line(data = dfnocontrol, aes(year, value, col = survivorship), size = 1.5) +
facet_wrap( ~ density, nrow = 2) +
scale_colour_manual(values = c("grey", "forestgreen", "red", "blue"))
gives this plot:
DATA
1)
dfcontrol <- structure(list(year = 1:2, psize = structure(c(6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(392.884450281975,
392.76842677951), simulname = structure(c(1L, 1L), .Label = c("control",
"d02s70", "d02s80", "d02s90", "d05s70", "d05s80", "d05s90", "d07s70",
"d07s80", "d07s90", "d1s70", "d1s80", "d1s90", "d2s70", "d2s80",
"d2s90", "d3s70", "d3s80", "d3s90", "d4s70", "d4s80", "d4s90",
"d5s70", "d5s80", "d5s90"), class = "factor"), survivorship = structure(c(1L,
1L), .Label = c("control", "s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "survivorship"), row.names = 2501:2502, class = "data.frame")
2)
dfnocontrol <- structure(list(year = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L), psize = structure(c(6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(391.933827876557,
390.784233661738, 391.931768654094, 390.777949423224, 391.930831801103,
390.775125884957, 391.904131913644, 390.671681105517, 391.903377880798,
390.669377819171, 391.902842713777, 390.667498067697, 391.874743014214,
390.557893743236, 391.874006362415, 390.555639401299, 391.8735511448,
390.554149478021, 391.84367266143, 390.443618794749, 391.843064602404,
390.442149462261, 391.842594963982, 390.440725187945, 391.72267802326,
388.998242801555, 391.722309813432, 388.996838950063, 391.721745089041,
388.995715149179, 384.967818982887, 383.215849576989, 384.967407490871,
383.214728664341, 384.96689031843, 383.213390281481, 391.897592532656,
389.445606459513, 391.897234485415, 389.444632515097, 391.89681267375,
389.443358475326, 391.402389493961, 388.987279260992, 391.401979078947,
388.985920091544, 391.401583421483, 388.984891027315), simulname = structure(c(2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L,
10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L,
23L, 24L, 24L, 25L, 25L), .Label = c("control", "d02s70", "d02s80",
"d02s90", "d05s70", "d05s80", "d05s90", "d07s70", "d07s80", "d07s90",
"d1s70", "d1s80", "d1s90", "d2s70", "d2s80", "d2s90", "d3s70",
"d3s80", "d3s90", "d4s70", "d4s80", "d4s90", "d5s70", "d5s80",
"d5s90"), class = "factor"), density = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("control",
"d02", "d05", "d07", "d1", "d2", "d3", "d4", "d5"), class = "factor"),
survivorship = structure(c(2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("control",
"s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "density", "survivorship"), row.names = c(6081L,
6082L, 9845L, 9846L, 14345L, 14346L, 17985L, 17986L, 21797L,
21798L, 26297L, 26298L, 30567L, 30568L, 34528L, 34529L, 38744L,
38745L, 43144L, 43145L, 47519L, 47520L, 51983L, 51984L, 56483L,
56484L, 60983L, 60984L, 65483L, 65484L, 69983L, 69984L, 74483L,
74484L, 78983L, 78984L, 83483L, 83484L, 87983L, 87984L, 92483L,
92484L, 96983L, 96984L, 101483L, 101484L, 105983L, 105984L), class = "data.frame")
This question already has answers here:
Convert data from long format to wide format with multiple measure columns
(6 answers)
Closed 4 years ago.
I need to reorganize a large dataset into a specific format for further analysis. Right now the data are in long format, with multiple records through time for each point. I need to reshape the data so that each point has a single record, but it will add many new columns of the time-specific data. I’ve looked at previous similar posts but I need to ultimately convert several of the current variables into columns, and I can’t find an example of such. Is there a way to accomplish this in a single reshape, or will I have to do several and then concatenate the new columns back together? Another wrinkle before I post the example is that not all points were sampled at each time-step, so I need those values to show up as NA. For example, (see data below) SitePoint A1 was not sampled at all in 2012, SitePoint A10 was not sampled during the first round in 2012, but K83 was sampled all nine times.
mydatain <- structure(list(SitePoint = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L), .Label = c("A1", "A10", "K145", "K83", "T15",
"T213"), class = "factor"), Year_Rotation = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 7L), .Label = c("2010_1", "2010_2",
"2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3"
), class = "factor"), MR_Fire = structure(c(5L, 6L, 6L, 2L, 9L,
9L, 5L, 6L, 6L, 2L, 9L, 9L, 7L, 8L, 16L, 17L, 21L, 22L, 23L,
25L, 3L, 4L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 1L,
2L, 2L, 5L, 6L, 6L, 11L, 11L, 12L, 7L, 24L), .Label = c("0",
"1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25",
"35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53",
"8", "9"), class = "factor"), fire_seas = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L), .Label = c("dry", "fire", "wet"
), class = "factor"), OptTSF = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 1L)), .Names = c("SitePoint", "Year_Rotation", "MR_Fire",
"fire_seas", "OptTSF"), row.names = c(31L, 32L, 33L, 34L, 35L,
36L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 10543L, 10544L,
10545L, 10546L, 10547L, 10548L, 10549L, 10550L, 14988L, 14989L,
14990L, 14991L, 14992L, 14993L, 14994L, 14995L, 14996L, 17370L,
17371L, 17372L, 17373L, 17374L, 17375L, 17376L, 17377L, 17378L,
19353L, 19354L), class = "data.frame")
Ultimately I need something like this:
myfinal <- structure(list(SitePoint = structure(1:6, .Label = c("A1", "A10",
"K145", "K83", "T15", "T213"), class = "factor"), MR_Fire_2010_1 = c(12L,
12L, 39L, 23L, 0L, 14L), MR_Fire_2010_2 = c(13L, 13L, 40L, 24L,
1L, NA), MR_Fire_2010_3 = c(13L, 13L, NA, 25L, 1L, NA), MR_Fire_2011_1 = c(1L,
1L, 51L, 35L, 12L, NA), MR_Fire_2011_2 = c(2L, 2L, 52L, 36L,
13L, NA), MR_Fire_2011_3 = c(2L, 2L, 53L, 37L, 13L, NA), MR_Fire_2012_1 = c(NA,
NA, 9L, 47L, 24L, 8L), MR_Fire_2012_2 = c(NA, 14L, 10L, 48L,
24L, NA), MR_Fire_2012_3 = c(NA, 15L, 11L, 49L, 25L, NA), season_2010_1 = structure(c(2L,
2L, 1L, 2L, 2L, 1L), .Label = c("dry", "fire"), class = "factor"),
season_2010_2 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2010_3 = structure(c(1L,
1L, NA, 1L, 1L, NA), .Label = "fire", class = "factor"),
season_2011_1 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2011_2 = structure(c(2L,
2L, 1L, 2L, 2L, NA), .Label = c("dry", "fire"), class = "factor"),
season_2011_3 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2012_1 = structure(c(NA,
NA, 2L, 1L, 1L, 2L), .Label = c("fire", "wet"), class = "factor"),
season_2012_2 = structure(c(NA, 1L, 2L, 1L, 1L, NA), .Label = c("fire",
"wet"), class = "factor"), season_2012_3 = structure(c(NA,
1L, 2L, 1L, 1L, NA), .Label = c("fire", "wet"), class = "factor"),
OptTSF_2010_1 = c(1L, 1L, 0L, 1L, 1L, 1L), OptTSF_2010_2 = c(1L,
1L, 0L, 1L, 1L, NA), OptTSF_2010_3 = c(1L, 1L, NA, 1L, 1L,
NA), OptTSF_2011_1 = c(1L, 1L, 0L, 0L, 1L, NA), OptTSF_2011_2 = c(1L,
1L, 0L, 0L, 1L, NA), OptTSF_2011_3 = c(1L, 1L, 0L, 0L, 1L,
NA), OptTSF_2012_1 = c(NA, NA, 1L, 0L, 0L, 1L), OptTSF_2012_2 = c(NA,
1L, 1L, 0L, 0L, NA), OptTSF_2012_3 = c(NA, 1L, 1L, 0L, 0L,
NA)), .Names = c("SitePoint", "MR_Fire_2010_1", "MR_Fire_2010_2",
"MR_Fire_2010_3", "MR_Fire_2011_1", "MR_Fire_2011_2", "MR_Fire_2011_3",
"MR_Fire_2012_1", "MR_Fire_2012_2", "MR_Fire_2012_3", "season_2010_1",
"season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2",
"season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3",
"OptTSF_2010_1", "OptTSF_2010_2", "OptTSF_2010_3", "OptTSF_2011_1",
"OptTSF_2011_2", "OptTSF_2011_3", "OptTSF_2012_1", "OptTSF_2012_2",
"OptTSF_2012_3"), class = "data.frame", row.names = c(NA, -6L
))
The actual dataset is about 23656 records X 15 variables, so doing it by hand is likely to cause major headaches and potential for mistakes. Any help or suggestions are appreciated. If this has been answered elsewhere, apologies. I couldn’t find anything directly applicable; everything seemed to related to three columns and only one of those being extracted as new variables. Thanks.
SP
dcast from the devel version of data.table i.e., v1.9.5 can cast multiple columns simultaneously. It can be installed from here.
library(data.table) ## v1.9.5+
dcast(setDT(mydatain), SitePoint~Year_Rotation,
value.var=c('MR_Fire', 'fire_seas', 'OptTSF'))
You can use reshape to change the structure of your dataframe from long to wide using the following code:
reshape(mydatain,timevar="Year_Rotation",idvar="SitePoint",direction="wide")
I have the following code working:
library(rCharts)
library(shiny)
X <- data.frame(Var1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
Var2 = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control","treatment1", "treatment2"), class = "factor"),
Freq = c(0L,0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L, 4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,490L)
)
runApp(
list(ui = fluidPage(
titlePanel("Quiz 3 grades distribution"),
fluidRow(
column(3,
#helpText("Select grade in Quiz 1 before the treatment:"),
selectInput("select", label = h3("Grade Quiz 1 before the treatment:"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
column(9, div(showOutput("histogram","nvd3")), style = 'align:center;')
, tags$head(tags$style(HTML(".nv-axislabel {font: 22px Arial;}"))) # to style labels
)
),
server = shinyServer(
function(input, output, session) {
output$histogram <- renderChart2({
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2$params$width <- 500
n2$params$height <- 400
n2$xAxis(axisLabel = "my x axis label")
n2$yAxis(axisLabel = "my y axis label", width = 50)
n2
})
}
)
)
)
How can I change the marks in the y axis to appear, for example, every 25 instead of every 50 counts?
Thanks a lot!
Use tickValues passed as an option to yAxis
library(rCharts)
library(shiny)
X <- data.frame(Var1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
Var2 = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control","treatment1", "treatment2"), class = "factor"),
Freq = c(0L,0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L, 4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,490L)
)
runApp(
list(ui = fluidPage(
titlePanel("Quiz 3 grades distribution"),
fluidRow(
column(3,
#helpText("Select grade in Quiz 1 before the treatment:"),
selectInput("select", label = h3("Grade Quiz 1 before the treatment:"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
column(9, div(showOutput("histogram","nvd3")), style = 'align:center;')
,tags$head(tags$style(HTML(".nv-axislabel {font: 22px Arial;}")))
)
),
server = shinyServer(
function(input, output, session) {
output$histogram <- renderChart2({
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2$params$width <- 500
n2$params$height <- 400
n2$xAxis(axisLabel = "my x axis label")
n2$yAxis(axisLabel = "my y axis label", width = 50)
n2$yAxis(tickValues = do.call(seq, c(as.list(range(X$Freq)), 25)))
n2
})
}
)
)
)
I'm almost done creating the shiny app that I want.
How can I add labels to the x and y axis of my graph?
This is what I have right now:
library(rCharts)
library(shiny)
X <- data.frame(Var1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
Var2 = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control","treatment1", "treatment2"), class = "factor"),
Freq = c(0L,0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L, 4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,490L)
)
runApp(
list(ui = fluidPage(
titlePanel("Quiz 3 grades distribution"),
fluidRow(
column(3,
#helpText("Select grade in Quiz 1 before the treatment:"),
selectInput("select", label = h3("Grade Quiz 1 before the treatment:"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
column(9, div(showOutput("histogram","nvd3")), style = 'align:center;')
)
),
server = shinyServer(
function(input, output, session) {
output$histogram <- renderChart2({
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2$params$width <- 500
n2$params$height <- 400
n2
})
}
)
)
)
Thanks!
The nPlot object has a xAxis and yAxis method which take an option axisLabel. You may need to adjust the width on the yaxis.
library(rCharts)
library(shiny)
X <- data.frame(Var1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
Var2 = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control","treatment1", "treatment2"), class = "factor"),
Freq = c(0L,0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L, 4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,490L)
)
runApp(
list(ui = fluidPage(
titlePanel("Quiz 3 grades distribution"),
fluidRow(
column(3,
#helpText("Select grade in Quiz 1 before the treatment:"),
selectInput("select", label = h3("Grade Quiz 1 before the treatment:"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
column(9, div(showOutput("histogram","nvd3")), style = 'align:center;')
, tags$head(tags$style(HTML(".nv-axislabel {font: 22px Arial;}"))) # to style labels
)
),
server = shinyServer(
function(input, output, session) {
output$histogram <- renderChart2({
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2$params$width <- 500
n2$params$height <- 400
n2$xAxis(axisLabel = "my x axis label")
n2$yAxis(axisLabel = "my y axis label", width = 50)
n2
})
}
)
)
)