Related
I want to create a graph of geom_line() coloured by a variable (Var1) then plot geom_point() with shapes according to a different variable (Var2) with the same colours as geom_line().
After reading a lot about this but not being able to find anything that I could interpret as being the same issue I have attempted the following:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, colour = Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
Which results in the above. The issue with this graph is that the second legend has both IDs are circles when one is a circle and one is a triangle. I would ideally like it to just be a coloured line with no shapes at all.
I've also tried this:
ggplot(data, aes(X, Y)) +
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()
This issue with this graph is that the shapes are not filled in by colour in the graph.
This is my data.
dput(data)
structure(list(X = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L), Y = c(1L, 1L, 1L, 2L, 4L, 13L, 18L, 19L,
21L, 24L, 34L, 43L, 70L, 90L, 129L, 169L, 1L, 3L, 3L, 3L, 3L,
4L, 21L, 79L, 157L, 229L, 323L, 470L, 655L, 889L, 1128L, 1701L,
2036L, 2502L, 3089L, 3858L, 4636L, 5883L, 7375L, 9172L, 10149L
), Var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("",
"ID1", "ID2"), class = "factor"), Var2 = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L), .Label = c("", "0", "Point1", "Point2"
), class = "factor")), row.names = c(NA, -41L), class = "data.frame")
How about this
ggplot(data, aes(X, Y))+
geom_line(aes(color = Var1)) +
geom_point(data = subset(data, Var2 != 0), aes(shape = Var2, color=Var1), size = 3) +
scale_color_manual(values=c("#7CAE00", "#00BFC4", "#000000", "#C77CFF")) +
scale_x_continuous(breaks=seq(0,30,5)) +
theme_bw()+
guides(colour = guide_legend(override.aes = list(shape = NA)))
I want to add regression lines to my graph. geom_smooth only adds lines to my figure legend however.
Is the current code I've tried (geom_smooth has the exact same result) along with various attempts at tinkering myself. DGRP.Line & Diet is a factor while Weighted.average is an integer.
RENA <- read.csv('RENA.csv')
RENAVG <- aggregate(Weighted.average~Diet+DGRP.Line, data = RENA, FUN = sum)
ggplot(RENAVG, aes(x=DGRP.Line, y=Weighted.average, colour=Diet))+
geom_point()+
stat_smooth(method='lm')
I'm not sure if the failure to properly regress is a consequence of DGRP.Line being a factor or not. But I'd expect geom_smooth to just form regression lines from my .csv file anyway (RENAVG)
On another attempt using the main RENA.CSV I get this error
"mapping must be created by aes()"
but I'm not sure if that's relevant to the RENAVG I made on R.
My graph is included below. As you can see the figure legend is lined, but no regression lines are on the actual data set.
Edit:
I tried converting my original ggplot (excluding smooth) to its own 'RENAVGPLOT'. Then added smooth in as RENAVGPLOT _ Geom_Smooth, resulting in: Error: Don't know how to add RENAVGPLOT to a plot
dput(RENAVG)
structure(list(Diet = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Control",
"Rena"), class = "factor"), DGRP.Line = structure(c(1L, 1L, 3L,
3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L,
11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L,
17L, 18L, 18L, 19L, 19L, 20L, 20L), .Label = c("105a", "105b",
"348", "354", "362a", "362b", "391a", "391b", "392", "397", "405",
"486a", "486b", "712", "721", "737", "757a", "757b", "853", "879"
), class = "factor"), Weighted.average = c(3.618181818, 7.516666667,
7.5, 10.464285714, 5.830882353, 7.0625, 6.411392405, 7.413953488,
6.079053054, 7.0375, 6.373640273, 10.406521739, 6.948020792,
9.851458886, 9.176727909, 10.164712153, 6.23826291, 11.023310023,
7.908730159, 9.537815126, 5.314323607, 6.655822854, 5.669226044,
7.818181818, 4.761481935, 9.468873129, 6.577764637, 12.170588235,
5.742087177, 10.529411765, 8.891608391, 2, 11.036572623, 3, 9.739878543,
9.782051282, 7.741384687, 8.739583333)), row.names = c(NA, -38L
), class = "data.frame")
>
Example of what I'd like
mtcars
ggplot(mtcars, aes(x=mpg, y=wt, colour=cyl)) +
geom_point()+
geom_smooth()
I need to loop different functions in dataframes allocated in my Global Environment and save the output of each "run" of the loop in a new dataframe that includes the initial name.
For this end, I'm using assign() with for() loop. It works well, except if I use the dplyr pipe %>%. The function itself works, but there is some error with the name assigned to the output dataframe. How can I fix this issue with %>% ? If not possible to fix, can I change assign() for another function?
This works well:
code1:
for(i in unique(table$V1)){
assign(paste0(i, "_target"),table[grepl(i,table$V1),])
}
Explanation: Selects unique entries in column 1 of the "table" and subset the rows with these entries to a new dataframe per entry. Output: the new dataframe name is "entry name" + "_target"
This doesn't work well (and I would like to know why):
code2:
for(i in mget(ls(pattern = "_target"))){
assign(paste0(i, "_slim"),data.frame(i %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
Explanation: Selects all dataframes in the Global Env that name contains "_target". In each dataframe: it does the mean of the values "(C__)" associated to entries with same characters "(Sample.Name)". Should be output: the new dataframe name is "entry name_target" + "_slim". Real output: the new dataframe presents the mean of the same characters, but is named "c(aleatory numbers)_slim".
code2 input:
STA_target <- structure(list(Well = structure(c(8L, 9L, 10L, 21L, 22L, 23L,
33L, 34L, 35L, 46L, 47L, 48L, 58L, 59L, 60L, 73L, 74L, 75L, 85L,
86L, 87L, 97L, 98L, 99L), .Label = c("", "A1", "A10", "A11",
"A12", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "Analysis Type",
"B1", "B10", "B11", "B12", "B2", "B3", "B4", "B5", "B6", "B7",
"B8", "B9", "C1", "C10", "C11", "C12", "C2", "C3", "C4", "C5",
"C6", "C7", "C8", "C9", "Chemistry", "D1", "D10", "D11", "D12",
"D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "E1", "E10",
"E11", "E12", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9",
"Endogenous Control", "Experiment File Name", "Experiment Run End Time",
"F1", "F10", "F11", "F12", "F2", "F3", "F4", "F5", "F6", "F7",
"F8", "F9", "G1", "G10", "G11", "G12", "G2", "G3", "G4", "G5",
"G6", "G7", "G8", "G9", "H1", "H10", "H11", "H12", "H2", "H3",
"H4", "H5", "H6", "H7", "H8", "H9", "Instrument Type", "Passive Reference",
"Reference Sample", "RQ Min/Max Confidence Level", "Well"), class = "factor"),
Sample.Name = c("Control_in", "Control_in", "Control_in",
"Sample2_in", "Sample2_in", "Sample2_in", "Sample5_in", "Sample5_in",
"Sample5_in", "Sample3_in", "Sample3_in", "Sample3_in", "Control_c",
"Control_c", "Control_c", "Sample2_c", "Sample2_c", "Sample2_c",
"Sample3_c", "Sample3_c", "Sample3_c", "Sample5_c", "Sample5_c",
"Sample5_c"), Target.Name = c("STA", "STA", "STA", "STA",
"STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA",
"STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA", "STA",
"STA", "STA"), Task = structure(c(3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L), .Label = c("", "Task", "UNKNOWN"), class = "factor"),
Reporter = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("", "Reporter", "SYBR"), class = "factor"),
Quencher = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("", "None", "Quencher"), class = "factor"),
RQ = structure(c(12L, 12L, 12L, 8L, 8L, 8L, 6L, 6L, 6L, 11L,
11L, 11L, 1L, 1L, 1L, 5L, 5L, 5L, 14L, 14L, 14L, 18L, 18L,
18L), .Label = c("", "0.706286132", "0.714652956", "0.724364996",
"0.7665869", "0.828774512", "0.838611245", "0.846661508",
"0.863589227", "0.896049678", "0.929288268", "1", "1.829339266",
"15.57538891", "17.64183807", "27.67574501", "3.064466953",
"34.78881073", "41.82569504", "8.117406845", "8.884188652",
"RQ"), class = "factor"), RQ.Min = structure(c(9L, 9L, 9L,
7L, 7L, 7L, 8L, 8L, 8L, 10L, 10L, 10L, 1L, 1L, 1L, 2L, 2L,
2L, 21L, 21L, 21L, 17L, 17L, 17L), .Label = c("", "0.032458056",
"0.429091513", "0.460811675", "0.541289926", "0.611138761",
"0.674698055", "0.71383971", "0.742018044", "0.753834546",
"0.772591949", "0.7868222", "0.803419232", "0.820919514",
"0.826185584", "0.989573121", "22.58564949", "27.2142868",
"4.501103401", "4.745172024", "4.843928814", "4.979007244",
"9.076541901", "RQ Min"), class = "factor"), RQ.Max = structure(c(13L,
13L, 13L, 8L, 8L, 8L, 6L, 6L, 6L, 9L, 9L, 9L, 1L, 1L, 1L,
16L, 16L, 16L, 19L, 19L, 19L, 20L, 20L, 20L), .Label = c("",
"0.858568788", "0.910271943", "0.943540215", "0.947846115",
"0.962214947", "0.971821666", "1.062453985", "1.145578504",
"1.162549496", "1.218146205", "1.244680166", "1.347676158",
"14.63914394", "15.85231876", "18.10507202", "20.37916756",
"3.381742954", "50.08181381", "53.58541107", "64.28199768",
"65.58969879", "84.38751984", "RQ Max"), class = "factor"),
C_ = c(25.48042297, 25.4738903, 25.83390617, 25.7304306,
25.78297043, 25.41260529, 25.49670792, 25.52298164, 25.6956234,
25.34812355, 25.51462555, 25.15455437, 0, 0, 0, 32.29237366,
37.10370636, 32.22016525, 29.50172043, 30.18544579, 29.91492081,
25.14842796, 24.89806747, 24.99397278), C_.Mean = c(25.59607506,
25.59607506, 25.59607506, 25.64200401, 25.64200401, 25.64200401,
25.57177162, 25.57177162, 25.57177162, 25.33910179, 25.33910179,
25.33910179, NA, NA, NA, 33.87208176, 33.87208176, 33.87208176,
29.86736107, 29.86736107, 29.86736107, 25.01348877, 25.01348877,
25.01348877), C_.SD = structure(c(21L, 21L, 21L, 20L, 20L,
20L, 12L, 12L, 12L, 19L, 19L, 19L, 1L, 1L, 1L, 31L, 31L,
31L, 23L, 23L, 23L, 14L, 14L, 14L), .Label = c("", "0.039937571",
"0.043110434", "0.049541138", "0.05469643", "0.061177365",
"0.066671595", "0.07365533", "0.079849631", "0.082057081",
"0.095515646", "0.108060829", "0.120047837", "0.126316145",
"0.129658803", "0.130481929", "0.142733917", "0.172286868",
"0.180205062", "0.200392827", "0.205995336", "0.236968249",
"0.344334781", "0.36769405", "0.413046211", "0.445171326",
"0.514641941", "0.640576839", "0.895943522", "0.993181109",
"2.798901796", "C_ SD"), class = "factor"), `_C_` = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "_C_"), class = "factor"),
`_C_.Mean` = structure(c(8L, 8L, 8L, 5L, 5L, 5L, 4L, 4L,
4L, 7L, 7L, 7L, 1L, 1L, 1L, 3L, 3L, 3L, 13L, 13L, 13L, 14L,
14L, 14L), .Label = c("", "_C_ Mean", "-0.577166259", "-0.68969661",
"-0.720502198", "-0.776381195", "-0.85484314", "-0.96064502",
"-1.058534026", "-2.04822278", "-2.545912504", "-3.293611526",
"-4.921841145", "-6.081196308", "0.477069855", "1.373315215",
"2.092705965", "2.244637728", "2.251055479", "2.346632004",
"2.456220627", "2.557917356", "2.729323149", "2.746313095"
), class = "factor"), `_C_.SE` = structure(c(13L, 13L, 13L,
11L, 11L, 11L, 6L, 6L, 6L, 9L, 9L, 9L, 1L, 1L, 1L, 24L, 24L,
24L, 21L, 21L, 21L, 15L, 15L, 15L), .Label = c("", "_C_ SE",
"0.042180877", "0.042606823", "0.048373949", "0.077573851",
"0.088320434", "0.102536619", "0.108728357", "0.113733612",
"0.117972165", "0.144372106", "0.155044988", "0.223316222",
"0.224465802", "0.258952528", "0.300881863", "0.306413502",
"0.319273174", "0.579304695", "0.606897891", "0.635279417",
"0.682336032", "1.643036604"), class = "factor"), HK.Control._C_.Mean = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "HK Control _C_ Mean"
), class = "factor"), HK.Control._C_.SE = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "HK Control _C_ SE"
), class = "factor"), `__C_` = structure(c(12L, 12L, 12L,
16L, 16L, 16L, 18L, 18L, 18L, 13L, 13L, 13L, 1L, 1L, 1L,
19L, 19L, 19L, 7L, 7L, 7L, 10L, 10L, 10L), .Label = c("",
"__C_", "-0.871322632", "-1.61563623", "-3.021018982", "-3.15124011",
"-3.961196184", "-4.140928745", "-4.790550232", "-5.120551586",
"-5.38631773", "0", "0.105801903", "0.15834935", "0.211582825",
"0.240142822", "0.253925949", "0.27094841", "0.383478791",
"0.465211242", "0.484685272", "0.501675308"), class = "factor"),
Automatic.Ct.Threshold = structure(c(3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("", "Automatic Ct Threshold",
"TRUE"), class = "factor"), Ct.Threshold = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "0.056211855",
"0.208910329", "0.693888608", "0.704941193", "Ct Threshold"
), class = "factor"), Automatic.Baseline = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("", "Automatic Baseline",
"TRUE"), class = "factor"), Baseline.Start = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "3", "Baseline Start"
), class = "factor"), Baseline.End = structure(c(3L, 3L,
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 13L, 14L, 14L, 8L,
12L, 8L, 6L, 7L, 7L, 3L, 3L, 3L), .Label = c("", "21", "22",
"23", "25", "26", "27", "29", "30", "31", "32", "34", "35",
"39", "Baseline End"), class = "factor"), Efficiency = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "1", "Efficiency"
), class = "factor"), Comments = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "Comments"), class = "factor"),
HIGHSD = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("", "HIGHSD", "N", "Y"), class = "factor"),
NOAMP = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"N", "NOAMP", "Y"), class = "factor"), OUTLIERRG = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", "N", "OUTLIERRG",
"Y"), class = "factor"), EXPFAIL = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("", "EXPFAIL", "N", "Y"
), class = "factor")), .Names = c("Well", "Sample.Name",
"Target.Name", "Task", "Reporter", "Quencher", "RQ", "RQ.Min",
"RQ.Max", "C_", "C_.Mean", "C_.SD", "_C_", "_C_.Mean", "_C_.SE",
"HK.Control._C_.Mean", "HK.Control._C_.SE", "__C_", "Automatic.Ct.Threshold",
"Ct.Threshold", "Automatic.Baseline", "Baseline.Start", "Baseline.End",
"Efficiency", "Comments", "HIGHSD", "NOAMP", "OUTLIERRG", "EXPFAIL"
), row.names = c(12L, 13L, 14L, 24L, 25L, 26L, 36L, 37L, 38L,
48L, 49L, 50L, 60L, 61L, 62L, 72L, 73L, 74L, 84L, 85L, 86L, 96L,
97L, 98L), class = "data.frame")
code2 "output":
> dput(`c(8, 9, 10, 21, 22, 23, 33, 34, 35, 46, 47, 48, 58, 59, 60, 73, 74, 75, 85, 86, 87, 97, 98, 99)_slim`)
structure(list(Group.1 = c("Sample2_c", "Sample2_in", "Sample3_c",
"Sample5_in", "Control_c", "Control_in", "Sample5_c", "Sample3_in"
), x = c(33.8720817566667, 25.6420021066667, 29.8673623433333,
25.5717709866667, 0, 25.5960731466667, 25.0134894033333, 25.3391011566667
)), .Names = c("Group.1", "x"), row.names = c(NA, -8L), class = "data.frame")
I don't know if this is really the output because of the given name. But the expected output should be something like that with the correct name: STA_slim
Thank you for your time
First of all, I strongly suggest you avoid assign() in your R code. It's much better to use one of the many mapping/apply function in R to build related data in lists. Using get/assign is sign that you are not doing things in a very R-like way.
Your problem has nothing to do with dplyr really, it's what you are looping over in your loop. When you do
for(i in mget(ls(pattern = "_target"))){
assign(paste0(i, "_slim"),data.frame(i %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
that i isn't the name of the data.frame, because you did mget() it's the data frame itself. It doesn't make sense to paste that into a new name.
To "fix" this, you could do
for(i in ls(pattern = "_target")){
assign(paste0(i, "_slim"),data.frame(get(i) %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C__))))
}
But even then you don't have a column named C__ in your example data set. You have C_ or _C_ or __C_ (what do these names even mean??). So you'd need to fix that.
The better list way would be
slim <- lapply(mget(ls(pattern = "_target$")) , function(x) {
x %>% group_by(Sample.Name) %>% summarise(Mean_dC=mean(C_))
})
Data description:
I have a data set that is in long format with multiple different grouping variables (in data example: StandID and simID)
What I am trying to do:
I need to create simple scatter plots (x=predicted, y=observed) from this dataset for multiple columns based on a unique grouping variable.
An example of what I am trying to do using just standard plot is
obs=subset(example,simID=="OBS_OBS_OBS")
csfnw=example[example$simID== "CS_F_NW",]
plot(obs$X1HR,csfnw$X1HR)
I would need to do this for all simID and columns 9-14. (12 graphs total from data example)
What I have tried:
The problem I am running into is the y axis needs to remain the same, while cycling through the different subsets for the x axis.
I will admit up front, I have no idea what would be the best approach for this... I thought this would be easy for a split second because the data is already in long format and I would just be pointing to a subset of the data.
1) My original approach was to try and just splice up the data so that each simID had its own data frame, and compare it against the observation dataframe but I don't know how I would then pass it to ggplot.
2) My second idea was to make some kind of makeGraph function containing all the aesthetics I wanted essentially and use some kind of apply on it to pass everything through the function, but I could get neither to work.
makePlot=function(dat,x,y) {
ggplot(data=dat,aes(x=x,y=y))+geom_point(shape=Treat)+theme_bw()
}
What I could get to work was just breaking down the dataframe into the vectors of the variables I would then pass to some kind of loop/apply
sims=levels(example$simID)
sims2=sims[sims != "OBS_OBS_OBS"]
fuel_classes=colnames(example)[9:14]
Thank you
Data example:
example=structure(list(Year = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L), .Label = c("2001", "2002", "2003", "2004", "2005",
"2013", "2014", "2015"), class = "factor"), StandID = structure(c(10L,
2L, 6L, 22L, 14L, 18L, 34L, 26L, 30L, 10L, 2L, 6L, 22L, 14L,
18L, 34L, 26L, 30L, 10L, 2L, 6L, 22L, 14L, 18L, 34L, 26L, 30L
), .Label = c("1NB", "1NC", "1NT", "1NTB", "1RB", "1RC", "1RT",
"1RTB", "1SB", "1SC", "1ST", "1STB", "2NB", "2NC", "2NT", "2NTB",
"2RB", "2RC", "2RT", "2RTB", "2SB", "2SC", "2ST", "2STB", "3NB",
"3NC", "3NT", "3NTB", "3RB", "3RC", "3RT", "3RTB", "3SB", "3SC",
"3ST", "3STB"), class = "factor"), Block = structure(c(1L, 1L,
1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L,
1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("1", "2", "3"
), class = "factor"), Aspect = structure(c(3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L), .Label = c("N", "R", "S"), class = "factor"),
Treat = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("B", "C", "T", "TB"), class = "factor"),
Variant = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("CS", "OBS", "SN"), class = "factor"),
Fuels = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("F", "NF", "OBS"), class = "factor"),
Weather = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("NW", "OBS", "W"), class = "factor"),
X1HR = c(0.321666667, 0.177777778, 0.216111111, 0.280555556,
0.255555556, 0.251666667, 0.296666667, 0.231111111, 0.22,
0.27556628, 0.298042506, 0.440185249, 0.36150676, 0.398630172,
0.367523015, 0.345717251, 0.349305987, 0.412227929, 0.242860824,
0.258737177, 0.394024998, 0.287317872, 0.321927488, 0.281322986,
0.313588411, 0.303123146, 0.383658946), X10HR = c(0.440555556,
0.32, 0.266666667, 0.292222222, 0.496666667, 0.334444444,
0.564444444, 0.424444444, 0.432777778, 0.775042951, 0.832148314,
1.08174026, 1.023838878, 0.976997674, 0.844206274, 0.929837704,
1.0527215, 1.089246511, 0.88642776, 0.920596302, 1.209707737,
1.083737493, 1.077612877, 0.92481339, 1.041637182, 1.149550319,
1.229776621), X100HR = c(0.953888889, 1.379444444, 0.881666667,
1.640555556, 2.321666667, 1.122222222, 1.907777778, 1.633888889,
1.208333333, 1.832724094, 2.149356842, 2.364475727, 2.493232965,
2.262988567, 1.903909683, 2.135747433, 2.256677628, 2.288722038,
1.997704744, 2.087135553, 2.524872541, 2.34671092, 2.338253498,
2.06796217, 2.176314831, 2.580271006, 2.857197046), X1000HR = c(4.766666667,
8.342222222, 3.803333333, 8.057777778, 10.11444444, 6.931111111,
6.980555556, 13.20611111, 1.853333333, 3.389177084, 4.915714741,
2.795267582, 2.48227787, 2.218413353, 1.64684248, 2.716156483,
2.913746119, 2.238629341, 3.449863434, 3.432626724, 3.617531776,
3.641639471, 3.453454971, 3.176793337, 3.459602833, 3.871166945,
2.683447838), LITTER = c(2.4, 2.219444444, 2.772222222, 2.596666667,
2.693888889, 2.226111111, 2.552222222, 3.109444444, 2.963333333,
2.882233381, 3.025934696, 3.174396992, 3.291081667, 2.897673607,
2.737119675, 2.987895727, 3.679605484, 2.769756079, 2.882241249,
3.02594161, 3.174404144, 3.291091681, 2.897681713, 2.737129688,
2.987901449, 3.679611444, 2.769766569), DUFF = c(1.483333333,
1.723888889, 0.901666667, 1.520555556, 1.49, 1.366111111,
0.551666667, 1.056111111, 0.786111111, 2.034614563, 2.349547148,
1.685223818, 2.301301956, 2.609308243, 2.21895647, 2.043699026,
2.142618418, 0.953421116, 4.968493462, 4.990526676, 5.012362003,
5.023665905, 4.974074364, 4.947199821, 4.976779461, 5.082509995,
3.55211544), simID = structure(c(5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("CS_F_NW", "CS_F_W",
"CS_NF_NW", "CS_NF_W", "OBS_OBS_OBS", "SN_F_NW", "SN_F_W",
"SN_NF_NW", "SN_NF_W"), class = "factor")), .Names = c("Year",
"StandID", "Block", "Aspect", "Treat", "Variant", "Fuels", "Weather",
"X1HR", "X10HR", "X100HR", "X1000HR", "LITTER", "DUFF", "simID"
), row.names = c(37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L,
82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 127L, 128L, 129L,
130L, 131L, 132L, 133L, 134L, 135L), class = "data.frame")
You were actually on the right track. If all plots are the same, just make one function and then use loops to loop over the subsets. For your example this can be done like this:
library(ggplot2)
# the plot function
plotFun = function(dat, title) {
ggplot(data=dat) +
geom_point(aes(x = x, y = y), shape=18) +
ggtitle(title) +
theme_bw()
}
# columns of interest
colIdx = 9:14
# split on all values of simID
dfList = split(example, example$simID)
# simID has never appearing factors. These are removed
dfList = dfList[lapply(dfList, nrow) != 0]
# make empty array for saving plots
plotList = array(list(), dim = c(length(dfList), length(dfList), length(colIdx)),
dimnames = list(names(dfList), names(dfList), names(example)[colIdx]))
# the first two loops loop over all unique combinations of dfList
for (i in 2:length(dfList)) {
for (j in 1:(i-1)) {
# loop over target variables
for (k in seq_along(colIdx)) {
# store variables to plot in a temporary dataframe
tempDf = data.frame(x = dfList[[i]][, colIdx[k]],
y = dfList[[j]][, colIdx[k]])
# add a title so we can see in the plot what is plotted vs what
title = paste0(names(dfList)[i], ":", names(dfList[[i]])[colIdx[k]], " VS ",
names(dfList)[j], ":", names(dfList[[j]])[colIdx[k]])
# make and save plot
plotList[[i, j, k]] = plotFun(tempDf, title)
}
}
}
# call the plots like this
plotList[[2, 1, 4]]
# Note that we only filled the lower triangle of combinations
# therefore indexing with [[1, 1, 1]] just returns NULL
plotList[, , 1]
This process can probably be more optimized, but when creating graphs I would go for clarity above speed since speed usually isn't an issue.
I'm able to create this graph with rCharts:
library(rCharts)
X <- structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control",
"treatment1", "treatment2"), class = "factor"), Freq = c(0L,
0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L,
4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,
490L)), .Names = c("Var1", "Var2", "Freq"), row.names = c(NA,
-30L), class = "data.frame")
n1<-nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
print(n1)
Now I'm trying to embeded in a Shiny app. I can do a shiny app with ggplot2, but I'm not sure how to print the rCharts graph.
This is the shiny code that I have right now:
#server.R
library(rCharts)
X <- structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control",
"treatment1", "treatment2"), class = "factor"), Freq = c(0L,
0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L,
4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,
490L)), .Names = c("Var1", "Var2", "Freq"), row.names = c(NA,
-30L), class = "data.frame")
shinyServer(
function(input, output) {
output$histogram <- renderPlot({
# You can access the value of the widget with input$select, e.g.
output$value <- renderPrint({ input$select })
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2$set(dom = "histogram")
return(n2)
})
}
)
#ui.R
shinyUI(fluidPage(
titlePanel("Quiz 3 grades distribution"),
sidebarLayout(
sidebarPanel(
helpText("Quiz 3 grade distribution by treatment group"),
selectInput("select", label = h3("Select box"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
mainPanel(plotOutput("histogram"))
)
))
What am I doing wrong? Thanks!
Use renderChart2 and showOutput to display nvd3 plots in shiny. Using renderChart2 doesn't require the using $set(dom = ....
library(rCharts)
library(shiny)
X <- data.frame(Var1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L,8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
Var2 = structure(c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("control","treatment1", "treatment2"), class = "factor"),
Freq = c(0L,0L, 3L, 2L, 6L, 9L, 13L, 36L, 50L, 497L, 0L, 2L, 1L, 3L, 6L, 4L, 11L, 29L, 50L, 499L, 1L, 2L, 0L, 2L, 5L, 6L, 12L, 22L, 63L,490L)
)
runApp(
list(ui = fluidPage(
titlePanel("Quiz 3 grades distribution"),
sidebarLayout(
sidebarPanel(
helpText("Quiz 3 grade distribution by treatment group"),
selectInput("select", label = h3("Select box"),
choices = list("All" = 0, "Not Perfect" = 1, "Perfect" = 2),
selected = 0)
),
mainPanel(
showOutput("histogram","Nvd3")
)
)
),
server = shinyServer(
function(input, output, session) {
output$histogram <- renderChart2({
n2 <- nPlot(Freq ~ Var1, group = 'Var2', data = X, type = 'multiBarChart')
n2
})
}
)
)
)