Related
I am facing the problem that I am not able to specify the shape of the line symbols (without this specification the code works fine):
Below the data in reproducible format (it is the effects data put into a data frame):
structure(list(varL = c(0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07, 0, 1e+07, 2e+07, 3e+07, 4e+07,
0, 1e+07, 2e+07, 3e+07, 4e+07), varP = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L), .Label = c("(0,0.1]", "(0.1,0.2]",
"(0.2,0.3]", "(0.3,0.4]", "(0.4,0.5]", "(0.5,0.6]", "(0.6,0.7]",
"(0.7,0.8]", NA), class = "factor"), fit = c(0.0496509727291671,
0.0889644199210129, 0.147763911240627, 0.228140612498209, 0.328558663864939,
0.0137066329240178, 0.0170188110490053, 0.0209924787528359, 0.0257246732663005,
0.0313187292462082, 0.0289376730565942, 0.0324367840687503, 0.036277818691311,
0.0404834466212193, 0.0450765434401318, 0.0377500587733006, 0.0506605267612627,
0.0668653640284829, 0.0868169793966305, 0.110912824327041, 0.0461062991171287,
0.0536136421990573, 0.0620580975149222, 0.071506100162885, 0.0820206662867591,
0.0271688764980807, 0.0310122602430318, 0.0352949603875076, 0.0400511628245002,
0.0453154762467586, 0.0593111130006543, 0.0777425439930874, 0.100226912943776,
0.127122712337706, 0.158670602546708, 0.02092268966042, 0.0481738946672621,
0.0984225581163725, 0.179214944179607, 0.292488347088707), se = c(0.0259513690928884,
0.0478802966619357, 0.0959400030912549, 0.146319368888539, 0.197248937550513,
0.033511891943933, 0.0649738808934063, 0.13283528902344, 0.203454843482363,
0.274713638499851, 0.0399137666412373, 0.0836182332502119, 0.170994872374127,
0.261409298049175, 0.352531889503407, 0.0128068165036135, 0.0265824058594164,
0.054035051049317, 0.0824833429902055, 0.111165505837411, 0.00821998219695643,
0.0204628357910751, 0.0416140898624852, 0.0632975717285407, 0.08510744963605,
0.0111710559049469, 0.0241847618850518, 0.0491238092261353, 0.0748967974373985,
0.100866484066391, 0.0158269724358688, 0.0376131484048352, 0.0769417704226139,
0.117330108518709, 0.157967414110193, 0.041410334660995, 0.0756439112597116,
0.154046905957391, 0.236093539915582, 0.318984533128398), lower = c(0.0446491361632188,
0.0747918828643712, 0.108580794230823, 0.151091116521613, 0.203128798877193,
0.0115654911703096, 0.0123209025069961, 0.0108946289492482, 0.00947592068351736,
0.00819339231583828, 0.0241414249675257, 0.022214689650845, 0.0165544754495112,
0.0119897952488384, 0.00852702139073778, 0.0357322013203882,
0.0454582184069632, 0.05419654291372, 0.0639689796722408, 0.0749947791208939,
0.0445700854939895, 0.0493806984121361, 0.0526928942349234, 0.0560610804290682,
0.0595674652939625, 0.0258256186623322, 0.0278406506196297, 0.0284298721928641,
0.0289213813577344, 0.0293941320590341, 0.0557369600349524, 0.0675693013920541,
0.0762061129774737, 0.0853339447169982, 0.0951745618665348, 0.0171631624694616,
0.0350640216006218, 0.0556341130925104, 0.0836247441564837, 0.120733474483254
), upper = c(0.0550901244832386, 0.10504537627394, 0.195437125694758,
0.323403092731095, 0.477154780078397, 0.0161814201791539, 0.0231702136215256,
0.0380881102321762, 0.0606970430990062, 0.0928632628051039, 0.0345006405904498,
0.046261189541596, 0.0720801941481896, 0.10883443823233, 0.1577758610065,
0.0398599758225597, 0.0563263318177106, 0.0817125182150079, 0.115272694473414,
0.157735685375171, 0.0476847568332783, 0.0581290329012679, 0.072673983374196,
0.0900244403895002, 0.110325327955545, 0.0285699305757627, 0.0344771441605397,
0.0434644025927379, 0.0544344789594513, 0.0675376260500437, 0.0630625316437756,
0.0890383062732732, 0.129352639664315, 0.181375571131999, 0.244994560270329,
0.0253570051034752, 0.06494141884307, 0.161413333266735, 0.324329403439929,
0.531510876579345)), .Names = c("varL", "varP",
"fit", "se", "lower", "upper"), row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40"), class = "data.frame") -> effectdat
The following codes yields an error:
library(ggplot2)
ggplot(effectdat) + geom_line(aes(varL,fit,linetype=varP)) + theme_bw() + geom_point(aes(shape = varP))
Error: geom_point requires the following missing aesthetics: x, y
I read here ggplot2_Error: geom_point requires the following missing aesthetics: y to use the unlist function. However, this produces another error:
ggplot(unlist(effectdat)) + geom_line(aes(varL,fit,linetype=varP)) + theme_bw() + geom_point(aes(shape = varP))
Error: ggplot2 doesn't know how to deal with data of class numeric
Any ideas what is wrong? What surprises me is that the function without geom_point() seems to work fine.
No need to unlist the data.frame. Code below works:
ggplot(effectdat) + geom_line(aes(x = varL,y = fit,linetype=varP)) + theme_bw() + geom_point(aes(x = varL,y = fit, shape = varP))
Explanation: added the missing aesthetics as required by the function
I am trying to make a ggplot. When I had shape in aesthetics, the code was working just fine. However, I need to put shape in geom_point() because I'm trying to reproduce a figure. And when I added shape to geom_point() it gave me the following error:
Aesthetics must be either length 1 or the same as the data (6): shape
I've looked for other answers here but apparently, nothing seems to be working for me. Above I've provided with an image of what my data looks like. There are 17000 entries.
Below is my code:
summarised_data <-ddply(mammals,c('mammals$chr','mammals$Species','mammals$chrMark'),
function (x) c(median_rpkm = median(x$RPKM), median = median(x$dNdS)))
ggplot(summarised_data,aes(x = summarised_data$median_rpkm, y = summarised_data$median,
color = summarised_data$`mammals$Species`)) + geom_smooth(se = FALSE, method = "lm") +
geom_point(shape = summarised_data$`mammals$chrMark`) + xlab("median RPKM") + ylab("dNdS")
"ENSG00000213221", "ENSG00000213341", "ENSG00000213380", "ENSG00000213424",
"ENSG00000213533", "ENSG00000213551", "ENSG00000213619", "ENSG00000213626",
"ENSG00000213699", "ENSG00000213782", "ENSG00000213949", "ENSG00000214013",
"ENSG00000214338", "ENSG00000214357", "ENSG00000214367", "ENSG00000214517",
"ENSG00000214814", "ENSG00000215203", "ENSG00000215305", "ENSG00000215367",
"ENSG00000215440", "ENSG00000215897", "ENSG00000221947", "ENSG00000222011",
"ENSG00000224051", "ENSG00000225830", "ENSG00000225921", "ENSG00000239305",
"ENSG00000239474", "ENSG00000239900", "ENSG00000241058", "ENSG00000242247",
"ENSG00000242612", "ENSG00000243646", "ENSG00000244038", "ENSG00000244045"),
class = "factor"), Species = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Chimp", "Gori", "Human", "Maca",
"Mouse", "Oran"), class = "factor"), labs = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Chimp-A", "Chimp-X",
"Gori-A", "Gori-X", "Human-A", "Human-X", "Maca-A", "Maca-X",
"Mouse-A", "Mouse-X", "Oran-A", "Oran-X"), class = "factor"),
chrMark = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("A", "X"), class = "factor"), chr = structure(c(27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "2a", "2A", "2b", "2B", "3", "4",
"5", "6", "7", "8", "9", "X"), class = "factor"), dN = c(3.00669,
3.27182, 7.02044, 1.01784, 3.0363, 2.32786, 4.92959, 3.03753,
3.0776, 1.02147), dS = c(3.15631, 5.87147, 3.13716, 2.05438,
4.10205, 5.24764, 4.2014, 3.18086, 5.4942, 3.02169), dNdS = c(0.9525965447,
0.5572403504, 2.2378329444, 0.4954487485, 0.7401908802, 0.4436013141,
1.1733207978, 0.954939859, 0.5601543446, 0.3380459279), RPKM = c(31.6,
13.9, 26.3, 9.02, 11.3, 137, 242, 1.05, 59.4, 10.1), Tau = c(0.7113820598,
0.8391023102, 0.3185943152, 0.6887167806, 0.9120531859, 0.6254200542,
0.7165302682, 0.7257435312, 0.2586613298, 0.6493567251),
GC3 = c(0.615502, 0.622543, 0.393064, 0.490141, 0.461592,
0.626407, 0.490305, 0.482853, 0.346424, 0.466484)), .Names = c("gene",
"Species", "labs", "chrMark", "chr", "dN", "dS", "dNdS", "RPKM",
"Tau", "GC3"), row.names = c(NA, 10L), class = "data.frame")
There's a few things wrong with your code and how ggplot handles non-standard evaluation, I'd recommend reading a ggplot tutorial or the docs. Having a column called within summarised_data called 'mammals$species' and 'mammals$chrMark' is going to cause lots of problems.
If we change these to something more sensible...
names(summarised_data)[names(summarised_data) == "mammals$species"] <- "mammals_species"
names(summarised_data)[names(summarised_data) == "mammals$chrMark"] <- "mammals_chrMark"
We can make the ggplot code more friendly. Note that shape has to been within aes, as you're mapping it to your data.
ggplot(summarised_data, aes(x = median_rpkm, y = median)) +
geom_smooth(se = FALSE, method = "lm") +
geom_point(aes(shape = mammals_chrMark,
color = mammals_species)) +
xlab("median RPKM") + ylab("dNdS")
Hopefully this should work, or at least get you somewhere closer to an answer.
I have a plot of lines with colors from black to green. However, I want to color the lines gradual by their y-value at "Value2" on the x-axis. The line with the highest y-value at "Value2" should be green, the one with the lowest y-value at "Value2" should be black.
How can I assign the color to the lines by their y values at a specific point of the x-axis?
My code:
library(ggplot2)
x <- structure(list(ID = c("1998-06-05_area2", "1999-07-11_area2",
"1998-05-13_area1", "1998-05-20_area1", "1998-06-05_area2", "1999-07-11_area2",
"1998-05-13_area1", "1998-05-20_area1", "1998-06-05_area2", "1999-07-11_area2",
"1998-05-13_area1", "1998-05-20_area1"), variable = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("Value1",
"Value2", "Value3"), class = "factor"), value = c(322, 280, 210,
416, 384, 252, 329, 601, 83, 66, 100, 147)), .Names = c("ID",
"variable", "value"), na.action = structure(c(1L, 2L, 3L, 4L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 25L, 26L, 27L, 28L), .Names = c("1",
"2", "3", "4", "13", "14", "15", "16", "17", "18", "19", "20",
"25", "26", "27", "28"), class = "omit"), row.names = c(5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 21L, 22L, 23L, 24L), class = "data.frame")
pal <- colorRampPalette(c("black","green"))
colorlist <- pal(length(unique(x$ID)))
ggplot(data = x , aes(x = variable, y = value, color = ID)) +
geom_line(aes(group =ID),size=1) + geom_point(size = 2) +
scale_colour_manual(values=colorlist)
We can use dplyr to to create an extra column inside your data for the appropriate colour mapping, and consequently pipe it into the ggplot() call to generate the plot.
library(dplyr)
library(ggplot2)
x %>% group_by(ID) %>%
mutate(col = value[variable == "Value2"]) %>% # Add column to map colours
ggplot(aes(x = variable, y = value, color = factor(col))) +
geom_line(aes(group =ID),size=1) + geom_point(size = 2) +
scale_colour_manual(values=colorlist)
I have measurements of CH4 concentration with depth:
df <- structure(list(Depth = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 15L, 16L, 17L), .Label = c("0", "10",
"12", "14", "16", "18", "2", "20", "22", "24", "26", "28", "30",
"32", "4", "6", "8", "AR"), class = "factor"), Conc_CH4 = c(4.30769230769231,
23.1846153846154, 14.5615384615385, 21.1769230769231, 16.2615384615385,
132.007692307692, 5.86923076923077, 389.353846153846, 823.023076923077,
948.684615384615, 1436.56923076923, 1939.88461538462, 26.2769230769231,
27.5538461538462, 19.6461538461538)), .Names = c("Depth", "Conc_CH4"
), row.names = c(NA, -15L), class = "data.frame")
And I need to create a plot like this:
But I have some problems: the factors in my data are in the wrong order, and I don't know how to plot this kind of data using ggplot2.
Any ideas?
Here's a solution with base plotting functions (you reverse the limits of ylim):
df$Depth <- as.numeric(as.character(df$Depth))
df <- df[order(df$Depth),]
plot(Depth~Conc_CH4, df, t="l", ylim=rev(range(df$Depth)))
Why not convert Depth to a number and plot?
ggplot(transform(df, Depth=as.numeric(as.character(df$Depth))),
aes(x=Conc_CH4, y=Depth)) +
geom_line() + scale_y_reverse()
The as.numeric(as.character(...)) is because your Depth is a factor and calling as.numeric directly converts factors differently than character to string.
The scale_y_reverse reverses the y scale.
If your actual data has a depth of "AR" in it, you'll have to omit them or otherwise handle them.
I want to plot a "fitted" mixed models regression by plotting predicted/fitted values and a regression line for each condition with SE's included. Unfortunately, the SE is not plotted (although I do think that the grey shading of the legend indicates that it is processed).
newdat contains the data to plot the predicted values (geom_point); nd contains the data needed to plot the lines via geom_smooth().
newdat <- structure(list(v0 = c(55L, 90L, 30L, 23L, 74L, 48L, 25L, 25L,
60L, 69L, 55L, 41L, 34L, 41L, 53L, 76L, 72L, 64L, 34L, 37L, 75L,
21L, 26L, 14L, 24L, 19L), treatment = structure(c(2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("hc", "nhc"), class = "factor"),
cse = c(2, 2, 6, 6, -4, -4, 5, 5, -4, -4, -3, -3, -2, -2,
3, 3, 2, 2, -4, -4, -7, -7, 4, 4, 2, 2), dv280 = structure(c(28.5954553607209,
29.0010807407473, 0.820231380215773, 3.35865456461513, 49.8359456217717,
24.461804847022, 6.23032836368822, 0.772936154511909, 41.8150506885472,
31.9089377911506, 25.2183508293096, 29.203718756273, 23.4674396239055,
18.5277638674685, 14.154110078194, 38.9009660948022, 22.6178239314942,
33.7517449606509, 26.9191029554161, 20.5609256858118, 55.5863616856965,
20.0644146304084, 2.85339319855906, 1.65402829619576, 10.8349022942953,
3.82267888202684), .Dim = c(26L, 1L), .Dimnames = list(c("1",
"2", "3", "4", "5", "6", "7", "8", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28"), NULL)), plo = c(18.2940632968672,
8.70682874092615, -9.57004073754051, -7.05295432875793, 35.2691733515267,
14.2687966060566, -3.12208622604343, -8.52627071371677, 30.1788256344375,
18.2506947724591, 14.8705702665525, 20.3644901882128, 15.3980231727933,
10.4235410902273, 3.52894178176158, 22.3750340692014, 7.67201979003711,
21.2004011925819, 16.8945364920955, 10.6654316626679, 39.1117560188314,
4.71896161593837, -5.54649636719771, -8.03839072475669, 3.25706574634023,
-4.38303434571468), phi = c(38.8968474245745, 49.2953327405684,
11.2105034979721, 13.7702634579882, 64.4027178920167, 34.6548130879875,
15.5827429534199, 10.0721430227406, 53.4512757426569, 45.5671808098421,
35.5661313920666, 38.0429473243332, 31.5368560750176, 26.6319866447097,
24.7792783746264, 55.4268981204031, 37.5636280729513, 46.3030887287198,
36.9436694187367, 30.4564197089558, 72.0609673525617, 35.4098676448784,
11.2532827643158, 11.3464473171482, 18.4127388422504, 12.0283921097684
), tlo = c(18.2877068225676, 8.70360144639113, -9.57634287064189,
-7.05924355454202, 35.2646774598802, 14.2623725847359, -3.12908722334489,
-8.53331173874155, 30.1731979587424, 18.2458999214011, 14.8642422705033,
20.3570830595245, 15.3899100922942, 10.4154628193239, 3.52277889155111,
22.371071031997, 7.6676378822382, 21.1951836536363, 16.8880045983016,
10.6588146263129, 39.1077806378248, 4.71469379607788, -5.55429056032973,
-8.04514630529966, 3.24842694535383, -4.39101280006747),
thi = c(38.9032038988741, 49.2985600351034, 11.2168056310734,
13.7765526837723, 64.4072137836632, 34.6612371093081, 15.5897439507213,
10.0791840477654, 53.456903418352, 45.5719756609001, 35.5724593881158,
38.0503544530215, 31.5449691555168, 26.6400649156131, 24.7854412648369,
55.4308611576074, 37.5680099807502, 46.3083062676655, 36.9502013125306,
30.4630367453107, 72.0649427335683, 35.4141354647389, 11.2610769574479,
11.3532028976912, 18.4213776432368, 12.0363705641212)), .Names = c("v0",
"treatment", "cse", "dv280", "plo", "phi", "tlo", "thi"), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L), class =
"data.frame")
nd <- structure(list(v0 = c(55L, 90L, 30L, 23L, 74L, 48L, 25L, 25L,
60L, 69L, 55L, 41L, 34L, 41L, 53L, 76L, 72L, 64L, 34L, 37L, 75L,
21L, 26L, 14L, 24L, 19L), treatment = structure(c(2L, 1L, 1L,
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("hc", "nhc"), class = "factor"),
cse = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), dv280 = structure(c(32.0471186922315,
32.4527440722579, 11.1752213747477, 13.713644559147, 42.9326189587504,
17.5584781840008, 14.8594866924648, 9.4020944832885, 34.911724025526,
25.0056111281293, 20.0408558320436, 24.0262237590071, 20.0157762923948,
15.0761005359579, 19.3316050754599, 44.0784610920682, 26.0694872630048,
37.2034082921615, 20.0157762923948, 13.6575990227905, 43.5055400254093,
7.98359297012116, 9.75671986158034, 8.55735495921703, 14.2865656258059,
7.27434221353748), .Dim = c(26L, 1L), .Dimnames = list(c("1",
"2", "3", "4", "5", "6", "7", "8", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28"), NULL)), plo = c(22.5072031474275,
13.0500664185171, 4.05863186267882, 5.77259649335957, 28.5121051842211,
9.29736790581986, 7.2048329083037, 1.78971324244184, 24.2096188947274,
11.7540646048434, 10.363946079095, 16.7677263682142, 13.0339097497873,
7.77962797988299, 10.0943827426394, 29.0914605330986, 11.9778881852231,
25.4951576099086, 13.0339097497873, 6.63425372645495, 28.8025761975293,
-0.238742065354621, 2.26914358668319, -1.1076157441286, 6.49404176281806,
-1.31100367364568), phi = c(41.5870342370355, 51.8554217259987,
18.2918108868166, 21.6546926249345, 57.3531327332797, 25.8195884621816,
22.5141404766259, 17.0144757241352, 45.6138291563245, 38.2571576514153,
29.7177655849923, 31.2847211497999, 26.9976428350024, 22.3725730920328,
28.5688274082805, 59.0654616510377, 40.1610863407866, 48.9116589744144,
26.9976428350024, 20.6809443191261, 58.2085038532893, 16.2059280055969,
17.2442961364775, 18.2223256625627, 22.0790894887938, 15.8596881007206
), tlo = c(22.500339650347, 13.046690851483, 4.0494338564546,
5.76435239847513, 28.5075637077657, 9.28944290164532, 7.19628070358881,
1.78111359966419, 24.2035002837825, 11.7491226514857, 10.3571796788833,
16.7587079630338, 13.024534493447, 7.77065645403329, 10.0872945063297,
29.087090666816, 11.9732407398515, 25.4895645373964, 13.024534493447,
6.62493376528452, 28.7981219380453, -0.246704406534527, 2.26040076852036,
-1.1143904969611, 6.4856407045028, -1.3186296516059), thi = c(41.593897734116,
51.8587972930328, 18.3010088930408, 21.6629367198189, 57.3576742097352,
25.8275134663562, 22.5226926813408, 17.0230753669128, 45.6199477672694,
38.2620996047729, 29.7245319852039, 31.2937395549803, 27.0070180913427,
22.3815446178825, 28.5759156445901, 59.0698315173204, 40.1657337861582,
48.9172520469266, 27.0070180913427, 20.6902642802966, 58.2129581127733,
16.2138903467768, 17.2530389546403, 18.2291004153952, 22.0874905471091,
15.8673140786809)), .Names = c("v0", "treatment", "cse",
"dv280", "plo", "phi", "tlo", "thi"), row.names = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L), class = "data.frame")
p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
geom_point() +
geom_smooth(data=nd, method='lm', se=TRUE) +
scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))
p + .mytheme + coord_cartesian(xlim=c(-20,100)) +
geom_hline(yintercept=0, colour='gray35', linetype='dashed') +
geom_vline(xintercept=0, colour='gray35', linetype='dashed')
This is all nice and dandy but unfortunately, the SE is not displayed:
I don't get why the SE is dropped (or perhaps overwritten by/with something else, given that the legend appears to indicate that the SE parameter is seen and recognized).
The code to generate newdat and nd is as follows with the main difference that for nd the value for cse is set to zero.
m <- lmer(dv280 ~ 1 + v0:treatment + cse + (0 + v0 | pp), data=dat, REML=TRUE)
newdat <- data.frame(
v0=dat$v0,
treatment=dat$treatment,
cse=dat$cse,
dv280=0)
newdat <- newdat[-c(9,10),]
mm <- model.matrix(terms(m), newdat)
newdat$dv280 <- mm %*% fixef(m)
pvar1 <- diag(mm %*% tcrossprod(vcov(m), mm))
tvar1 <- pvar1 + VarCorr(m)$pp[1]
newdat <- data.frame(newdat, plo=newdat$dv280 - 2 * sqrt(pvar1), phi=newdat$dv280 + 2 * sqrt(pvar1),
tlo=newdat$dv280 - 2 * sqrt(tvar1), thi=newdat$dv280 + 2 * sqrt(tvar1))
nd <- data.frame(
v0=dat$v0,
treatment=dat$treatment,
cse=0,
dv280=0)
nd <- nd[-c(9,10),]
mm <- model.matrix(terms(m), nd)
nd$dv280 <- mm %*% fixef(m)
pvar1 <- diag(mm %*% tcrossprod(vcov(m), mm))
tvar1 <- pvar1 + VarCorr(m)$pp[1]
nd <- data.frame(nd, plo=nd$dv280 - 2 * sqrt(pvar1), phi=nd$dv280 + 2 * sqrt(pvar1),
tlo=nd$dv280 - 2 * sqrt(tvar1), thi=nd$dv280 + 2 * sqrt(tvar1))
In your code,
p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
geom_point() +
geom_smooth(data=nd, method='lm', se=TRUE) +
scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))
You use newdat as the dataset in geom_points(...), and nd in geom_smooth(...). The problem is that nd apparently contains the fitted values (e.g. the predictions). So the se around the "data" from nd is 0. You can see the by typing:
p <- ggplot(data=nd, mapping=aes(x=v0, y=dv280, colour=treatment)) +
geom_point() +
geom_smooth(data=nd, method='lm', se=TRUE) +
scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))
The points in nd lie perfectly on a line, so se=0. If the point of using nd is to calculate the lm using some subset of your data, then provide that as the dataset to geom_smooth(...). For example, the code below plots all the points, but lm is done on the subset where dv280 > 5:
p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
geom_point() +
geom_smooth(data=subset(newdat,dv280>5), method='lm', se=TRUE) +
scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))
Edit: In response to OP's comment.
It looks like you are fitting an lme model to dat$dv280. So the data is in dat. On the other hand, newdat$dv280 is the prediction based on the model parameters and a model matrix containing v0, treatment, and cse. In addition, nd$dv280 is the prediction based on v0 and treatment, with cse=0. So it is not surprising that a plot of nd$dv280 vs. v0, grouped by treatment, will be perfectly linear. Nowhere in your ggplot code are you plotting the actual data (e.g., dat$dv280). The apparent scatter is solely due to the effect of cse. So at this point I'm not really sure what you are trying to demonstrate with this plot (??).
If I understand the question correctly, I think the following code might put you on the right track. You can explicitly state the standard errors:
model <- lm(dv280 ~ v0, data=newdat)
err <- stats::predict(model, newdata=newdat, se=TRUE)
newdat$ucl <- err$fit + 1.96 * err$se.fit
newdat$lcl <- err$fit - 1.96 * err$se.fit
qplot(v0, dv280, data=newdat, colour=treatment) +
geom_smooth(aes(ymin=lcl, ymax=ucl), data=newdat, method="lm")
This will give you the following figure: