How can I shift the regression line? - r

pfacet<-function(data,x,y){
ggplot(data, aes_string(x=x, y=y)) +
stat_density2d(aes(fill=..level..,contour=TRUE), geom="density2d",color="black") +
geom_smooth(method=lm, se=FALSE,color="black") +
facet_wrap(~id)+
theme_bw()+
theme(panel.grid.major = element_line(colour = "#808080"))+
guides(fill=FALSE)
}
Plotting
pfacet(data=df,x='log(area)',y='log(fd)')+
ggsave('test.png',width=6, height=4,dpi=300)
This gives me this figure.
BUT, I want to shift these regression lines to the highest value of each facet. i.e. make it an envelope curve. Eg. http://specialpapers.gsapubs.org/content/401/63/F11.large.jpg
Ordinarily, I would simply make a regression line and shift it, but I don't know how to do this inside a facet. Could you please give some pointers?
DATA
dput(droplevels(head(df, 100)))
structure(list(fd = c(11, 7.75, 55.25, 45.25, 9.5, 89, 14.5,
8, 84.25, 5.25, 79.5, 7.75, 71, 38.5, 242.25, 33, 32, 19, 58.5,
249.25, 19, 72.5, 6.25, 27.0333333333333, 26.5, 81, 30, 29, 39.75,
18.5, 64.25, 91, 4.5, 30.5, 74, 256.75, 9, 81, 27, 7.5, 107,
26.75, 47.25, 16, 57, 37, 48.25, 48, 36, 147.25), area = c(20168.2374,
432.528, 5780.8535, 1411.5435, 543.8975, 660.447, 24995.9752,
543.8975, 2659.9178, 277.1287, 147.8883, 153.0683, 2217.0298,
1188.8045, 4237.2205, 489.5078, 1051.5352, 1362.3337, 1401.1836,
169.3852, 1333.8439, 1051.5352, 29.5259, 1429.6734, 668.2169,
17068.0216, 660.447, 16860.8226, 116.5495, 3820.2325, 784.7664,
841.7461, 1696.4422, 85.9876, 2066.8105, 2090.1204, 121.7294,
9999.9441, 127.4274, 1152.5447, 934.9857, 1424.4935, 1774.1419,
543.8975, 784.7664, 237.5019, 3486.124, 1080.025, 6164.1717,
3348.8546), id = structure(c(3L, 2L, 2L, 4L, 4L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 1L, 3L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 4L, 2L, 4L, 3L, 4L, 4L, 3L, 4L, 3L), .Label = c("Csb", "Dfb(E)",
"Dfa", "Cfa"), class = "factor")), .Names = c("fd", "area", "id"
), row.names = c("65775", "61848", "11286", "22467", "34601",
"53841", "14661", "8941", "88607", "47681", "7898", "9598", "40615",
"35550", "70985", "64634", "43864", "88065", "37520", "100794",
"74916", "76759", "48928", "17575", "7424", "30848", "53970",
"65662", "27392", "82248", "48004", "93560", "51147", "42321",
"30070", "22096", "25575", "49067", "23702", "65877", "53617",
"90727", "59449", "34568", "48155", "96101", "31358", "40150",
"75458", "71566"), class = "data.frame")
> dput(droplevels(head(ddf2, 100)))
structure(list(fd = c(11, 7.75, 55.25, 45.25, 9.5, 89, 14.5,
8, 84.25, 5.25, 79.5, 7.75, 71, 38.5, 242.25, 33, 32, 19, 58.5,
249.25, 19, 72.5, 6.25, 27.0333333333333, 26.5, 81, 30, 29, 39.75,
18.5, 64.25, 91, 4.5, 30.5, 74, 256.75, 9, 81, 27, 7.5, 107,
26.75, 47.25, 16, 57, 37, 48.25, 48, 36, 147.25, 23.5, 42, 1.08333333333333,
21.5, 6.51666666666667, 198, 47.5, 8.75, 16, 43.5, 34.75, 30.25,
132.25, 2.25, 12.5, 225, 37, 17.25, 63.5, 48, 19.75, 12, 62.5,
64, 27, 11, 72.25, 246, 27.75, 15.5, 178, 93.75, 3.75, 3, 46.25,
4, 6.25, 5.25, 20, 44, 44.5, 1, 33, 18.25, 14.5, 29.25, 9, 33,
133, 67), area = c(20168.2374, 432.528, 5780.8535, 1411.5435,
543.8975, 660.447, 24995.9752, 543.8975, 2659.9178, 277.1287,
147.8883, 153.0683, 2217.0298, 1188.8045, 4237.2205, 489.5078,
1051.5352, 1362.3337, 1401.1836, 169.3852, 1333.8439, 1051.5352,
29.5259, 1429.6734, 668.2169, 17068.0216, 660.447, 16860.8226,
116.5495, 3820.2325, 784.7664, 841.7461, 1696.4422, 85.9876,
2066.8105, 2090.1204, 121.7294, 9999.9441, 127.4274, 1152.5447,
934.9857, 1424.4935, 1774.1419, 543.8975, 784.7664, 237.5019,
3486.124, 1080.025, 6164.1717, 3348.8546, 236.2069, 510.2277,
31.5979, 916.8558, 1994.2908, 3501.6639, 1152.5447, 486.9178,
533.5376, 271.9488, 336.6985, 1103.3349, 9764.2552, 36.7778,
2375.0191, 512.8176, 422.1681, 859.8761, 1118.8749, 237.5019,
133.6434, 453.2479, 1787.0918, 6063.1622, 237.5019, 1383.0537,
668.2169, 1424.4935, 621.5971, 1486.6532, 7355.5662, 984.1955,
68.8937, 154.6223, 116.5495, 1665.3624, 43.5118, 450.6579, 510.2277,
1494.4231, 404.0381, 598.2873, 339.2884, 12406.043, 274.5387,
151.2553, 753.6865, 29059.6666, 3470.5841, 1885.5113), id = structure(c(3L,
2L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L,
4L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 3L, 4L, 4L, 3L, 4L,
3L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L,
4L, 4L, 2L, 3L, 4L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 2L,
1L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L,
2L, 2L, 4L), .Label = c("Csb", "Dfb(E)", "Dfa", "Cfa"), class = "factor")), .Names = c("fd",
"area", "id"), row.names = c("65775", "61848", "11286", "22467",
"34601", "53841", "14661", "8941", "88607", "47681", "7898",
"9598", "40615", "35550", "70985", "64634", "43864", "88065",
"37520", "100794", "74916", "76759", "48928", "17575", "7424",
"30848", "53970", "65662", "27392", "82248", "48004", "93560",
"51147", "42321", "30070", "22096", "25575", "49067", "23702",
"65877", "53617", "90727", "59449", "34568", "48155", "96101",
"31358", "40150", "75458", "71566", "1687", "86603", "77451",
"68075", "14478", "69151", "66028", "77394", "68580", "16328",
"26945", "73615", "24179", "19497", "82448", "25098", "23565",
"8113", "66312", "96062", "60313", "34767", "97877", "48598",
"96135", "36877", "7309", "71065", "49622", "65051", "55340",
"105109", "26480", "4550", "18935", "61714", "31667", "55035",
"27563", "17332", "35694", "20092", "44334", "21344", "22303",
"64816", "12814", "12391", "52770", "84979"), class = "data.frame")

Related

Plotting multiple variables in time series with greyscale and shapes [duplicate]

This question already has answers here:
Changing the line type in the ggplot legend
(2 answers)
ggplot2 for grayscale printouts
(3 answers)
Closed 7 months ago.
I am trying to make a time-series graph with multiple y values. I would like to change the shape of the different variables so some are solid, some are dashed etc. I would also like all the colors to be on greyscale.
Does anyone know how I can accomplish this?
I know how to melt my data so that I can plot them all together by the value of the variables but right now I cannot get the shapes to change or the greyscale. Thank you in advance.
ggplot(melted_data, aes(x = Distance, y = value, color = variable)) + geom_line()
data <- structure(list(Distance = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Mg",
"Mn", "Zn", "Ba"), class = "factor"), value = c(0.903247645,
0.912560748, 0.896003508, 0.909572697, 0.883631829, 0.905722594,
0.892465355, 0.909271173, 0.880506202, 0.889278401, 0.878534542,
0.959209459, 0.913303825, 0.929893977, 0.97778374, 0.9885554,
0.929716333, 1.028422583, 1.025638955, 1.011352651, 1.041343955,
1.092562951, 1.129761801, 1.088857171, 1.107257284, 1.116728405,
1.103053734, 1.041662037, 1.134182243, 1.104550315, 1.086952767,
1.106004784, 1.057688595, 1.034347579, 1.04641385, 1.139270945,
1.048446018, 1.033827731, 1.075554754, 1.029893202, 1.074749532,
1.001626205, 0.977053541, 0.987467665, 0.999540478, 0.945184816,
0.959677178, 0.962807712, 0.967023936, 1.024286493, 0.881264816,
0.967181342, 1.000316876, 0.956168258, 1.003214572, 1.00047837,
0.940103474, 0.929875987, 0.928227112, 0.982410241, 0.983035162,
0.976666772, 1.019755049, 1.075189042, 0.975380543, 0.981316782,
0.986876269, 1.026690916, 1.052379934, 1.001547298, 0.979888683,
1.008209647, 0.976098272, 0.944479556, 0.996767684, 1.018077758,
1.028862706, 1.08510417, 1.08963868, 1.048481179, 1.139954126,
1.107066353, 1.122920581, 1.23904326, 1.19449336, 1.179971969,
1.165865352, 1.068804094, 1.099436469, 1.073307737, 1.07045113,
1.101007051, 1.011962649, 1.11202545, 1.097883672, 1.05361424,
0.993283703, 1.046635444, 1.04951188, 0.086720869, 0.113119382,
0.088197332, 0.081547788, 0.079373211, 0.07888827, 0.072865285,
0.079637996, 0.066314774, 0.097585729, 0.185034982, 0.214466904,
0.294317625, 0.481389256, 0.531196058, 0.715842439, 0.865098887,
0.987242052, 1.081028291, 1.240920518, 1.313524957, 1.543771699,
1.78495042, 1.746572555, 2.048760527, 2.101438775, 1.967474033,
2.000286925, 2.014020838, 1.924470659, 1.75696549, 1.786681246,
1.633290961, 1.455799758, 1.315346538, 1.435348984, 1.27887702,
1.152818928, 1.095127218, 0.987502349, 1.062278922, 0.898540082,
0.83617998, 0.889057689, 0.825563648, 0.788347646, 0.790973555,
0.775541228, 0.815063004, 0.848723108, 0.66783059, 0.672629631,
0.747809615, 0.72338158, 0.666220438, 0.664051795, 0.597260657,
0.689282162, 0.663808452, 0.678551141, 0.672917354, 0.686199986,
0.724202364, 0.746195474, 0.686135659, 0.654148537, 0.713488795,
0.72446665, 0.699529989, 0.630120423, 0.661767463, 0.663290351,
0.705879842, 0.709399338, 0.76228353, 0.714368918, 0.720561695,
0.837036666, 0.923882149, 1.014163852, 1.221410703, 1.315825246,
1.368054705, 1.641746627, 1.630198312, 1.698589629, 1.562956393,
1.427322658, 1.53964983, 1.574583495, 1.527101216, 1.380123116,
1.28649445, 1.29251968, 1.330565441, 1.317758525, 1.19292313,
1.217953538, 1.218591815, 0.746612627, 0.818368055, 0.696689824,
0.748702805, 0.717457681, 0.766243608, 0.805305259, 0.855909762,
0.803357905, 0.889646097, 0.854456208, 1.067795473, 1.051422575,
1.17061972, 1.138440648, 1.052796919, 1.040998633, 1.161739158,
1.025956799, 0.971567748, 1.072911493, 0.952121155, 1.040392714,
1.069745522, 1.068549198, 1.090194087, 1.214584829, 1.157485471,
1.245813376, 1.336359991, 1.204038397, 1.126255292, 1.131057736,
0.922042386, 1.037566449, 1.100852394, 1.121842367, 0.998657748,
1.006938923, 1.002800377, 0.897387497, 0.93902937, 0.889327622,
0.802133735, 0.855245047, 0.860702407, 0.704324249, 0.905827093,
0.760155095, 0.760247698, 0.655991619, 0.677006743, 0.668001976,
0.623410532, 0.569302474, 0.523713794, 0.690042836, 0.539115342,
0.528696218, 0.57851915, 0.60294784, 0.581392042, 0.65277069,
0.65620614, 0.625397246, 0.697647782, 0.6180657, 0.632326126,
0.684659215, 0.606197513, 0.630134281, 0.637151517, 0.574538208,
0.605993607, 0.533522181, 0.544522236, 0.577535469, 0.573427383,
0.672984155, 0.735286828, 0.7532343, 0.881292245, 0.801132661,
1.122761046, 1.137397845, 1.173190388, 1.138033979, 1.126494557,
1.144871399, 1.087042815, 0.981750792, 0.992888445, 0.955352455,
1.074357698, 1.027127808, 1.083248059, 1.010304962, 1.037776316,
1.052809984, 0.742734852, 0.839492568, 0.743899849, 0.817080816,
0.773569657, 0.735728339, 0.715168283, 0.78077814, 0.694280484,
0.773303425, 0.768041196, 0.883401699, 0.818274274, 0.715927964,
0.696938222, 0.832246446, 0.73089346, 0.790965216, 0.799717389,
0.865896893, 0.946771069, 0.954212275, 1.023740345, 1.027036123,
1.086336263, 1.064542815, 0.9463809, 0.924081609, 0.999832641,
0.911277648, 0.922871168, 0.953134033, 0.786732115, 0.802026729,
0.832863371, 0.863952475, 0.817833153, 0.748586924, 0.72095701,
0.738213943, 0.672736744, 0.704947698, 0.531743532, 0.634123809,
0.683548549, 0.733277161, 0.608993729, 0.752162246, 0.568705823,
0.643172511, 0.597251486, 0.655514695, 0.583437677, 0.557676441,
0.646713866, 0.527005047, 0.578023512, 0.576281064, 0.600923204,
0.578475648, 0.551957027, 0.585007991, 0.623858699, 0.630936819,
0.636198589, 0.565476603, 0.658861425, 0.577557604, 0.629178306,
0.646092809, 0.566079299, 0.60953767, 0.680135261, 0.500802233,
0.704656678, 0.61109605, 0.645344144, 0.667139888, 0.734969576,
0.780062983, 0.783090234, 0.83005691, 0.905356723, 0.933746319,
0.947613375, 0.923115827, 0.873482691, 0.746883952, 0.850273618,
0.795256154, 0.800825928, 0.772630039, 0.749567395, 0.7823457,
0.772609842, 0.736269985, 0.699705666, 0.716860238, 0.65909369
)), row.names = c(NA, -396L), class = "data.frame")
You can use the linetype parameter with the aestethics :
ggplot(data) +
geom_line(aes(x = Distance, y = value, color = variable, linetype = variable))

Boxplot colour mismatch

I would like to color the dots of the plot with the legend Episode values. What I am missing?
I replaced fill with color still is not the plot that I would like to have
Sample code:
(p <- ggplot(df, aes(x=Type, y=Value, fill=Episode, group=Type)) +
geom_boxplot()+
geom_line()+
geom_dotplot(binaxis='y', stackdir='center',
position=position_dodge(0.8))+
theme_bw())
Sample data:
df<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c("A", "B", "C", "D", "E", "F", "G"), class = "factor"),
Episode = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L), .Label = c("t1", "t2", "t3", "t4", "t5", "t6"
), class = "factor"), Value = c(32, 36, 57, 83, 88, 40, 40,
44, 67, 77, 66, 45, 88, 46, 56, 99, 65, 0, 66, 46, 59, 77,
74, 79, 38, 45, 60, 78, 66, 75, 45, 55, 68, 77, 88, 35, 36,
118, 80, 73, 71, 0)), row.names = c(NA, -42L), class = "data.frame")
I am not entirely sure if I understand your question correctly, but for me, the cleanest plot would be:
library(ggplot2)
df<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L), .Label = c("A", "B", "C", "D", "E", "F", "G"), class = "factor"),
Episode = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L,
1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L,
4L, 5L, 6L), .Label = c("t1", "t2", "t3", "t4", "t5", "t6"
), class = "factor"), Value = c(32, 36, 57, 83, 88, 40, 40,
44, 67, 77, 66, 45, 88, 46, 56, 99, 65, 0, 66, 46, 59, 77,
74, 79, 38, 45, 60, 78, 66, 75, 45, 55, 68, 77, 88, 35, 36,
118, 80, 73, 71, 0)), row.names = c(NA, -42L), class = "data.frame")
p <- ggplot(df, aes(x=Type, y=Value, group = Type)) +
geom_boxplot() +
geom_line() +
geom_point(aes(col=Episode))
p
Created on 2021-04-15 by the reprex package (v0.3.0)

R get linear regression equation for boxplots

I didn´t found a sufficient answer in this forum yet, so I decided to raise my own question.
I want to get the linear regression equation of a linear fit from a boxplot. I have this data:
library(ggplot2)
data <- structure(list(x = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L
), .Label = c("1", "2", "3", "4", "5", "6"), class = "factor"),
y = c(169, 79.5, 78.5, 75, 99.5, 68, 14, 30.5, 107.5, 51,
43, 33, 21.5, 35, 11, 1, 38, 54.5, 26.5, 143, 158, 171, 31.5,
67.5, 1, 57.5, 12, 36.5, 1, 23.5, 22.5, 71, 141, 218, 7.5,
1, 129, 144.5, 76, 46.5, 75.5, 45, 12, 24, 67, 65.5, 44.5,
37.5, 25.5, 19, 15, 1, 17.5, 50, 22.5, 90, 226, 220, 32,
69.5, 1, 79.5, 7, 44, 1, 15.5, 22, 75.5, 178, 153, 4.5, 1,
159, 89, 57, 71, 98.5, 47.5, 18.5, 30, 119, 57.5, 41, 33.5,
30, 31, 10, 1, 12, 43.5, 20.5, 98, 146.5, 145, 34, 64.5,
1, 40.5, 17, 41, 1, 14.5, 16.5, 71, 181, 168, 2, 1, 159,
103, 69, 65.5, 97.5, 37.5, 21, 15.5, 120.5, 46, 27, 29.5,
16.5, 20, 7.5, 1, 15.5, 42.5, 21.5, 111, 102.5, 124, 20.5,
51.5, 1, 22.5, 15, 42, 1, 13, 13.5, 64.5, 138, 155, 4.9,
1, 190, 89.5, 74.5, 79, 78, 59.5, 19.5, 21, 88.5, 44, 18,
19, 10, 13, 4, 1, 9.5, 44, 17, 140.5, 98, 112.5, 29.5, 62.56,
1, 31, 11.5, 49.5, 1, 10, 8.5, 40.5, 121, 141, 2.5, 1, 170,
87.5, 92, 77, 65, 34, 8, 26, 98, 51.5, 26, 19, 9, 8.5, 7.5,
1, 4.5, 0, 15.5, 80, 69, 59, 28, 44.5, 1, 38.5, 10, 51.5,
1, 3, 5, 65, 107, 152, 5, 1)), row.names = c(NA, -216L), class = "data.frame")
p <- ggplot(data = data) +
aes(x = x,
y = y) +
geom_boxplot(outlier.shape = NA) + geom_jitter(shape = 1, position = position_jitter(0.1)) +
ylim(0, NA) +
theme_light() +
geom_smooth(method = "lm",se = TRUE, formula = y ~ x, aes(group = 1))
print(p)
fit <- lm(y ~ x, data = data)
fit
which results in this output:
How can I extract the regression equation for this dataset? The function fit <- lm(y ~ x, data = data) just gives me one intercept and 5 coefficients, which is not my desired output. I want a simple regression equation in the form of y = a + bx.
How can I put this equation into the diagramm? I´ve already looked into ggpmisc::stat_poly_eq(), but this doesn´t seem to work with boxplot linear regression.
Can you guys help me out?

Agglomerative hierachial clustering using R

Plotting a dendogram from a agglomerative hierachial clustering does not yield the expected results. I have attached the example of the expected output in the image here . The y axis shows the treatment groups.
My MWE is
library(cluster)
dist<-daisy(cluster, metric = "gower")
kaari <-hclust(dist, method = "ward.D2")
plot(kaari,cex = 0.6, hang = -1)
Here is the data frame:
structure(list(Variety = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Cal J",
"Pesa F1", "Rambo F1", "Riograde"), class = "factor"), Sample.Part = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("% fruit damage",
"Intermediate", "Lower", "Upper"), class = "factor"), overall = c(8.33,
15.83, 18.33, 18.33, 16.67, 15.83, 17.5, 15, 14.17, 16.67, 15,
18.33, 6.67, 14.17, 6.67, 15.83, 10, 12.5, 10, 15, 35, 55, 50,
25, 12.5, 11.67, 12.5, 13.33, 15.83, 13.33, 14.17, 10, 11.67,
15.83, 8.33, 10.83, 7.5, 7.5, 10.83, 9.17, 5.83, 5.83, 10, 17.5,
20, 12.5, 20, 5, 18.33, 15, 15, 12.5, 10, 15.83, 20.83, 15.83,
18.33, 10, 11.67, 18.33, 10.83, 6.67, 7.5, 14.17, 6.67, 10.83,
37.5, 17.5, 25, 15, 30, 20, 24.17, 22.5, 16.67, 19.17, 14.17,
24.17, 26.67, 20.83, 16.67, 17.5, 14.17, 20, 12.5, 20.83, 11.67,
6.67, 12.5, 11.67, 55, 55, 55, 60, 55, 57.5, 24.17, 28.33, 19.17,
21.67, 20, 18.33, 24.17, 20.83, 17.5, 15, 16.67, 15, 15, 10.83,
11.67, 16.67, 14.17, 10, 30, 45, 55, 42.5, 55, 37.5, 33.33, 20.83,
20, 17.5, 18.33, 20, 28.33, 13.33, 17.5, 13.33, 20.83, 11.67,
11.67, 10.83, 13.33, 8.33, 8.33, 13.33, 55, 40, 55, 52.5, 45,
45, 12.5, 17.5, 15, 21.67, 17.5, 17.5, 14.17, 14.17, 16.67, 14.17,
19.17, 15, 10.83, 13.33, 6.67, 9.17, 8.33, 13.33, 45, 50, 40,
35, 55, 45, 10.83, 9.17, 23.33, 22.5, 15.83, 11.67, 26.67, 8.33,
20, 12.5, 10.83, 18.33, 9.17, 7.5, 9.17, 7.5, 5.83, 13.33, 37.5,
35, 45, 22.5, 30, 25, 15, 13.33, 20, 13.33, 20, 20, 9.17, 21.67,
12.5, 10, 14.17, 24.17, 10.83, 10, 13.33, 9.17, 11.67, 10.83,
45, 45, 42.5, 30, 55, 40, 11.67, 21.67, 18.33, 16.67, 16.67,
16.67, 14.17, 15, 15.83, 20.83, 12.5, 16.67, 10, 12.5, 9.17,
10, 7.5, 6.67, 27.5, 30, 32.5, 45, 17.5, 25, 15.83, 15.83, 17.5,
13.33, 12.5, 13.33, 13.33, 10.83, 19.17, 12.5, 13.33, 12.5, 7.5,
8.33, 9.17, 5.83, 10.83, 10.83, 47.5, 15, 20, 20, 30, 30, 10,
18.33, 12.5, 11.67, 10.83, 13.33, 13.33, 12.5, 10, 10, 13.33,
15, 6.67, 14.17, 7.5, 7.5, 10.83, 7.5, 22.5, 15, 22.5, 20, 25,
15)), .Names = c("Variety", "Sample.Part", "overall"), class = "data.frame", row.names = c(NA,
-288L))
My first and second columns in my data set are categorical while the third is numeric, I have attached the the data here.
Variety Sample.Part overall
Cal J Lower 8.33
Cal J Lower 15.83
Cal J Lower 18.33
Cal J Lower 18.33
Cal J Lower 16.67
Cal J Lower 15.83
Cal J Intermediate 17.50
Cal J Intermediate 15.00
Cal J Intermediate 14.17
Cal J Intermediate 16.67
Cal J Intermediate 15.00
Cal J Intermediate 18.33
Cal J Upper 6.67
Cal J Upper 14.17
Cal J Upper 6.67
Cal J Upper 15.83
Cal J Upper 10.00
Cal J Upper 12.50
Cal J % fruit damage 10.00
Cal J % fruit damage 15.00
Cal J % fruit damage 35.00
Cal J % fruit damage 55.00
Cal J % fruit damage 50.00
I would like to have the factor levels in the first column appear as leaf nodes in the y axis. Any help?

How can I write a function to shift regression line for each Facet?

fit<-lm(log(all$fd) ~ log(all$area))
fintercept<-fit$coefficients[[1]] #intercept
fslope<-fit$coefficients[[2]] #slope
interceptmax<-max(log(all$fd)-fslope*log(all$area))
ggplot(all, aes(x=log(area), y=log(fd))) +
geom_point()+
geom_abline(aes(intercept=interceptmax,slope=fslope))+ #shifted regression line
#facet_wrap(~id)+
theme_bw()+
theme(panel.grid.major = element_line(colour = "#808080"))+
ggsave('test.png',width=6, height=4,dpi=300)
This ggplot2 gives me an envelope curve (a shifted regression line). If I remove facet_wrap(~id), it works as intended. But I want the lines to be computed for every facet separately. Any suggestions how I can do this for every facet? (Similar to how using 'lm' inside ggplot2 works)
Here subset of 'all' is:
structure(list(fd = c(11, 7.75, 55.25, 45.25, 9.5, 89, 14.5,
8, 84.25, 5.25, 79.5, 7.75, 71, 38.5, 242.25, 33, 32, 19, 58.5,
249.25, 19, 72.5, 6.25, 27.0333333333333, 26.5, 81, 30, 29, 39.75,
18.5, 64.25, 91, 4.5, 30.5, 74, 256.75, 9, 81, 27, 7.5, 107,
26.75, 47.25, 16, 57, 37, 48.25, 48, 36, 147.25, 23.5, 42, 1.08333333333333,
21.5, 6.51666666666667, 198, 47.5, 8.75, 16, 43.5, 34.75, 30.25,
132.25, 2.25, 12.5, 225, 37, 17.25, 63.5, 48, 19.75, 12, 62.5,
64, 27, 11, 72.25, 246, 27.75, 15.5, 178, 93.75, 3.75, 3, 46.25,
4, 6.25, 5.25, 20, 44, 44.5, 1, 33, 18.25, 14.5, 29.25, 9, 33,
133, 67), area = c(20168.2374, 432.528, 5780.8535, 1411.5435,
543.8975, 660.447, 24995.9752, 543.8975, 2659.9178, 277.1287,
147.8883, 153.0683, 2217.0298, 1188.8045, 4237.2205, 489.5078,
1051.5352, 1362.3337, 1401.1836, 169.3852, 1333.8439, 1051.5352,
29.5259, 1429.6734, 668.2169, 17068.0216, 660.447, 16860.8226,
116.5495, 3820.2325, 784.7664, 841.7461, 1696.4422, 85.9876,
2066.8105, 2090.1204, 121.7294, 9999.9441, 127.4274, 1152.5447,
934.9857, 1424.4935, 1774.1419, 543.8975, 784.7664, 237.5019,
3486.124, 1080.025, 6164.1717, 3348.8546, 236.2069, 510.2277,
31.5979, 916.8558, 1994.2908, 3501.6639, 1152.5447, 486.9178,
533.5376, 271.9488, 336.6985, 1103.3349, 9764.2552, 36.7778,
2375.0191, 512.8176, 422.1681, 859.8761, 1118.8749, 237.5019,
133.6434, 453.2479, 1787.0918, 6063.1622, 237.5019, 1383.0537,
668.2169, 1424.4935, 621.5971, 1486.6532, 7355.5662, 984.1955,
68.8937, 154.6223, 116.5495, 1665.3624, 43.5118, 450.6579, 510.2277,
1494.4231, 404.0381, 598.2873, 339.2884, 12406.043, 274.5387,
151.2553, 753.6865, 29059.6666, 3470.5841, 1885.5113), id = structure(c(3L,
2L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L,
4L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L,
3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 3L, 4L, 4L, 3L, 4L,
3L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L,
4L, 4L, 2L, 3L, 4L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 2L,
1L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L,
2L, 2L, 4L), .Label = c("Csb", "Dfb(E)", "Dfa", "Cfa"), class = "factor")), .Names = c("fd",
"area", "id"), row.names = c("65775", "61848", "11286", "22467",
"34601", "53841", "14661", "8941", "88607", "47681", "7898",
"9598", "40615", "35550", "70985", "64634", "43864", "88065",
"37520", "100794", "74916", "76759", "48928", "17575", "7424",
"30848", "53970", "65662", "27392", "82248", "48004", "93560",
"51147", "42321", "30070", "22096", "25575", "49067", "23702",
"65877", "53617", "90727", "59449", "34568", "48155", "96101",
"31358", "40150", "75458", "71566", "1687", "86603", "77451",
"68075", "14478", "69151", "66028", "77394", "68580", "16328",
"26945", "73615", "24179", "19497", "82448", "25098", "23565",
"8113", "66312", "96062", "60313", "34767", "97877", "48598",
"96135", "36877", "7309", "71065", "49622", "65051", "55340",
"105109", "26480", "4550", "18935", "61714", "31667", "55035",
"27563", "17332", "35694", "20092", "44334", "21344", "22303",
"64816", "12814", "12391", "52770", "84979"), class = "data.frame")
Follow up to: How can I shift the regression line?
In order to have a different line per facet, you can calculate the slope and desired intercept separately for each id. It's often easiest to simply make a new dataset of the desired values and use it in geom_abline. This new dataset needs to contain an id column as well as the values to plot so you can get separate info per facet.
There are lots of ways that you could go about creating this dataset, including fitting the regressions one by one and collecting the values "by hand". I'll use functions from package dplyr to calculate the regression per id, save the slope, and calculate the new intercept using your formula.
library(dplyr)
all2 = all %>%
group_by(id) %>%
do({model = lm(log(fd) ~ log(area), data = .)
data.frame(fslope = coef(model)[2], interceptmax = max(log(.$fd) - coef(model)[2]*log(.$area)))
})
all2
Source: local data frame [4 x 3]
Groups: id [4]
id fslope interceptmax
(fctr) (dbl) (dbl)
1 Csb -0.5556930 8.3703705
2 Dfb(E) 0.5378457 0.5057893
3 Dfa 0.1227013 4.6143276
4 Cfa 0.3247770 3.3895178
Now use the new dataset in geom_abline.
ggplot(all, aes(x=log(area), y=log(fd))) +
geom_point() +
geom_abline(data = all2, aes(intercept=interceptmax, slope=fslope)) + #shifted regression line
facet_wrap(~id) +
theme_bw() +
theme(panel.grid.major = element_line(colour = "#808080"))
The resulting plot has different lines for each facet.

Resources