GG plot legend stacking all shapes / deleting one item from legend - r

There is a problem with my ggplot, I want to have different shapes for certain values. But in the legend all thing stack on top of each other and therefore the legend is not clear anymore. Moreover, I would like to delete Japan from the legend or find another way to make it red and shape=17. Maybe add to the filter Japan=='FALSE'? I tried but did not succeed...
Here is sample of my data:
structure(list(Country = c("Albania", "Aruba", "Austria", "Barbados",
"Bosnia and Herzegovina", "Canada", "China, Hong Kong SAR", "China, Macao SAR",
"Croatia", "CuraƧao", "Denmark", "Finland", "France", "Germany",
"Iceland", "Italy", "Japan", "Latvia", "Lithuania", "Malta",
"Mauritius", "Montenegro", "Netherlands", "New Zealand", "Poland",
"Portugal", "Republic of Korea", "Serbia", "Singapore", "Slovenia",
"Sri Lanka", "Taiwan", "Thailand", "Trinidad and Tobago", "United States of America"
), `Dependency Ratio 1990` = c(0.371731839842905, 0.42945960478559,
0.698167620530499, 0.444513116903726, 0.511357742868368, 0.519783119456753,
0.444426949479237, 0.30306654331295, 0.723691486939267, 0.424414908111054,
0.68769508504734, 0.641530173960242, 0.690189226564259, 0.755969184286434,
0.520917100019657, 0.763735128335739, 0.692461922514607, 0.728970209495916,
0.655093765838824, 0.556158238426314, 0.308439455191019, 0.551893405455789,
0.582543266573117, 0.548269437314668, 0.592240027149362, 0.744368260326749,
0.33818760118961, 0.653157768845158, 0.294237762460344, 0.611402526341597,
0.354595845574429, 0.391092962761626, 0.331304119150256, 0.35111793456609,
0.562804979721953), `Average Age 1990` = c(40.3688042387203,
42.5004114258846, 46.6904752788518, 42.5683625031078, 42.4530074518545,
44.2409448871874, 42.0677766503007, 39.7173235436725, 46.2329924328207,
42.2501753565583, 47.1375106133558, 46.3380103826365, 46.6915593676301,
46.9202073747455, 44.129974503284, 47.1071528898825, 46.6077408054755,
46.4664135824761, 45.460688263743, 44.9450928096016, 39.2332051727974,
43.66848, 45.1863467813393, 44.5466909246095, 44.9318462263063,
46.8407998745322, 39.6873706785703, 45.3128111624097, 39.2982502106955,
45.1205082490539, 40.2124158913374, 40.9051762916043, 39.4534335710941,
40.4173693037492, 45.0904477728946), ...4 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
`Dependency Ratio 2000` = c(0.457221087782508, 0.45940989018547,
0.742565467519652, 0.541204030550029, 0.597208883500012,
0.622668782217446, 0.473107405821069, 0.344264744501091,
0.780230513979289, 0.633789601501269, 0.790840886898238,
0.816743961984905, 0.766496601277572, 0.821136017787255,
0.572251283384235, 0.849390138872188, 0.927739183233871,
0.791488299481733, 0.704545225683664, 0.702609326498817,
0.35385418751795, 0.612564625368555, 0.69550083971213, 0.617845149047375,
0.611138887992547, 0.758176723785889, 0.399735388267277,
0.715467873467691, 0.383896159972764, 0.671137540638121,
0.407831309113246, 0.419443507121452, 0.374126385687095,
0.409593048372564, 0.615930392620661), `Average Age 2000` = c(42.9309383891972,
43.8674007980144, 47.739334648896, 45.3560289004102, 45.2990249348384,
46.3058678455289, 44.3575197674921, 42.5567755821042, 47.8706196243093,
46.6926342578517, 47.9056748231027, 48.2912968951969, 48.1201704908476,
48.7598382100637, 45.3475147626354, 48.7908038019529, 48.7739160208226,
47.837540150878, 46.9150297452015, 46.5384376276976, 41.1677637838199,
45.6830176554619, 46.9484767952653, 46.0162750047118, 45.6870193241911,
47.5083105450284, 41.9472090972845, 46.7580169116961, 42.3830413567395,
46.6274879755993, 41.6098644987726, 42.77485916275, 41.5907978667698,
41.9719792296039, 46.2781534087236), ...7 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA), `Dependency Ratio 2015` = c(0.769855001031037, 0.896573728647162,
0.952116351996821, 0.847635802309437, 0.907658891504387,
0.913339561634508, 0.877436210055064, 0.597279210234922,
1.04631340359464, 1.01174318826707, 0.977891738355926, 1.08472890326446,
1.01372962185931, 1.12997699958302, 0.77882885539859, 1.08347132170333,
1.20679093156161, 0.990147667477283, 0.994357961860711, 0.950194923573131,
0.666562280517562, 0.828817278052088, 0.989460150334804,
0.831814166715077, 0.847326002560978, 1.03326726133893, 0.749863481391909,
0.917215066264046, 0.671774028057953, 0.971441198307662,
0.611354032233621, 0.748057645284422, 0.665957813686028,
0.602213503073687, 0.855245238291093), `Average Age 2015` = c(46.3902100558352,
47.9818955923079, 49.878498965043, 48.4578763127188, 48.3870694416244,
48.7063314226308, 48.5131007402609, 44.1281261495054, 50.522561636728,
49.6455293947711, 49.9810732770387, 50.7119476819108, 50.6805196046482,
51.5236122201751, 47.3079677856577, 51.8838669025279, 53.2344169277342,
50.0674941000466, 49.9057070057583, 49.1748722211516, 45.3413745873924,
47.6617051653597, 49.9107746561504, 48.0022465682781, 48.1684244717051,
51.1526322354916, 47.160655712273, 49.0269050604693, 45.5481140676913,
50.0264456515826, 44.8882173741791, 47.0138589294768, 46.131374630996,
44.5204789350954, 48.0998439723386), `rgdpe 1990` = c(12005.7568359375,
2575.25561523438, 208007.234375, 4099.8515625, 6946.330078125,
915724.6875, 151044.28125, 9127.78125, 64448.71484375, NA,
144018.203125, 136194.359375, 1581529.625, 2204488.5, 8510.248046875,
1560881.5, 3552613.25, 44957.03515625, 53623.6875, 5354.54541015625,
11257.8095703125, 6702.1552734375, 427072.25, 76859.65625,
335254.875, 157535.140625, 565140.75, 113435.8046875, 64860.5703125,
42714.70703125, 55257.37109375, 430917.25, 308367.4375, 15085.6611328125,
9847675), `pop 1990` = c(3.286073, 0.062149, 7.723949, 0.260936,
4.463423, 27.541319, 5.727938, 0.343808, 4.776374, NA, 5.141115,
4.996222, 58.235697, 79.053984, 0.255043, 57.048236, 124.50524,
2.664439, 3.696035, 0.362015, 1.055868, 0.615002, 14.965448,
3.398172, 37.960193, 9.895364, 42.918419, 9.517675, 3.012966,
2.006405, 17.325773, 20.278946, 56.558186, 1.221116, 252.120309
), `emp 1990` = c(1.32407820224762, NA, 3.56034135818481,
0.105200000107288, 1.68987882137299, 13.2902002334595, 2.73075985908508,
0.16329999268055, 2.17813229560852, NA, 2.63417220115662,
2.47324681282043, 23.6595039367676, 39.5477294921875, 0.138074412941933,
22.8031978607178, 65.1040191650391, 1.25425291061401, 1.70560574531555,
0.132750615477562, 0.403737008571625, 0.174824863672256,
6.80782461166382, 1.52131986618042, 15.0829668045044, 4.46721506118774,
18.2060832977295, 4.61394643783569, 1.52955627441406, 1.1292530298233,
5.04270553588867, 8.64918994903564, 28.7045097351074, 0.374099999666214,
123.046020507812), ...5 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `rgdpe 2000` = c(15180.880859375,
4031.13427734375, 314579.6875, 6460.9755859375, 21482.595703125,
1276875.5, 256729.140625, 13199.31640625, 58379.2734375,
NA, 203091.515625, 186055.46875, 2135621.25, 3030253, 11653.5791015625,
2081385.625, 4696670.5, 26473.306640625, 42346.78125, 9706.8212890625,
18593.318359375, 4850.50634765625, 691869.625, 114914.1171875,
563679.1875, 259576.953125, 1150272, 60002.0703125, 166273.53125,
48606.53125, 90442.875, 789527.25, 504829.28125, 18303.63671875,
14110581), `pop 2000` = c(3.129243, 0.090853, 8.069276, 0.271515,
3.751176, 30.588383, 6.606327, 0.427782, 4.428075, NA, 5.341194,
5.187954, 60.874357, 81.400882, 0.280435, 56.692178, 127.524174,
2.384164, 3.501839, 0.393645, 1.185145, 0.613559, 15.926188,
3.858999, 38.556693, 10.297112, 47.379241, 7.516346, 4.028871,
1.987717, 18.777601, 22.18453, 62.952642, 1.267153, 281.710909
), `emp 2000` = c(0.962967455387115, 0.0419000014662743,
3.7599310874939, 0.12899999320507, 0.643303751945496, 14.952766418457,
3.20262169837952, 0.195299997925758, 1.67029082775116, NA,
2.75595617294312, 2.30501818656921, 25.6252250671387, 39.6031150817871,
0.1570855230093, 22.91796875, 65.9155044555664, 0.930018603801727,
1.40124833583832, 0.146938025951385, 0.464872002601624, 0.176752656698227,
8.20334815979004, 1.81842231750488, 14.4786930084229, 5.076171875,
21.4411239624023, 3.0847954750061, 2.08465480804443, 0.917375922203064,
6.30462980270386, 9.59665679931641, 31.47385597229, 0.503100037574768,
138.636108398438), ...9 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), `rgdpe 2015` = c(32037.935546875,
3959.59252929688, 448794.71875, 4856.21044921875, 40599.22265625,
1659691.75, 411350.65625, 62493.578125, 99181.7265625, 4041.00463867188,
278112.53125, 237412.921875, 2772463.25, 3915258.25, 16865.345703125,
2296760.75, 5094436, 51517.390625, 87529.2890625, 17455.76171875,
25619.560546875, 10165.248046875, 872643.75, 174613.65625,
1069768.375, 314019.625, 1928056.875, 108470.875, 451476.4375,
68875.1796875, 242116.15625, 1125999, 1108115.875, 38140.46484375,
18905122), `pop 2015` = c(2.890513, 0.104341, 8.67866, 0.285324,
3.429361, 36.026676, 7.185996, 0.602085, 4.232874, 0.159847,
5.688695, 5.481122, 66.596315, 81.787411, 0.330243, 60.578494,
127.985133, 1.997674, 2.93188, 0.433559, 1.259456, 0.626956,
16.938499, 4.614532, 38.034079, 10.368351, 50.823093, 7.095383,
5.592152, 2.071199, 20.908027, 23.462914, 68.714511, 1.370328,
320.87831), `emp 2015` = c(0.926395297050476, 0.0467174984514713,
4.27823972702026, 0.128199994564056, 0.616872131824493, 18.3558368682861,
3.77715754508972, 0.396699994802475, 1.69973313808441, 0.0617999993264675,
2.83158588409424, 2.52453279495239, 27.3850765228271, 42.5355796813965,
0.181162342429161, 24.4446144104004, 66.9830322265625, 0.897909104824066,
1.35488307476044, 0.191138163208961, 0.563370883464813, 0.221699982881546,
8.80725860595703, 2.36527323722839, 15.8249969482422, 4.60829973220825,
26.079252243042, 2.56693267822266, 3.65548992156982, 0.950674414634705,
7.83100032806396, 11.1978015899658, 37.9529876708984, 0.623300015926361,
150.248474121094), GDP_per_capita_1990 = c(3653.52712369369,
41436.7989064084, 26930.1667288326, 15712.0963090566, 1556.27868524337,
33249.1224367286, 26369.74793547, 26549.0659030622, 13493.2303968973,
NA, 28013.0289100711, 27259.4691298745, 27157.3915394195,
27885.8621470614, 33367.895009371, 27360.7320654051, 28533.8452421762,
16872.9834521451, 14508.4360672991, 14790.949021881, 10662.1372845019,
10897.7780128154, 28537.2178634412, 22617.9417198423, 8831.74843183753,
15920.0955745539, 13167.790500391, 11918.4364550691, 21527.1497628915,
21289.1749329024, 3189.31634933402, 21249.4894951641, 5452.2158383934,
12353.995142814, 39059.4277750151), GDP_per_capita_2015 = c(11083.8233721402,
37948.5775418759, 51712.4439429589, 17019.9858729681, 11838.7135843237,
46068.41191788, 57243.3739526156, 103795.274961177, 23431.2966940429,
25280.4534252872, 48888.6346077615, 43314.6574506096, 41630.8807777127,
47871.1601471283, 51069.5024667442, 37913.7974278463, 39804.9045274657,
25788.6875561278, 29854.3218216639, 40261.5600616064, 20341.7670382094,
16213.6546214966, 51518.3635810942, 37839.9491541071, 28126.5749855544,
30286.3613509998, 37936.6300079375, 15287.5292285138, 80733.9352542635,
33253.7721809927, 11580.0575659291, 47990.5863355251, 16126.3735835943,
27833.0916713006, 58916.7962147395), change_log_GDP_per_cap_1990_2015 = c(1.10979365523243,
-0.0879373277889428, 0.652451349776046, 0.0799574118764497,
2.02907746023446, 0.326099133412088, 0.775094455358737, 1.36342588458029,
0.551884486717116, NA, 0.556875228192604, 0.46309013522626,
0.427192960710386, 0.540393411165555, 0.425593287986558,
0.32620624489414, 0.332899198982464, 0.424222198470275, 0.721599336011078,
1.00138172800831, 0.645977367508866, 0.39729484866726, 0.590729193215088,
0.514621927745218, 1.15836185302803, 0.643115306926695, 1.05814340225474,
0.248950929776939, 1.32184407992377, 0.44596948530371, 1.28947786000817,
0.814672002339478, 1.08443394063506, 0.812246156576203, 0.411041939497887
), change_dependency_ratio_1990_2015 = c(0.398123161188132,
0.467114123861573, 0.253948731466323, 0.403122685405711,
0.396301148636019, 0.393556442177755, 0.433009260575827,
0.294212666921972, 0.322621916655375, 0.587328280156014,
0.290196653308586, 0.44319872930422, 0.323540395295048, 0.374007815296586,
0.257911755378933, 0.319736193367588, 0.514329009047002,
0.261177457981367, 0.339264196021887, 0.394036685146817,
0.358122825326543, 0.2769238725963, 0.406916883761686, 0.283544729400408,
0.255085975411616, 0.288899001012185, 0.411675880202299,
0.264057297418888, 0.377536265597609, 0.360038671966065,
0.256758186659192, 0.356964682522796, 0.334653694535773,
0.251095568507597, 0.29244025856914)), class = "data.frame", row.names = c(NA,
-35L))
And this is my code:
#Adding OECD
#Exclude japan for fig
OECD <- c("Dem. People's Republic of Korea",'Mexico','Chile',
'New Zealand','Czechia','Hungary','Slovakia','Denmark','Estonia',
'Finland','Iceland','Ireland','Latvia','Lithuania','Norway',
'Sweden','United Kingdom','Greece','Italy','Portugal','Slovenia',
'Spain','Austria','Belgium','France','Germany','Luxembourg',
'Netherlands','Switzerland','Australia','Canada','United States of America',
'Poland','Turkey','Israel')
#Figure 2
library(tidyverse)
df %>%
mutate(OECD = factor(Country %in% OECD, labels = c("NonOECD","OECD"))) %>% mutate(Japan = factor(Country == 'Japan' , labels=c('FALSE','TRUE')))-> newdata
ggplot() +
geom_point(data = filter(newdata, OECD == 'NonOECD'),aes(x = change_dependency_ratio_1990_2015, y = change_log_GDP_per_cap_1990_2015, colour='NonOECD'),shape = 16, size=3) +
geom_point(data = filter(newdata, Japan == 'TRUE'),aes(x = change_dependency_ratio_1990_2015, y = change_log_GDP_per_cap_1990_2015,colour='Japan'),shape = 17, size=4) +
geom_point(data = filter(newdata, OECD == 'OECD'),aes(x = change_dependency_ratio_1990_2015, y = change_log_GDP_per_cap_1990_2015, colour='OECD' ),shape = 18, size=4) +
scale_color_manual(values = c(NonOECD = "cyan4", OECD = "orange",Japan='red'),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries")) + geom_abline(size=1, col='grey')+
theme_classic()+ theme( panel.grid.major.y =element_line(color='grey', size=0.7),legend.title = element_blank(),
panel.grid.minor.y =element_blank(),
legend.background = element_blank(), legend.box.background = element_rect(colour = "black"),
legend.spacing.y = unit(0, "mm"),legend.direction = 'horizontal',
legend.position = "bottom",aspect.ratio = 0.7, axis.text = element_text(colour = 1, size = 13),)
Example of what I got:
And what it should look like:
Thanking you in advance!!

The code below produces a plot equivalent to the expected output.
The two main differences are:
There is no data for "Japan" in the question so I have substituted "Portugal" (my country) for it;
There is no GDP data, logged or not so I have created a new column with random uniform numbers, runif.
The plot is in fact simple, to create the factor OECD start by creating a logical vector, then use an ifelse to assign an integer value to the special country, in this case "Portugal" and add 2 to the other logical vector's elements, giving FALSE/TRUE + 2 == 0/1 + 2.
In order not to mix the plot with the theme, I have also created a custom theme, with code at the end.
library(tidyverse)
set.seed(2021)
df %>%
mutate(OECD = Location %in% OECD,
OECD = ifelse(Location == "Portugal", 1L, OECD + 2L),
OECD = factor(OECD, labels = c("Portugal", "NonOECD","OECD"))) %>%
mutate(GDP = runif(n(), -2, 2)) %>%
ggplot(aes(x = `Dependency Ratio`, y = GDP, color = OECD, shape = OECD, size = OECD)) +
geom_point() +
scale_color_manual(
values = c(NonOECD = "cyan4", OECD = "orange", Portugal = 'red'),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_shape_manual(
values = c(NonOECD = 16, OECD = 18, Portugal = 17),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_size_manual(
values = c(NonOECD = 4, OECD = 3, Portugal = 4),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
geom_abline(size = 1, col = 'grey') +
theme_custom_Cas()
In order to remove the special country from the legend, subset the data. The code below will output the %>% pipe to a new data set and used it in the plot.
set.seed(2021)
df %>%
mutate(OECD = Location %in% OECD,
OECD = ifelse(Location == "Portugal", 1L, OECD + 2L),
OECD = factor(OECD, labels = c("Portugal", "NonOECD","OECD"))) %>%
mutate(GDP = runif(n(), -2, 2)) -> newdata
ggplot(newdata, aes(x = `Dependency Ratio`, y = GDP, color = OECD, shape = OECD, size = OECD)) +
geom_point(data = subset(newdata, OECD != "Portugal")) +
# In the special country's layer the color, shape and size must be
# outside aes() and show.legend = FALSE
geom_point(
data = subset(newdata, OECD == "Portugal"),
color = "red", shape = 17, size = 4,
show.legend = FALSE
) +
scale_color_manual(
values = c(NonOECD = "cyan4", OECD = "orange", Portugal = 'red'),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_shape_manual(
values = c(NonOECD = 16, OECD = 18, Portugal = 17),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_size_manual(
values = c(NonOECD = 4, OECD = 3, Portugal = 4),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
geom_abline(size = 1, col = 'grey') +
theme_custom_Cas()
In order to have a fill color, the points shapes must be changed. See in help("points") the rightmost points, filled in grey. Those shapes allow for a border (ggplot aesthetic color) and a fill color (ggplot aesthetic fill).
ggplot(newdata, aes(x = `Dependency Ratio`, y = GDP, fill = OECD, shape = OECD, size = OECD)) +
geom_point(data = subset(newdata, OECD != "Portugal")) +
# In the special country's layer the color, shape and size must be
# outside aes() and show.legend = FALSE
geom_point(
data = subset(newdata, OECD == "Portugal"),
fill = "red", shape = 24, size = 4,
show.legend = FALSE
) +
scale_fill_manual(
values = c(NonOECD = "cyan4", OECD = "orange", Portugal = 'red'),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_shape_manual(
values = c(NonOECD = 21, OECD = 23, Portugal = 24),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
scale_size_manual(
values = c(NonOECD = 4, OECD = 3, Portugal = 4),
labels = c(NonOECD = "All Countries except OECD", OECD = "OECD countries", Portugal = "Portugal")
) +
geom_abline(size = 1, col = 'grey') +
theme_custom_Cas()
Custom theme code.
theme_custom_Cas <- function(){
theme_classic() %+replace% #replace elements we want to change
theme(
panel.grid.major.y = element_line(color = 'grey', size = 0.7),
legend.title = element_blank(),
panel.grid.minor.y = element_blank(),
legend.background = element_blank(),
legend.box.background = element_rect(colour = "black"),
legend.spacing.y = unit(0, "mm"),
legend.direction = 'horizontal',
legend.position = "bottom",
aspect.ratio = 0.7,
axis.text = element_text(colour = 1, size = 13)
)
}

Related

R Matrix Row.names adding a number at the end of every repeated string

im uing the row names function to track the production capacity of power producing facilities based on the fuel they use. when i go to create a barplot of the data, instead of creating a nice bar plot of the 6 types of fuel im interested in, i instead get a plot that looks like this
bad bar plot
when i reviewed my matrix, i found that my data looks like this enter image description here
does anyone know how i can effectively group this dataset to fix my barplot?
code used
install.packages('ggplot2', 'tidyverse')
install.packages('tidyverse')
library('tidyverse')
Power_Facilities<- read.csv('powerplants (global) - global_power_plants.csv')
drop<-c("secondary.fuel", "other_fuel2", "other_fuel3", "geolocation_source")
PF<-Power_Facilities[,!(names(Power_Facilities) %in% drop)]
PF<-subset(PF,PF$capacity.in.MW>2000)
PF$generated <-(ifelse(is.na (PF$generation_gwh_2021), paste(PF$estimated_generation_gwh_2021), PF$generation_gwh_2021))
PF$generated <-as.numeric(PF$generated)
#PF<- PF [!((PF$generated == "NA") | PF$generated==""), ]
#PF<- PF [!((PF$generated >1)),]
#PF<- PF [!((PF$capacity.in.MW<20)), ]
head(sort(PF$capacity.in.MW, decreasing = TRUE))
tail(sort(PF$capacity.in.MW, decreasing = TRUE))
head(sort(PF$generated, decreasing = TRUE))
tail(sort(PF$generated, decreasing = TRUE))
pf2<-PF%>%group_by(primary_fuel)summarize
barplot((PF2$capacity.in.MW), names.arg =pf2$primary_fuel)
barplot(t(power_matrix), beside = T, las=2, legend.text =T, col = c("blue", "grey"), ylim=c(0, 1000000))
summary(power_matrix)
structure(list(country.code = c("AUS", "AUS", "AUS", "AZE", "BHR",
"BLR", "BEL", "BEL", "BRA", "BRA"), country_long = c("Australia",
"Australia", "Australia", "Azerbaijan", "Bahrain", "Belarus",
"Belgium", "Belgium", "Brazil", "Brazil"), name.of.powerplant = c("Bayswater",
"Liddell", "Loy Yang A", "Azerbaijan TPP", "Alba Power Station",
"Lukoml Thermal Power Plant Belarus", "DOEL 4", "TIHANGE 3",
"Belo Monte", "Ilha Solteira"), capacity.in.MW = c(2640, 2200,
2180, 2400, 2204, 2460, 2910, 2053.8, 3327.45544, 3444), latitude = c(-32.3953,
-32.3713, -38.2536, 40.78, 26.0945, 54.6803, 51.3254, 50.5342,
-3.1264, -20.3822), longitude = c(150.9491, 150.9776, 146.5746,
46.9901, 50.6008, 29.1341, 4.2597, 5.2751, -51.775, -51.3636),
primary_fuel = c("Coal", "Coal", "Coal", "Oil", "Gas", "Gas",
"Nuclear", "Nuclear", "Hydro", "Hydro"), start.date = c(NA,
NA, NA, NA, NA, NA, 1985, 1985, 2016, 1973), owner.of.plant = c("Macquarie Generation",
"Macquarie Generation", "GEAC Great Energy Alliance Corporation",
"AzerEnerji", "Aluminum Bahrain", "", "", "", "", ""), generation_gwh_2021 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), estimated_generation_gwh_2021 = c(NA,
NA, NA, NA, NA, NA, NA, NA, 17396.84, 6318.07), generated = c(NA,
NA, NA, NA, NA, NA, NA, NA, 17396.84, 6318.07)), row.names = c(356L,
565L, 573L, 927L, 942L, 1017L, 1044L, 1083L, 1386L, 2164L), class = "data.frame")```
I'd pivot your data to long format and use ggplot2:
library(tidyr)
library(ggplot2)
PF2_long = PF2 %>%
pivot_longer(cols = c(generated, capacity.in.MW), names_to = "measure")
ggplot(PF2_long, aes(x = primary_fuel, y = value, fill = measure)) +
geom_col(position = "dodge") +
scale_fill_manual(values = c("blue", "grey60")) +
labs(
x = "Primary fuel",
y = "MW",
fill = ""
) +
theme_bw()

ggplot2 | How to customize the order of string values in the legend?

In continuation of my earlier question, I am facing issues w.r.t. to ordering the legends. The initially posted question had ordinal (ordered) values and hence worked perfectly. In real-time, the data rendered in the legend is being ordered alphabetically.
library(ggplot2)
library(tidyverse)
library(reshape2)
#Creating a dataframe with use-case specific variables.
df = data.frame(
Year = 2006:2025,
Survey = c(40.5, 39.0, NA, NA, NA, NA, 29.9, NA, NA, NA, 21.6,
NA, NA, NA, NA, NA, NA, NA, NA, NA),
Projected1 = c(NA, NA, NA, NA, NA, NA, 29.9, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 14.9),
WhatIf= c(NA, NA, NA, NA, NA, NA, 29.9, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 13.0),
Projected2 = c(NA, NA, NA, NA, NA, NA, 29.9, 27.6, 25.4, 23.4, 21.6,
19.9, 18.4, 16.9, 15.6, 14.4, 13.3, NA, 12.2, 11.3)
)
#Transforming data
df <- melt(df,id.vars = "Year")
ggplot(data = NULL, aes(x=factor(Year), y=value, group=variable)) +
geom_line(data = df[!is.na(df$value) & df$variable != "Survey",],
aes(linetype=variable, color = variable), size = 1, linetype = "dashed")+
geom_point(data = df[!is.na(df$value) & df$variable == "Survey",],
aes(color = variable), size = 4) +
scale_color_manual(values=c('#999999', 'orange2','turquoise2','blue2'))+
guides(color = guide_legend(override.aes = list(linetype = c("blank", "dashed", "dashed", "dashed"),
shape = c(16, NA, NA, NA)))) +
scale_y_continuous(
breaks=seq(0,100, 10), labels = seq(0, 100, 10), limits=c(0,70),
sec.axis = dup_axis()) +
theme(
legend.position = 'bottom', legend.direction = 'horizontal',
panel.grid.major.y = element_line(color='gray85'),
axis.title = element_text(face='bold')) +
labs(x='Year', y='measure (%)')
Created on 2020-07-11 by the reprex package (v0.3.0)
Output
Objective: Sequence in the legend and respective plots must be as follows: c("Survey", "WhatIf", "Projected1", "Projected2" )
I have tried the following methods alternatively but there's no difference in the output.
df$variable <- factor(df$variable, levels = c("Survey", "WhatIf", "Projected1", "Projected2" ))
scale_fill_discrete(breaks = c("Survey", "WhatIf", "Projected1", "Projected2" ))
I might be missing out on a trivial step and any suggestions would be greatly helpful.
You just need to add a breaks = argument to scale_color_manual and change the order of values = to match because you have the guide argument set to color =:
scale_color_manual(breaks = c("Survey", "WhatIf", "Projected1", "Projected2" ),
values=c('turquoise2','blue2','#999999', 'orange2'))+

Geom_txt does not display correctly in animation

Sorry for my English.
I'm creating a feature to display the NBA season moving average. I'm doing an animation, part of which is displaying the current rating. I do this with geom_text. The problem is that instead of displaying a single value, you get a jumble.
Code:
library(httr)
library(jsonlite)
library(tidyverse)
##Getting data via NBA API.
##Required link
adv_box_team <- "https://stats.nba.com/stats/teamgamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season=2018-19&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&VsConference=&VsDivision="
##Adding headers
request_headers <- c(
"accept-encoding" = "gzip, deflate, sdch",
"accept-language" = "en-US,en;q=0.8",
"cache-control" = "no-cache",
"connection" = "keep-alive",
"host" = "stats.nba.com",
"pragma" = "no-cache",
"upgrade-insecure-requests" = "1",
"user-agent" = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9"
)
#Getting a response
request <- GET(adv_box_team, add_headers(request_headers))
#Convert to js.file to list
boxscore_data <- fromJSON(content(request, as = "text"))
#Convert to tibble data and assigning column names
table <- tbl_df(data.frame(boxscore_data$resultSets$rowSet[[1]], stringsAsFactors = FALSE))
names(table) <- toupper(boxscore_data$resultSets$headers[[1]])
library(tidyverse)
library(lubridate)
library(zoo)
library(ggthemes)
library(gganimate)
library(rlang)
library(data.table)
##Cleaning data
rating <- table %>%
select(TEAM_ID,
TEAM_ABBREVIATION,
TEAM_NAME,
GAME_ID,
GAME_DATE,
MATCHUP,
WL,
E_OFF_RATING,
E_DEF_RATING,
E_NET_RATING)
rating1 <- rating %>%
rename_at(vars(starts_with("E_")),
funs(str_c(str_sub(., start = 3, end = 3),
str_sub(., start = 7, end = 7),
str_sub(., start = 9, end = 9),
str_sub(., start = 12, end = 12))))
rolling_offnet_rating_nba <- function(table, variable, name, col1 = col1, col2 = col2){
quo_rating <- enquo(variable)
quo_col1 <- enquo(col1)
quo_col2 <- enquo(col2)
test1 <- rating1 %>%
mutate(GAME_DATE = as.Date(ymd_hms(GAME_DATE))) %>%
mutate_at(vars(ORTG:NRTG), list(~as.numeric))
team <- test1 %>%
filter(TEAM_ABBREVIATION == name) %>%
mutate(RATING = rollmeanr(!! quo_rating, k = 10, fill= NA)) %>%
na.omit(test1)
league <- test1 %>%
group_by(TEAM_NAME) %>%
summarise(ORTG = mean(ORTG),
DRTG = mean(DRTG),
NTRG = mean(NRTG))
average <- league %>%
mutate(average = mean(!! quo_rating)) %>%
select(average) %>%
unique() %>%
.$average
top10 <- league %>%
arrange(desc(!! quo_rating)) %>%
select(!! quo_rating) %>%
slice(10)
top10 <- top10[[1]]
bottom10 <- league %>%
arrange(desc(!! quo_rating)) %>%
select(!! quo_rating) %>%
slice(21)
bottom10 <- bottom10[[1]]
data <- team %>%
select(GAME_DATE) %>%
unique() %>%
arrange(GAME_DATE)
data <- data[[1,1]]
table_color <- data.table(TEAM_ID = c(1610612737, 1610612738, 1610612751, 1610612766, 1610612741, 1610612739, 1610612742,
1610612743, 1610612765, 1610612744, 1610612745, 1610612754, 1610612746, 1610612747,
1610612763, 1610612748, 1610612749, 1610612750, 1610612740, 1610612752, 1610612760,
1610612753, 1610612755, 1610612756, 1610612757, 1610612758, 1610612759, 1610612761,
1610612762, 1610612764),
TEAM_NAME = c("Atlanta Hawks", "Boston Celtics", "Brooklyn Nets",
"Charlotte Hornets", "Chicago Bulls", "Cleveland Cavaliers",
"Dallas Mavericks", "Denver Nuggets", "Detroit Pistons",
"Golden State Warriors", "Houston Rockets", "Indiana Pacers",
"LA Clippers", "Los Angeles Lakers", "Memphis Grizzlies",
"Miami Heat", "Milwaukee Bucks", "Minnesota Timberwolves",
"New Orleans Pelicans", "New York Knicks", "Oklahoma City Thunder",
"Orlando Magic", "Philadelphia 76ers", "Phoenix Suns",
"Portland Trail Blazers", "Sacramento Kings", "San Antonio Spurs",
"Toronto Raptors", "Utah Jazz", "Washington Wizards"),
TEAM_ABBREVIATION = c("ATL", "BOS", "BKN", "CHA", "CHI", "CLE", "DAL", "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL",
"MEM", "MIA", "MIL", "MIN", "NOP", "NYK", "OKC", "ORL", "PHI", "PHX", "POR", "SAC", "SAS", "TOR",
"UTA", "WAS"),
col1 = c("#E03A3E", "#007A33", "#000000", "#1D1160", "#CE1141", "#6F263D", "#00538C", "#0E2240",
"#C8102E", "#006BB6", "#CE1141", "#002D62", "#C8102E", "#552583", "#5D76A9", "#98002E",
"#00471B", "#0C2340", "#0C2340", "#006BB6", "#007AC1", "#0077C0", "#006BB6", "#1D1160",
"#E03A3E", "#5A2D81", "#C4CED4", "#CE1141", "#002B5C", "#002B5C"),
name_col1 = c("HAWKS RED", "CELTICS GREEN", "BLACK",
"HORNETS PURPLE", "BULLS RED", "CAVALIERS WINE",
"ROYAL BLUE", "MIDNIGHT BLUE", "RED",
"WARRIORS ROYAL BLUE", "RED", "PACERS BLUE",
"RED", "LAKERS PURPLE", "BLUE",
"RED", "GOOD LAND GREEN", "MIDNIGHT BLUE",
"PELICANS NAVY", "KNICKS BLUE", "THUNDER BLUE",
"MAGIC BLUE", "BLUE", "PURPLE",
"RED", "PURPLE", "SILVER",
"RED", "NAVY", "NAVY BLUE"),
col2 = c("#C1D32F", "#BA9653", "#FFFFFF", "#00788C", "#000000", "#041E42", "#002B5E", "#FEC524",
"#006BB6", "#FDB927", "#000000", "#FDBB30", "#1D428A", "#FDB927", "#12173F", "#F9A01B",
"#EEE1C6", "#236192", "#C8102E", "#F58426", "#EF3B24", "#C4CED4", "#ED174C", "#E56020",
"#000000", "#63727A", "#000000", "#000000", "#00471B", "#E31837"),
name_col2 = c("VOLT GREEN", "CELTICS GOLD", "WHITE", "TEAL",
"BLACK", "CAVALIERS NAVY", "NAVY BLUE", "SUNSHINE YELLOW",
"ROYAL", "GOLDEN YELLOW", "BLACK", "YELLOW",
"BLUE", "GOLD", "NAVY", "YELLOW",
"CREAM CITY CREAM", "LAKE BLUE", "PELICANS RED", "KNICKS ORANGE",
"SUNSET", "SILVER", "RED", "ORANGE",
"BLACK", "GRAY", "BLACK", "BLACK",
"GREEN", "RED"),
col3 = c("#26282A", "#963821", NA, "#A1A1A4", NA, "#FFB81C", "#B8C4CA", "#8B2131",
"#BEC0C2", "#26282A", "#C4CED4", "#BEC0C2", "#BEC0C2", "#000000", "#F5B112", "#000000",
"#0077C0", "#9EA2A2", "#85714D", "#BEC0C2", "#002D62", "#000000", "#002B5C", "#000000",
NA, "#000000", NA, "#A1A1A4", "#F9A01B", "#C4CED4"),
name_col3 = c("HAWKS CHARCOAL", "CELTICS BROWN", NA, "GRAY",
NA, "CAVALIERS NAVY", "SILVER", "FLATIRONS RED",
"GRAY", "SLATE", "SILVER", "SILVER",
"SILVER", "BLACK", "YELLOW", "BLACK",
"GREAT LAKES BLUE", "MOONLIGHT GREY", "PELICANS GOLD", "KNICKS SILVER",
"BLUE", "BLACK", "NAVY", "BLACK",
NA, "BLACK", NA, "SILVER",
"YELLOW", "SILVER" ),
col4 = c( NA, "#E59E6D", NA, NA, NA, "#000000", "#000000", "#1D428A",
"#002D62", NA, NA, NA, "#000000", NA, "#707271", NA,
"#000000", "#78BE20", NA, "#000000", "#FDBB30", NA, "#C4CED4", "#63727A",
NA, NA, NA, "#B4975A", NA, NA ),
name_col4 = c( NA, "CELTICS BEIGE", NA, NA,
NA, "CAVALIERS BLACK", "BLACK", "SKYLINE BLUE",
"NAVY", NA, NA, NA,
"BLACK", NA, "GRAY", NA,
"BLACK", "AURORA GREEN", NA, "KNICKS BLACK",
"YELLOW", NA, "SILVER", "GRAY",
NA, NA, NA, "GOLD",
NA, NA ),
col5 = c(NA, "#000000", NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "#F9AD1B",
NA, NA, NA, NA, NA, NA ),
name_col5 = c(NA, "CELTICS BLACK", NA, NA, NA,
NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA,
NA, NA, NA, "YELLOW", NA,
NA, NA, NA, NA, NA ),
col6 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "#B95915",
NA, NA, NA, NA, NA, NA ),
name_col6 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "DARK ORANGE",
NA, NA, NA, NA, NA, NA ),
col7 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "#BEC0C2",
NA, NA, NA, NA, NA, NA ),
name_col7 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "LIGHT GRAY",
NA, NA, NA, NA, NA, NA))
color1 <- table_color %>%
filter(TEAM_ABBREVIATION == name) %>%
select(!! quo_col1)
color1 <- color1[[1]]
color2 <- table_color %>%
filter(TEAM_ABBREVIATION == name) %>%
select(!! quo_col2)
color2 <- color2[[1]]
name1 <- paste0("name_", quo_name(quo_col1))
name2 <- paste0("name_", quo_name(quo_col2))
name_color1 <- table_color %>%
filter(TEAM_ABBREVIATION == name) %>%
select(name1)
name_color1 <- name_color1[[1]]
name_color2 <- table_color %>%
filter(TEAM_ABBREVIATION == name) %>%
select(name2)
name_color2 <- name_color2[[1]]
max <- team %>%
filter(RATING == max(RATING)) %>%
select(RATING)
max <- max[[1]]
Sys.setlocale("LC_ALL", "C")
gg <- ggplot(team, aes(GAME_DATE, RATING)) +
geom_hline(yintercept = c(top10, bottom10), col = c("red", "blue")) +
annotate(geom = "text", x = as.Date(data) + 2, y = top10 - 0.2,
label = "TOP 10", col = "red") +
annotate(geom = "text", x = as.Date(data) + 2, y = bottom10 + 0.2,
label = "BOTTOM 10", col = "blue") +
geom_line(size = 2, col = if_else(team$RATING > average, color1, color2)) +
theme_tufte() +
labs(title = paste0(team$TEAM_NAME, " 10-Game Rolling Luck-Adjusted ", quo_name(quo_rating)),
subtitle = paste0(paste0(name_color1, " - above average ", quo_name(quo_rating)),
"\n", paste0(name_color2, " - below average ",quo_name(quo_rating))),
caption = "Source: BBall Index Data & Tools\nTelegram: #NBAatlantic, twitter: #vshufinskiy")
theme(plot.title = element_text(size = 12, hjust = 0.5),
plot.caption = element_text(size = 10),
plot.subtitle = element_text(size = 9))
ggsave(paste0(unique(team$TEAM_NAME), quo_name(quo_rating), ".jpeg"), gg, width = 8, units = "in")
anim <- gg +
theme(plot.title = element_text(hjust = 0.5, size = 25),
plot.subtitle = element_text(size = 15),
plot.caption = element_text(size = 15),
axis.text = element_text(size = 15),
axis.title = element_text(size = 18)) +
geom_text(aes(x = as.Date(data), y = max + 0.5),
label = paste0(quo_name(quo_rating)," ", round(team$RATING, digits = 1)), size = 6,
col = if_else(team$RATING > average, color1, color2)) +
transition_reveal(GAME_DATE) +
labs(title = paste0(team$TEAM_NAME, " 10-Game Rolling Luck-Adjusted ", quo_name(quo_rating)),
subtitle = paste0(paste0(name_color1, " - above average ",quo_name(quo_rating)),
"\n", paste0(name_color2, " - below average ",quo_name(quo_rating)),
"\n", "Date: {frame_along}"),
caption = paste0("Source: BBall Index Data & Tools\nTelegram: #NBAatlantic, twitter: #vshufinskiy"))
animate(anim, fps = 5, duration = 5, width = 1280, height = 720,
renderer = gifski_renderer(paste0(unique(team$TEAM_NAME), quo_name(quo_rating), ".gif")))
}
rolling_offnet_rating_nba(rating1, ORTG, "GSW")
Result: https://c.radikal.ru/c40/1907/c8/37e210e3f31b.gif

Ylim max to change dynamically with a variable, while min is set to 0 in R

I would like my graphs to start at y= 0, but I would like the maximum to change with a multiple of the data, or somehow otherwise zoom out dynamically. I have 34 charts in this set with various ymax.
I have tried scale_y_continuous and coord_cartesian but when I try to put in the expand = expand_scale(mult = 2) that works for getting my maximum to change dynamically, but then the graphs start to start at negative numbers, and I want them to start at 0.
title<- c(
"Carangidae",
"Atlantic cutlassfish",
"Lizardfish",
"Sharks",
"Mackerel")
#DATA#
biomass<- structure(list(timestep = structure(c(10957, 10988, 11017, 11048,
11078, 11109, 11139, 11170, 11201, 11231, 11262, 11292), class = "Date"),
bio_pre_Carangidae = c(0.01105, 0.0199, 0.017,
0.01018, 0.0119, 0.0101, 0.009874, 0.009507,
0.009019, 0.00843, 0.00841, 0.00805), bio_obs_Carangidae = c(NA,
NA, NA, NA, NA, 0.00239, NA, NA, NA, NA, NA, NA), bio_pre_Atl_cutlassfish = c(0.078,
0.069, 0.067, 0.06872, 0.0729, 0.0769,
0.0775, 0.075, 0.0743, 0.072, 0.071,
0.069), bio_obs_Atl_cutlassfish = c(NA, NA, NA, NA, NA,
0.0325, NA, NA, NA, NA, NA, NA), bio_pre_lizardfish = c(0.0635,
0.062, 0.057, 0.0536, 0.0505, 0.0604,
0.0627, 0.068, 0.0695, 0.066, 0.0623,
0.0598), bio_obs_lizardfish = c(NA, NA, NA, NA, NA, 0.037,
NA, NA, NA, NA, NA, NA), bio_pre_sharks = c(0.025, 0.0155,
0.0148, 0.0135, 0.01379, 0.01398, 0.014,
0.0139, 0.0136, 0.0132, 0.0126, 0.011),
bio_obs_sharks = c(NA, NA, NA, NA, NA, 0.003, NA, NA,
NA, NA, NA, NA), bio_pre_mackerel = c(0.0567, 0.0459,
0.0384, 0.03, 0.0328, 0.0336, 0.0299,
0.0296, 0.02343, 0.02713, 0.0239, 0.019
), bio_obs_mackerel = c(NA, NA, NA, NA, NA, 0.055, NA,
NA, NA, NA, NA, NA)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
This is my function:
function (biomass, .var1, .var2, .var3) {
p <- ggplot(biomass, aes(x = timestep)) +
geom_line(aes(y = .data[[.var1]], linetype = "Predicted")) + geom_point(size = 3, aes(y = .data[[.var2]], shape = "Observed")) +
ggtitle(paste0(.var3)) +
ylab(expression("biomass" ~ (t/km^2))) +
theme_classic() +
scale_y_continuous(limits = c(0, NA), expand = expand_scale(mult = 2))+
###This is the portion where I cannot figure out how to set ymin = 0 and then ymax to 2* the maximum value of a dataset.##
theme(legend.position = "right") +
theme(axis.ticks = element_line(size = 1), axis.ticks.length = unit(0.25, "cm"))
return(p)
}
## create two separate name vectors
var1_names <- colnames(biomass)[grepl("^bio_pre", colnames(biomass))]
var2_names <- colnames(biomass)[grepl("^bio_obs", colnames(biomass))]
var3_names <- title
## loop through two vectors simultaneously and save result in a list
# ..1 = var1_names, ..2 = var2_names
my_plot_b <- pmap(list(var1_names, var2_names, var3_names), ~ my_bio_plot(biomass, ..1, ..2, ..3))
## merge plots together
# https://cran.r-project.org/web/packages/cowplot/
# install.packages("cowplot", dependencies = TRUE)
dev.new(title = "Model Fit Biomass",
width = 12,
height = 6,
noRStudioGD = TRUE
)
print(my_plot_b)
I can manage to get EITHER a set ymin=0 (a) OR a dynamic ymax (b) but cannot manage to get both.
a
b
How about this? Seems to work on your data.
Define the max for each chart at the top of your function:
my_bio_plot <- function (biomass, .var1, .var2, .var3) {
max_y = 2.0 * max(biomass[[.var1]])
...
scale_y_continuous(limits = c(0, max_y)) +
...
This seems to create the requested output, with min y = 0 and max y = 2 * max y in data.
Updated to add a substantially different approach from yours:
biomass %>%
gather(species, bio, -timestep) %>%
mutate(type = ifelse(stringr::str_detect(species, 'pre'), 'predicted', 'observed'),
species = gsub(".*_", "", species)) %>%
group_by(species) %>%
mutate(ul = max(bio, na.rm = TRUE) * 2) %>%
filter(species == "sharks") -> df
df %>%
ggplot(aes(timestep, bio, group = type)) +
geom_point(aes(shape = type)) +
geom_line(aes(linetype = type)) +
# facet_wrap(~species) +
scale_linetype_manual(name = "",
values = c("blank", 'solid')) +
scale_shape_manual(name = "",
values = c(19, NA))+
scale_y_continuous(limits = c(0, max(df$ul)))
You could remove the filter(species == "sharks") and uncomment thefacet_wrap(~species) and you will get all the species plotted at the same time.

R ggplot - using geom_box plot and geom_ribbon together

I have an R dataframe called wSubset. I give the dput of this at the bottom of the question. I can generate a candlestick plot with the below line of code without problems:
if(!require("ggplot2")) { install.packages("ggplot2"); require("ggplot2") }
g <- ggplot(data=wSubset, aes(x=1:nrow(wSubset), lower=wSubset$candleLower, middle=wSubset$candleMiddle,
upper=wSubset$candleUpper, ymin=wSubset$low, ymax=wSubset$high)) +
geom_boxplot(stat='identity', aes(group=datetime, fill=fill))
Now, within wSubset$cluster I also store class information ranging from 1 to 5. I want to super-impose a geom_ribbon on top of the above g to colour-label different regions.
# the dataframe to be used by the geom_ribbon for colour-labelling
df_bg2 <- data.frame(x = c(0, rep(which(as.logical(diff(wSubset$cluster))), each=2), length(wSubset$cluster)),
ymin = min(scale(wSubset$low), na.rm = TRUE),
ymax = 1.1*max(scale(wSubset$high), na.rm = TRUE),
fill = factor(rep(wSubset$cluster[c(which(as.logical(diff(wSubset$cluster))),
length(wSubset$cluster) )], each=2)),
grp = factor(rep(seq(sum(as.logical(diff(wSubset$cluster)), na.rm=TRUE)+1), each=2))
)
# new plot which has candlesticks and the geom_ribbon colour-labelling
g2 <- ggplot(data=wSubset, aes(x=1:nrow(wSubset), lower=wSubset$candleLower, middle=wSubset$candleMiddle,
upper=wSubset$candleUpper, ymin=wSubset$low, ymax=wSubset$high)) +
geom_boxplot(stat='identity', aes(group=datetime, fill=fill)) +
geom_ribbon(data = df_bg2, aes(x = x, ymin=ymin, ymax=ymax, fill=fill, group=grp), alpha=.2) +
xlab("Date-Time") +
ylab("Levels") +
labs(title = "States in Temporal Display")
Here df_bg2 is used for filling the geom ribbons and works ok when I use geom_line, however I get the below error when I try to mix geom_boxplot with geom_ribbon.
Error: Aesthetics must either be length one, or the same length as the
dataProblems:wSubset$candleLower, wSubset$candleMiddle,
wSubset$candleUpper
Any idea where I am going wrong? This question is linked to an earlier question of mine on how to use geom_ribbon.
> dput(wSubset)
structure(list(date = structure(c(16342, 16342, 16342, 16342,
16342, 16342, 16343, 16343, 16343, 16343, 16343, 16343, 16344,
16344, 16344, 16344, 16344, 16344, 16345, 16345, 16345, 16345,
16345, 16345, 16346, 16346, 16346, 16346, 16346, 16346), class = "Date"),
datetime = structure(c(1411945200, 1411959600, 1411974000,
1411988400, 1412002800, 1412017200, 1412031600, 1412046000,
1412060400, 1412074800, 1412089200, 1412103600, 1412118000,
1412132400, 1412146800, 1412161200, 1412175600, 1412190000,
1412204400, 1412218800, 1412233200, 1412247600, 1412262000,
1412276400, 1412290800, 1412305200, 1412319600, 1412334000,
1412348400, 1412362800), class = c("POSIXct", "POSIXt"), tzone = ""),
open = c(1.62383, 1.62398, 1.62182, 1.62289, 1.62408, 1.62449,
1.6242, 1.62363, 1.62573, 1.62001, 1.62161, 1.62084, 1.62149,
1.61949, 1.61978, 1.62036, 1.62253, 1.61767, 1.61825, 1.61978,
1.62157, 1.61697, 1.61439, 1.6143, 1.61496, 1.61294, 1.61403,
1.60669, 1.59768, 1.59739), high = c(1.62517, 1.62426, 1.62618,
1.62559, 1.62745, 1.62522, 1.6245, 1.62678, 1.62873, 1.62253,
1.62297, 1.62175, 1.62194, 1.62018, 1.6214, 1.62261, 1.62521,
1.61873, 1.61999, 1.625, 1.62248, 1.61756, 1.615, 1.61624,
1.61591, 1.61405, 1.61403, 1.60844, 1.598, 1.5978), low = c(1.62258,
1.62166, 1.62143, 1.62162, 1.6233, 1.62375, 1.62247, 1.62361,
1.61936, 1.61664, 1.62026, 1.61998, 1.61886, 1.61792, 1.61618,
1.61798, 1.61739, 1.61724, 1.61817, 1.61962, 1.61561, 1.61283,
1.61126, 1.61358, 1.6128, 1.61248, 1.60665, 1.59769, 1.59518,
1.59594), close = c(1.62401, 1.62184, 1.6229, 1.62412, 1.62448,
1.62422, 1.62365, 1.62575, 1.62002, 1.6216, 1.62085, 1.62149,
1.61948, 1.61979, 1.62036, 1.62256, 1.61769, 1.61826, 1.61978,
1.62153, 1.61698, 1.61435, 1.61429, 1.61496, 1.61294, 1.61405,
1.60669, 1.59769, 1.5974, 1.59672), candleLower = c(1.62383,
1.62184, 1.62182, 1.62289, 1.62408, 1.62422, 1.62365, 1.62363,
1.62002, 1.62001, 1.62085, 1.62084, 1.61948, 1.61949, 1.61978,
1.62036, 1.61769, 1.61767, 1.61825, 1.61978, 1.61698, 1.61435,
1.61429, 1.6143, 1.61294, 1.61294, 1.60669, 1.59769, 1.5974,
1.59672), candleMiddle = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), candleUpper = c(1.62401, 1.62398,
1.6229, 1.62412, 1.62448, 1.62449, 1.6242, 1.62575, 1.62573,
1.6216, 1.62161, 1.62149, 1.62149, 1.61979, 1.62036, 1.62256,
1.62253, 1.61826, 1.61978, 1.62153, 1.62157, 1.61697, 1.61439,
1.61496, 1.61496, 1.61405, 1.61403, 1.60669, 1.59768, 1.59739
), fill = c("white", "red", "white", "white", "white", "red",
"red", "white", "red", "white", "red", "white", "red", "white",
"white", "white", "red", "white", "white", "white", "red",
"red", "red", "white", "red", "white", "red", "red", "red",
"red"), cluster = c(5, 4, 2, 2, 2, 5, 5, 2, 4, 2, 5, 5, 4,
5, 4, 2, 4, 1, 1, 2, 4, 4, 4, 5, 4, 5, 3, 3, 5, 5)), .Names = c("date",
"datetime", "open", "high", "low", "close", "candleLower", "candleMiddle",
"candleUpper", "fill", "cluster"), row.names = c(NA, 30L), class = "data.frame")
You want to do something like this:
wSubset <- within(wSubset, x<-1:nrow(wSubset))
g2 <- ggplot() +
geom_boxplot(stat='identity', data=wSubset, aes(x=x, lower=candleLower, middle=candleMiddle, upper=candleUpper,
group=datetime, fill=fill, ymin=low, ymax=high)) +
geom_ribbon(data = df_bg2, aes(x = x, ymin=ymin, ymax=ymax, fill=fill, group=grp), alpha=.2) +
xlab("Date-Time") +
ylab("Levels") +
labs(title = "States in Temporal Display")
Namely, separate out the global aesthetics to the geom_boxplot layer. Although, after looking at the plot, it would appear you need to adjust your values in df_bg2.

Resources