Plot multiple lines and error bars

Plot multiple lines and error bars - r

I am working with both observed and modeled soil moisture measurements at multiple sites:
DF <- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L), .Label = c("CA-Oas", "CA-Ojp", "CA-Qfo",
"US-Ho1", "US-UMB"), class = "factor"), month = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), satellite = c(NA,
NA, NA, 0.246855412760089, 0.239430871664309, 0.247785585430952,
0.254201344766859, 0.228525727473456, 0.220153005451572, 0.248914102315903,
0.226286688271691, NA, NA, NA, NA, 0.289740440845489, 0.377737456677007,
0.349513851965849, 0.369372068320291, 0.33528384697019, 0.316710417976185,
0.353778275847435, 0.366419460285794, NA, NA, NA, NA, NA, 0.523234443318459,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, 0.541902482509613, NA, NA, NA, 0.490694537758827,
0.592309034864108, 0.636846342572459, 0.645659983158112, 0.642242492328991,
0.644422933720706, 0.634390437856634, 0.604341197472352, 0.601287194034632,
0.586221873760223, NA, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923), satellite.low = c(NA, NA, NA, 0.208491480949955,
0.208809739158129, 0.190487245023279, 0.206581580485592, 0.190563366897309,
0.180458581035219, 0.229389992708553, 0.200156716900049, NA,
NA, NA, NA, 0.197556973794879, 0.303169270710883, 0.255839831380852,
0.308712828188052, 0.272073699535891, 0.262898007889838, 0.279825783579647,
0.257326671080677, NA, NA, NA, NA, NA, 0.477209513195344, 0.541902482509613,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, NA, NA, NA, 0.364323639893309, 0.534208357528997,
0.611485343460275, 0.643659507474862, 0.627827219126354, 0.637549292123253,
0.608606893522788, 0.5526295760826, 0.539322500377704, 0.519807807424512,
NA, NA, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923), satellite.high = c(NA,
NA, NA, 0.285219344570222, 0.270052004170489, 0.305083925838626,
0.301821109048126, 0.266488088049604, 0.259847429867925, 0.268438211923252,
0.252416659643333, NA, NA, NA, NA, 0.3819239078961, 0.45230564264313,
0.443187872550847, 0.430031308452529, 0.398493994404489, 0.370522828062531,
0.427730768115223, 0.47551224949091, NA, NA, NA, NA, NA, 0.569259373441575,
0.541902482509613, 0.541902482509613, 0.541902482509613, 0.541902482509613,
0.541902482509613, 0.541902482509613, NA, NA, NA, 0.617065435624345,
0.650409712199219, 0.662207341684644, 0.647660458841361, 0.656657765531627,
0.651296575318159, 0.660173982190479, 0.656052818862104, 0.663251887691561,
0.652635940095934, NA, NA, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923,
0.624189734458923, 0.624189734458923, 0.624189734458923, 0.624189734458923
), observed = c(0.140086734851409, 0.137745990685859, 0.146660019201229,
0.275950971628449, 0.298260250896057, 0.26870029739777, 0.227566661823465,
0.197824137311287, 0.195409734063355, 0.229745648248465, 0.226546607074933,
0.158508782420749, 0.0809095246636771, 0.0804010923965351, 0.0845644708882278,
0.136702248824284, 0.121883242349049, 0.108167424836601, 0.0970784232538687,
0.0860934461299105, 0.0910916878172589, 0.10747642248062, 0.102700195758564,
0.0811833903700756, 0.115733715437788, 0.0631616319005478, 0.0631265153446416,
0.171535848109378, 0.18694684173028, 0.142807562821677, 0.145926108701425,
0.154393702185792, 0.171436382382201, 0.188897212829005, 0.186402403754978,
0.165098945598251, 0.0713685071127924, 0.0436531172429078, 0.0624862109235555,
0.127141665482761, 0.134542260869565, 0.124414092512545, 0.100807230998223,
0.0765214392215714, 0.0798724029741452, 0.103098854664915, 0.116568256944444,
0.1105108739241, 0.108650005144474, 0.0976296689160692, 0.105006219572287,
0.122777662914972, 0.102765292125318, 0.0851933017211099, 0.0566760862577016,
0.056282148272957, 0.0718264626865672, 0.0909327257326783, 0.10461694624978,
0.103895834299474), observed.low = c(0.123032811442984, 0.126127332034484,
0.118118534835286, 0.198817004012519, 0.254140718534211, 0.208009439993492,
0.164189152182023, 0.138753714747272, 0.124966620815314, 0.161415523471958,
0.159972232752574, 0.121607685043651, 0.0591772062927612, 0.05892126834644,
0.0577332139485083, 0.0979872715014624, 0.0939120895219702, 0.0785008888345453,
0.0635909823809719, 0.0516671874880994, 0.0513847048326183, 0.0716629121839855,
0.0686069735468453, 0.0605781104970885, 0.0725611238034912, 0.0287878644886994,
0.0312612891352408, 0.117719361193379, 0.158601178554096, 0.103448327552773,
0.108434377599704, 0.123534447310382, 0.150300870070247, 0.171762349088762,
0.173223674947214, 0.143153513926194, 0.0176750483828094, -0.00825194618307156,
0.00841588326444485, 0.0851767193470053, 0.114292404939469, 0.104180435677072,
0.0730616681527658, 0.03446185464827, 0.0417092732525248, 0.0672335656317878,
0.091673056919691, 0.0718925232829272, 0.0765541880959607, 0.0759211571229279,
0.0803948566686958, 0.0858717319021568, 0.0760499923996711, 0.0506548126690479,
0.0369099617804679, 0.0337436690922423, 0.0466183548891693, 0.0663011553037621,
0.0900575679369071, 0.0899421880715561), observed.high = c(0.157140658259833,
0.149364649337235, 0.175201503567172, 0.353084939244379, 0.342379783257904,
0.329391154802047, 0.290944171464907, 0.256894559875301, 0.265852847311397,
0.298075773024972, 0.293120981397293, 0.195409879797847, 0.102641843034593,
0.10188091644663, 0.111395727827947, 0.175417226147105, 0.149854395176127,
0.137833960838657, 0.130565864126765, 0.120519704771722, 0.130798670801899,
0.143289932777255, 0.136793417970284, 0.101788670243063, 0.158906307072085,
0.0975353993123963, 0.0949917415540424, 0.225352335025378, 0.215292504906464,
0.182166798090582, 0.183417839803146, 0.185252957061203, 0.192571894694156,
0.206032076569248, 0.199581132562743, 0.187044377270308, 0.125061965842775,
0.0955581806688872, 0.116556538582666, 0.169106611618516, 0.154792116799661,
0.144647749348019, 0.128552793843681, 0.118581023794873, 0.118035532695766,
0.138964143698041, 0.141463456969198, 0.149129224565273, 0.140745822192987,
0.11933818070921, 0.129617582475879, 0.159683593927787, 0.129480591850964,
0.119731790773172, 0.0764422107349353, 0.0788206274536718, 0.097034570483965,
0.115564296161594, 0.119176324562654, 0.117849480527392)), .Names = c("site",
"month", "modeled", "modeled.low", "modeled.high", "observed",
"observed.low", "observed.high"), row.names = c(NA, -60L), class = "data.frame")
What I need to do is to create line plots of both "modeled" and "observed" soil moisture against months, and to add corresponding error bars modeled.low and modeled.high and observed.low and observed.high to those lines.
Also, I need to create facets based on the site column.
My first approach would be melting that data frame and starting from there, but the error bars might complicate that approach:
library(reshape2)
library(ggplot2)
DF.m <- melt(DF, id=c('site','month'))
ggplot(data=DF.m) +
geom_line(aes(x=month, y=value, colour=variable, group=variable)) +
facet_wrap(~site) +
theme_bw(base_size = 18) +
scale_x_discrete(limits=month.abb) +
ylab('Soil water content (%)') + xlab('')
Which obviously does not work because modeled.low and modeled.high and observed.low and observed.high are interpreted as lines to plot, whereas I need them as error bars.
I know that I should use geom_errorbar() in this code in order to achieve what I need, but I am not sure how to use it with the molten data frame.
Any tips?

library(dplyr)
df_m <- DF %>%
select(site:modeled.high) %>%
mutate(var="modeled") %>%
setNames(c("site", "month", "val", "low", "high", "var"))
df_ob <- DF %>%
select(site:month, observed:observed.high) %>%
mutate(var="observed") %>%
setNames(c("site", "month", "val", "low", "high", "var"))
df <- rbind(df_m, df_ob)
ggplot(df, aes(month, val, colour=var)) +
geom_errorbar(aes(ymax=high, ymin=low)) +
geom_point() +
facet_wrap(~site)
Line plot with ribbon illustrating high and low value ranges:
ggplot(df, aes(month, val, colour=var)) +
geom_ribbon(aes(ymax=high, ymin=low, linetype=NA), alpha=.2) +
geom_line() +
facet_wrap(~site)

A data.table solution:
library(data.table)
plt <- melt(as.data.table(DF), id=1:2, measure = patterns("modeled$|observed$", "low", "high"))
plt[, variable := factor(variable, levels = 1:2, labels = c('modeled', 'observed'))]
ggplot(plt, aes(x = month, y = value1, ymin = value2, ymax = value3, color = variable)) +
geom_line() +
geom_errorbar() +
facet_wrap(~site)

Related

Adding labels with 3 elements in GGplot coord_polar

I am building a pie / donut chart with 2 levels and want to label them using the Name, Value and Percentage. For example:
Tiger Block
3596 (20%)
Here is my code so far:
gplot(usage.may, aes(x = Level, y = Percent, fill = Subcategory, label = Label)) +
geom_bar(stat = "identity", color='white', show.legend = FALSE) +
geom_text(aes(label = Value),
size = 3,
colour = "white",
check_overlap = TRUE,
position = position_stack(vjust = 0.5)) +
coord_polar('y') + theme_minimal()
And some sample data:
structure(list(Level = structure(c(2L, 3L, 3L, 3L, 3L, 2L, 3L,
2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L), levels = c("0", "1", "2"), class = "factor"),
Category = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 1L), levels = c("C00", "C01", "C02",
"C03", "C04"), class = "factor"), Subcategory = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
1L), levels = c("C00", "C01", "C011", "C012", "C013", "C014",
"C02", "C021", "C03", "C031", "C032", "C033", "C04", "C041",
"C042"), class = "factor"), Colour = structure(c(2L, 3L,
3L, 3L, 3L, 4L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 1L), levels = c("0",
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Label = c("Cafe (R1 & R2) ", "Non-checked ", "Spider Monkey ",
"Signing-in Cabin", "Solar (cafe)", "Vet Room", "Non-checked",
"Butchery", "Non-checked", "Solar (lynx)", "Solar (butchery)",
"Tiger Block", "Farm", "Non-checked", ""), Value = c(5323L,
921L, 2611L, 34L, 1791L, 534L, 534L, 8479L, 6689L, 1371L,
419L, 3596L, 87L, 3247L, 0L), Percent = c(30L, 5L, 15L, 0L,
10L, 3L, 3L, 47L, 37L, 8L, 2L, 20L, 2L, 18L, 0L), X = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
X.1 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA)), row.names = c(NA, -15L), class = "data.frame")
Thanks in advance

With paste0 you could do:
library(ggplot2)
ggplot(df, aes(x = Level, y = Percent, fill = Subcategory)) +
geom_bar(stat = "identity", color = "white", show.legend = FALSE) +
geom_text(aes(label = paste0(Label, "\n", Value, " (", Percent, ")")),
size = 3,
colour = "white",
check_overlap = TRUE,
position = position_stack(vjust = 0.5)
) +
coord_polar("y") +
theme_minimal()

Part 2: How to correctly order segments by value, within an individual bar, on a bar chart in ggplot

So I thought I had my question answered with my prior question, but alas something is still not working. I am wondering if there is something in the structure of my data that I am missing because my prior question with fake data worked.
Here is a new reproducible example, with dummy data that more closely replicates my data set and my problem. My question is how do I sort the segments within each bar of the bar chart by value (largest value within a bar on the bottom, smallest on top).
library(dplyr)
repro_df <- structure(list(Grp = structure(c(5L, 7L, 2L, 3L, 8L, 7L, 10L,
4L, 4L, 3L, 2L, 2L, 3L, 8L, 9L, 3L, 3L, 6L, 6L, 5L, 6L, 8L, 4L,
11L, 5L, 1L, 10L, 8L, 1L, 6L, 3L, 1L, 1L, 9L, 5L, 3L, 5L, 4L,
5L, 5L, 2L, 1L, 9L, 4L, 5L, 10L, 6L, 8L, 3L, 6L, 2L, 6L, 4L,
7L, 2L, 8L, 9L, 9L, 10L, 5L, 1L, 9L, 1L, 5L, 2L, 8L, 8L, 3L,
3L, 10L, 7L, 6L, 9L, 2L, 9L, 7L, 1L, 1L, 9L, 1L, 11L, 10L, 9L,
3L, 7L, 2L, 4L, 7L, 6L, 6L, 4L, 8L, 5L, 5L, 7L, 10L, 8L, 3L,
6L, 3L, 10L, 10L, 7L, 8L, 9L, 8L, 5L, 7L, 3L, 10L, 11L, 7L, 4L,
10L, 3L, 8L, 5L, 3L, 5L, 4L, 3L, 10L, 7L, 3L, 4L, 9L, 2L, 3L,
2L, 1L, 8L, 11L, 2L, 1L, 7L), .Label = c("0", "1", "2", "3",
"4", "5", "6", "7", "8", "9", "10"), class = "factor"), Segment = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("A", "B", "C"), class = "factor"),
Value = c(914, NA, NA, 228, NA, NA, NA, 207, NA, 179, NA,
NA, 149, NA, NA, 135, NA, NA, NA, 109, NA, NA, 105, NA, NA,
101, NA, 100, NA, NA, NA, 98, NA, 96, NA, NA, 87, NA, NA,
77, NA, NA, 74, NA, NA, 57, NA, NA, 49, NA, NA, 35, NA, NA,
31, NA, NA, 25, NA, NA, NA, 25, NA, NA, 21, NA, 18, NA, NA,
16, NA, NA, 8, NA, NA, 7, NA, NA, 7, NA, NA, 5, NA, NA, NA,
NA, 4, NA, NA, 0, 0, NA, NA, 0, NA, NA, 0, NA, NA, NA, NA,
0, 0, NA, NA, NA, NA, 0, 0, NA, NA, 0, NA, NA, NA, NA, 0,
NA, NA, 0, NA, NA, 0, 0, NA, NA, NA, 0, NA, NA, NA, 0, NA,
NA, 0)), class = "data.frame", row.names = c(NA, -135L))
# Reorder the data frame
repro_order_df <- repro_df %>%
group_by(Segment) %>%
mutate(Grp = fct_reorder(Grp, Value))
head(repro_order_df, 10)
# A tibble: 10 x 3
# Groups: Segment [3]
Grp Segment Value
<fct> <fct> <dbl>
1 4 A 914
2 6 B NA
3 1 C NA
4 2 A 228
5 7 B NA
6 6 C NA
7 9 A NA
8 3 B 207
9 3 C NA
10 2 A 179
# Plot
ggplot(repro_order_df, aes(x=Segment, y=Value, fill=Grp)) +
geom_col(color = "black")
When I graph this data after reordering, each bar is not ordered by Value as I would have expected. A bit more oddly, in my real data set the first bar is ordered correctly but the following bars are not. Any thoughts as to why this is not working?
Thanks!

I think Peter is definitely on the right track. However, I understand the OP to be asking for the individual Values be ordered by Value within each Segment. I've made Grp a factor such that Grp is ordered in decreasing size of the largest Value in a Grp. The code would look like:
repro_ord <- repro_df %>% arrange(desc(Value)) %>%
mutate(Value_ord = row_number(), Grp = as_factor(as.character(Grp)) )
p <- ggplot(repro_ord, aes(x = Segment, y = Value) ) +
geom_col( aes( fill = Grp, group = rev(Value_ord) ),color = "black")
bar_tot <- repro_ord %>% group_by(Segment) %>% summarize(Total = sum(Value, na.rm = TRUE)) %>%
ungroup() %>% mutate_if(., is.numeric, round, 0)
p1 <- p + geom_text(data = bar_tot, aes(x=Segment, y = Total, label = Total), vjust = -0.5 ,
size = 3, hjust = 0.5, fontface = "bold" )
which gives.
This answer should be regarded as a long comment on Peter's answer rather than a new answer.

How about this, which I think is what you are after...
The trick is to utilise the group aesthetic combined with an additional grouping variable to control the plotting order and use the Grp variable to control the fill colours.
library(dplyr)
library(ggplot2)
library(forcats)
Option 1) show merged groups in value order
create a new grouping variable to order the groups by segment and group size
repro_order_df <-
repro_df %>%
group_by(Segment, Grp) %>%
summarise(Value = sum(Value, na.rm = TRUE)) %>%
ungroup() %>%
group_by(Segment) %>%
arrange(Value) %>%
mutate(g = row_number())
p1 <-
ggplot(repro_order_df, aes(x = Segment, y = Value, group = g, fill = Grp)) +
geom_col(color = "black") +
ggtitle("p1 grouped by Grp") +
theme(legend.position = "bottom")
Option 2) show groups in value order with individual group values stacked largest first
create a new grouping variable to order the groups by segment and group size and value within group
repro_order_df1 <-
repro_df %>%
group_by(Segment, Grp) %>%
mutate(Value_g = sum(Value, na.rm = TRUE)) %>%
ungroup() %>%
group_by(Segment) %>%
arrange(Value_g, Value) %>%
mutate(g = row_number())
p2 <-
ggplot(repro_order_df1, aes(x = Segment, y = Value, group = g, fill = Grp)) +
geom_col(color = "black") +
ggtitle("p2 grouped by Grp and Value") +
theme(legend.position = "bottom")
Which give you:
Created on 2020-05-16 by the reprex package (v0.3.0)

Duplicated variables in one legend (not common problem as far as I can see!)

I have the following problem, which seems common, but is not. I have made a ggplot graph with linetype and colour set manually, both legends have the same name and the same variable labels, df in long format. One legend is produced, but each variable is shown twice. In order for you to understand what I want to achieve, I need to back up a little.
I am working on a function which permits me to update a dataframe with monthly spending for this year and to then generate different plots to follow up on my budgeting. My variables have two "properties", so to speak. They are of a particular item, and each item is either a projection (i.e. planned) or actual spending. What I wanted originally was to have each item possess one colour and two linetypes (solid for projected, solid-dashed for actual spending). So, for example, green for saving, projected savings with a solid line, actual savings with a dashed line. I wanted two legends with that, one legend showing only colours (i.e. items) and the other showing only the two kinds of linetypes (solid, dashed) so that it is left to the reader to put the two together (and thus also have less legend items in total). If anyone has a solution for this problem, I'd be very happy to find out. However, the following is what I am trying to solve now:
I have by now given up on this original intention and settled for a legend with each kind of line getting one legend entry. This is what the intro (above) was about. Despite having the same legend name and variable labels and correct number thereof, each variable appears twice now. I would like to know why I am getting these double entries and find a solution. I have tried all sorts of things over many hours and have found nobody with a similar problem (since I get the more "normal" problems with my keyword search).
One strange thing I have also noted is that the variable "Add. income" does not behave like the other variables, since it only appears once.
The reason why there are many NA values in the dataframe (below) is because these are figures to be filled into the df and then plotted as the year progresses.
Code:
ggplot(fin2019Long, aes(x=month, y=value, colour=variable)) + geom_line(aes(linetype=variable)) + geom_point() +
labs(title = "Projected expenditure and saving", y = "Euros", x = "Month") +
scale_x_continuous("Month", breaks= c(1:12)) +
scale_colour_manual(name = "Items",
values=c("green","green", "yellow", "yellow", "blue", "blue", "red", "red", "orange"),
labels=c(rep("Living expend.", 2), rep("Debt repay.", 2), rep("Saving", 2), rep("Furn. fund", 2), "Extra pay")) +
scale_linetype_manual(name = "Items",
values=c(rep(c("solid", "twodash"), 4), "twodash"),
labels=c(rep("Living expend.", 2), rep("Debt repay.", 2), rep("Saving", 2), rep("Furn. fund", 2), "Extra pay"))
Data:
structure(list(month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("livingExpProj",
"livingExp", "debtRepayProj", "debtRepay", "savingProj", "saving",
"furnFundProj", "furnFund", "addIncome"), class = "factor"),
value = c(1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
1000, 1000, 1000, 1000, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 600, 600, 600, 600, 600, 600, 600, 600, 600,
600, 600, 600, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500,
500, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -108L
), class = "data.frame")

Seperate the variable column into two columns will make it much easier to control:
fin2019Long$type <- ifelse(grepl('Proj$', fin2019Long$variable), 'Planned', 'Spending')
fin2019Long$variable2 <- gsub('Proj$', '', fin2019Long$variable)
ggplot(fin2019Long, aes(x=month, y=value, colour=variable2)) +
geom_line(aes(linetype=type)) + geom_point() +
labs(title = "Projected expenditure and saving", y = "Euros", x = "Month") +
scale_x_continuous("Month", breaks= c(1:12))

change the order of a common legend, in a superimposed graph

I would like to change the order of my legend, and not to display them in alphabetical order as you can see below. I would like to have
"NONE","LIGHT","MEDIUM","HEAVY","V_COLD","COLD","MEDIUM","HOT".
Is it possible? I tried with several arguments but without success.
Below, my table :
structure(list(SOUNAME = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "BALLYSHANNON (CATHLEENS FALL)", class = "factor"),
year_month = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L), .Label = c("2013-03",
"2013-04", "2013-05", "2013-06", "2013-07", "2013-08", "2013-09",
"2013-10", "2013-12"), class = "factor"), pre_type = structure(c(4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L), .Label = c("HEAVY", "LIGHT", "MEDIUM",
"NONE"), class = "factor"), pre_value = c(13L, 2L, 11L, 5L,
9L, 3L, 10L, 7L, 2L, 6L, 13L, 10L, 10L, 1L, 15L, 4L, 16L,
2L, 7L, 5L, 2L, 2L, 17L, 9L, 7L, 3L, 13L, 6L, 5L, 2L, 10L,
14L, 1L, 5L, 19L, 6L), tem_type = structure(c(4L, 3L, 2L,
1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L,
2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L, 3L, 2L, 1L, 4L,
3L, 2L, 1L), .Label = c("COLD", "HOT", "MEDIUM", "V_COLD"
), class = "factor"), tem_value = c(0L, 7L, 0L, 23L, 0L,
29L, 0L, 1L, 0L, 29L, 2L, 0L, 0L, 21L, 9L, 0L, 0L, 5L, 25L,
0L, 0L, 18L, 13L, 0L, 0L, 21L, 9L, 0L, 0L, 26L, 5L, 0L, 0L,
24L, 0L, 7L), cnt_vehicle = c(NA, 2754406, NA, NA, NA, 2846039,
NA, NA, NA, 3149377, NA, NA, NA, 3058810, NA, NA, NA, 3362614,
NA, NA, NA, 3415716, NA, NA, NA, 3020812, NA, NA, NA, 3076665,
NA, NA, NA, 2775306, NA, NA), x = c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L
)), .Names = c("SOUNAME", "year_month", "pre_type", "pre_value",
"tem_type", "tem_value", "cnt_vehicle", "x"), row.names = c(NA,
-36L), class = "data.frame")
Below my graph:
ggplot(data = b_complet_2013, aes(x = x, y = pre_value*100000, fill = pre_type), stat = "identity") +
scale_x_continuous(breaks=(1:9)+0.2, labels=unique(b_complet_2013$year_month)) +
geom_bar(stat = "identity", width=0.3) +
xlab("date") + ylab ("Number of days of précipitations(left) and temperatures (ritght)") +
ggtitle("Precipitation per month") +
geom_bar(data=b_complet_2013,aes(x=x+0.4, y=tem_value*100000, fill=tem_type), width=0.3, stat = "identity") +
xlab("date") + ylab("Number of days of precipitations(left) and temperatures (ritght)") +
ggtitle("Impact of weather on road traffics") + theme( axis.title.y = element_text(color = "blue", face = "bold")) +
theme(axis.text.y = element_text(color = "blue", face = "bold", size=9)) + theme( axis.title.y.right = element_text(color = "black", face = "bold")) +
theme(axis.text.y.right = element_text(color = "black", size = 9, face = "bold")) +
geom_line(mapping = aes(x= x+0.2, y = as.numeric(cnt_vehicle)), colour = I("blue"), size = 0.8) +
geom_point(aes(x= x+0.2, y = as.numeric(cnt_vehicle), colour = I("blue")), show.legend=FALSE, stat = "identity") +
scale_y_continuous(sec.axis = sec_axis(~./100000,name="Number of days of precipitations(left) and temperatures (ritght)")) +
theme( plot.title = element_text(size = 17)) + theme(axis.title.x = element_text(size = 12)) + theme(axis.title.y = element_text(size = 12)) +
labs(y = "Number of vehicles", color ="black") +
theme(panel.background = element_rect(linetype = "dashed", fill="white"), plot.background = element_rect(linetype = "dashed",fill="grey90" ))

Cannot concatenate more than 3 elements in an expression for ggplot2's geom_text

I have a data frame for which I'm computing a linear model and would like to include the correlation coefficient and its significance using geom_text.
structure(list(ppno = c(1L, 1L, 1L, 10L, 10L, 10L, 2L, 2L, 2L,
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L), light.color = structure(c(1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("B", "IR",
"IR+B"), class = "factor"), session = c(2L, 1L, 3L, 2L, 3L, 1L,
1L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 3L, 2L,
1L, 3L, 1L, 3L, 2L, 3L, 2L, 1L), time = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("pre",
"post"), class = "factor"), pre.pri.s = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), pre.pri.r = c(8L, 4L, 6L,
2L, 2L, 4L, 10L, 12L, 9L, 24L, 16L, 15L, 15L, 15L, 15L, 3L, 5L,
7L, 13L, 11L, 12L, 16L, 15L, 14L, 21L, 5L, 8L, 1L, 0L, 0L), pre.nwc = c(5L,
2L, 4L, 2L, 2L, 4L, 10L, 10L, 9L, 11L, 10L, 11L, 12L, 11L, 11L,
3L, 5L, 6L, 9L, 11L, 12L, 12L, 11L, 10L, 11L, 5L, 8L, 1L, 0L,
0L), pre.ppi = structure(c(3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 2L,
3L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 2L, NA, 2L, 2L, 3L, 3L, 3L, 4L,
2L, 3L, 1L, 1L, 1L), .Label = c("1", "2", "3", "4", "NULL"), class = "factor"),
pre.pri.nwc = c(1.6, 2, 1.5, 1, 1, 1, 1, 1.2, 1, 2.18181818181818,
1.6, 1.36363636363636, 1.25, 1.36363636363636, 1.36363636363636,
1, 1, 1.16666666666667, 1.44444444444444, 1, 1, 1.33333333333333,
1.36363636363636, 1.4, 1.90909090909091, 1, 1, 1, NaN, NaN
), post.pri.s = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA), post.pri.r = c(4L, 4L, 7L, 0L, 0L, 4L,
3L, 8L, 7L, 16L, 12L, 19L, 6L, 10L, 4L, 1L, 3L, 0L, 3L, 11L,
15L, 8L, 9L, 9L, 8L, 4L, 3L, 0L, 0L, 0L), post.nwc = c(4L,
3L, 4L, 0L, 0L, 3L, 3L, 8L, 7L, 10L, 9L, 15L, 5L, 9L, 4L,
1L, 3L, 0L, 3L, 8L, 13L, 8L, 9L, 9L, 8L, 4L, 3L, 0L, 0L,
0L), post.ppi = structure(c(2L, 2L, 3L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 2L, 5L, 1L, 1L, NA, 3L, 2L, 1L, 1L,
2L, 3L, 2L, 2L, 1L, 1L, 1L), .Label = c("1", "2", "3", "4",
"NULL"), class = "factor"), post.pri.nwc = c(1, 1.33333333333333,
1.75, NaN, NaN, 1.33333333333333, 1, 1, 1, 1.6, 1.33333333333333,
1.26666666666667, 1.2, 1.11111111111111, 1, 1, 1, NaN, 1,
1.375, 1.15384615384615, 1, 1, 1, 1, 1, 1, NaN, NaN, NaN),
delta.pri.r = c(4, 0.1, -1, 2, 2, 0.1, 7, 4, 2, 8, 4, -4,
9, 5, 11, 2, 2, 7, 10, 0.1, -3, 8, 6, 5, 13, 1, 5, 1, 0.1,
0.1), delta.nwc = c(1, -1, 0.1, 2, 2, 1, 7, 2, 2, 1, 1, -4,
7, 2, 7, 2, 2, 6, 6, 3, -1, 4, 2, 1, 3, 1, 5, 1, 0.1, 0.1
), delta.pri.nwc = c(-0.6, -0.666666666666667, 0.25, NaN,
NaN, 0.333333333333333, 0.1, -0.2, 0.1, -0.581818181818182,
-0.266666666666667, -0.0969696969696969, -0.05, -0.252525252525252,
-0.363636363636364, 0.1, 0.1, NaN, -0.444444444444444, 0.375,
0.153846153846154, -0.333333333333333, -0.363636363636364,
-0.4, -0.90909090909091, 0.1, 0.1, NaN, NaN, NaN), delta.vas = c(4.081632,
-43.877544, -8.163264, -2.040816, 0.510204, 9.183672, 8.163264,
8.163264, 11.224488, 0, -14.285712, -11.224488, 19.387752,
0, 26.530608, 2.040816, 10.20408, 11.224488, 42.346932, -10.20408,
-28.06122, 11.224488, 5.612244, 21.428568, 22.448976, 0,
23.469384, 0.510204, -1.020408, 0)), .Names = c("ppno", "light.color",
"session", "time", "pre.pri.s", "pre.pri.r", "pre.nwc", "pre.ppi",
"pre.pri.nwc", "post.pri.s", "post.pri.r", "post.nwc", "post.ppi",
"post.pri.nwc", "delta.pri.r", "delta.nwc", "delta.pri.nwc",
"delta.vas"), row.names = c(NA, -30L), class = "data.frame")
Using this code for the plot.
p <- ggplot(data=mpq.vas, mapping=aes(x=delta.vas, y=delta.pri.r,
colour=light.color)) +
geom_point() +
geom_smooth(aes(group=1), method="lm", size=1, colour="black")
#
# Clean up the basics.
pp <- p + geom_hline(yintercept=0, colour="grey60") +
geom_vline(xintercept=0, colour="grey60") +
scale_colour_manual(name="Treatment\ncolor", values=cols) +
scale_x_continuous(name=
expression(paste(Delta, " VAS pain [t(0) - t(60)]")))+
scale_y_continuous(name=expression(paste(Delta, "PRI(r) [pre - post]")))
#
# Add correlation info.
val <- cor.test(mpq.vas$delta.vas, mpq.vas$delta.pri.r)
When I then try to add the correlation coefficient somewhere in the text, I get an error about an unexpected symbol at the location of the Q in the label.
pp + geom_text(aes(x=20, y=-5, label=paste("italic(r) ==", 3, "Q", sep=" ")),
parse=TRUE, colour="black")
(yes, I know a correlation of 3 is impossible, just an example).
I would like to do:
pp + geom_text(aes(x=20, y=-5, label=paste("italic(r) ==", round(val$estimate, digits=2), "\np < 0.0001", sep=" ")), parse=TRUE, colour="black")
But this generates the same error, now at the \n thingy. What am I doing wrong?

pp + geom_text(aes(x=20, y=-5,
label=paste("list(italic(r) ==", round(val$estimate, digits=2), ", p < 0.0001)")),
parse=TRUE, colour="black")
The key is that the label argument is parsed if parse==TRUE, this means that the texts need to have a same format as in ?plotmath.
What the geom_text exactly do is like this:
expr <- parse(text=label)
and then draw text using the expr as a label. So label argument need to be a valid expression. In you example,
paste("italic(r) ==", 3, "Q", sep=" ")
is invalid expression, so
parse(text=paste("italic(r) ==", 3, "Q", sep=" "))
induces an error.
In plotmath, if you want to concat symbols, then you need to use:
paste(x, y, z)
list(x, y, z)
So if you want to simply concat, then
geom_text(foobar, label=paste("paste(italic(r) ==", 3, "Q)", sep=" "))
The first (outside) paste concats a piece of texts into one text variable.
The second (inside) paste is used in plotmath process.
In my example above, I used list (see ?plotmath) instead of paste, because stats and p value is separated by `,'.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Plot multiple lines and error bars - r

Related

Adding labels with 3 elements in GGplot coord_polar

Part 2: How to correctly order segments by value, within an individual bar, on a bar chart in ggplot

Duplicated variables in one legend (not common problem as far as I can see!)

change the order of a common legend, in a superimposed graph

Cannot concatenate more than 3 elements in an expression for ggplot2's geom_text

Categories

Resources