Adding 95% confidence interval of prediction using ggplot2 - r

I am using your facet_grid2 function from ggh4x to make both x and y-axis scales to be free like
ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_grid2(Station ~ Method, scales="free", independent = "all")+
xlab("Measured") +
ylab("Predicted") +
theme_bw()+
geom_smooth(method="lm") +
theme(panel.grid.minor = element_blank())
Now how can I add the 95% confidence interval of prediction to this plot like the following plot
Data
data_calibration = structure(list(Observed = c(17229L, 15964L, 13373L, 17749L, 12457L,
7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L, 7220L,
7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 11465L,
11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 10759L,
9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 3183L,
9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 17749L,
12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L,
7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L), Station = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Raigad",
"Ratnagiri", "Thane "), class = "factor"), Method = structure(c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ANN",
"ELNET", "LASSO", "PCA-ANN", "PCA-MLR", "SMLR"), class = "factor"),
Predicted = c(14463L, 14285L, 14452L, 12765L, 11917L, 8143L,
11251L, 8611L, 6789L, 2059L, 2787L, 2201L, 3062L, 4508L,
4975L, 15357L, 15605L, 12326L, 10377L, 9113L, 13926L, 13142L,
11407L, 8711L, 7801L, 2064L, 4563L, 4725L, 6247L, 7170L,
9492L, 8857L, 10323L, 7389L, 6776L, 7842L, 8261L, 6156L,
8627L, 4326L, 8094L, 8897L, 10370L, 10214L, 8548L, 16043L,
16671L, 15831L, 13463L, 11921L, 10239L, 9110L, 8090L, 10794L,
5826L, 3621L, 5639L, 7364L, 8152L, 5515L, 15182L, 14370L,
13559L, 12748L, 11936L, 11125L, 10313L, 9502L, 8691L, 7879L,
7068L, 6257L, 5445L, 4634L, 3822L, 10045L, 9911L, 11038L,
9255L, 8736L, 8848L, 8063L, 7847L, 8538L, 6744L, 9583L, 10474L,
8343L, 10353L, 8791L, 13185L, 13331L, 13099L, 12557L, 11898L,
10474L, 11199L, 10255L, 9251L, 6148L, 6795L, 6166L, 7775L,
8157L, 7990L, 14843L, 15086L, 12585L, 10987L, 10193L, 13663L,
11317L, 11071L, 9392L, 6991L, 4484L, 4667L, 4846L, 5830L,
6577L, 9085L, 8802L, 9570L, 7770L, 7652L, 8006L, 7995L, 6599L,
9050L, 4876L, 8360L, 8981L, 9931L, 9479L, 8009L, 13775L,
13890L, 13416L, 12851L, 12141L, 10693L, 10834L, 10372L, 9585L,
5914L, 5930L, 5922L, 7854L, 7407L, 7697L, 14941L, 15174L,
12572L, 10817L, 10412L, 13705L, 11154L, 10886L, 9448L, 7215L,
4389L, 4875L, 4809L, 5747L, 6385L, 9034L, 8749L, 9410L, 7820L,
7798L, 7940L, 7957L, 6803L, 8844L, 5227L, 8369L, 8972L, 9789L,
9514L, 7940L, 15309L, 14477L, 14219L, 18581L, 12084L, 10550L,
8666L, 8812L, 11415L, 5566L, 3928L, 4592L, 7861L, 7489L,
6903L, 12509L, 13366L, 11956L, 11880L, 8711L, 12768L, 11690L,
10922L, 4101L, 10106L, 2811L, 2979L, 4785L, 5944L, 5901L,
10007L, 8710L, 8688L, 7383L, 7575L, 8047L, 7938L, 6585L,
9517L, 3729L, 8816L, 8704L, 10847L, 8812L, 8493L, 18115L,
15670L, 15931L, 16804L, 12450L, 7701L, 7588L, 8450L, 9205L,
5477L, 4666L, 4948L, 8262L, 7095L, 6798L, 12902L, 12883L,
12864L, 12788L, 12690L, 12896L, 12491L, 12199L, 11982L, 5213L,
5357L, 5053L, 5013L, 5321L, 5596L, 9467L, 8931L, 9305L, 7867L,
8427L, 8282L, 7291L, 6396L, 9725L, 5509L, 8545L, 8997L, 10171L,
10389L, 8700L)), class = "data.frame", row.names = c(NA,
-270L))

In short, the geom_smooth function only calculates confidence intervals. To get prediction intervals as well, these should be calculated outside of ggplot and passed in. This is a bit of a long way of coding, but hopefully you can see that predict is called twice on the lm model, once to produce two columns of confidence intervals, once to produce two columns of prediction intervals. These are passed on to geom_ribbons:
library(ggh4x)
library(tidyverse)
data_calibration |>
group_by(Station, Method) |>
nest() |>
mutate(model = map(data, ~ lm(Predicted ~ Observed, data = .x))) |>
mutate(fit = map2(model, data, ~ as.tibble(
predict(.x, interval = "conf"), new_data = tibble(Observed = seq(min(
data$Observed, max(data$Observed), 100
)))
)),
pred = map2(model, data, ~ as.tibble(
predict(.x, interval = "pred", new_data = tibble(Observed = seq(
min(data$Observed, max(data$Observed), 100)
)))
))) |>
unnest(c(data, fit, pred), names_sep = "_") |>
ggplot(aes(data_Observed, data_Predicted)) +
geom_point(color = "black", alpha = 1 / 3) +
facet_grid2(Station ~ Method, scales = "free", independent = "all") +
xlab("Measured") +
ylab("Predicted") +
theme_bw() +
geom_smooth(method = "lm", se = FALSE) +
geom_ribbon(aes(ymax = fit_upr, ymin = fit_lwr),
colour = "green",
fill = NA) +
geom_ribbon(aes(ymax = pred_upr, ymin = pred_lwr),
colour = "red",
fill = NA) +
theme(panel.grid.minor = element_blank())
I would welcome a tidier answer! One would be to create a new stat_predict layer function, which is a little tricky but not impossible.
Edit - that thing I said was perhaps a good idea, maybe it is!
Out of curiosity, I thought worth making a stat_predict function. Source the code from this gist and then the simple code will work with above data:
# To source new function, either...
source("https://gist.githubusercontent.com/andrewbaxter439/b508a60786f8af3c0be7b381a667ae07/raw/f7f4672222f0b1024cf6bf536ed7f6059867b4f2/stat_predict.R")
# or devtools::source_gist("b508a60786f8af3c0be7b381a667ae07")
ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_grid2(Station ~ Method, scales="free", independent = "all")+
xlab("Measured") +
ylab("Predicted") +
theme_bw()+
geom_smooth(method="lm", se = FALSE) +
stat_smooth(method = "lm", geom = "ribbon", fill = NA, colour = "green") +
stat_predict(method = "lm", geom = "ribbon", fill = NA, colour = "red") +
theme(panel.grid.minor = element_blank())
Footnote: here's an old discussion on whether a prediction interval function should be a part of ggplot2 or not

Related

Plot linear regression analysis with error bar for variability

I wanted to make plots that look like figure 1 (source: link)
In figure 1, they have plotted the regression analysis with one-year yield variability. In my case, I would like to plot variability between two locations and 4 blocks for each treatment group. So the plot I wanted would have three facets for factors B.glucosidase, Protein, POX.C of variable and four colors for treatments factors. Also, in my current plot I have legend for block and treatment. I should only have treatment because the block should be used for making error bar for variability.
I tried with this code, which obviously doesn't work for what I want. (Data for df.melted included below.)
ggplot(df.melted, aes(x = value, y = yield, color = as.factor(treatment))) +
geom_point(aes(shape= as.factor(block))) +
stat_smooth(method = "lm", formula = y ~ x, col = "darkslategrey", se=F) +
stat_poly_eq(formula = y~x,
# aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(label = ..rr.label..),
parse = TRUE) +
theme_classic() +
geom_errorbar(aes(ymax = df.melted$yield+sd(df.melted$yield), ymin = df.melted$yield-sd(df.melted$yield)), width = 0.05)+
facet_wrap(~variable)
Data:
df.melted <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("M", "U"), class = "factor"),
treatment = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("CC",
"CCS", "CS", "SCS"), class = "factor"), block = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L), yield = c(5156L, 5157L, 5551L, 5156L, 4804L,
4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L, 4588L,
4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L, 5006L,
5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L, 4608L,
5156L, 5157L, 5551L, 5156L, 4804L, 4720L, 4757L, 5021L, 4826L,
4807L, 4475L, 4596L, 4669L, 4588L, 4542L, 4592L, 5583L, 5442L,
5693L, 5739L, 5045L, 4902L, 5006L, 5086L, 4639L, 4781L, 4934L,
4857L, 4537L, 4890L, 4842L, 4608L, 5156L, 5157L, 5551L, 5156L,
4804L, 4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L,
4588L, 4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L,
5006L, 5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L,
4608L), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B.glucosidase",
"Protein", "POX.C"), class = "factor"), value = c(1.600946,
1.474084, 1.433078, 1.532492, 1.198667, 1.193193, 1.214941,
1.360981, 1.853056, 1.690117, 1.544357, 1.825132, 1.695409,
1.764123, 1.903743, 1.538684, 0.845077, 1.011463, 0.857032,
0.989803, 0.859022, 0.919467, 1.01717, 0.861689, 0.972332,
0.952922, 0.804431, 0.742634, 1.195837, 1.267285, 1.08571,
1.20097, 6212.631579, 5641.403509, 4392.280702, 7120.701754,
5305.964912, 4936.842105, 5383.157895, 6077.894737, 5769.122807,
5016.842105, 5060.350877, 5967.017544, 5576.842105, 5174.035088,
5655.438596, 5468.77193, 7933.333333, 7000, 6352.982456,
8153.684211, 6077.894737, 4939.649123, 5002.807018, 6489.122807,
4694.035088, 5901.052632, 4303.859649, 6768.421053, 6159.298246,
6090.526316, 4939.649123, 5262.45614, 810.3024, 835.5242,
856.206, 759.8589, 726.2298, 792.6472, 724.7165, 699.3266,
500.9153, 634.8698, 637.9536, 648.8814, 641.0357, 623.3822,
555.2834, 520.8119, 683.3528, 595.9173, 635.4315, 672.4234,
847.2944, 745.5665, 778.3548, 735.8141, 395.2647, 570.4148,
458.0383, 535.3851, 678.0293, 670.7419, 335.2923, 562.5674
)), row.names = c(NA, -96L), class = "data.frame")
library(dplyr)
library(ggplot2)
library(ggpmisc)
Summarize data frame (this could also be done with stat_summary(), but it's often clearer/more transparent to do it explicitly up front). (I think that because your data set is balanced you could collapse/average over the block structure first, and then do your whole plot with the reduced data set - it shouldn't change the outcome of the linear regressions at all, at least not the mean values ... and any statistical comparisons should probably done on block-level summaries anyway ...)
df.sum <- (df.melted
%>% group_by(Location,treatment,variable)
%>% summarise(value=mean(value),yield_sd=sd(yield),
## collapse yield to mean *after* computing sd!
yield=mean(yield))
)
Plot:
(ggplot(df.melted,
aes(x = value, y = yield, color = treatment))
+ stat_smooth(method = "lm", col = "darkslategrey", se=FALSE)
+ stat_poly_eq(
formula = y ~ x,
## aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(group=1, label = ..rr.label..),
parse = TRUE)
+ theme_classic()
+ scale_shape(guide=FALSE)
+ geom_point(data=df.sum)
+ geom_errorbar(data=df.sum,
aes(ymax = yield+yield_sd, ymin = yield-yield_sd),
width = 0.05)
+ facet_wrap(~variable,scale="free_x")
)
(adding group=1 to the stat_poly_eq() aesthetics means we only plot a single R^2 value per facet)
Since you're no longer using the shape aesthetic for anything, you could consider using it to show the Location variable ...

How to model the residual variance using varFunc from nlme?

To see whether a linear trend exists between age and quartiles of some variable, I fitted a linear model using lm. Plots of the residuals against fitted values as well as residuals against the quartiles indicate heterogeneity of variance.
This image was created through:
m1 <- lm(age ~ quartile, data = DF) #DF = dataframe
op <- par(mfrow = c(1,3))
plot(resid(m1) ~ fitted(m1)) #Homogeneity of variances: graphical
plot(resid(m1) ~ DF$quartile)
qqnorm(resid(m1));qqline(resid(m1))
par(op)
Within the GLS framework, I would like to have the residual variance to depend on the quartiles using one of the classes from the varFunc from the nlme package. I tried multiple functions, though without success.
The sample data below roughly yield the same pattern:
reconstruct <- structure(list(quartile = structure(c(2L, 1L, 4L, 3L, 1L, 1L,
3L, 4L, 3L, 2L, 2L, 3L, 3L, 1L, 2L, 4L, 2L, 2L, 2L, 1L, 1L, 3L,
1L, 1L, 1L, 3L, 3L, 1L, 4L, 3L, 3L, 3L, 2L, 4L, 1L, 1L, 3L, 1L,
3L, 2L, 2L, 4L, 3L, 4L, 1L, 4L, 1L, 4L, 3L, 1L, 1L, 2L, 4L, 2L,
2L, 2L, 1L, 1L, 4L, 1L, 4L, 4L, 3L, 3L, 4L, 4L, 1L, 1L, 2L, 1L,
4L, 3L, 4L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 4L, 1L, 3L, 4L, 2L, 4L, 1L, 4L, 4L, 1L, 3L, 4L, 2L, 2L, 1L,
1L, 4L, 2L, 4L, 3L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 2L, 4L, 2L,
4L, 1L, 4L, 3L, 4L, 1L, 2L, 1L, 4L, 2L, 1L, 3L, 1L, 4L, 1L, 4L,
4L, 4L, 1L, 1L, 4L, 2L, 4L, 3L, 2L, 2L, 1L, 3L, 1L, 4L, 2L, 3L,
4L, 3L, 4L, 1L, 1L, 2L, 2L, 4L, 1L, 2L, 4L, 2L, 1L, 2L, 1L, 1L,
4L, 3L, 2L, 3L, 2L, 4L, 3L, 4L, 1L, 4L, 1L, 3L, 4L, 4L, 4L, 1L,
4L, 3L, 2L, 4L, 3L, 3L, 2L, 1L, 1L, 4L, 1L, 4L, 2L, 2L, 2L, 4L,
2L, 3L), .Label = c("1", "2", "3", "4"), class = c("ordered",
"factor")), age = c(40.45, 33.49, 41.02, 53.06, 63.46, 47.17,
39.45, 60.71, 67.13, 53.12, 62.78, 70.39, 56.14, 50.55, 35.64,
38.5, 68.53, 53.69, 50.84, 38.66, 35.31, 57.03, 37.84, 35.82,
50.68, 56.44, 65.36, 58.64, 55.98, 56.13, 42.09, 54.91, 35.16,
63.68, 44.5, 51.79, 69.56, 59.11, 55.39, 43.87, 58.12, 65.59,
52.58, 60.17, 48.57, 52.09, 40.04, 35.61, 77.14, 43.82, 48.98,
36.26, 44.63, 62.13, 69.59, 41.22, 47.85, 53.5, 42.08, 49.08,
75.49, 52.39, 41.21, 58.25, 74.37, 64.28, 34.01, 42.99, 34.05,
60.99, 68.82, 41.3, 71.07, 55.21, 52.01, 37.76, 64.54, 57.43,
45.78, 62.9, 67.73, 49.25, 69.68, 51.85, 37.32, 47.37, 53.41,
68.55, 35.31, 63.59, 69.04, 48.03, 50.74, 42.93, 79.23, 72.22,
35.42, 43.26, 45.81, 37.92, 39.26, 60.97, 47.36, 50.19, 43.52,
41.82, 40.42, 54.87, 55.32, 75.74, 69.54, 56.44, 59.85, 50.02,
49.23, 48.38, 34.07, 38.57, 46.57, 35.29, 42.04, 63.35, 34.68,
50.34, 72.5, 40.27, 58.41, 37.79, 34.62, 75.47, 38.91, 46.21,
49.72, 40.55, 66.98, 59.07, 55.8, 38.86, 47.76, 59.16, 74.79,
57.87, 54.82, 43.58, 66.15, 34.55, 50.12, 67.68, 61.1, 40.29,
54.1, 69.8, 60.68, 36.7, 38.31, 46.15, 34.68, 41.92, 38.97, 50.67,
68.53, 40.06, 46.5, 44.38, 47.6, 37.95, 78.39, 54.73, 79.07,
40.05, 48.67, 58.71, 73.07, 75.65, 43.07, 48.25, 44.03, 51.37,
62.16, 54.78, 66.27, 50.25, 60.56, 32.77, 68.41, 37.74, 38.46,
46.33, 41.59, 64.52, 53.66, 71.04, 64.55, 53.25, 40.58, 52.33,
39.64, 52.76, 43.52, 48.45)), row.names = c(1:200), class = "data.frame")
To obtain the image:
m2 <- lm(age ~ quartile, data = reconstruct)
op <- par(mfrow = c(1,3))
plot(resid(m2) ~ fitted(m2))
plot(resid(m2) ~ reconstruct$quartile)
qqnorm(resid(m2));qqline(resid(m2))
par(op)
Any suggestions?

error bars should not be very long in barplots in r

I am plotting grouped barplots with error bars, but my error bars are very long as in this image
[![https://i.stack.imgur.com/VUByO.png][1]][1].
I would like shorter error bars as in this image
[![https://i.stack.imgur.com/JhaUJ.png][2]][2]
The code used
per$Leaf_Location <- factor(per$Leaf_Location, levels = unique(per$Leaf_Location))
per$Time <- factor(per$Time, levels = unique(per$Time))
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean,
geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(fun.ymin=min,fun.ymax=max,geom="errorbar",
color="black",position=position_dodge(.7), width=.2) +
stat_summary(geom = 'text', fun.y = max, position = position_dodge(.7),
label = c("a","b","c","d","d","a","b","c","d","d","a","b","c","d","d"), vjust = -0.5) +
scale_fill_manual("Legend", values = c("grey36","grey46","grey56","grey76","grey86","grey96")) +
xlab("Leaf Location") +
ylab("Damage ") +
theme_bw()
data:
per =
structure(list(Site = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Defathers",
"Kariithi", "Kimbimbi"), class = "factor"), Field = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("F1", "F2", "F3", "F4", "F5"), class = "factor"),
Leaf_Location = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("Lower", "Intermediate",
"Upper"), class = "factor"), Time = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L), .Label = c("20_days",
"40_days", "60_days", "80_days", "100_days"), class = "factor"),
Damage = c(25.25, 26.07, 24.43, 20.73, 17.8, 6.9, 45.05,
33.47, 24.43, 51.67, 41.72, 34.17, 81.67, 73.33, 55.83, 34.28,
26.08, 13.28, 26.27, 14.1, 6.93, 37.55, 29.33, 23.62, 49.17,
38.45, 31.38, 70.83, 60.83, 44.2, 31.03, 25.2, 14.97, 14.38,
6.5, 4.33, 52.2, 39.17, 30.97, 75, 62.5, 38.33, 87.5, 62.5,
57.5, 45.02, 31.02, 26.07, 46.72, 34.32, 21.5, 50.83, 34.23,
25.25, 45.83, 33.47, 27.7, 67.67, 57.5, 52.67, 30.98, 23.62,
9.1, 18.17, 18.57, 10.15, 46.67, 34.27, 23.62, 54.17, 40.05,
29.37, 70.83, 59.17, 47.53, 8.67, 5.63, 0.87, 9.87, 3.03,
0, 17.75, 6.88, 0, 62.5, 37.5, 27.7, 70.83, 57.5, 50.83,
6.5, 2.17, 1.3, 6.93, 3.03, 0.53, 14.82, 5.2, 0, 37.5, 28.52,
13, 75, 37.5, 37.5, 15.3, 9.53, 5.63, 9.43, 3.03, 0.43, 16.4,
6.07, 0, 57.5, 34.23, 21.98, 78.33, 62.5, 37.5, 12.08, 6.5,
1.3, 10.73, 3.03, 0, 15.2, 3.9, 0.43, 62.5, 37.5, 21.98,
64.17, 55.83, 41.73, 8.73, 3.57, 0, 8.57, 2.17, 0, 16.5,
7.7, 0.43, 42.58, 36.68, 13, 65.83, 47.5, 37.5, 8.03, 5.07,
0.43, 10.68, 7.27, 3.5, 48.38, 38.42, 24.83, 45.03, 38.4,
30.8, 73.33, 63.33, 50.83, 3.37, 2.17, 0.9, 9, 6.02, 5.2,
21.07, 12.37, 6.02, 45.02, 32.65, 21.67, 68.78, 56.68, 50,
0, 0, 0, 7.8, 4.33, 4.33, 25.17, 20.65, 13.15, 48.37, 39.23,
27.17, 75.83, 62.5, 49, 11.78, 12.72, 3.8, 20.18, 14.87,
8.95, 46.7, 39.32, 33.03, 49.18, 40.05, 24.43, 69.17, 60,
48.33, 0, 0, 0, 15.25, 9.82, 7.75, 45.9, 38.47, 35.52, 50.88,
37.61, 33.47, 79.17, 71.67, 58.33)), .Names = c("Site", "Field",
"Leaf_Location", "Time", "Damage"), row.names = c(NA, -225L), class = "data.frame")
Here's a simplified reproducible example to explain
first, some dummy data:
per = data.frame(x=rep(c('a','b'), each=100), y=c(2+rnorm(100), 3+rnorm(100,0,2)))
Now you are plotting the error bars, using fun.ymin=min, fun.ymax=max, which will cause them to extend the full range of the data, as in the following graph:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
geom_point(position = position_jitter(0.1)) +
stat_summary(fun.ymin=min, fun.ymax=max, geom="errorbar", width=0.4) +
theme_bw()
Whereas, it is more conventional to use error bars that extend either +/- one standard deviation, as in the following:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sd(y)},
fun.ymax=function(y) {mean(y) + sd(y)},
geom="errorbar", width=0.2) +
theme_bw()
Or one standard error, like this:
ggplot(per, aes(x, y)) +
stat_summary(fun.y = mean, geom="bar") +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar", width=0.2) +
theme_bw()
EDIT - example data were added to question, after this answer was originally posted
We can applying exactly the same approach as above to your example data:
ggplot(per, aes(x=Leaf_Location, y=Damage, fill=as.factor(Time))) +
stat_summary(fun.y=mean, geom="bar",position=position_dodge(),colour="black",width=.7,size=.7) +
stat_summary(
fun.ymin=function(y) {mean(y) - sqrt(var(y)/length(y))},
fun.ymax=function(y) {mean(y) + sqrt(var(y)/length(y))},
geom="errorbar",
position=position_dodge(.7), width=.2)

Negative valued factors in stacked barplot

I am trying to figure out a way of introducing negative values of factors in a stacked barplot in ggplot2. The data is level of support for basic income among Finnish MPs. It is at the bottom of the post.
I can get a plot that is like the one I want (minus the negatively valued factors) with the following code:
library(forcats)
library(ggplot2)
support.plot <- ggplot(mpsupport.df, aes(fct_infreq(Party))) +
geom_bar (aes(fill=Support)) +
coord_flip() +
theme(legend.position = "bottom")+
ylab("Party") +
xlab("Number of MPs")
This gives the following:
What I would like is for the graph to be centred on the green-turquoise border, so that support for basic income was to the right, while opposition was to the left. Does this make sense?
Data:
> dput(mpsupport.df)
structure(list(Party = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 5L, 6L, 7L, 7L, 7L), .Label = c("National Coalition",
"Centre Party", "Social Democratic Party", "Left Alliance", "Christian Democrats",
"True Finns", "Swedish People's Party", "Greens"), class = "factor"),
Support = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("fully.agree", "partially.agree",
"partially.disagree", "fully.disagree"), class = "factor")), .Names = c("Party",
"Support"), row.names = c("1", "2", "2.1", "2.2", "2.3", "2.4",
"4", "4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7", "6", "8",
"8.1", "8.2", "8.3", "8.4", "8.5", "8.6", "8.7", "8.8", "8.9",
"8.10", "8.11", "8.12", "8.13", "8.14", "9", "9.1", "9.2", "9.3",
"9.4", "9.5", "9.6", "9.7", "10", "10.1", "10.2", "10.3", "10.4",
"10.5", "10.6", "10.7", "10.8", "10.9", "10.10", "10.11", "10.12",
"10.13", "10.14", "10.15", "10.16", "10.17", "10.18", "10.19",
"10.20", "10.21", "10.22", "10.23", "10.24", "10.25", "10.26",
"10.27", "10.28", "10.29", "10.30", "10.31", "10.32", "10.33",
"11", "11.1", "11.2", "11.3", "12", "12.1", "12.2", "12.3", "13",
"14", "14.1", "14.2", "14.3", "14.4", "14.5", "14.6", "14.7",
"14.8", "14.9", "14.10", "14.11", "14.12", "14.13", "14.14",
"14.15", "14.16", "14.17", "14.18", "14.19", "14.20", "15", "15.1",
"17", "17.1", "17.2", "17.3", "17.4", "17.5", "17.6", "17.7",
"17.8", "17.9", "17.10", "17.11", "17.12", "17.13", "17.14",
"17.15", "17.16", "17.17", "17.18", "17.19", "18", "18.1", "18.2",
"18.3", "18.4", "18.5", "18.6", "18.7", "19", "19.1", "19.2",
"19.3", "19.4", "19.5", "19.6", "19.7", "19.8", "19.9", "19.10",
"19.11", "19.12", "19.13", "19.14", "19.15", "19.16", "19.17",
"19.18", "19.19", "19.20", "19.21", "19.22", "19.23", "21", "21.1",
"22", "22.1", "22.2", "22.3", "22.4", "22.5", "22.6", "22.7",
"22.8", "22.9", "22.10", "22.11", "22.12", "23", "23.1", "23.2",
"23.3", "25", "25.1", "25.2", "25.3", "25.4", "25.5", "25.6",
"27", "27.1", "27.2", "27.3", "27.4", "27.5", "29", "30", "31",
"31.1", "31.2"), class = "data.frame")
Try something along these lines:
library(ggplot)
library(forcats)
mpsupport.df$dummy = ifelse(mpsupport.df$Support %in% c("fully.agree", "partially.agree"), 1, -1)
agg = aggregate(dummy ~ Support + Party, data = mpsupport.df, FUN = sum)
ggplot(data = agg)+
geom_bar (aes(y = dummy, x= fct_infreq(Party), fill = factor(Support, levels = c("fully.agree", "partially.agree", "fully.disagree" ,"partially.disagree"))), stat= "identity") +
coord_flip()+
theme(legend.position = "bottom", legend.title = element_blank())

Different x and y axis scales in multifaceted scatter ggplot2

I have used lemon package with ggplot2 for plotting multifaceted scatter plot with regression and confidence interval line using the following code
library(tidyverse)
library(lemon)
#Plotting
ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_rep_grid(Station ~ Method, scales="free",
repeat.tick.labels = "all")+
xlab("Measured") +
ylab("Predicted")+ theme_bw()+
geom_smooth(method="lm")
I want to have both x and y-axis scales to be free. But I am only getting free y-axis scale.
Also, I want to add the prediction interval to the plots.
Here is the dataset in dput() format.
data_calibration = structure(list(Observed = c(17229L, 15964L, 13373L, 17749L, 12457L,
7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L, 7220L,
7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L, 11465L,
11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L, 10759L,
9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L, 3183L,
9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L, 17749L,
12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L, 7197L,
7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L, 17229L, 15964L, 13373L,
17749L, 12457L, 7166L, 7842L, 8675L, 11718L, 6049L, 4232L, 4126L,
7197L, 7220L, 7284L, 16410L, 15772L, 12166L, 11997L, 7827L, 13034L,
11465L, 11409L, 10165L, 9702L, 2942L, 2940L, 4361L, 6197L, 6144L,
10759L, 9720L, 8631L, 7354L, 7640L, 6653L, 7551L, 6791L, 9093L,
3183L, 9078L, 8688L, 11023L, 9000L, 9001L), Station = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Raigad",
"Ratnagiri", "Thane "), class = "factor"), Method = structure(c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ANN",
"ELNET", "LASSO", "PCA-ANN", "PCA-MLR", "SMLR"), class = "factor"),
Predicted = c(14463L, 14285L, 14452L, 12765L, 11917L, 8143L,
11251L, 8611L, 6789L, 2059L, 2787L, 2201L, 3062L, 4508L,
4975L, 15357L, 15605L, 12326L, 10377L, 9113L, 13926L, 13142L,
11407L, 8711L, 7801L, 2064L, 4563L, 4725L, 6247L, 7170L,
9492L, 8857L, 10323L, 7389L, 6776L, 7842L, 8261L, 6156L,
8627L, 4326L, 8094L, 8897L, 10370L, 10214L, 8548L, 16043L,
16671L, 15831L, 13463L, 11921L, 10239L, 9110L, 8090L, 10794L,
5826L, 3621L, 5639L, 7364L, 8152L, 5515L, 15182L, 14370L,
13559L, 12748L, 11936L, 11125L, 10313L, 9502L, 8691L, 7879L,
7068L, 6257L, 5445L, 4634L, 3822L, 10045L, 9911L, 11038L,
9255L, 8736L, 8848L, 8063L, 7847L, 8538L, 6744L, 9583L, 10474L,
8343L, 10353L, 8791L, 13185L, 13331L, 13099L, 12557L, 11898L,
10474L, 11199L, 10255L, 9251L, 6148L, 6795L, 6166L, 7775L,
8157L, 7990L, 14843L, 15086L, 12585L, 10987L, 10193L, 13663L,
11317L, 11071L, 9392L, 6991L, 4484L, 4667L, 4846L, 5830L,
6577L, 9085L, 8802L, 9570L, 7770L, 7652L, 8006L, 7995L, 6599L,
9050L, 4876L, 8360L, 8981L, 9931L, 9479L, 8009L, 13775L,
13890L, 13416L, 12851L, 12141L, 10693L, 10834L, 10372L, 9585L,
5914L, 5930L, 5922L, 7854L, 7407L, 7697L, 14941L, 15174L,
12572L, 10817L, 10412L, 13705L, 11154L, 10886L, 9448L, 7215L,
4389L, 4875L, 4809L, 5747L, 6385L, 9034L, 8749L, 9410L, 7820L,
7798L, 7940L, 7957L, 6803L, 8844L, 5227L, 8369L, 8972L, 9789L,
9514L, 7940L, 15309L, 14477L, 14219L, 18581L, 12084L, 10550L,
8666L, 8812L, 11415L, 5566L, 3928L, 4592L, 7861L, 7489L,
6903L, 12509L, 13366L, 11956L, 11880L, 8711L, 12768L, 11690L,
10922L, 4101L, 10106L, 2811L, 2979L, 4785L, 5944L, 5901L,
10007L, 8710L, 8688L, 7383L, 7575L, 8047L, 7938L, 6585L,
9517L, 3729L, 8816L, 8704L, 10847L, 8812L, 8493L, 18115L,
15670L, 15931L, 16804L, 12450L, 7701L, 7588L, 8450L, 9205L,
5477L, 4666L, 4948L, 8262L, 7095L, 6798L, 12902L, 12883L,
12864L, 12788L, 12690L, 12896L, 12491L, 12199L, 11982L, 5213L,
5357L, 5053L, 5013L, 5321L, 5596L, 9467L, 8931L, 9305L, 7867L,
8427L, 8282L, 7291L, 6396L, 9725L, 5509L, 8545L, 8997L, 10171L,
10389L, 8700L)), class = "data.frame", row.names = c(NA,
-270L))
Thanks in advance for the help.
I have solved this issue after taking help from this post.
First, create two plots using facet_grid and facet_wrap.
g1 = ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_wrap(Station ~ Method, scales="free", ncol=6)+
xlab("Measured") +
ylab("Predicted")+ theme_bw()+
geom_smooth(method="lm")+
theme(strip.background = element_blank(),
strip.text = element_blank())
g2 = ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_grid(Station ~ Method, scales="free")+
xlab("Measured") +
ylab("Predicted")+ theme_bw()+
geom_smooth(method="lm")
Now replace the top facet strips of g1 with those from g2
library(grid)
library(gtable)
gt1 = ggplot_gtable(ggplot_build(g1))
gt2 = ggplot_gtable(ggplot_build(g2))
gt1$grobs[grep('strip-t.+1$', gt1$layout$name)] = gt2$grobs[grep('strip-t', gt2$layout$name)]
grid.draw(gt1)
Add the right-hand panel strips
gt1 = gtable_add_cols(gt1, widths=gt1$widths[1], pos = -1)
panel_id <- gt1$layout[grep('panel-.+1$', gt1$layout$name),]
gt.side1 = gtable_filter(gt2, 'strip-r-1')
gt.side2 = gtable_filter(gt2, 'strip-r-2')
gt.side3 = gtable_filter(gt2, 'strip-r-3')
gt1 = gtable_add_grob(gt1, zeroGrob(), t = 1, l = ncol(gt1), b=nrow(gt1))
gt1 = gtable_add_grob(gt1, gt.side1, t = panel_id$t[1], l = ncol(gt1))
gt1 = gtable_add_grob(gt1, gt.side2, t = panel_id$t[2], l = ncol(gt1))
gt1 = gtable_add_grob(gt1, gt.side3, t = panel_id$t[3], l = ncol(gt1))
grid.newpage()
grid.draw(gt1)
Update
I got a very nice solution using facet_grid2 function of ggh4x package. With just one line the task can be achieved like
ggplot(data_calibration, aes(Observed,Predicted))+
geom_point(color="black",alpha = 1/3) +
facet_grid2(Station ~ Method, scales="free", independent = "all")+
xlab("Measured") +
ylab("Predicted")+ theme_bw()+
geom_smooth(method="lm")

Resources