ggplot2: issues with dual y-axes and Loess smoothing - r

I'm a novice with R and ggplot. I recognize the power of R and elegance of ggplot and am trying to learn. Normally, I can find a solution online but have had no luck this time.
I am trying to generate a chart in ggplot comparing Economic Freedom scores with Life Expectancy and Infant mortality using World Bank data (the csv data is included at the bottom of the post). I have had some success using this code (using the example at https://rpubs.com/MarkusLoew/226759):
p <- ggplot(mydata, aes(x = Score))
p <- p + geom_point(aes(y = Longevity, colour = "Life Expectancy"))
p <- p + geom_point(aes(y = Infant/1, colour = "Infant mortality (per
capita)"))
p <- p + scale_y_continuous(sec.axis = sec_axis(~.*1, name = "Infant
mortality (per capita)"))
p <- p + scale_colour_manual(values = c("blue", "red"))
p <- p + labs(y = "Life Expectancy (years)",
x = "Score",
colour = " ")
p
This has produced the following:
my messed up chart
I can't manage to properly scale the primary y-axis. Scaling the graphs as in the example (link above) doesn't work: I just expand out or squash the Longevity data. I tried loading the Longevity data on the secondary y but it still didn't work.
The other issue is that I would like to add LOESS smooth trendlines to each set of data. I have tried following various examples but nothing works.
If anyone has a solution it will be much appreciated!
Thanks
Data:
Country Name,Score,GDP,Infant,Longevity,,,,,,,,,
Afghanistan,48.9,585.850064,53.2,63.673,,,,,,,,,
Albania,64.4,4537.86249,8.1,78.345,,,,,,,,,
Algeria,46.5,4.12E+03,21,76.078,,,,,,,,,
Angola,48.5,4.17E+03,55.8,61.547,,,,,,,,,
Argentina,50.4,1.44E+04,9.7,76.577,,,,,,,,,
Armenia,70.3,3936.79832,11.9,74.618,,,,,,,,,
Australia,81,5.38E+04,3.1,82.5,,,,,,,,,
Austria,72.3,4.73E+04,3,80.8902439,,,,,,,,,
Azerbaijan,63.6,4131.61831,21.9,72.026,,,,,,,,,
Bahrain,68.5,23655.0356,6.4,76.9,,,,,,,,,
Bangladesh,55,1.52E+03,28.3,72.489,,,,,,,,,
Barbados,54.5,16788.6839,11.9,75.906,,,,,,,,,
Belarus,58.6,5726.02967,2.9,73.82682927,,,,,,,,,
Belgium,67.8,4.33E+04,3.1,80.99268293,,,,,,,,,
Belize,58.6,4905.50628,12.8,70.384,,,,,,,,,
Benin,59.2,829.797231,65.1,60.907,,,,,,,,,
Bhutan,58.4,3110.23011,26.5,70.197,,,,,,,,,
Bolivia,47.7,3393.95582,29,69.125,,,,,,,,,
Bosnia and Herzegovina,60.2,5180.6363,5.1,76.911,,,,,,,,,
Botswana,70.1,7595.59585,32.3,66.797,,,,,,,,,
Brazil,52.9,9.82E+03,14.6,75.509,,,,,,,,,
Brunei Darussalam,69.8,28290.5852,9,77.203,,,,,,,,,
Bulgaria,67.9,8031.59844,6.7,74.61463415,,,,,,,,,
Burkina Faso,59.6,670.705913,52.6,60.361,,,,,,,,,
Burundi,53.2,320.08687,44.1,57.481,,,,,,,,,
Cabo Verde,56.9,3209.69112,15.9,72.798,,,,,,,,,
Cambodia,59.5,1384.42319,26.3,68.981,,,,,,,,,
Cameroon,51.8,1446.70289,56.6,58.073,,,,,,,,,
Canada,78.5,4.50E+04,4.6,82.3005122,,,,,,,,,
Central African Republic,51.8,418.411287,89.2,52.171,,,,,,,,,
Chad,49,669.886426,75,52.903,,,,,,,,,
Chile,76.5,1.53E+04,6.6,79.522,,,,,,,,,
China,57.4,8.83E+03,8.6,76.252,,,,,,,,,
Colombia,69.7,6.30E+03,13.1,74.381,,,,,,,,,
Comoros,55.8,797.286368,53.6,63.701,,,,,,,,,
Costa Rica,65,11630.6684,8,79.831,,,,,,,,,
Cote d'Ivoire,63,1662.44247,66,53.582,,,,,,,,,
Croatia,59.4,13294.5149,4,78.02195122,,,,,,,,,
Cyprus,67.9,25233.571,2.2,80.508,,,,,,,,,
Czech Republic,73.3,2.04E+04,2.6,78.33170732,,,,,,,,,
Denmark,75.1,5.63E+04,3.7,80.70487805,,,,,,,,,
Djibouti,46.7,1927.58971,53,62.465,,,,,,,,,
Dominica,63.7,7609.61435,30.4,,,,,,,,,,
Dominican Republic,62.9,7052.25884,25.6,73.861,,,,,,,,,
Ecuador,49.3,6.20E+03,12.7,76.327,,,,,,,,,
"Egypt, Arab Rep.",52.6,2.41E+03,19.4,71.484,,,,,,,,,
El Salvador,64.1,3889.30877,12.9,73.512,,,,,,,,,
Equatorial Guinea,45,9850.01358,67.4,57.681,,,,,,,,,
Estonia,79.1,19704.655,2.3,77.73658537,,,,,,,,,
Ethiopia,52.7,767.563478,42.5,65.475,,,,,,,,,
Fiji,63.4,5589.38883,21.1,70.269,,,,,,,,,
Finland,74,4.57E+04,1.9,81.7804878,,,,,,,,,
France,63.3,3.85E+04,3.5,82.27317073,,,,,,,,,
Gabon,58.6,7220.68724,36.1,66.105,,,,,,,,,
Georgia,76,4078.25488,10.2,73.261,,,,,,,,,
Germany,73.8,4.45E+04,3.2,80.64146341,,,,,,,,,
Ghana,56.2,1641.48662,37.2,62.742,,,,,,,,,
Greece,55,1.86E+04,4.2,81.03658537,,,,,,,,,
Guatemala,63,4470.98957,23.9,73.409,,,,,,,,,
Guinea,47.6,825.34493,58.1,60.015,,,,,,,,,
Guinea-Bissau,56.1,723.658622,57.4,57.403,,,,,,,,,
Guyana,58.5,4725.31906,26.7,66.65,,,,,,,,,
Haiti,49.6,765.683925,55,63.33,,,,,,,,,
Honduras,58.8,2480.12593,16.2,73.575,,,,,,,,,
"Hong Kong SAR, China",88.6,4.62E+04,,84.22682927,,,,,,,,,
Hungary,65.8,1.42E+04,4.1,75.56829268,,,,,,,,,
Iceland,74.4,70056.8734,1.7,82.46829268,,,,,,,,,

This should give you a good start. You can play around with scale_ratio & dif if you want to
library(tidyverse)
mydata <- read_csv(text, col_types = paste0(c("c", rep("d", 4), rep("_", 9)), collapse = ""))
mydata
#> # A tibble: 67 x 5
#> `Country Name` Score GDP Infant Longevity
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Afghanistan 48.9 586. 53.2 63.7
#> 2 Albania 64.4 4538. 8.1 78.3
#> 3 Algeria 46.5 4120 21 76.1
#> 4 Angola 48.5 4170 55.8 61.5
#> 5 Argentina 50.4 14400 9.7 76.6
#> 6 Armenia 70.3 3937. 11.9 74.6
#> 7 Australia 81 53800 3.1 82.5
#> 8 Austria 72.3 47300 3 80.9
#> 9 Azerbaijan 63.6 4132. 21.9 72.0
#> 10 Bahrain 68.5 23655. 6.4 76.9
#> # ... with 57 more rows
Calculate ratios needed to scale the two y-axes
scale_ratio <- (max(mydata$Infant, na.rm = TRUE) - min(mydata$Infant, na.rm = TRUE)) /
(max(mydata$Longevity, na.rm = TRUE) - min(mydata$Longevity, na.rm = TRUE))
dif <- min(mydata$Longevity, na.rm = TRUE) - min(mydata$Infant, na.rm = TRUE)
myColor <- c("#d95f02", "#1b9e77")
p <- ggplot(mydata, aes(x = Score, y = Longevity)) +
geom_point(aes(colour = "Life Expectancy"),
shape = "triangle",
alpha = 0.7, size = 2) +
geom_point(aes(y = Infant/scale_ratio + dif,
colour = "Infant mortality (per capita)"),
alpha = 0.7, size = 2) +
scale_y_continuous(sec.axis = sec_axis(~ (. - dif) * scale_ratio,
name = "Infant mortality (per capita)")) +
scale_colour_manual(values = myColor) +
theme_bw(base_size = 14) +
labs(y = "Life Expectancy (years)",
x = "Score",
colour = " ") +
guides(colour = guide_legend(title = "",
override.aes = list(shape = c("circle", "triangle")))) +
theme(legend.position = 'bottom') +
NULL
p
Add fitted lines and their corresponding equations/R2
### https://docs.r4photobiology.info/ggpmisc/articles/user-guide.html
library(ggpmisc)
formula <- y ~ poly(x, 2, raw = TRUE)
p +
stat_smooth(aes(y = Longevity),
method = "lm", formula = formula, se = FALSE, size = 1, color = myColor[2]) +
stat_smooth(aes(y = Infant/scale_ratio + dif),
method = "lm", formula = formula, se = FALSE, size = 1, color = myColor[1]) +
stat_poly_eq(aes(y = Longevity,
label = paste(..eq.label.., ..adj.rr.label..,
sep = "~~italic(\"with\")~~")),
geom = "text", alpha = 0.7,
formula = formula, parse = TRUE,
color = myColor[2],
label.x.npc = 0.5,
label.y.npc = 0.95) +
stat_poly_eq(aes(y = Infant/scale_ratio + dif,
label = paste(..eq.label.., ..adj.rr.label..,
sep = "~~italic(\"with\")~~")),
geom = "text", alpha = 0.7,
color = myColor[1],
formula = formula, parse = TRUE,
label.x.npc = 0.75,
label.y.npc = 0.15) +
NULL
Created on 2018-10-07 by the reprex package (v0.2.1.9000)

Related

label mean lines in ggplot that are mapped in a group

I have density plots for each shift and year. The means are plotted by grouping in a df called mu. I also add vertical reference lines which I can label without issue but I cannot seem to get the labels on the grouped vertical lines. You will see my latest attempt which throws an error "Aesthetics must be either length 1 or the same as the data (134): x"
My code
library(ggplot2)
library(dplyr)
df <- read.csv("f4_bna_no_cup.csv")
head(df)
ï..n yr s ys x
1 1 2021 1 2021-1 116.83
2 2 2021 1 2021-1 114.83
3 3 2021 1 2021-1 115.50
4 4 2021 1 2021-1 115.42
5 5 2021 1 2021-1 115.58
6 6 2021 1 2021-1 115.58
#summarize means by ys (year-shift)
mu <- df %>%
group_by(ys,s) %>%
summarise(grp.mean = mean(x))
mu
ys s grp.mean
<chr> <int> <dbl>
1 2021-1 1 116.
2 2021-2 2 117.
3 2022-1 1 114.
4 2022-2 2 115.
llab<-mu
shift <- c("Shift 1", "Shift 2")
#density charts on df
ggplot(data=df, aes(x=x,group =ys, fill = yr, color = yr)) +
geom_density(alpha = 0.4) +
scale_x_continuous(limits=c(112,120))+
geom_vline(aes(xintercept = grp.mean), data = mu, linetype = "dashed", size = 0.5) +
geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys) + #this throws the error
geom_vline(aes(xintercept=114.8), linetype="dashed", size=0.5, color = 'green3') +
geom_text(aes(x=114.8, y=.6), label = "Target", angle = 90, color="black",size=3) +
geom_vline(aes(xintercept=114.1), linetype="solid", size=0.5, color = 'limegreen') +
geom_text(aes(x=114.1, y=.55), label = "Potential", angle = 90, color="black",size=3 ) +
geom_vline(aes(xintercept=113.4), linetype="solid", size=0.5, color = 'firebrick3') +
geom_text(aes(x=113.4, y=.62), label = "Label wt", angle = 90,
color="black",size=3, family = "Times New Roman", vjust=0) +
facet_grid(
.~s,
labeller = labeller(
s = c(`1` = "Shift 1", `2` = "Shift 2")
))+
theme_light()+
theme(legend.position = "none")
Output so far...I'm so close.
Persistence pays off. I figured it out and thought I would share it in case someone else has a similar problem:
All code remains the same as in my question except a slight change to grouping for the mu df, AND replace the line that I noted as throwing the error as follows:
#small change to group_by, retaining yr
mu <- df %>%
group_by(yr,s,ys) %>%
summarise(grp.mean = mean(x))
Replace: geom_text(aes(x=llab$grp.mean, y=.6), label = llab$ys), with
geom_text(data = mu, aes(label = yr), x = mu$grp.mean, y = .60, color = "black", angle = 90, vjust = 0)

How to overlay a barplot on top of other plot with a different geom, by mapping the barplot positions to the original plot's scale

I'm trying to overlay a barplot on top of an existing plot, by mapping the positions of the new barplot to the values of the original plot.
Example
Using the mpg dataset from the {ggplot2} package, I want to assess the relationship between the variables cty and manufacturer. To keep this simple, I'll focus on 3 manufacturers only.
Step 1 -- data for analysis
I'm going to add another column that categorizes the cty values into 3 categories "low", "medium", "high".
library(ggplot2)
library(dplyr)
my_manuf <- c("audi", "ford", "dodge")
my_df <-
mpg %>%
filter(manufacturer %in% my_manuf) %>%
select(manufacturer, cty) %>%
mutate(cty_range = case_when(between(cty, 9, 13) ~ "low",
between(cty, 13, 16) ~ "medium",
between(cty, 16, 21) ~ "high"))
my_df
#> # A tibble: 80 x 3
#> manufacturer cty cty_range
#> <chr> <int> <chr>
#> 1 audi 18 high
#> 2 audi 21 high
#> 3 audi 20 high
#> 4 audi 21 high
#> 5 audi 16 medium
#> 6 audi 18 high
#> 7 audi 18 high
#> 8 audi 18 high
#> 9 audi 16 medium
#> 10 audi 20 high
#> # ... with 70 more rows
Step 2 -- fit a (very) simple model
2.1) Going to fit the model lm(cty ~ manufacturer)
library(emmeans)
library(ggeffects)
model_results <-
my_df %>%
lm(cty ~ manufacturer, data = .) %>%
ggeffects::ggemmeans(terms = "manufacturer") %>%
as_tibble() %>%
rename(manufacturer = x)
model_results
#> # A tibble: 3 x 6
#> manufacturer predicted std.error conf.low conf.high group
#> <fct> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 audi 17.6 0.521 16.6 18.6 1
#> 2 dodge 13.1 0.364 12.4 13.9 1
#> 3 ford 14 0.442 13.1 14.9 1
2.2) Going to merge model_results with my_df to be able to add raw data points to the plot.
data_for_plot <- left_join(my_df, model_results)
Step 3 -- visualize
p <-
data_for_plot %>%
ggplot(aes(x = manufacturer, y = predicted)) +
geom_label(aes(label = round(predicted, 2))) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
geom_jitter(aes(x = manufacturer, y = predicted, color = cty_range)) +
expand_limits(y = c(12, 21)) +
theme_minimal()
p
Desired output
I want to add a barplot aligned to each manufacturer, to account for the proportion of the cty_range variable in each of its categories ("low"/"medium"/"high").
In my mind I imagine it as something like the following (not drawn to scale but values are real):
I know there are tools from packages such as patchwork and cowplot. However, here I want to map the barplots to the x-axis scale, such that the barplot relevant to audi will be positioned respectively, etc.
Second, I wish there's an easy way to achieve this by simply adding another "barplot prop" geom to the structure that constructs p. This is because p already originates from data_for_plot, which has all the info we need to compute the proportions for the barplots (i.e., via dplyr::count() or alike).
Making use of patchwork::inset_elementyou could do:
library(ggplot2)
library(dplyr)
my_df_bar <- my_df %>%
count(manufacturer, cty_range) %>%
group_by(manufacturer) %>%
mutate(pct = n / sum(n),
cty_range = factor(cty_range, levels = c("low", "medium", "high")))
p_bar <- ggplot(my_df_bar, aes(manufacturer, pct, fill = cty_range)) +
geom_col(position = position_dodge2(width = .9)) +
geom_text(aes(y = pct - .01, label =scales::percent(pct)),
position = position_dodge2(width = .9),
size = 8 / .pt, vjust = 1) +
theme_void() +
guides(fill = "none")
library(patchwork)
p + inset_element(p_bar, 0, .8, 1, 1)
EDIT Personally I would go for patchwork. (; But as an alternative approach you could achieve your result like so.
Most tricky part is to put the bars on top of the error bars and jitters which requires some transformation of the data similar to the ones necessary in case of a second-axis. Not sure whether it is easier to generalize this approach.
trans <- 21
scale <- 5
breaks_fun <- function(x) {
scales::breaks_extended()(x + trans) - trans
}
p <-
data_for_plot %>%
ggplot(aes(x = manufacturer, y = predicted - trans)) +
geom_label(aes(label = round(predicted, 2))) +
geom_errorbar(aes(ymin = conf.low - trans,
ymax = conf.high - trans), width = 0.2) +
geom_jitter(aes(x = manufacturer, y = predicted - trans, color = cty_range)) +
scale_y_continuous(breaks = breaks_fun, labels = ~ .x + trans) +
theme_minimal()
pct <- function(count, group) {
count / tapply(count, group, sum)[group]
}
p +
geom_bar(aes(fill = cty_range,
y = scale * after_stat(pct(count, x))),
stat = "count", position = position_dodge2(width = .9)) +
geom_text(aes(group = cty_range,
y = scale * after_stat(pct(count, x)) - .1,
label = scales::percent(after_stat(pct(count, x)))),
stat = "count", position = position_dodge2(width = .9),
size = 8 /.pt, vjust = 1) +
guides(fill = "none") +
coord_flip()

How can I add NSE and PBIAS results to ggplot using facet_wrap?

I'm a beginner and I created a function (down below) to calculate the Percent Bias (PBIAS) and Nash-Sutcliffe Efficiency (NSE) of simulated vs observed data. However I can calculate these tests only using my whole data set.
model.assess <- function(Sim, Obs) {
rmse = sqrt( mean( (Sim - Obs)^2, na.rm = TRUE) ) #Formula to calculate RMSE
RSR <- rmse / sd(Obs) #object producing RSR test from the RMSE formula
PBIAS <- 100 *(sum((Sim - Obs)/sum(Obs), na.rm =TRUE)) #object producing PBIAS test
NSE <- 1 - sum((Obs - Sim)^2)/sum((Obs - mean(Obs))^2, na.rm =TRUE) #object producing NSE test
stats <- print(paste0("RSR = ", sprintf("%.3f", round(RSR, digits=3)), " PBIAS = ", sprintf("%.3f",round(PBIAS, digits=3))," NSE = ", sprintf("%.3f",round(NSE, digits=3))))
return(stats) #returns the results of the tests with 3 decimals and spacing in between
This is my data set, monthly streamflow, of four different stations (SNS, MRC, TLG, SJF):
StationID Date Obs_flow Sim_flow Month Year
SNS 1950-10-01 0.010170 0.030687967 October 1950-01-01
SNS 1950-11-01 0.366260 0.416466741 November 1950-01-01
SNS 1950-12-01 0.412210 0.496136731 December 1950-01-01
SNS 1951-01-01 0.119520 0.182072570 January 1951-01-01
SNS 1951-02-01 0.113480 0.142611192 February 1951-01-01
SNS 1951-03-01 0.127090 0.176350274 March 1951-01-01
SNS 1951-04-01 0.175120 0.193221389 April 1951-01-01
SNS 1951-05-01 0.208940 0.275980903 May 1951-01-01
SNS 1951-06-01 0.114420 0.144675317 June 1951-01-01
SNS 1951-07-01 0.032280 0.018057796 July 1951-01-01
To plot a scatter plot of Obs vs Sim with the equation and R squared I used:
dataset %>%
filter(StationID == "SNS") %>%
ggplot(aes(x = Obs_flow, y = Sim_flow)) +
geom_point(aes(Obs_flow, Sim_flow), alpha = 0.3)+
stat_smooth(aes(x = Obs_flow, y = Sim_flow),
method = "lm", se = TRUE, colour="#FC4E07", fullrange = TRUE) +
stat_poly_eq(formula = "y~x",
aes(label = paste0(..eq.label..)), #adding the equation on the top
parse = TRUE, label.x.npc = "center", label.y.npc = 0.97, size = 3.45, family= "Times New Roman")+
stat_poly_eq(formula = "y~x",
aes(label = paste0(..rr.label..)), #adding the Rsquared at the bottom
parse = TRUE, label.x.npc = 0.95, label.y.npc = 0.05, size = 3.45, family= "Times New Roman")+
annotate("text", x = 0, y = 1.3,, label = paste0(model.assess(dataset$Sim_flow, dataset$Obs_flow)), collapse = "\n", hjust = 0, size=2.4, family= "Times New Roman") +
facet_wrap(~ Month, ncol=4, labeller = labeller(StationID = c("MRC" = "Merced River", "SJF"= "Upper San Joaquin River", "SNS" = "Stanislaus River", "TLG" = "Tuolumne River")), scales = "fixed")
stat_poly_eq added an equation and Rsquared for each facet, but the annotate adds the same number for all facets. Is there a way to add NSE and PBIAS for each facet separately? I tried the package HydroGOF, but I got the same result. Excuse the aesthetics.
A sample data would be helpful for others to assist you. Please take a look at this link for future queries.
You have a few problems. Your model.assess() function gives one record, while you need values for each facet. So, I created a dummy using the code
ll <- data.frame(Month=c(),label=c())
nM <- length(Month)
lapply(1:nM, function(i){
a <- Sim_flow*i*i*0.5
b <- Obs_flow*i
m <- model.assess(a,b)
ll <<- rbind(ll,data.frame(Month=Month[i],label=m))
})
labels <- ll
Next, you need to use geom_label instead of annotate as mentioned here. The code below
ggplot(data=dataset, aes(x = Obs_flow, y = Sim_flow)) +
geom_point(aes(Obs_flow, Sim_flow), alpha = 0.3)+
stat_smooth(aes(x = Obs_flow, y = Sim_flow),
method = "lm", se = TRUE, colour="#FC4E07", fullrange = TRUE) +
stat_poly_eq(formula = "y~x",
aes(label = paste0(..eq.label..)), #adding the equation on the top
parse = TRUE, label.x.npc = "center", label.y.npc = 0.97, size = 3.45, family= "Times New Roman") +
stat_poly_eq(formula = "y~x",
aes(label = paste0(..rr.label..)), #adding the Rsquared at the bottom
parse = TRUE, label.x.npc = 0.95, label.y.npc = 0.05, size = 3.45, family= "Times New Roman") +
facet_wrap(~Month, ncol=4, labeller = labeller(StationID = c("MRC" = "Merced River", "SJF"= "Upper San Joaquin River", "SNS" = "Stanislaus River", "TLG" = "Tuolumne River")), scales = "fixed") +
geom_label(data = labels, aes(label=label, x = Inf, y = -Inf),
hjust=1, vjust=0, size=1.8,
inherit.aes = FALSE)
gives the following

Small ggplots on a ggmap - a purrr map version

Based on Small ggplot2 plots placed on coordinates on a ggmap
I would like to have the same solution, but with ggplot function outside the pipeline, applied with purrr::map().
The data for small bar subplots indicating 2 values, may contain
lon, lat, id, valueA, valueB,
After tidyr::gather operation it may look like:
Town, Potential_Sum, lon, lat, component , sales
Aaa, 9.00, 20.80, 54.25, A, 5.000
Aaa, 9.00, 20.80, 54.25, B, 4.000
Bbb, 5.00, 19.60, 50.50, A, 3.000
Bbb, 5.00, 19.60, 50.50, B, 2.000
Current working solution is to use do() to generate sublopts and then ggplotGrob to generate a column with objects "grobs" to be placed at lon,lat locations on a ggmap.
maxSales <- max(df$sales)
df.grobs <- df %>%
do(subplots = ggplot(., aes(1, sales, fill = component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, maxSales)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(sales>0,round(sales), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
df.grobs %>%
{p + geom_label(aes(x = 15, y = 49.8, label = "A"), colour = c("black"),fill = "green", size=3)+
geom_label(aes(x = 15, y = 5.01, label = "B"), colour = c("black"),fill = "red", size=3)+
.$subgrobs +
geom_text(data=df, aes(label = Miasto), vjust = 3.5,nudge_x = 0.05, size=2.5) +
geom_col(data = df,
aes(0,0, fill = component),
colour = "white")}
p is a ggmap object, map of Poland, on which I would like to place small plots:
# p <-
# get_googlemap(
# "Poland",
# maptype = "roadmap",
# zoom = 6,
# color = "bw",
# crop = T,
# style = "feature:all|element:labels|visibility:off" # 'feature:administrative.country|element:labels|visibility:off'
# ) %>% # or 'feature:all|element:labels|visibility:off'
# ggmap() + coord_cartesian() +
# scale_x_continuous(limits = c(14, 24.3), expand = c(0, 0)) +
# scale_y_continuous(limits = c(48.8, 55.5), expand = c(0, 0))
#
How to translate this solution to the syntax nest - apply -unnest so that the ggplot part should be outside of the piped expression as a function.
In other words. How to replace do() with map(parameters, GGPlot_function) and then plot grobs on a ggmap .
What I did so far was I tried to write a ggplot function
#----barplots----
maxSales <- max(df$sales)
fn_ggplot <- function (df, x, component, maxX) {
x <- enquo(x)
component <-enquo(component)
maxX <-enquo(maxX)
p <- ggplot(df, aes(1, !!x, fill = !!component)) +
geom_col(position = "dodge", alpha = 0.50, colour = "white") +
coord_cartesian(ylim = c(0, !!maxX)) +
scale_fill_manual(values = c("green", "red"))+
geom_text(aes(label=if_else(x>0,round(!!x), NULL)), vjust=0.35,hjust=1.1, colour="black",
position=position_dodge(.9), size=2.5, angle=90)+
theme_void()+ guides(fill = F)
return(p)
}
And got totaly confused trying to apply it like this (I am a constant beginner unfortunately)... this is not working, showing
df.grobs <- df %>%
mutate(subplots = pmap(list(.,sales,component,Potential_Sum),fn_ggplot)) %>%
mutate(subgrobs = list(annotation_custom(ggplotGrob(subplots),
x = lon-0.14, y = lat-0.20,
xmax = lon+0.14, ymax = lat+1.2)))
I get errors indicating I do not know what I am doing, ie lengths of arguments are incorrect and something else is expected.
message: Element 2 of `.l` must have length 1 or 7, not 2
class: `purrr_error_bad_element_length`
backtrace:
1. dplyr::mutate(...)
12. purrr:::stop_bad_length(...)
13. dplyr::mutate(...)
Call `rlang::last_trace()` to see the full backtrace
> rlang::last_trace()
x
1. +-`%>%`(...)
2. | +-base::withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
3. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
4. | \-base::eval(quote(`_fseq`(`_lhs`)), env, env)
5. | \-global::`_fseq`(`_lhs`)
6. | \-magrittr::freduce(value, `_function_list`)
7. | \-function_list[[i]](value)
8. | +-dplyr::mutate(...)
9. | \-dplyr:::mutate.tbl_df(...)
10. | \-dplyr:::mutate_impl(.data, dots, caller_env())
11. +-purrr::pmap(list(., sales, component, Potential_Sum), fn_ggplot)
12. \-purrr:::stop_bad_element_length(...)
13. \-purrr:::stop_bad_length(...)
data
First let's build some sample data close to yours but reproducible without the need for an api key.
As a starting point we have a plot of a country map stored in p, and some data in long form to build the charts stored in plot_data.
library(maps)
library(tidyverse)
p <- ggplot(map_data("france"), aes(long,lat,group=group)) +
geom_polygon(fill = "lightgrey") +
theme_void()
set.seed(1)
plot_data <- tibble(lon = c(0,2,5), lat = c(44,48,46)) %>%
group_by(lon, lat) %>%
do(tibble(component = LETTERS[1:3], value = runif(3,min=1,max=5))) %>%
mutate(total = sum(value)) %>%
ungroup()
plot_data
# # A tibble: 9 x 5
# lon lat component value total
# <dbl> <dbl> <chr> <dbl> <dbl>
# 1 0 44 A 2.06 7.84
# 2 0 44 B 2.49 7.84
# 3 0 44 C 3.29 7.84
# 4 2 48 A 4.63 11.0
# 5 2 48 B 1.81 11.0
# 6 2 48 C 4.59 11.0
# 7 5 46 A 4.78 11.9
# 8 5 46 B 3.64 11.9
# 9 5 46 C 3.52 11.9
define a plotting function
we isolate the plotting code in a separate function
my_plot_fun <- function(data){
ggplot(data, aes(1, value, fill = component)) +
geom_col(position = position_dodge(width = 1),
alpha = 0.75, colour = "white") +
geom_text(aes(label = round(value, 1), group = component),
position = position_dodge(width = 1),
size = 3) +
theme_void()+ guides(fill = F)
}
build a wrapper
This function takes a data set, some coordinates and the plotting function as parameters, to annotate at the right spot.
annotation_fun <- function(data, lat,lon, plot_fun) {
subplot = plot_fun(data)
sub_grob <- annotation_custom(ggplotGrob(subplot),
x = lon-0.5, y = lat-0.5,
xmax = lon+0.5, ymax = lat+0.5)
}
The final code
The the code becomes simple, using nest and pmap
subgrobs <- plot_data %>%
nest(-lon,-lat) %>%
pmap(annotation_fun,plot_fun = my_plot_fun)
p + subgrobs

Errorbar duplicated for ggplot barplot

I'm new to ggplot and have a problem with plotting errorbars in a barplot.
A minimal working example looks like this:
abun_all <- data.frame("Tree.genus" = c(rep("Acer", 5), rep("Betula", 5), rep("Larix", 5), rep("Picea", 5), rep("Pinus", 5), rep("Quercus", 5)),
"P.sampled" = c(sample(c(seq(from = 0.001, to = 0.06, by = 0.0005)), 30)),
"Insects.sampled" = c(sample(c(seq(from = 1.667, to = 533, by = 1.335)), 30)),
"Category" = as.factor(c(sample(c(seq(from = 1, to = 3, by = 1)), 30, replace = T))),
"P.sampled_mean" = c(sample(c(seq(from = 0.006, to = 0.178, by = 0.0005)), 30)),
"P.sampled_sd" = c(sample(c(seq(from = 0.004, to = 0.2137, by = 0.0005)), 30)))
ggplot(data = abun_all, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - (P.sampled_mean+P.sampled_sd), ymax = P.sampled + (P.sampled_mean+P.sampled_sd)), width = 0.1, position = position_dodge(1)) + scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
NOTE : The values are just random and do not represent the actual data but should suffice to demonstrate the problem !
The problem seems to be that errorbars are plotted for the number of entires of each Tree.genus per Category. How can I get this to work ?
Edit: I created another Df by hand with just the max values of each P.sampled combination and now the plot looks the way I want it (except for the two missing errorbars).
abun_plot <- data.frame("Tree.genus" = rep(genera, each = 3),
"P.sampled" = c(0.400000000, 0.100000000, 0.500000000, 0.200000000, 0.100000000, 0.042857143, 0.016666667, 0.0285714286, 0.0222222222, 0.020000000, 0, 0.010000000, 0.060000000, 0.025000000, 0.040000000, 0.250000000, 0.150000000, 0.600000000),
"Category" = as.factor(rep(c(1,2,3), 3)),
"P.sampled_SD" = as.numeric(c(0.08493057, 0.02804758, 0.19476489, 0.04533747, 0.02447665, 0.01308939, 0.004200168, "NA", 0.015356359, 0.005724859, "NA", "NA", 0.01633612, 0.01013794, 0.02045931, 0.07584737, 0.05760980, 0.21374053)),
"P.sampled_Mean" = as.numeric(c(0.07837134, 0.05133333, 0.14089286, 0.04537983, 0.02686200, 0.01680721, 0.005833333, 0.028571429, 0.011363636, 0.01101331, "NA", 0.01000000, 0.02162986, 0.01333333, 0.01668582, 0.08705221, 0.04733333, 0.17870370)))
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_SD, ymax = P.sampled + P.sampled_SD), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
Since doing this by hand takes a lot of time and several other plots have the same problem, I would prefer working with the original df (abun_all). Can I just subset my df in the ggplot() function to get the desired output ?
Since you want to just show the maximum value for each combination of genus and category, you can use a couple of dplyr functions (in the tidyverse alongside ggplot2) to group by both genus and category, then take the top value for each. That way, you aren't building abun_plot by hand the way you did in the second block.
library(dplyr)
library(ggplot2)
abun_plot <- abun_all %>%
group_by(Tree.genus, Category) %>%
top_n(1, P.sampled_mean)
head(abun_plot)
#> # A tibble: 6 x 6
#> # Groups: Tree.genus, Category [6]
#> Tree.genus P.sampled Insects.sampled Category P.sampled_mean P.sampled_sd
#> <fct> <dbl> <dbl> <fct> <dbl> <dbl>
#> 1 Acer 0.041 295. 3 0.0125 0.044
#> 2 Acer 0.044 81.8 1 0.166 0.037
#> 3 Acer 0.0085 379. 2 0.155 0.134
#> 4 Betula 0.0505 183. 2 0.170 0.0805
#> 5 Betula 0.0325 61.7 3 0.0405 0.0995
#> 6 Betula 0.0465 326. 1 0.0985 0.188
After that, the plotting works as you initially expected:
ggplot(data = abun_plot, aes(x = as.factor(Tree.genus), y = P.sampled , fill = Category)) +
geom_col(position = position_dodge(1)) +
geom_errorbar(aes(ymin = P.sampled - P.sampled_sd, ymax = P.sampled + P.sampled_sd), width = 0.1, position = position_dodge(1)) +
scale_fill_discrete(name = "Category",
breaks = c(1, 2, 3),
labels = c("NrAm in SSM", "NrAm in FR", "Eurp in FR")) +
xlab("Genus") + ylab("No. of Focus sp. per total insect abundance")
It's also worth noting that as of a few releases back of ggplot2, you can use geom_col() in place of geom_bar(stat = "identity").
Created on 2018-10-03 by the reprex package (v0.2.1)

Resources