ggplot: scale second axis with error bars - r

I am attempting to graph two scatter plots with associated error bars using ggplot2. I would like the top of left axis to be ~0 and range down to -2000 and for the bottom of the right axis to be 0 and range up to about 0.15.
I can re-scale the axes for the points themselves following the instructions here but I am having trouble generalizing the instructions to include error bars.
Here is my current plot
Here is my code
library(tidyverse)
library(ggplot2)
deciles <- data.frame(count = 1:10,
coef_maroon = c(0.005, 0.015, 0.015, 0.02, 0.03, 0.07, 0.09, 0.12, 0.12, 0.13),
ci_upper_maroon = c(0.008, 0.02, 0.025, 0.03, 0.04, 0.09, 0.11, 0.14, 0.13, 0.15),
ci_lower_maroon = c(0.001, 0.01, 0.005, 0, 0.01, 0.05, 0.07, 0.11, 0.11, 0.12),
coef_navy= c(0, -200, -400, -600, -800, -700, -900, -900, -1100, -1700),
ci_upper_navy = c(100, -100, -300, -500, -700, -600, -800, -700, -900, -1600),
ci_lower_navy = c(-100, -500, -700, -900, -850, -800, -950, -1000, -1200, -1900))
scl = with(deciles, max(abs(coef_navy))/max(abs(coef_maroon)))
ggplot(deciles) +
geom_point(aes(x = count, y = coef_navy, color = 'navy')) +
geom_point(aes(x = count, y = coef_maroon*scl-1200, color = 'maroon')) +
geom_errorbar(aes(x = count, ymin = ci_lower_navy, ymax = ci_upper_navy, color = 'navy'), width = 0) +
geom_errorbar(aes(x = count, ymin = ci_lower_maroon*scl-1200, ymax = ci_upper_maroon*scl-1200, color = 'maroon', width = 0)) +
labs(x = "Group", y = "") +
scale_color_manual(values = c('maroon', 'navy')) +
scale_y_continuous(sec.axis = sec_axis(~(.+1200)/scl, name = "2nd axis"))
My question is how can I scale my axes such that the downward trending plot (left axis) "starts" in the top left corner and the upward trending plot (right axis) starts in the bottom left?
As you can see, there is much dead space in the top of the left axis and the right axis need not range as low as -0.05 because the smallest maroon value is just above 0.
Thank you

Does adjusting the scaling factor (1200 to 1900) work for you?
library(tidyverse)
deciles <- data.frame(count = 1:10,
coef_maroon = c(0.005, 0.015, 0.015, 0.02, 0.03, 0.07, 0.09, 0.12, 0.12, 0.13),
ci_upper_maroon = c(0.008, 0.02, 0.025, 0.03, 0.04, 0.09, 0.11, 0.14, 0.13, 0.15),
ci_lower_maroon = c(0.001, 0.01, 0.005, 0, 0.01, 0.05, 0.07, 0.11, 0.11, 0.12),
coef_navy= c(0, -200, -400, -600, -800, -700, -900, -900, -1100, -1700),
ci_upper_navy = c(100, -100, -300, -500, -700, -600, -800, -700, -900, -1600),
ci_lower_navy = c(-100, -500, -700, -900, -850, -800, -950, -1000, -1200, -1900))
scl = with(deciles, max(abs(coef_navy))/max(abs(coef_maroon)))
ggplot(deciles) +
geom_point(aes(x = count, y = coef_navy, color = 'navy')) +
geom_point(aes(x = count, y = coef_maroon*scl-1900, color = 'maroon')) +
geom_errorbar(aes(x = count, ymin = ci_lower_navy, ymax = ci_upper_navy, color = 'navy'), width = 0) +
geom_errorbar(aes(x = count, ymin = ci_lower_maroon*scl-1900, ymax = ci_upper_maroon*scl-1900, color = 'maroon', width = 0)) +
labs(x = "Group", y = "") +
scale_color_manual(values = c('maroon', 'navy')) +
scale_y_continuous(sec.axis = sec_axis(~(.+1900)/scl, name = "2nd axis"))
Created on 2021-09-02 by the reprex package (v2.0.1)
Also, depending on how relevant the x-axis values are, you could consider nudging the values so they don't overlap, e.g.
library(tidyverse)
deciles <- data.frame(count = 1:10,
coef_maroon = c(0.005, 0.015, 0.015, 0.02, 0.03, 0.07, 0.09, 0.12, 0.12, 0.13),
ci_upper_maroon = c(0.008, 0.02, 0.025, 0.03, 0.04, 0.09, 0.11, 0.14, 0.13, 0.15),
ci_lower_maroon = c(0.001, 0.01, 0.005, 0, 0.01, 0.05, 0.07, 0.11, 0.11, 0.12),
coef_navy= c(0, -200, -400, -600, -800, -700, -900, -900, -1100, -1700),
ci_upper_navy = c(100, -100, -300, -500, -700, -600, -800, -700, -900, -1600),
ci_lower_navy = c(-100, -500, -700, -900, -850, -800, -950, -1000, -1200, -1900))
scl = with(deciles, max(abs(coef_navy))/max(abs(coef_maroon)))
ggplot(deciles) +
geom_point(aes(x = count, y = coef_navy, color = 'navy'),
position = position_nudge(x = -0.05)) +
geom_point(aes(x = count, y = coef_maroon*scl-1900, color = 'maroon'),
position = position_nudge(x = 0.05)) +
geom_errorbar(aes(x = count, ymin = ci_lower_navy,
ymax = ci_upper_navy, color = 'navy', width = 0),
position = position_nudge(x = -0.05)) +
geom_errorbar(aes(x = count, ymin = ci_lower_maroon*scl-1900,
ymax = ci_upper_maroon*scl-1900,
color = 'maroon', width = 0),
position = position_nudge(x = 0.05)) +
labs(x = "Group", y = "") +
scale_color_manual(values = c('maroon', 'navy')) +
scale_y_continuous(sec.axis = sec_axis(~(.+1900)/scl, name = "2nd axis"))
Created on 2021-09-02 by the reprex package (v2.0.1)
You can further trim the 'blank space' at the top if you adjust the expand() option in scale_y_continuous:
library(tidyverse)
deciles <- data.frame(count = 1:10,
coef_maroon = c(0.005, 0.015, 0.015, 0.02, 0.03, 0.07, 0.09, 0.12, 0.12, 0.13),
ci_upper_maroon = c(0.008, 0.02, 0.025, 0.03, 0.04, 0.09, 0.11, 0.14, 0.13, 0.15),
ci_lower_maroon = c(0.001, 0.01, 0.005, 0, 0.01, 0.05, 0.07, 0.11, 0.11, 0.12),
coef_navy= c(0, -200, -400, -600, -800, -700, -900, -900, -1100, -1700),
ci_upper_navy = c(100, -100, -300, -500, -700, -600, -800, -700, -900, -1600),
ci_lower_navy = c(-100, -500, -700, -900, -850, -800, -950, -1000, -1200, -1900))
scl = with(deciles, max(abs(coef_navy))/max(abs(coef_maroon)))
ggplot(deciles) +
geom_point(aes(x = count, y = coef_navy, color = 'navy'),
position = position_nudge(x = -0.05)) +
geom_point(aes(x = count, y = coef_maroon*scl-1900, color = 'maroon'),
position = position_nudge(x = 0.05)) +
geom_errorbar(aes(x = count, ymin = ci_lower_navy,
ymax = ci_upper_navy, color = 'navy', width = 0),
position = position_nudge(x = -0.05)) +
geom_errorbar(aes(x = count, ymin = ci_lower_maroon*scl-1900,
ymax = ci_upper_maroon*scl-1900,
color = 'maroon', width = 0),
position = position_nudge(x = 0.05)) +
labs(x = "Group", y = "") +
scale_color_manual(values = c('maroon', 'navy')) +
scale_y_continuous(sec.axis = sec_axis(~(.+1900)/scl, name = "2nd axis"),
expand = c(0.01,0.01))
Created on 2021-09-02 by the reprex package (v2.0.1)

Related

How fill geom_ribbon with different colour in R?

I am trying to use different fill for geom_ribbon according to the x-values (For Temp = 0-20 one fill, 20-30.1 another fill and > 30.1 another fill). I am using the following code
library(tidyverse)
bounds2 <- df %>%
mutate(ymax = pmax(Growth.rate, slope),
ymin = pmin(Growth.rate, slope),
x_bins = cut(Temp, breaks = c(0,20,30.1,max(Temp)+5)))
ggplot(df, aes(x = Temp, y = Growth.rate)) +
geom_line(colour = "blue") +
geom_line(aes(y = slope), colour = "red") +
scale_y_continuous(sec.axis = sec_axis(~ .^1, name = "slope")) +
geom_ribbon(data = bounds2, aes(Temp, ymin = ymin, ymax = ymax, fill = x_bins),
alpha = 0.4)
It is returning me following output
As you can see from the output some regions are remaining empty. Now how can I fill those parts in the curve?
Here is the data
df = structure(list(Temp = c(10, 13, 17, 20, 25, 28, 30, 32, 35, 38
), Growth.rate = c(0, 0.02, 0.19, 0.39, 0.79, 0.96, 1, 0.95,
0.65, 0), slope = c(0, 0.02, 0.16, 0.2, 0.39, 0.1, 0.03, -0.04,
-0.29, -0.65)), row.names = c(NA, 10L), class = "data.frame")
Here's a solution that involves interpolating new points at the boundaries between the areas. I used approx to get the values of ymin and ymax at Temp=30.1 and added this to the plotting dataset.
Then, instead of using cut just once as you did I use it twice, once with lower bounds included in each set then once with upper bounds included. Then I reshape the data long, and de-duplicate the rows I don't need.
If you zoom in enough you can see that the boundary is at 30.1 not at 30.
bounds2 <- df %>%
mutate(ymax = pmax(Growth.rate, slope),
ymin = pmin(Growth.rate, slope))
bounds2 <- bounds2 |>
add_case(Temp=30.1,
ymax=approx(bounds2$Temp,bounds2$ymax,xout = 30.1)$y,
ymin=approx(bounds2$Temp,bounds2$ymin,xout = 30.1)$y) |>
mutate(x_bins2 = cut(Temp, breaks = c(0,20,30.1,max(Temp)+5),right=FALSE, labels=c("0-20","20-30.1","30.1-max")),
x_bins = cut(Temp, breaks = c(0,20,30.1,max(Temp)+5), labels=c("0-20","20-30.1","30.1-max"))) |>
tidyr::pivot_longer(cols=c(x_bins2, x_bins), names_to = NULL, values_to = "xb") |>
distinct()
ggplot(df, aes(x = Temp, y = Growth.rate)) +
geom_line(colour = "blue") +
geom_line(aes(y = slope), colour = "red") +
scale_y_continuous(sec.axis = sec_axis(~ .^1, name = "slope")) +
geom_ribbon(data = bounds2, aes(Temp, ymin = ymin, ymax = ymax, fill = xb),
alpha = 0.4)
The idea is here but the code I show can be much improved at the step ### Dupplicate the 2 last x_bins from each category and move them into the next
### Libraries
library(tidyverse)
df <- structure(list(Temp = c(10, 13, 17, 20, 25, 28, 30, 32, 35, 38
), Growth.rate = c(0, 0.02, 0.19, 0.39, 0.79, 0.96, 1, 0.95,
0.65, 0), slope = c(0, 0.02, 0.16, 0.2, 0.39, 0.1, 0.03, -0.04,
-0.29, -0.65)), row.names = c(NA, 10L), class = "data.frame")
### Preprocessing
bounds2 <- df %>%
mutate(ymax = pmax(Growth.rate, slope),
ymin = pmin(Growth.rate, slope),
x_bins = cut(Temp, breaks = c(0, 20, 30.1, max(Temp)+5)))
### Dupplicate the 2 last x_bins from each category and move them into the next category
bounds2 <- rbind(bounds2, bounds2[c(4, 7), ])
bounds2$x_bins[c(11, 12)] <- bounds2[c(5, 8), ]$x_bins
### Plot
ggplot(df, aes(x = Temp, y = Growth.rate)) +
geom_line(colour = "blue") +
geom_line(aes(y = slope), colour = "red") +
scale_y_continuous(sec.axis = sec_axis(~ .^1, name = "slope")) +
geom_ribbon(data = bounds2, aes(Temp, ymin = ymin, ymax = ymax, fill = x_bins),
alpha = 0.4)

Need help to display legend and in similar color code to the data

I am visualizing a time-series plot using ggplot2 and trying to combine the legend. I have tried many options but in not yet gotten my desired output. In one plot the lines are missing the color coding and in the other, the chart is missing the legend. My desired output is to have a chart with the legend and the color scheme being the same.
Here is the script where the lines are missing the color-coding;
library(tidyverse)
deviation <- read_csv("C:/Users/JohnWaweru/Documents/Thesis/Data/yearly_CSVs/Turkana_new/2018_new.csv")
deviation %>% ggplot() +
geom_line(aes(x = as.Date(Month), y = Upper_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Lower_curve, col = 'red'), linetype = 2) +
geom_line(aes(x = as.Date(Month), y = Mean_NDVI, col = 'red'), linetype = 1) +
geom_line(aes(x = as.Date(Month), y = NDVI_2018, col = 'green'), linetype = 1) +
scale_color_manual(name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green', 'Upper_curve' = 'red', 'Lower_curve' = 'red'),
labels = c('Mean_NDVI', 'NDVI_2018', 'Upper_curve','Lower_curve')) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018") ```
Here is the Sample data I am working with;
structure(list(Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
Setting literal colours only works outside the aes() function or when you use scale_colour_identity(). Most of the time when you want to label individual line layers, you can set aes(..., colour = "My legend label").
library(ggplot2)
deviation <- structure(list(
Month = structure(c(18262, 18293, 18322, 18353, 18383, 18414), class = "Date"),
Mean_NDVI = c(0.26, 0.23, 0.25, 0.34, 0.36, 0.32),
NDVI_2018 = c(0.22, 0.23, 0.23, 0.41, 0.46, 0.32),
Mean_Std = c(0.01, 0.01, 0.01, 0.02, 0.02, 0.02),
Std_2018 = c(0.01, 0.01, 0.03, 0.03, 0.04, 0.03),
Upper_curve = c(0.27, 0.24, 0.26, 0.36, 0.38, 0.34),
Lower_curve = c(0.25, 0.22, 0.24, 0.32, 0.34, 0.3)),
row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame")
)
ggplot(deviation) +
geom_line(aes(x = Month, y = Upper_curve, colour = 'Upper_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Lower_curve, colour = 'Lower_curve'), linetype = 2) +
geom_line(aes(x = Month, y = Mean_NDVI, colour = 'Mean_NDVI'), linetype = 1) +
geom_line(aes(x = Month, y = NDVI_2018, colour = 'NDVI_2018'), linetype = 1) +
scale_color_manual(
name = 'Legend',
values = c('Mean_NDVI'= 'red', 'NDVI_2018' = 'green',
'Upper_curve' = 'red', 'Lower_curve' = 'red'),
# Setting appropriate linetypes
guide = guide_legend(
override.aes = list(linetype = c(2,1,1,2))
)
) +
ylim(0.2, 0.6) +
scale_x_date(date_labels = "%b", date_breaks = "1 month") +
ylab(label = "NDVI") +
xlab(label = "Month") +
ggtitle("NDVI Deviation 2018")
Created on 2021-08-05 by the reprex package (v1.0.0)

Scale fill gradient using absolute values

In the following chart, I would like a gradient to be applied at an absolute value level, rather than relative values. For example, rows I and G should be the same color of red as their values are -75 and 75, respectively. By the same token, rows F and E should be the same shade of green as their values are -15 and 15, respectively. Can anyone tell me how I would do this?
library(dplyr)
library(ggplot2)
data.frame(grp = LETTERS[1:10],
vals = c(0.11, 0.39, -0.06, 0.42, 0.15, -0.15, 0.75, -0.02, -0.75, 0.00)) %>%
ggplot(aes(x = vals, y = grp, fill = vals)) +
geom_col() +
scale_fill_gradient(low = "green", high = "red")
You could simply use fill = abs(vals)
data.frame(grp = LETTERS[1:10],
vals = c(0.11, 0.39, -0.06, 0.42, 0.15, -0.15, 0.75, -0.02, -0.75, 0.00)) %>%
ggplot(aes(x = vals, y = grp, fill = abs(vals))) +
geom_col() +
scale_fill_gradient(low = "green", high = "red")

In R , how to draw a scatter plot by ggplot2 with different scale but equal length

My data is below.
X <- c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3)
Y <- c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3)
I would like to draw a scatter plot for X vs Y by R package ggplot2. The X axis should be 0.01, 0.02, 0.03, 0.04, ..., 0.09, 0.1, 0.2, 0.3, 0.4, ..., 0.9, 1, 2, 3, 4, 5. The Y axis should also be 0.01, 0.02, 0.03, 0.04, ..., 0.09, 0.1, 0.2, 0.3, 0.4, ..., 0.9, 1, 2, 3, 4, 5. My question is that how can I make the length between any two adjacent points in X axis(also Y axis) the same? For example, the length between 0.01 and 0.02 are the same as the length between 0.1 and 0.2 in X axis.
d <- tibble(X= c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3) ,
Y = c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3))
d %>% ggplot(aes(x=X, y=Y)) +
geom_point() +
scale_x_log10() +
scale_y_log10()
Is this what you want?
Here is a more elaborative solution, where you cut your data into small bins, create a faceted plot by each bin, and then paste the facets back together..
X <- c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3)
Y <- c(0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 1, 2, 3)
library( data.table )
library( ggplot2 )
plotdata <- data.table( x = X, y = Y )
#create bins to plot
limits <- data.table( from = c(0,0.01,0.1,1,10) )
limits[, to := shift( from, type="lead", fill = Inf ) ]
limits[, bin_name := paste(from, to, sep = "-") ]
# from to bin_name
# 1: 0.00 0.01 0-0.01
# 2: 0.01 0.10 0.01-0.1
# 3: 0.10 1.00 0.1-1
# 4: 1.00 10.00 1-10
# 5: 10.00 Inf 10-Inf
#bin plotdata for x and y values by joining
plotdata[ limits, bin_x := i.bin_name, on = .(x >= from, x < to ) ][]
plotdata[ limits, bin_y := i.bin_name, on = .(y >= from, y < to ) ][]
#get factorder right for ordering facets
plotdata[, bin_x.f := forcats::fct_rev( factor( bin_x ) ) ]
#almost what we want, but scales are not right
ggplot( data = plotdata ) +
geom_point( aes(x = x, y = y ) ) +
facet_grid( bin_x.f ~ bin_y )
#get facetscales-package from github
## devtools::install_github("zeehio/facetscales") # <-- run once !
library(facetscales)
#build y scales
scales_y <- list(
"0.01-0.1" = scale_y_continuous(expand = c(0,0), limits = c(0,0.1), breaks = seq(0,0.1,0.01), labels = c( seq(0,0.09,0.01), "" ) ),
"0.1-1" = scale_y_continuous(expand = c(0,0), limits = c(0.1,1), breaks = seq(0.1,1,0.1), labels = c( seq(0.1,0.9,0.1), "" ) ),
"1-10" = scale_y_continuous(expand = c(0,0), limits = c(1,10), breaks = seq(1,10,1), labels = c( seq(1,9,1), "" ) )
)
#build x scales
scales_x <- list(
"0.01-0.1" = scale_x_continuous(expand = c(0,0), limits = c(0,0.1), breaks = seq(0,0.1,0.01), labels = c( seq(0,0.09,0.01), "" ) ),
"0.1-1" = scale_x_continuous(expand = c(0,0), limits = c(0.1,1), breaks = seq(0.1,1,0.1), labels = c( seq(0.1,0.9,0.1), "" ) ),
"1-10" = scale_x_continuous(expand = c(0,0), limits = c(1,10), breaks = seq(1,10,1), labels = c( seq(1,9,1), "" ) )
)
#now we get
ggplot( data = plotdata ) +
geom_point( aes(x = x, y = y ) ) +
facet_grid_sc( rows = vars(bin_x.f), cols = vars(bin_y), scales = list( x = scales_x, y = scales_y ) )+
theme(
#remove the facet descriptions completely
strip.background = element_blank(),
strip.text = element_blank(),
#drop space bewteen panels
panel.spacing = unit(0, "lines"),
#rotate x_axis labels
axis.text.x = element_text( angle = 90, hjust = 1, vjust = 0.5 )
)
As you can see, this works, but points that are exact lyon the min/max of a bin get cut off a bit due to the expand = c(0,0) when setting the x and y axis.
log transform can be a close solution to what you are looking, but it does not provide the same distance:
df <- data.frame(X=X,Y=Y)
p = ggplot(df,aes(x=X,y=Y))+
geom_line() +
geom_point(size=4)
p+
scale_x_continuous(trans = "log10") +
scale_y_continuous(trans = "log10")
Please check the below website: It is the same problem for Animals data from MASS:
http://www.sthda.com/english/wiki/ggplot2-axis-scales-and-transformations

stacked bar ggplot in log scale

I'm trying to do a stacked barplot with ggplot in R,using log_10 scale for the y-axis, but I'm getting the wrong plot.
This is my code:
library(reshape2)
library(ggplot2)
DF <- structure(list(Worker = 1:18, Seed = c(300, 40, 200, 0.1, 0.1,
0.1, 1000, 0.1, 0.1, 30, 45, 0.1, 13, 0.1, 50, 0.1, 0.1, 30),
Boosted = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
35, 0.1, 0.1, 13, 0.1, 0.1, 0.1, 0.1, 0.1), Online = c(0.1,
0.1, 0.1, 15000, 60, 500, 0.1, 100, 450, 0.1, 200, 500, 0.1,
5000, 0.1, 70, 1000, 500)), class = "data.frame", row.names = c(NA,
-18L))
DF1 <- melt(DF, id.var="Worker")
ggplot(DF1, aes(x = Worker, y = value, fill = variable)) +
geom_bar(stat = "identity") +
xlab("id") +
ylab("# of x") +
labs(fill = "") +
scale_fill_manual(values = cols,
labels = c("Mobile", "Laptop","Online User")) +
scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18)) +
expand_limits(x = 1, y = 1) +
scale_y_log10(limits = c(1,1e5),
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
Below you see the image where the bars for some ids are wrong: see for instance ids 10 and 13.
What am I missing? I also tried just using log(value) in ggplot(DF1, aes(x = Worker, y = log(value), fill = variable)), but I'm getting the same.

Resources