show range in a log scale - r

I'd like to include the range of possible values for y_val where the upper and lower bands are available. I tried using geom_errorbar which works for the continuous scale but not in log scale. How can I fix this?
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,y_val=y_val,y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
scale_y_log10()+
scale_x_log10()

Per this answer, you need coord_trans(y = "log10") rather than scale_y_log10()
library(ggplot2)
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,
y_val=y_val,
y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
coord_trans(y="log10", x = "log10", ylim = range(c(y_upper, y_lower)))
Created on 2021-03-16 by the reprex package (v1.0.0)

A trick could be to use geom_segment with arrow settings produced by a call to arrow().
library(ggplot2)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_segment(aes(xend = x_val, y = y_lower, yend = y_upper),
arrow = arrow(angle = 90, ends = "both")) +
scale_y_log10()+
scale_x_log10()

Related

How can I customise a paletter in ggplot with values that are not in the data?

Let's say I have the following:
library(ggplot)
library(viridis)
df <- data.frame(x = c(10:15), y = c(20:25), z = c(8, 14, 15, 17, 18, 9))
ggplot(df, aes(x = x, y = z)) +
geom_tile(aes(fill = z)) +
scale_fill_viridis(discrete = FALSE) +
scale_y_continuous(limits = c(0, 60)) +
scale_x_continuous(limits = c(0, 60)) +
theme_classic()
I have other data frames with the same structure with the constraint that values in z are between 0 and 20, inclusive. How can I create a palette where values below 15 have the same colour and values 15, 16, 17, 18, 19 and 20 have different colours, regardless of whether they are in the data frame. For instance, the max value in df is 18, but for the palette should be 20.
If you are looking for a continuous scale of colors rather than a discrete scale, you can use the limits argument to set fixed limits of 0-20 in your color mapping. If you want a single fixed color between zero and 15, then a gradient between 15 and 20 (as your question implies), you can use scale_fill_gradientn
library(ggplot2)
df <- data.frame(x = c(10:15), y = c(20:25), z = c(8, 14, 15, 17, 18, 9))
ggplot(df, aes(x = x, y = z)) +
geom_tile(aes(fill = z)) +
scale_fill_gradientn(values = c(0, 0.749999, 0.75, 0.75001, 1),
colors = c('lightblue', 'lightblue',
'green3', 'green3', 'red'),
limits = c(0, 20)) +
scale_y_continuous(limits = c(0, 60)) +
scale_x_continuous(limits = c(0, 60)) +
theme_classic()

ggplot time series: messed up x axis for data with missing values

I am creating time series plot for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc ))
ggplot(data_to_plot, aes(x = month_1)) +
geom_line(aes(y = avg_dlt_calc), size = 0.5) +
scale_x_discrete(name = "months", limits = data_with_avg$month_1) +
facet_grid(~year, scales = "free")
I am ok with the plot itself, but x-axis labels are messed up:
How I can fix it?
It is ok not to have labels for missing months (for example, for 2018 it will be only 2,3,7,8 - so it will be clear, that there is data only for those months).
A remedy is to coerce month_1 to a factor and group the observations by year like so:
ggplot(data_to_plot, aes(x = as.factor(month_1), y = avg_dlt_calc, group = year)) +
geom_line(size = 0.5) +
scale_x_discrete(name = "months") +
facet_grid(~year, scales = "free")
Note that I've moved y = avg_dlt_calc inside aes() in ggplot() which is more idiomatic than your approach. You may use the breaks argument in scale_x_discrete() to set breaks manually, see ?scale_x_discrete.
I think a fixed x-axis and adding points is more suitable for conveying the information that data is only available for some periods:
ggplot(data_to_plot, aes(x = as.factor(month_1), y = avg_dlt_calc, group = year)) +
geom_line(size = 0.5) +
geom_point() +
scale_x_discrete(name = "months") +
facet_grid(~year, scales = "free_y")

ggplot time series: messed up x axis - 2

This is modified version of this question.
I need to create time series plot for 2 lines for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
avg_dlt_standard <- c(rep(9,12))
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc,avg_dlt_standard ))
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
ggplot(data_to_plot,aes(x = as.factor(month_1))) +
geom_line(aes(y = avg_dlt_calc, group = year, colour = "DLT Calculated"), size = 0.5) +
geom_line(aes(y = avg_dlt_standard, group = year, colour = "DLT standard"), size = 0.5) +
geom_point(aes(y = avg_dlt_calc, colour = "DLT Calculated")) +
scale_x_discrete(name = "months", limits = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
scale_color_manual(name="",
labels = c("DLT Calculated",
"DLT standard"),
values = c( "blue",
"red")) +
theme(legend.position="top",
legend.text = element_text(size = 8))
s = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
But x-axis looks wrong:
If to plot data without this line:
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
Then x-axis will still be messy:
I am setting limits for x-axis, but looks like it is not working.
How can I fix it?
I've skipped some lines and features of your plot, but in essence, this is what needs to be changed:
ggplot(data_to_plot, aes(x=month_1))+ # no as.factor
geom_point(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_standard), colour='red') +
scale_x_continuous(breaks=1:12, limits=c(1,2)) + # do *not* use scale_x_discrete,
# your x-axis is *continuous*; use breaks-argument to set the ticks.
# note, limits should only have 2 values - upper and lower limit.
facet_grid(~year)
In your code, you used limits = data_to_plot$month_1, but ggplot2 only used the 2 first elements of month_1 - it did not interpret it as a set of acceptable values.

Add a legend in ggplot2 with R

I have a problem in adding the legend in R with ggplots when plotting the figure of sample sizes vs power. I have tried guide_legend() but it failed. Thanks so much for your help.
# Load the library and input the data
library(ggplot2)
n <- 2:10
control <- rep(150, 4)
infected <- c(150, 170, 200, 250)
all <- c(control, infected)
sigma <- c(35, 40, 45)
# Compute the population mean
mu <- mean(all)
# Compute the sum of the tau squared
tau2 <- sum((all-mu)^2)
# Compute the gamma
gamma.1 <- (n*tau2)/(sigma[1]^2)
gamma.2 <- (n*tau2)/(sigma[2]^2)
gamma.3 <- (n*tau2)/(sigma[3]^2)
# Compute the power
power.1 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.1)
power.2 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.2)
power.3 <- 1-pf(qf(.95, 7, 16), 7, 16, gamma.3)
# Plot the power vs the sample size
data <- data.frame(n, power.1, power.2, power.3)
ggplot(data, aes(x = n)) +
geom_point(aes(y=power.1), size = 3.5, color = "blue") +
geom_line(aes(y=power.1), size = 0.5) +
geom_point(aes(y=power.2), size = 3.5, color = "red") +
geom_line(aes(y=power.2), size = 0.5) +
geom_point(aes(y=power.3), size = 3.5, color = "black") +
geom_line(aes(y=power.3), size = 0.5) +
xlab("Sample Sizes") +
ylab("Power") +
ggtitle("Power versus Sample size")
You can transform your data to long format which is nice for plotting because then you have grouping variable (names of your columns are groups):
data %>%
pivot_longer(cols = contains("power"), names_to = "group", values_to = "power") %>%
ggplot(aes(n, power)) +
geom_line(aes(group = group)) +
geom_point(aes(color = group), size = 4) +
scale_color_manual(values = c("blue", "red", "black"))
Also I've added line before points.

relabel x-axis to be consistent with reorder function ggplot2

I am creating a grouped bar chart in ggplot2, where I have the x-axis as direction of gaze, y-axis percentage of time, and grouped by condition (reliability of robot). I have created a reproducible example of the dataset below.
install.packages("tidyverse")
library(tidyverse)
library(ggplot2)
library(reshape2)
robot_reliability <- c("reliable", "reliable", "reliable", "reliable", "unreliable", "unreliable", "unreliable", "unreliable")
percent_robot <- c(5, 10, 15, 20, 25, 30, 35, 40)
percent_game <- c(20, 30, 40, 50, 15, 25, 35, 45)
percent_others <- c(6, 8, 10, 12, 11, 9, 7, 5)
data <- data.frame(robot_reliability, percent_robot, percent_game, percent_others)
gg <- melt(data, id = "robot_reliability")
ggplot(gg, aes(x = reorder(variable, -value), y = value, fill = factor(robot_reliability))) +
stat_summary(fun = mean, geom = "bar", position = position_dodge(1)) +
stat_summary(fun.min = min, fun.max = max, geom = "errorbar",
colour="grey40", position=position_dodge(1), width=.2) +
scale_fill_discrete(name = "Robot Reliability", labels = c("Reliable", "Unreliable")) +
xlab("Direction of Gaze") +
ylab("Percentage of Overall Interaction Time") +
ggtitle("Percentage of Time Spent Gazing") +
scale_x_discrete(labels = c("Game", "Robot","Others"))
I have ordered my graph from high-low values on the y-axis (percentage of overall interaction time) using the reorder function
ggplot(gg, aes(x = reorder(variable, -value), y = value, fill = factor(robot_reliability)))
Later on, I have relabelled the axis using scale_x_discrete:
scale_x_discrete(labels = c("Game", "Robot", "Others"))
However, this appears to fix the labels to those positions (so, for example if you remove the '-' from '-value' in reorder, the bar graph would rearrange to go from low-high, but the labels on the x-axis would stay in the same positions, meaning the labels are incorrectly matched to the data. Is there a way to combine the labels on the x-axis with the reorder function so that they are permanently attached to the correct data columns?
There are two options to achieve this. First. Use a named vector to map categories of your variable to labels like I did in the code below. Second. Simply rename the categories in your df or add a column with the labels. This way you will get the correct labels automatically.
library(tidyverse)
library(ggplot2)
library(reshape2)
robot_reliability <- c("reliable", "reliable", "reliable", "reliable", "unreliable", "unreliable", "unreliable", "unreliable")
percent_robot <- c(5, 10, 15, 20, 25, 30, 35, 40)
percent_game <- c(20, 30, 40, 50, 15, 25, 35, 45)
percent_others <- c(6, 8, 10, 12, 11, 9, 7, 5)
data <- data.frame(robot_reliability, percent_robot, percent_game, percent_others)
gg <- melt(data, id = "robot_reliability")
ggplot(gg, aes(x = reorder(variable, value), y = value, fill = factor(robot_reliability))) +
stat_summary(fun = mean, geom = "bar", position = position_dodge(1)) +
stat_summary(fun.min = min, fun.max = max, geom = "errorbar",
colour="grey40", position=position_dodge(1), width=.2) +
scale_fill_discrete(name = "Robot Reliability", labels = c("Reliable", "Unreliable")) +
xlab("Direction of Gaze") +
ylab("Percentage of Overall Interaction Time") +
ggtitle("Percentage of Time Spent Gazing") +
scale_x_discrete(labels = c(percent_game = "Game", percent_robot = "Robot", percent_others = "Others"))
Created on 2020-04-24 by the reprex package (v0.3.0)

Resources