How to add a legend in ggplot to my graph - r

Hi have some code to simulate a Gaussian process. Please can someone help me add a legend to my plots on the top right corner. I want to state the different parameter values for each of the line styles/colours, e.g. l=1, l=5, l=10. Thanks.
# simulate a gaussian process
simGP = function(K){
n = nrow(K)
U = chol(K) # cholesky decomposition
z = rnorm(n)
c(t(U) %*% z)
}
# choose points to simulate the covariance.
x = seq(-1, 1, length.out = 500)
# Exponential kernel ------------------------------------------------------
kernel_exp = function(x, l = 1) {
d = as.matrix(dist(x))/l
K = exp(-d)
diag(K) = diag(K) + 1e-8
K
}
{y1 = simGP(kernel_exp(x,l=10))
y2 = simGP(kernel_exp(x,l=1))
y3 = simGP(kernel_exp(x,l=0.1))
data1 <- as.data.frame(x,y1)
data2 <- as.data.frame(x,y2)
data3 <- as.data.frame(x,y3)
df=data.frame(data1,data2,data3)
ggplot() +
geom_line(data=data1, aes(x=x, y=y1), color="green4", linetype = "twodash", size=0.5) +
geom_line(data=data2, aes(x=x, y=y2), color='red', linetype="longdash", size=0.5) +
geom_line(data=data3, aes(x=x, y=y3), color='blue') +
scale_color_manual(values = colors) +
theme_classic() +
labs(x='input, x',
y='output, f(x)')+
theme(axis.text=element_text(size=16),
axis.title=element_text(size=14))}

You can do it using a dataframe variable to group the linetype and colour.
If you want to specify color and linetype, use scale_color_discrete and scale_linetype_discrete
y1 = simGP(kernel_exp(x,l=10))
y2 = simGP(kernel_exp(x,l=1))
y3 = simGP(kernel_exp(x,l=0.1))
data1 <- data.frame(x, y = y1, value = "10")
data2 <- data.frame(x, y = y2, value = "1")
data3 <- data.frame(x, y = y3, value = "0.1")
df=rbind(data1,data2,data3)
ggplot(data = df, aes(x=x, y=y, color = value, linetype = value, group = value)) +
geom_line(size=0.5) +
theme_classic() +
labs(x='input, x',
y='output, f(x)')+
theme(axis.text=element_text(size=16),
axis.title=element_text(size=14))

Related

Two different color keys in ggplot

I would like to plot points twice using two diferent color scales:
In the exemple here 5 points are drown and color is mapped to two covariates (cov1 and cov2): cov1 and cov2 are in different scales 1 to 5 and 0.01 to 0.05 respectively.
I wish to have 2 independent color keys, one for cov1 and one for cov2,
a bit like in the graph below. However on the graph below I used 'color = cov1' end 'fill = cov2' in order to bring another color key...
Any help would be appreciated.
gg1 <- ggplot(data = df1 , aes( x = x , y = y ) ) +
geom_point( aes(x = x , y = y - 1 , color = cov1 )) +
geom_point( aes(x = x , y = y + 1 , color = cov2 )) +
scale_y_continuous(limits = c(-3,3))
gg2 <- ggplot(data = df1 , aes( x = x , y = y ) ) +
geom_point( aes(x = x , y = y - 1 , color = cov1 )) +
geom_point( aes(x = x , y = y + 1 , fill = cov2 ), pch = 21 ) +
scale_y_continuous(limits = c(-3,3))
grid.arrange( gg1 , gg2 , ncol = 2 )
In basic ggplot2 it is impossible if I remember correctly. But this repository may be your answer:
https://github.com/eliocamp/ggnewscale
or this (mentioned in description of the previous one):
https://github.com/clauswilke/relayer
I haven't been using ggplot2 for quite a long time so I'm not familiar with these two, but I remember that I used one of them at least once.
I've just wrote quick example to check if it works:
d1 <- data.frame(x=1:5, y=1)
d2 <- data.frame(x=1:5, y=2)
library(ggplot2)
library(ggnewscale)
ggplot() +
geom_point(data = d1, aes(x=x, y=y, color = x)) +
scale_color_continuous(low = "#0000aa", high="#ffffff") +
new_scale_color() +
geom_point(data = d2, aes(x=x, y=y, color = x)) +
scale_color_continuous(low = "#aa0000", high="#00aa00")
And it seems to work as you want.
I used your idea about combining col and fill and small hack to use different shapes for cov1 and cov2:
# sample data
my_data <- data.frame(x = 1:5,
cov1 = 1:5,
cov2 = seq(0.01, 0.05, 0.01))
library(ggplot2)
ggplot() +
geom_point(data = my_data, aes(x = x, y = 0.5, col = cov1), shape = 16) +
scale_color_continuous(low = "red1", high = "red4") +
geom_point(data = my_data, aes(x = x, y = -0.5, fill = cov2), shape = 21, col = "white", size = 2) +
ylim(-1, 1)
Hope it helps.

Add an exponential line in ggplot that goes through pre-specified points

The code below plots step functions of increasing accuracy toward the underlying polynomial or exponential curve. I am trying to add a curve to my plot that goes through the bottom of each step that I have added.
The commented lines are different attempts that I have tried, but nothing goes exactly through all of the bottom corners of each step down. Is anyone able to help me achieve this? Any help would be greatly appreciated.
library(ggplot2)
X1 <- seq(1, 5, by=0.25)
Y1 <- (0.74 * X^(-2)+0.25)*100
sm <- data.frame(X1, Y1)
X2 <- sort(rep(seq(1, 5, by=0.5), 2))[-18]
Y2 <- sort(rep(Y1[1:17 %% 2 == 1], 2), decreasing = T)[-18]
med <- data.frame(X1, Y2)
X3 <- sort(rep(seq(1,5), 4))[1:17]
Y3 <- sort(rep(Y1[c(1, 5, 9, 13, 17)], 4), decreasing = T)[1:17]
lg <- data.frame(X1, Y3)
ggplot() +
#stat_function(data=sm, mapping = aes(x = X), fun = function(x) {exp(-1*x)*100+28}) +
#geom_curve(aes(x=1, xend=5, y=99, yend=28), ncp = 17) +
#geom_smooth(data = sm, aes(x=X1, y=Y1), method="lm", formula = y ~ poly(x,2), se=F, color= "black", fullrange=T) +
#geom_smooth(data = sm, aes(x=X1, y=Y1), method="lm", formula = (y ~ exp(-1.9*x)), se=F, color= "black", fullrange=T) +
scale_y_continuous(name="Overall Survival (%)", limits=c(0, 100)) +
scale_x_continuous(breaks = seq(from=1, to=5, by=0.25), name = "Survival Time (years)") +
geom_step(colour = "red", size = 1, data = lg, aes(x=X1, y=Y3)) +
geom_step(colour = "purple", size = 1, data = med, aes(x=X1, y=Y2)) +
geom_step(colour = "orange", size = 1, data = sm, aes(x=X1, y=Y1)) +
theme_classic()
I ended up re-doing the initial functions and made it all match up:
MyFunction <- function(x) {100*exp(-(1/4)*x)}
Xyearly <- c(0:5)
Yyearly <- MyFunction(Xyearly)
Yearly <- data.frame(x=Xyearly, y=Yyearly)
X6monthly <- c(0:10/2)
Y6monthly <- MyFunction(X6monthly)
Month6 <- data.frame(x=X6monthly, y=Y6monthly)
X3monthly <- c(0:15/3)
Y3monthly <- MyFunction(X3monthly)
Month3 <- data.frame(x=X3monthly, y=Y3monthly)
X1monthly <- c(0:60/12)
Y1monthly <- MyFunction(X1monthly)
Month1 <- data.frame(x=X1monthly, y=Y1monthly)
ggplot() +
stat_function(data=data.frame(x = 0), mapping = aes(x = x), fun = MyFunction, size=1.2) +
scale_y_continuous(name="Overall Survival (%)", limits=c(0, 100)) +
scale_x_continuous(breaks = seq(from=0, to=5, by=0.5), name = "Survival Time (years)") +
geom_step(colour = "red", size = 1, data = Yearly, aes(x=x, y=y)) +
geom_step(colour = "purple", size = 1, data = Month6, aes(x=x, y=y)) +
geom_step(colour = "orange", size = 1, data = Month3, aes(x=x, y=y)) +
geom_step(colour = "limegreen", size = 1, data = Month1, aes(x=x, y=y))

ggplot: how to get legend linetype group-dependent while errorbar linetype solid

I am trying to plot lines representing observations from two groups, y1 and y2 in a way that:
the two groups have different line color (marked on legend)
the two groups have different linetype (marked on legend)
the plot has errorbar, and error bar is solid line in both groups
Code generating some data:
## generate data
x.grid <- seq(0, 1, length.out = 6)
y1.func <- function(x) 1/(x+1)
y2.func <- function(x) 2/(x+3)
set.seed(1)
x.vec <- numeric()
y.vec <- numeric()
group.vec <- numeric()
for (x in x.grid){
x.vec <- c(x.vec, rep(x, 2*10))
y.vec <- c(y.vec,
rep(y1.func(x), 10) + rnorm(10, sd = 0.1),
rep(y2.func(x), 10) + rnorm(10, sd = 0.1))
group.vec <- c(group.vec, rep("y1", 10), rep("y2", 10))
}
plt.df <- data.frame(x = x.vec, y = y.vec, group = group.vec)
## summarize data
plt.df.se <- Rmisc::summarySE(plt.df, measurevar = "y", groupvars=c("x", "group"))
Approach 1:
ggplot2::ggplot(plt.df.se,
aes(x = x,
y = y,
color = group,
linetype = group)) +
geom_line(position=pd, size = 0.5) +
geom_errorbar(aes(ymin=y-se, ymax=y+se), width=.05,
position=position_dodge(0.05), linetype = 1)
bad: legend blue not dashed
Approach 2:
ggplot2::ggplot(plt.df.se,
aes(x = x,
y = y,
color = group,
linetype = group)) +
geom_line(position=pd, size = 0.5) +
geom_errorbar(aes(ymin=y-se, ymax=y+se), width=.05,
position=position_dodge(0.05))
bad: blue error bars are dashed (I want them solid)
First off, you only want the linetype aesthetic to apply to your lines, so don't include it in the top-level aesthetic mapping, only in geom_line(). Then use show.legend = FALSE in geom_errorbar() so it won't affect the legends:
ggplot(plt.df.se,
aes(x = x,
y = y,
color = group)) +
geom_line(aes(linetype = group), position=position_dodge(0.05), size = 0.5) +
geom_errorbar(aes(ymin=y-se, ymax=y+se), width=.05,
position=position_dodge(0.05),
show.legend = FALSE)
Result:

include data from different dataframes, combining different geoms and with a legend which identifies the data source

I have three dataframes, containing data for the same variables (x and y, grouped by variable case) but each dataframe contains data from a different source (test, sim and model). The levels of case are identical for test and model, but they are different for sim. For each value of case, I want all xy curves from different sources but with the same case to have the same color. I need to have a legend which clearly identifies the data source, but I would also like to use different geoms for different data sources. This is what I've been able to do:
rm(list=ls())
gc()
graphics.off()
library(ggplot2)
# build the dataframes
nx <- 10
x1 <- seq(0, 1, len = nx)
x2 <- x1+ 0.1
x3 <- x2+ 0.1
x4 <- x3+ 0.1
x <- c(x1, x2, x3, x4)
y1 <- 1 - x1
y2 <- 1.1 * y1
y3 <- 1.1 * y2
y4 <- 1.1 * y3
y <- c(y1, y2, y3, y4)
z1 <- (y1 + y2)/2
z2 <- (y2 + y3)/2
z3 <- (y3 + y4)/2
z4 <- (y4 + 1.1 * y4)/2
z <- c(z1, z2, z3, z4)
w <- y*1.01
case_y <- c("I-26_1", "I00", "I20_5", "I40_9")
case_z <- c("I-23_6", "I00", "I22_4", "I42_3")
case_y <- rep(case_y, each = nx)
case_z <- rep(case_z, each = nx)
foo <- data.frame(x = x, y = z, case = case_z, type = "test")
bar <- data.frame(x = x, y = y, case = case_y, type = "sim")
mod <- data.frame(x = x, y = w, case = case_z, type = "model")
# different data frames have different factor levels: to avoid this,
# I bind all dataframes and I reorder the levels of case
foobar <- rbind(foo, bar, mod)
case_levels <- c("I-26_1", "I-23_6", "I00", "I20_5", "I22_4", "I40_9", "I42_3")
foobar$case <- factor(foobar$case, levels = case_levels)
# now I can plot the resulting dataframe
p <- ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = type), size = 1)
p
The problem here is that it's difficult to discern sim and model. In order to make a more readable plot, I switch to geom_point for the model data:
foobar <- rbind(foo, bar)
case_levels <- c("I-26_1", "I-23_6", "I00", "I20_5", "I22_4", "I40_9", "I42_3")
foobar$case <- factor(foobar$case, levels = case_levels)
mod$case <- factor(mod$case, levels = case_levels)
# now I can plot the resulting dataframe
p <- ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = type), size = 1) +
geom_point(data = mod)
However, now I don't have a model label in the legend. How can I make sure that the model curves are clearly labeled in the legend, but they are also easy to discern visually from the sim and test curves?
EDIT Procrastinatus Maximus suggests an edit to Pierre Lafortune's code which should eliminate the space between the model label and the type legend, but apparently it eliminates the space between model and the case legend instead:
ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = type), size = 1) +
geom_point(data = mod, aes(shape=type)) +
scale_shape_discrete(name="") +
guides(colour = guide_legend(override.aes = list(linetype=c(1),
shape=c(NA)))) +
theme(legend.margin = margin(0,0,0,0), legend.spacing = unit(0, 'lines'))
The result is
This will get you closer to your goal. I will look to see if we can close the gap between the two legends.
ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = type), size = 1) +
geom_point(data = mod, aes(shape=type)) +
scale_shape_discrete(name="") +
guides(colour = guide_legend(override.aes = list(linetype=c(1),
shape=c(NA))))
Edit
##ProcrastinatusMaximus
ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = type), size = 1) +
geom_point(data = mod, aes(shape = type)) +
guides(color = guide_legend(override.aes = list(linetype = c(1), shape = c(NA)), order = 1),
linetype = guide_legend(order = 2),
shape = guide_legend(title = NULL, order = 3))+
theme(legend.margin = margin(0,0,0,0), legend.spacing = unit(0, 'lines'))
Personally, I think all you need to do is change to order of the type, so that the solid line is in the middle. If you make the background white and the line colors a bit brighter, I think your figure is clear:
(p <- ggplot(data = foobar, aes(x = x, y = y, color = case)) +
geom_line(aes(linetype = rev(type)), size = 1) +
scale_color_manual(values = c("black","green","blue","purple","pink","red","brown"))+
theme_bw())

Add a specific value of x-axis on ggplot

I am using the ggplot function to plot this kind of graph
image
I want to add the specific value of the x-axis as shown in the picture
this is my code :
quantiles <- quantile(mat,prob = quant)
x <- as.vector(mat)
d <- as.data.frame(x=x)
p <- ggplot(data = d,aes(x=x)) + theme_bw() +
geom_histogram(aes(y = ..density..), binwidth=0.001,color="black",fill="white") +
geom_density(aes(x=x, y = ..density..),fill="blue", alpha=0.5, color = 'black')
x.dens <- density(x)
df.dens <- data.frame(x = x.dens$x, y = x.dens$y)
p <- p + geom_area(data = subset(df.dens, x <= quantiles), aes(x=x,y=y),
fill = 'green', alpha=0.6)
print(p)

Resources