Add an exponential line in ggplot that goes through pre-specified points - r

The code below plots step functions of increasing accuracy toward the underlying polynomial or exponential curve. I am trying to add a curve to my plot that goes through the bottom of each step that I have added.
The commented lines are different attempts that I have tried, but nothing goes exactly through all of the bottom corners of each step down. Is anyone able to help me achieve this? Any help would be greatly appreciated.
library(ggplot2)
X1 <- seq(1, 5, by=0.25)
Y1 <- (0.74 * X^(-2)+0.25)*100
sm <- data.frame(X1, Y1)
X2 <- sort(rep(seq(1, 5, by=0.5), 2))[-18]
Y2 <- sort(rep(Y1[1:17 %% 2 == 1], 2), decreasing = T)[-18]
med <- data.frame(X1, Y2)
X3 <- sort(rep(seq(1,5), 4))[1:17]
Y3 <- sort(rep(Y1[c(1, 5, 9, 13, 17)], 4), decreasing = T)[1:17]
lg <- data.frame(X1, Y3)
ggplot() +
#stat_function(data=sm, mapping = aes(x = X), fun = function(x) {exp(-1*x)*100+28}) +
#geom_curve(aes(x=1, xend=5, y=99, yend=28), ncp = 17) +
#geom_smooth(data = sm, aes(x=X1, y=Y1), method="lm", formula = y ~ poly(x,2), se=F, color= "black", fullrange=T) +
#geom_smooth(data = sm, aes(x=X1, y=Y1), method="lm", formula = (y ~ exp(-1.9*x)), se=F, color= "black", fullrange=T) +
scale_y_continuous(name="Overall Survival (%)", limits=c(0, 100)) +
scale_x_continuous(breaks = seq(from=1, to=5, by=0.25), name = "Survival Time (years)") +
geom_step(colour = "red", size = 1, data = lg, aes(x=X1, y=Y3)) +
geom_step(colour = "purple", size = 1, data = med, aes(x=X1, y=Y2)) +
geom_step(colour = "orange", size = 1, data = sm, aes(x=X1, y=Y1)) +
theme_classic()

I ended up re-doing the initial functions and made it all match up:
MyFunction <- function(x) {100*exp(-(1/4)*x)}
Xyearly <- c(0:5)
Yyearly <- MyFunction(Xyearly)
Yearly <- data.frame(x=Xyearly, y=Yyearly)
X6monthly <- c(0:10/2)
Y6monthly <- MyFunction(X6monthly)
Month6 <- data.frame(x=X6monthly, y=Y6monthly)
X3monthly <- c(0:15/3)
Y3monthly <- MyFunction(X3monthly)
Month3 <- data.frame(x=X3monthly, y=Y3monthly)
X1monthly <- c(0:60/12)
Y1monthly <- MyFunction(X1monthly)
Month1 <- data.frame(x=X1monthly, y=Y1monthly)
ggplot() +
stat_function(data=data.frame(x = 0), mapping = aes(x = x), fun = MyFunction, size=1.2) +
scale_y_continuous(name="Overall Survival (%)", limits=c(0, 100)) +
scale_x_continuous(breaks = seq(from=0, to=5, by=0.5), name = "Survival Time (years)") +
geom_step(colour = "red", size = 1, data = Yearly, aes(x=x, y=y)) +
geom_step(colour = "purple", size = 1, data = Month6, aes(x=x, y=y)) +
geom_step(colour = "orange", size = 1, data = Month3, aes(x=x, y=y)) +
geom_step(colour = "limegreen", size = 1, data = Month1, aes(x=x, y=y))

Related

Add legend for line plot with confidence interval (geom_ribbon) and density plot

I have a plot containing a line plot with confidence interval (geom_ribbon()) and an additional density plot of the underlying data (from a different data frame).
I would like to add a legend with three entries: for the line, the confidence interval, and the density.
Consider the following MWE: (in t1, ylo and yup mark the CI, y the line, and t2 contains the data for the density plot.
(I tried incorporating code from this question to no avail.)
library(tidyverse)
t1 <- tibble(x = c(0:22)) %>%
mutate(ylo = c(seq(from = .25, to = 1.35, length.out=10), seq(from = 1.35, to = -1.22, length.out=13)),
y = .25 * x + 1,
yup = c(seq(from = 2.75, to = 4.5, length.out=10), seq(from = 4.75, to = 12, length.out=13)))
t2 <- tibble(x = rnorm(100000, 10, 1))
ggplot() +
geom_line(data = t1, aes(x=x, y=y)) +
geom_ribbon(data = t1, aes(x=x, y=y, ymin=ylo, ymax=yup), linetype=2, alpha=.15) +
geom_hline(linetype='dotted', yintercept = 0) + labs(y = "Y", x = "X") + theme_bw() +
geom_density(data = t2, aes(x = x), color="darkblue", fill="lightblue", linetype="dashed") +
scale_color_manual(values = c("#000000", "grey60", "lightblue"), name = "Title") + # from other answer; couldn't get it to work
coord_cartesian(ylim = c(-0.0125, 12.5), xlim = c(-0.5, 22))
Many thanks for any pointers! :)
Legends appear only for aesthetics. You need to pass color, fill, linetype, alpha, etc. etc. etc into your call to aes.
You can use either columns programmatically, i.e. pass unquoted column names (usually preferred), but you can also just pass a character string, which will then create a discrete aesthetic, which you can then scale as usual with scale_?aes_...
Below I just added title and label to the character vectors just to make it clear where the names come from.
library(tidyverse)
t1 <- tibble(x = c(0:22)) %>%
mutate(ylo = c(seq(from = .25, to = 1.35, length.out=10), seq(from = 1.35, to = -1.22, length.out=13)),
y = .25 * x + 1,
yup = c(seq(from = 2.75, to = 4.5, length.out=10), seq(from = 4.75, to = 12, length.out=13)))
t2 <- tibble(x = rnorm(100000, 10, 1))
ggplot() +
geom_line(data = t1, aes(x=x, y=y, lty = "myline-label")) +
geom_ribbon(data = t1, aes(x=x, y=y, ymin=ylo, ymax=yup, fill = "MyCI-label"), linetype=2, alpha=.15) +
geom_hline(linetype='dotted', yintercept = 0) + labs(y = "Y", x = "X") +
geom_density(data = t2, aes(x = x, color="MyDens-label"), fill="lightblue", linetype="dashed") +
scale_linetype("Myline-title") +
scale_fill_discrete("MyCI-title") +
scale_color_manual("MyDens-title", values = c("Darkblue"))
Created on 2021-04-07 by the reprex package (v1.0.0)

How to add a legend in ggplot to my graph

Hi have some code to simulate a Gaussian process. Please can someone help me add a legend to my plots on the top right corner. I want to state the different parameter values for each of the line styles/colours, e.g. l=1, l=5, l=10. Thanks.
# simulate a gaussian process
simGP = function(K){
n = nrow(K)
U = chol(K) # cholesky decomposition
z = rnorm(n)
c(t(U) %*% z)
}
# choose points to simulate the covariance.
x = seq(-1, 1, length.out = 500)
# Exponential kernel ------------------------------------------------------
kernel_exp = function(x, l = 1) {
d = as.matrix(dist(x))/l
K = exp(-d)
diag(K) = diag(K) + 1e-8
K
}
{y1 = simGP(kernel_exp(x,l=10))
y2 = simGP(kernel_exp(x,l=1))
y3 = simGP(kernel_exp(x,l=0.1))
data1 <- as.data.frame(x,y1)
data2 <- as.data.frame(x,y2)
data3 <- as.data.frame(x,y3)
df=data.frame(data1,data2,data3)
ggplot() +
geom_line(data=data1, aes(x=x, y=y1), color="green4", linetype = "twodash", size=0.5) +
geom_line(data=data2, aes(x=x, y=y2), color='red', linetype="longdash", size=0.5) +
geom_line(data=data3, aes(x=x, y=y3), color='blue') +
scale_color_manual(values = colors) +
theme_classic() +
labs(x='input, x',
y='output, f(x)')+
theme(axis.text=element_text(size=16),
axis.title=element_text(size=14))}
You can do it using a dataframe variable to group the linetype and colour.
If you want to specify color and linetype, use scale_color_discrete and scale_linetype_discrete
y1 = simGP(kernel_exp(x,l=10))
y2 = simGP(kernel_exp(x,l=1))
y3 = simGP(kernel_exp(x,l=0.1))
data1 <- data.frame(x, y = y1, value = "10")
data2 <- data.frame(x, y = y2, value = "1")
data3 <- data.frame(x, y = y3, value = "0.1")
df=rbind(data1,data2,data3)
ggplot(data = df, aes(x=x, y=y, color = value, linetype = value, group = value)) +
geom_line(size=0.5) +
theme_classic() +
labs(x='input, x',
y='output, f(x)')+
theme(axis.text=element_text(size=16),
axis.title=element_text(size=14))

Generate two plots with same x axis breaks

I have two plots I want the x axes being broken by the same way.
This is the code for plot 1:
m <- read.csv('Finalfor1lowergreaterthan1.csv', header=T, row.names=1)
m <- m[m$SVM.Count >= 40,]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Then I have another plot:
m <- read.csv('Finalfor20lowergreaterthan1.csv', header=T, row.names=1)
#m <- m[m$SVM.Count >= 40, ]
boxOdds = m$Odd
df <- data.frame(
yAxis = length(boxOdds):1,
boxnucleotide = m$Position,
boxCILow = m$lower,
boxCIHigh = m$upper,
Mutation = m$Resistance)
ticksy <- c(seq(0,0.3,by=.1), seq(0, 1, by =.5), seq(0, 20, by =5), seq(0, 150, by =50))
ticksx <- c(seq(0,300,by=25))
p <- ggplot(df,
aes(x = boxnucleotide, y = boxOdds, colour=Mutation, label=rownames(m)))
p1 <- p +
geom_errorbar(aes(ymax = boxCIHigh, ymin = boxCILow), size = .5, height = .01) +
geom_point(size = 1) +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_y_continuous(breaks=ticksy, labels = ticksy) +
scale_x_continuous(breaks=ticksx, labels = ticksx) +
coord_trans(y = "log10") +
ylab("Odds ratio (log scale)") +
scale_color_manual(values=c("#00BFC4","#F8766D","#619CFF")) +
xlab("Integrase nucleotide position") +
geom_text(size=2,hjust=0, vjust=0)
Why is plot 1 starting from 75 on x axis and plot 2 starting at 100...how can plot2 start at 75 as well and being scaled like plot 1.
The two codes get the same piece of: ticksx <- c(seq(0, 300, by=25))
A good technique to align the axis range on different plots is to use expand_limits.
You can simply use p1 + expand_limits(x=c(0, 300)). This will ensure the x-axis contains at least 0 and 300 on all your plots. You can also control the y-axis range by using the y argument.
From ?expand_limits:
Sometimes you may want to ensure limits include a single value, for all panels or all plots. This function is a thin wrapper around geom_blank() that makes it easy to add such values.

How to make all plots in gridarrange the same size. Some with labels on axes and others none?

I have 4 plots that I want to display in the same figure. I can do this with ggplot2 and grid.arrange. However to make it neater I want to remove some of the redundant labels and axes values as essentially it is the same graph repeated 4 times.
This is my original code:
x <- 1:100
y1 <- sample(x = c(1,2, 3, 4, 5, 6, 7, 8, 9, 10), size = 100, replace = TRUE)
y2 <- sample(x = c(1,2, 3, 4, 5, 6, 7, 8, 9, 10), size = 100, replace = TRUE)
y3 <- sample(x = c(1,2, 3, 4, 5, 6, 7, 8, 9, 10), size = 100, replace = TRUE)
y4 <- sample(x = c(1,2, 3, 4, 5, 6, 7, 8, 9, 10), size = 100, replace = TRUE)
df <- data.frame(x, y1, y2, y3, y4)
library(ggplot2)
library(grid)
library(gridExtra)
#Create 4 simple plots
plot1 <- ggplot(df, aes(x = x, y = y1)) + geom_line() +
labs(y = "Temperature") + labs(x = "Hours") + labs(title = "Y1") +
theme_bw()
plot(plot1)
plot2 <- ggplot(df, aes(x = x, y = y2)) + geom_line() +
labs(y = "Temperature") + labs(x = "Hours") + labs(title = "Y2") +
theme_bw()
plot(plot2)
plot3 <- ggplot(df, aes(x = x, y = y3)) + geom_line() +
labs(y = "Temperature") + labs(x = "Hours") + labs(title = "Y3") +
theme_bw()
plot(plot3)
plot4 <- ggplot(df, aes(x = x, y = y4)) + geom_line() +
labs(y = "Temperature") + labs(x = "Hours") + labs(title = "Y4") +
theme_bw()
plot(plot4)
# combine plots
All_plot <- grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
These are the original plots and look fine but I want to remove the x axis of Y1 and Y2 and the Y axis of Y2 and Y4.
I do this in ggplot:
plot1 <- ggplot(df, aes(x = x, y = y1)) + geom_line() +
labs(y = "Temperature") + labs(title = "Y1") +
theme_bw() +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank())
plot(plot1)
plot2 <- ggplot(df, aes(x = x, y = y2)) + geom_line() +
labs(title = "Y2") +
theme_bw() +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(),axis.ticks.x=element_blank()) +
theme(axis.title.y=element_blank(), axis.text.y=element_blank(),axis.ticks.y=element_blank())
plot(plot2)
plot3 <- ggplot(df, aes(x = x, y = y3)) + geom_line() +
labs(y = "Temperature") + labs(x = "Hours") + labs(title = "Y3") +
theme_bw()
plot(plot3)
plot4 <- ggplot(df, aes(x = x, y = y4)) + geom_line() +
labs(x = "Hours") + labs(title = "Y4") +
theme_bw() +
theme(axis.title.y=element_blank(), axis.text.y=element_blank(),axis.ticks.y=element_blank())
plot(plot4)
All_plot <- grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)
But the plots are different sizes? Y1 and Y2 are taller and Y2 and Y4 are wider than the others. How can I make it so the square boxes are all the same size?
try the patchwork library:
https://github.com/thomasp85/patchwork
library(patchwork)
plot1 + plot2 + plot3 + plot4
It is a magical library.....have fun with it
I'd recommend faceting by Y1, Y2, etc. To do that, you first have to gather the y's.
library(tidyverse)
df %>%
#gather the y's to a new column named Variable
gather(contains('y'), key = Variable, value = Value) %>%
ggplot(aes(x, Value)) +
geom_line() +
#facet by Variable
facet_wrap(~ Variable) +
theme_bw()

plotting modified point and line plot - variability as "spike" plot in r

Before explaining details, here is my data:
set.seed (1234)
datas <- data.frame (Indv = 1:20, Xvar = rnorm (20, 50, 10),
Yvar = rnorm (20, 30,5), Yvar1 = rnorm (20, 10, 2),
Yvar2 = rnorm (20, 5, 1), Yvar3 = rnorm (20, 100, 20),
Yvar4 = rnorm (20, 15, 3))
I want to prepare a graph (Metroglymph ) which is essentially point plot however points (of Xvar and Yvar) with spikes (lines) orignated from the point scaled to rest of variables (Yvar1, Yvar2, Yvar3, Yvar4).
Each spike are ordered and preferably color coded.
require(ggplot2)
ggplot(datas, aes(x=Xvar, y=Yvar)) +
geom_point(shape=1, size = 10) + theme_bw()
Here is one possible approach that may be helpful to you. It uses stat_spoke() from ggplot2. Each of your y-variables is mapped to the spoke radius in 4 separate calls to stat_spoke.
plot_1 = ggplot(datas, aes(x=Xvar, y=Yvar)) +
stat_spoke(aes(angle=(1/8)*pi, radius=Yvar1), colour="#E41A1C",size=1) +
stat_spoke(aes(angle=(3/8)*pi, radius=Yvar2), colour="#377EB8",size=1) +
stat_spoke(aes(angle=(5/8)*pi, radius=Yvar3), colour="#4DAF4A",size=1) +
stat_spoke(aes(angle=(7/8)*pi, radius=Yvar4), colour="#984EA3",size=1) +
geom_point(shape=1, size = 10)
ggsave("plot_1.png", plot_1)
Depending on your data and specific needs, it may make sense to transform the variables so they fit better on the plot.
normalize = function(x) {
new_x = (x - mean(x)) / sd(x)
new_x = new_x + abs(min(new_x))
return(new_x)
}
plot_2 = ggplot(datas, aes(x=Xvar, y=Yvar)) +
stat_spoke(aes(angle=(1/8)*pi, radius=normalize(Yvar1)), colour="#E41A1C", size=1) +
stat_spoke(aes(angle=(3/8)*pi, radius=normalize(Yvar2)), colour="#377EB8", size=1) +
stat_spoke(aes(angle=(5/8)*pi, radius=normalize(Yvar3)), colour="#4DAF4A", size=1) +
stat_spoke(aes(angle=(7/8)*pi, radius=normalize(Yvar4)), colour="#984EA3", size=1) +
geom_point(shape=1, size = 10)
ggsave("plot_2.png", plot_2)
Important caveat: For the same spoke radius value, the magnitude of the plotted line will be greater if the line is more vertical, and less if the line is more horizontal. This is because the range of x is around twice the range of y for your data set. The plotted angles also become distorted as the x-to-y axis ratio changes. Adding coord_equal(ratio=1) solves this issue, but may introduce other problems.
Edit: Plotting without a loop
This was fun and educational to figure out. Possibly it would have been more time-efficient to type the repetitive code! If anyone can offer advice to improve this code, please comment.
library(reshape2)
dat2 = melt(datas, id.vars=c("Indv", "Xvar", "Yvar"),
variable.name="spoke_var", value.name="spoke_value")
# Apply normalization in a loop. Can plyr do this more gracefully?.
for (var_name in levels(dat2$spoke_var)) {
select_rows = dat2$spoke_var == var_name
norm_dat = normalize(dat2[select_rows, "spoke_value"])
dat2[select_rows, "spoke_value"] = norm_dat
}
# Pick an angle for each Yvar, then add angle column to dat2.
tmp = data.frame(spoke_var=unique(dat2$spoke_var))
tmp$spoke_angle = seq(from=pi/8, by=pi/4, length.out=nrow(tmp))
dat2 = merge(dat2, tmp)
plot_4 = ggplot(dat2, aes(x=Xvar, y=Yvar)) +
stat_spoke(data=dat2, size=1,
aes(colour=spoke_var, angle=spoke_angle, radius=spoke_value)) +
geom_point(data=datas, aes(x=Xvar, y=Yvar), shape=1, size=7) +
coord_equal(ratio=1) +
scale_colour_brewer(palette="Set1")
Here is more manual approach:
set.seed (1234)
datas <- data.frame (Indv = 1:20, Xvar = rnorm (20, 50, 10),
Yvar = rnorm (20, 30,5), Yvar1 = rnorm (20, 10, 2),
Yvar2 = rnorm (20, 5, 1), Yvar3 = rnorm (20, 100, 20),
Yvar4 = rnorm (20, 15, 3))
datas$SYvar1 <- 2 + scale (datas$Yvar1)
datas$SYvar2 <- 2 + scale (datas$Yvar2)
datas$SYvar3 <- 2 + scale (datas$Yvar3)
datas$SYvar4 <- 2 + scale (datas$Yvar4)
require(ggplot2)
p <- ggplot(datas, aes(x=Xvar, y=Yvar)) +
geom_point(size = 10, pch = 19, col = "yellow2")
p + geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar + SYvar1,
yend = Yvar), col = "red4", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar,
yend = Yvar + SYvar2), col = "green4", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend = Xvar-2.5,
yend = Yvar + SYvar3), col = "darkblue", size = 1) +
geom_segment(aes(x = Xvar, y = Yvar, xend =
Xvar - SYvar4, yend = Yvar ), col = "red", size = 1) +
theme_bw()

Resources