ggplot for dose-response curve using drc package - r

I'm trying to plot dose-response curve in ggplot using drc package using below code and have two questions as follows.
First: I need to include 0, 10, 100 etc and omit 4000 label on the x axis, how it can be done?. Second: Is it possible to squeeze the graph towards y-axis as the first data point is at 100, much space is taken up before that. I need to arrange several plots side by side so if the plot can start from 100 and how we can avoid the overlap of labels (for example 2000 and 3000 in the image below). Please guide me with this, thanks!
gi <- as.numeric(c("0", "5.24", "24.2",
"37.2", "71.9", "80",
"100", "100", "0",
"0", "15.1", "42.8", "61.8", "73.5", "97.3", "100"))
conc <- as.numeric(c("0", "100", "167", "278.89", "465.74", "777.79", "1298.91", "2169.19", "0", "100", "167", "278.89", "465.74", "777.79", "1298.91", "2169.19" ))
df <- data.frame(conc, gi)
library("drc")
library(ggplot2)
Pyr <- drm(gi ~ conc, data = df, fct = LL.4(fixed = c(NA, 0, 100, NA)))
newdata <- expand.grid(conc=exp(seq(log(0.5), log(3000), length=500)))
# predictions and confidence intervals
pm <- predict(Pyr, newdata=newdata, interval="confidence")
# new data with predictions
newdata$p <- pm[,1]
newdata$pmin <- pm[,2]
newdata$pmax <- pm[,3]
# need to shift conc == 0 a bit up, otherwise there are problems with coord_trans
df$conc0 <- df$conc
df$conc0[df$conc0 == 0] <- 0.5
# plotting the curve
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition")

you can define the X-axis limits within the scale_x_continuous() function:
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
# here you can decide the limits of the x-axis
scale_x_continuous(limits = c(100,3000)) +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition")
acording to your comment:
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
# here you can decide the limits of the x-axis, breaks and labels
scale_x_log10(limits = c(10, 3000), breaks = c(10, 100, 1000, 2000, 3000), labels = c(10, 100, 1000, 2000, 3000)) +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition") + theme(axis.text.x = element_text(angle = 90))

Related

using ggplot to create pareto chart

I am using this code to create a Pareto chart, but it does not work well on this dataset, the x-axis becomes unsorted in the graph
cds <- catsvdogs
newdata <- cds[order(-cds$`Number of Households (in 1000)`),]
newdata <- data.frame(newdata$Location,newdata$`Number of Households (in 1000)`)
newdata <- newdata[c(1:10),c(1:2)]
newdata$cumulative <- cumsum(newdata$newdata..Number.of.Households..in.1000..)
ggplot(newdata, aes(x=newdata[,1])) +
geom_bar(aes(y=newdata[,2]), fill='blue', stat="identity") +
geom_point(aes(y=cumulative), color = rgb(0, 1, 0), pch=16, size=1) +
geom_path(aes(y=cumulative, group=1), colour="slateblue1", lty=3, size=0.9) +
theme(axis.text.x = element_text(angle=90, vjust=0.6)) +
labs(title = "Pareto Plot", x = 'Cities', y =
'Count')

How to highlight a point along a curve in ggplot

I have the below code to plot a probit model comparing the chance of success based on a maximum temperature value. Seems to work well, I'm happy with the plot. But I'm hoping to highlight the point along the curve where the probability is 50%, and then draw a line down to the x-axis to determine (and show) this value as well. Also hoping to include confidence intervals for this estimate. Any help would be greatly appreciated!
data <- data.frame(MaxTemp = c(53.2402, 59.01004,51.42602,41.53883,44.70763,53.90285,51.130318,54.5929,43.697559,49.772446,54.902222,52.720528,58.782608,47.680374,48.30313,56.10921,57.660324,46.387924,60.503147,53.803177,52.27771,58.58555,55.74136,49.04505,46.816269,52.58295,52.751373,56.209747,51.733894,51.424305,50.74564,47.046513,53.030407,56.68752,56.639351,53.526585,51.562313),
Success=c(1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1))
TempProbitModel <- glm(Success ~ MaxTemp, data=data, family=binomial(link="logit"))
temp.data <- data.frame(MaxTemp = seq(40, 62, 0.5))
predicted.data <- as.data.frame(predict(TempProbitModel, newdata = temp.data, type="link", se=TRUE))
new.data <- cbind(temp.data, predicted.data)
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- TempProbitModel$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- TempProbitModel$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- TempProbitModel$family$linkinv(new.data$fit)
(TempProb <- ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") )
Find the closest value to y = 0.5:
closest_value <- which(abs(new.data$fit - 0.5) == min(abs(new.data$fit - 0.5)))
Calculate slope at this point:
slope_at_closest_value <- (new.data[closest_value, "MaxTemp"] - new.data[closest_value - 1, "MaxTemp"]) /( new.data[closest_value, "fit"] - new.data[closest_value - 1, "fit"])
x_value <- new.data[closest_value - 1, "MaxTemp"] + slope_at_closest_value * (0.5 - new.data[closest_value - 1, "fit"])
Use this x_value to draw a vertical line:
ggplot(data, aes(x=MaxTemp, y=Success)) +
geom_point() +
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_vline(xintercept = x_value, color="red")
This draws the following plot:
The confidence interval can be drawn accordingly.
An another way of getting this point is to use approxfun function.
f <- approxfun(new.data$fit,new.data$MaxTemp, rule = 2)
f(0.5)
[1] 49.39391
So now, if you are plotting it:
library(ggplot2)
ggplot(data, aes(x = MaxTemp, y = Success))+
geom_point()+
geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.5) +
geom_line(data=new.data, aes(y=fit)) +
labs(x="Peak Temperature", y="Probability of Success") +
geom_point(x = f(0.5), y = 0.5, size = 3, color = "red")+
geom_vline(xintercept = f(0.5), linetype = "dashed", color = "red")+
geom_hline(yintercept = 0.5, linetype = "dashed", color = "red")

ggplot2 - how to limit panel and axis?

I want to know how to turn this plot:
Into this plot:
As you can see the panel and axis on the 2nd plot are limited to the data extent. I made the second graph using design software but want to know the code.
Ive already limited the x and y axis using
xlim and ylim but no difference.
Please see my code below, sorry its so messy, first time using r studio. Thanks!
ggplot() +
geom_errorbar(data = U1483_Coiling_B_M_Removed_R, mapping = aes(x = `Age (Ma) Linear Age Model`, ymin = `Lower interval*100`, ymax = `Upper interval*100`), width = 0.025, colour = 'grey') +
geom_line(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`)) +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Percent Dextral`), colour = 'red') +
geom_point(data = U1483_Coiling_B_M_Removed_R, aes(x = `Age (Ma) Linear Age Model`, y = `Lab?`)) +
theme(axis.text.x=element_text(angle=90, size=10, vjust=0.5)) +
theme(axis.text.y=element_text(angle=90, size=10, vjust=0.5)) +
theme_classic() +
theme(panel.background = element_rect(colour = 'black', size = 1)) +
xlim(0, 2.85) +
ylim(0, 100)
You can use expand when specifying axis scales, like so:
# Load library
library(ggplot2)
# Set RNG
set.seed(0)
# Create dummy data
df <- data.frame(x = seq(0, 3, by = 0.1))
df$y <- 100 - abs(rnorm(nrow(df), 0, 10))
# Plot results
# Original
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5)
# With expand
ggplot(df, aes(x, y)) +
geom_line() +
geom_point(colour = "#FF3300", size = 5) +
scale_y_continuous(expand = c(0, 0))

Extrapolation of non-linear relationships in R (ggplot2)

Assuming this dataset (df):
Year<- c(1900, 1920,1940,1960,1980,2000, 2016)
Percent<-(0, 2, 4, 8, 10, 15, 18)
df<-cbind (Year, Percent)
df<-as.data.frame (df)
How would it be possible to extrapolate this plotted loess relationship to the years 2040, 2060, 2080, 2100. Using three different scenarios with different slopes to get to a y value (Percent) of 50%?
ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)
This should work:
library(ggplot2)
p <- ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)
p
model <- loess(Percent~Year,df, control=loess.control(surface="direct"))
newdf <- data.frame(Year=seq(2017,2100,1))
predictions <- predict(model, newdata=seq(2017,2100,1), se=TRUE)
newdf$fit <- predictions$fit
newdf$upper <- predictions$fit + qt(0.975,predictions$df)*predictions$se
newdf$lower <- predictions$fit - qt(0.975,predictions$df)*predictions$se
head(newdf)
# Year fit upper lower
#1 2017 18.42822 32.18557 4.6708718
#2 2018 18.67072 33.36952 3.9719107
#3 2019 18.91375 34.63008 3.1974295
#4 2020 19.15729 35.96444 2.3501436
#5 2021 19.40129 37.37006 1.4325124
#6 2022 19.64571 38.84471 0.4467122
p +
geom_ribbon(data=newdf, aes(x=Year, y=fit, ymax=upper, ymin=lower), fill="grey90") +
geom_line(data=newdf, aes(x=Year, y=fit), color='steelblue', lwd=1.2, lty=2)
A colleague from work offered this solution: Thanks ADAM!
loess_mod <- loess(Perc_area~Estab_Yr, data = marine_sub, control=loess.control(surface="direct"))
prd <- data.frame(Estab_Yr = seq(2017, 2100, by = 1))
loess_df <- data.frame(Estab_Yr = prd, Perc_area = predict(loess_mod, newdata = prd))
#Then, we can use geom_line and geom_point, but we need to tweak the scale on the y-axis to allow for where the predictions in 2017 start (just above 60):
ggplot(data=marine_sub, aes(x=Estab_Yr, y=Perc_area)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,100), "Percent of Protected Area") +
scale_x_continuous (limits=c(1900,2100), "Year Protected") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2017) +
geom_line(data= loess_df, color = "orange", size = 1) +
geom_point(data = loess_df, aes(x = Estab_Yr, y = Perc_area), size=.25)

R running average for non-time data

This is the plot I'm having now.
It's generated from this code:
ggplot(data1, aes(x=POS,y=DIFF,colour=GT)) +
geom_point() +
facet_grid(~ CHROM,scales="free_x",space="free_x") +
theme(strip.text.x = element_text(size=40),
strip.background = element_rect(color='lightblue',fill='lightblue'),
legend.position="top",
legend.title = element_text(size=40,colour="lightblue"),
legend.text = element_text(size=40),
legend.key.size = unit(2.5, "cm")) +
guides(fill = guide_legend(title.position="top",
title = "Legend:GT='REF'+'ALT'"),
shape = guide_legend(override.aes=list(size=10))) +
scale_y_log10(breaks=trans_breaks("log10", function(x) 10^x, n=10)) +
scale_x_continuous(breaks = pretty_breaks(n=3)) +
geom_line(stat = "hline",
yintercept = "mean",
size = 1)
The last line, geom_line creates the mean line for each panel.
But now I want to have the more specific running average inside each panel.
i.e. If panel1('chr01') has x-axis range from 0 to 100,000,000, I would want to have the mean value for each 1,000,000 range.
mean1 = mean(x=0 to x=1,000,000)
mean2 = mean(x=1,000,001 to x=2,000,000)
One way to provide a running mean is with geom_smooth() using the loess local regression method. In order to demonstrate my proposed solution, I created a fake genomic dataset using R functions. You can adjust the span parameter of geom_smooth to make the running mean smoother (closer to 1.0) or rougher (closer to 1/number of data points).
# Create example data.
set.seed(27182)
y1 = rnorm(10000) +
c(rep(0, 1000), dnorm(seq(-2, 5, length.out=8000)) * 3, rep(0, 1000))
y2 = c(rnorm(2000), rnorm(1000, mean=1.5), rnorm(1000, mean=-1, sd=2),
rnorm(2000, sd=2))
y3 = rnorm(4000)
pos = c(sort(runif(10000, min=0, max=1e8)),
sort(runif(6000, min=0, max=6e7)),
sort(runif(4000, min=0, max=4e7)))
chr = rep(c("chr01", "chr02", "chr03"), c(10000, 6000, 4000))
data1 = data.frame(CHROM=chr, POS=pos, DIFF=c(y1, y2, y3))
# Plot.
p = ggplot(data1, aes(x=POS, y=DIFF)) +
geom_point(alpha=0.1, size=1.5) +
geom_smooth(colour="darkgoldenrod1", size=1.5, method="loess", degree=0,
span=0.1, se=FALSE) +
scale_x_continuous(breaks=seq(1e7, 3e8, 1e7),
labels=paste(seq(10, 300, 10)), expand=c(0, 0)) +
xlab("Position, Megabases") +
theme(axis.text.x=element_text(size=8)) +
facet_grid(. ~ CHROM, scales="free", space="free")
ggsave(filename="plot_1.png", plot=p, width=10, height=5, dpi=150)

Resources