Extrapolation of non-linear relationships in R (ggplot2) - r

Assuming this dataset (df):
Year<- c(1900, 1920,1940,1960,1980,2000, 2016)
Percent<-(0, 2, 4, 8, 10, 15, 18)
df<-cbind (Year, Percent)
df<-as.data.frame (df)
How would it be possible to extrapolate this plotted loess relationship to the years 2040, 2060, 2080, 2100. Using three different scenarios with different slopes to get to a y value (Percent) of 50%?
ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)

This should work:
library(ggplot2)
p <- ggplot(data=df, aes(x=Year, y=Percent)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,60), "Percent of Area") +
scale_x_continuous (limits=c(1900,2100), "Year") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2016)
p
model <- loess(Percent~Year,df, control=loess.control(surface="direct"))
newdf <- data.frame(Year=seq(2017,2100,1))
predictions <- predict(model, newdata=seq(2017,2100,1), se=TRUE)
newdf$fit <- predictions$fit
newdf$upper <- predictions$fit + qt(0.975,predictions$df)*predictions$se
newdf$lower <- predictions$fit - qt(0.975,predictions$df)*predictions$se
head(newdf)
# Year fit upper lower
#1 2017 18.42822 32.18557 4.6708718
#2 2018 18.67072 33.36952 3.9719107
#3 2019 18.91375 34.63008 3.1974295
#4 2020 19.15729 35.96444 2.3501436
#5 2021 19.40129 37.37006 1.4325124
#6 2022 19.64571 38.84471 0.4467122
p +
geom_ribbon(data=newdf, aes(x=Year, y=fit, ymax=upper, ymin=lower), fill="grey90") +
geom_line(data=newdf, aes(x=Year, y=fit), color='steelblue', lwd=1.2, lty=2)

A colleague from work offered this solution: Thanks ADAM!
loess_mod <- loess(Perc_area~Estab_Yr, data = marine_sub, control=loess.control(surface="direct"))
prd <- data.frame(Estab_Yr = seq(2017, 2100, by = 1))
loess_df <- data.frame(Estab_Yr = prd, Perc_area = predict(loess_mod, newdata = prd))
#Then, we can use geom_line and geom_point, but we need to tweak the scale on the y-axis to allow for where the predictions in 2017 start (just above 60):
ggplot(data=marine_sub, aes(x=Estab_Yr, y=Perc_area)) +
geom_smooth(method="loess", color="#bdc9e1") +
geom_point(color="#2b8cbe", size=0.5) + theme_bw() +
scale_y_continuous (limits=c(0,100), "Percent of Protected Area") +
scale_x_continuous (limits=c(1900,2100), "Year Protected") +
geom_hline(aes(yintercept=50)) + geom_vline(xintercept = 2017) +
geom_line(data= loess_df, color = "orange", size = 1) +
geom_point(data = loess_df, aes(x = Estab_Yr, y = Perc_area), size=.25)

Related

ggplot for dose-response curve using drc package

I'm trying to plot dose-response curve in ggplot using drc package using below code and have two questions as follows.
First: I need to include 0, 10, 100 etc and omit 4000 label on the x axis, how it can be done?. Second: Is it possible to squeeze the graph towards y-axis as the first data point is at 100, much space is taken up before that. I need to arrange several plots side by side so if the plot can start from 100 and how we can avoid the overlap of labels (for example 2000 and 3000 in the image below). Please guide me with this, thanks!
gi <- as.numeric(c("0", "5.24", "24.2",
"37.2", "71.9", "80",
"100", "100", "0",
"0", "15.1", "42.8", "61.8", "73.5", "97.3", "100"))
conc <- as.numeric(c("0", "100", "167", "278.89", "465.74", "777.79", "1298.91", "2169.19", "0", "100", "167", "278.89", "465.74", "777.79", "1298.91", "2169.19" ))
df <- data.frame(conc, gi)
library("drc")
library(ggplot2)
Pyr <- drm(gi ~ conc, data = df, fct = LL.4(fixed = c(NA, 0, 100, NA)))
newdata <- expand.grid(conc=exp(seq(log(0.5), log(3000), length=500)))
# predictions and confidence intervals
pm <- predict(Pyr, newdata=newdata, interval="confidence")
# new data with predictions
newdata$p <- pm[,1]
newdata$pmin <- pm[,2]
newdata$pmax <- pm[,3]
# need to shift conc == 0 a bit up, otherwise there are problems with coord_trans
df$conc0 <- df$conc
df$conc0[df$conc0 == 0] <- 0.5
# plotting the curve
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition")
you can define the X-axis limits within the scale_x_continuous() function:
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
# here you can decide the limits of the x-axis
scale_x_continuous(limits = c(100,3000)) +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition")
acording to your comment:
ggplot(df, aes(x = conc0, y = gi)) +
geom_point() +
geom_ribbon(data=newdata, aes(x=conc, y=p, ymin=pmin, ymax=pmax), alpha=0.2) +
geom_line(data=newdata, aes(x=conc, y=p)) +
coord_trans(x="log") +
# here you can decide the limits of the x-axis, breaks and labels
scale_x_log10(limits = c(10, 3000), breaks = c(10, 100, 1000, 2000, 3000), labels = c(10, 100, 1000, 2000, 3000)) +
ggtitle("Pyridine") + xlab("Concentration (mg/l)") + ylab("Growth inhibition") + theme(axis.text.x = element_text(angle = 90))

Displaying the number of counts per bin in a ggplot2 hexbin graph

I'm working on a project to simulate the movement of missing ships. I've made a distribution map using this code:
library("ggplot2")
a <- rnorm(1000, 30.2, 2)
b <- rnorm(1000, 10, 5)
y <- (x + a + b) * 0.6
df <- data.frame(x,y)
p <- ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") + xlab("Longtitude") + ylab("Latitude") +
scale_fill_gradientn(colors = topo.colors(10))
p + stat_binhex(show.legend = T, bins = 20)
This produces a map like this:
A hexbin map
However, instead of showing the number of counts using a color, I would like to show the actual count in a point. So if the program 'landed' on a certain point 3 times, it would display '3'.
How can this be done in R?
Here's how to add counts to the existing graph:
library(ggplot2)
theme_set(theme_bw())
set.seed(2)
a <- rnorm(1000, 30.2, 2)
b <- rnorm(1000, 10, 5)
x = rnorm(1000)
y <- (x + a + b) * 0.6
df <- data.frame(x,y)
p <- ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
scale_fill_gradientn(colors = topo.colors(10)) +
stat_binhex(show.legend = T, bins = 20)
p + geom_text(stat="binhex", bins=20, aes(label=..count..), show.legend=FALSE,
colour=hcl(15,100,60), fontface="bold", size=3.5)
To remove the fill colours, you could do:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
stat_binhex(bins = 20, fill=NA, colour="black") +
geom_text(stat="binhex", bins=20, aes(label=..count..), colour="red")
You could also use text size to highlight the regions of highest density:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
stat_binhex(show.legend = T, bins = 20, fill=NA, colour="grey70") +
geom_text(stat="binhex", bins=20, aes(label=..count.., size=..count..), colour="red") +
scale_size_continuous(range=c(3,6)) +
guides(size=FALSE)
Which also works without the hex-grid:
ggplot(df,aes(x=x,y=y)) +
ggtitle("A Priori Map") +
xlab("Longtitude") + ylab("Latitude") +
geom_text(stat="binhex", bins=20, aes(label=..count.., size=..count..), colour="red") +
scale_size_continuous(range=c(3,6)) +
guides(size=FALSE)

How can I fill the space between values(geom_line) and an intercept with ggplot2? Different Colors for values over and under intercept [duplicate]

This question already has answers here:
How to fill with different colors between two lines? (originally: fill geom_polygon with different colors above and below y = 0 (or any other value)?)
(4 answers)
Closed 3 months ago.
I want to do a graph with ggplot2, where I need the space/area between the intercept (=1) and the values (which I connected through geom_line) to be red (if the values are lower than 1) or green (if the values are bigger than 1). The data is from microsoft (price performance since 1999).
Data:
require(quantmod)
require(dplyr)
require(ggplot2)
getSymbols("MSFT", from ="1999-01-01")
microsoft <- data.frame(time(MSFT), MSFT[,6])
microsoft$time <- as.Date(microsoft$time.MSFT., "%Y-%m-%d")
microsoft <- microsoft %>%
mutate(change = MSFT.Adjusted - first(MSFT.Adjusted),
change.pc = change/first(MSFT.Adjusted)+1)
that is the ggplot I have so far:
ggplot(microsoft, aes(x = time, y = change.pc)) +
geom_line(stat = "identity") +
geom_hline(aes(yintercept=1), color="black") +
theme_bw() +
xlab("Jahr") + ylab("") +
ggtitle("Microsoft Kursentwicklung seit Januar 1999")
I want to fill the space between y = 1 and the values above in green, and the space between y = 1 and the values under in red. I tried geom_ribbon, geom_area, geom_polynom, but nothing worked. The biggest problem is, that it fills the space green, but not online above y = 1 but also under. and the red you can't even see...
here what I tried:
geom_area(data = subset(microsoft, change.pc > 1), fill = "green", alpha =0.5)
geom_area(data = subset(microsoft, change.pc < 1), fill = "red", alpha = 0.5)
I put these to lines in my plot, and then the problem I described above appeared.
Among other things I also tried this (found here on stackoverflow.com):
microsoft$grp <- "orig"
microsoft <- microsoft[order(microsoft$time),]
microsoft_new <- do.call("rbind",
sapply(1:(nrow(microsoft) -1), function(i){
f <- lm(time ~ change.pc, microsoft[i:(i+1), ])
if (f$qr$rank < 2) return(NULL)
r <- predict(f, newdata = data.frame(change.pc = 0))
if(microsoft[i, ]$time < r & r < microsoft[i+1, ]$time)
return(data.frame(time = r, change.pc = 0))
else return(NULL)
})
)
microsoft_2 <- rbind(microsoft, microsoft_new)
ggplot(microsoft_2, aes(x = time, y = change.pc)) +
geom_area(data = subset(microsoft_2, change.pc <= 1), fill = "red") +
geom_area(data = subset(microsoft_2, change.pc >= 1), fill = "blue") +
scale_x_continuous("", expand = c(0,0), breaks = seq(1999, 2017, 3)) +
theme_bw()
That didn't work either.
Does anyone has an idea how I could achieve what I need?
This is how it should look
I couldn't get your data to work, but using some made up data, the following approach looks like your example:
library(ggplot2)
set.seed(0)
microsoft <- data.frame(date=1:1000, y=cumsum(runif(1000)-0.5))
ggplot(microsoft, aes(x=date,y=y)) +
geom_ribbon(aes(ymin=pmin(microsoft$y,0), ymax=0), fill="red", col="red", alpha=0.5) +
geom_ribbon(aes(ymin=0, ymax=pmax(microsoft$y,0)), fill="green", col="green", alpha=0.5) +
geom_line(aes(y=0))
I found a very clean solution using ggh4x package. Here it is
library(ggh4x)
set.seed(0)
microsoft <- data.frame(date=1:1000, y=cumsum(runif(1000)-0.5))
ggplot(microsoft, aes(x=date,y=y)) +
ggh4x::stat_difference(aes(ymin = 0, ymax = y)) +
geom_line(aes(y = y)) +
labs(fill = NULL)+
theme_bw()
You can use geom_ribbon for this. The following solution is similar to #Miff's solution, but with intersection at 1. I have in addition added the desired scales.
ggplot(microsoft, aes(x = time, y = change.pc)) +
geom_ribbon(aes(ymin=pmin(change.pc,1), ymax=1), fill="red", col="red", alpha=0.5) +
geom_ribbon(aes(ymin=1, ymax=pmax(microsoft$change.pc,1)), fill="green", col="green", alpha=0.5) +
geom_hline(aes(yintercept=1), color="black") +
theme_bw(base_size = 16) +
scale_x_date(name = "Jahr",
date_breaks = "3 years",
date_minor_breaks = "1 year",
date_labels = "%Y") +
scale_y_continuous(name = "",
breaks = seq(.8, 2.8, by = .4),
labels = paste0(seq(80, 280, by = 40), "%")) +
ggtitle("Microsoft Kursentwicklung seit Januar 1999")

Add multiple geom_line to ggplot

The geom_line CUR_MTH_UNEARN_REV_EUR is plotted correctly as a numeric. My goal is to add a second numeric geom_line (i.e., CUR_MTH_EARN_REV_EUR). Here's the code:
library("ggthemes")
library("gridExtra")
library("grid")
p = ggplot(f, aes(DTE_OF_REPORT_EUR, CUR_MTH_UNEARN_REV_EUR, label=(CUR_MTH_UNEARN_REV_EUR)))
+ geom_point(size=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, 11, 5), color=ifelse(f$CUR_MTH_UNEARN_REV_EUR<8.0, '#CC0000', 'black'))
+ geom_line(size=2,aes(group=1)) + geom_rangeframe() + theme_wsj()
+ theme(axis.text.x=element_text(angle=50, size=20, vjust=0.7))
+ geom_smooth(aes(group=1), method="loess", colour = "#CC0000", lwd=2)
+ geom_text(aes(label=CUR_MTH_UNEARN_REV_EUR), hjust=-0.5, vjust=0.5, fontface="bold")
+ ggtitle("Unearned Revenue by Service Code 'BS', in CSG Months, Jul. 2014-Aug. 2015")
+ theme(plot.title = element_text(lineheight=.8, face="bold"))
p
Text1 = textGrob("Source: Revenue Assurance and Quality Control", gp=gpar(fontsize=7))
p2 = p + annotation_custom(grob = Text1, ymin = -0.2, ymax = -30)
p2
format(round(f$CUR_MTH_UNEARN_REV_EUR, 2), nsmall = 2)
f$ScoreRounded <- round(f$CUR_MTH_UNEARN_REV_EUR, 1)
f$DTE_OF_REPORT_EUR <- factor(f$DTE_OF_REPORT_EUR, levels=unique(as.character(f$DTE_OF_REPORT_EUR)))
Hope this helps as a start. You can just add things, but you need to have the correct aes.
#data with x and two y-variables
set.seed(123)
f <- data.frame(x=1:10, var1=sample(7:10,10,T),
var2=sample(5:7,10,T))
#as you want sizing by a measure, make a flag
f$var1_threshold <- f$var1 <9
#example with adding different geoms
#not that it's unnecessary to use my data (f)
#in the call to aes, as everything I need is already
#inside f
p <- ggplot(f, aes(x=x)) +
geom_point(aes(y=var1,size=var1_threshold, color=var1_threshold))+
#colors and size in aes allows for legend generation.
scale_size_manual(values=c("FALSE"=5,"TRUE"=8)) +
scale_color_manual(values=c("FALSE"='#CC0000',"TRUE"='black')) +
geom_line(aes(y=var1),size=1) +
geom_line(aes(y=var2),size=1) +
geom_smooth(aes(y=var1), colour="#CC0000") +
geom_smooth(aes(y=var2), colour="black")

R - Shading part of a ggplot2 histogram

So I have this data:
dataset = rbinom(1000, 16, 0.5)
mean = mean(dataset)
sd = sd(dataset)
data_subset = subset(dataset, dataset >= (mean - 2*sd) & dataset <= (mean + 2*sd))
dataset = data.frame(X=dataset)
data_subset = data.frame(X=data_subset)
And here's how I'm drawing my histogram for dataset:
ggplot(dataset, aes(x = X)) +
geom_histogram(aes(y=..density..), binwidth=1, colour="black", fill="white") +
theme_bw()
How can I shade the data_subset portion of the histogram, like so?
My solution is very similar to joran's -- I think they're both worth looking at for the slight differences:
ggplot(dataset,aes(x=X)) +
geom_histogram(binwidth=1,fill="white",color="black") +
geom_histogram(data=subset(dataset,X>6&X<10),binwidth=1,
colour="black", fill="grey")+theme_bw()
Just add another geom_histogram line using that data subset (although you may have to tinker with the binwidth a bit, I'm not sure):
ggplot(dataset, aes(x = X)) +
geom_histogram(aes(y=..density..), binwidth=1, colour="black", fill="white") +
geom_histogram(data = data_subset,aes(y=..density..), binwidth=1, colour="black",fill = "grey") +
theme_bw()

Resources