I have a dataset with actual and modelled values. I have split the dataset into two periods Jan 2003- Dec 2006 and Jan 2007- Dec 2012 and plotted trend lines - there are two trends lines for actual values (for 2 time periods) and 2 for modelled. I have attached a picture - I want to control the length of the lines so that they start and stop at the right time - but I cannot figure this out! I have attached the code that I have managed so far - I'm still fairly new.Also attached a picture in case the above is not clear. Thanks!
I'm not sure if there is a way I could put a vertical line at 2007 (Jan) and use this line as the reference to start and stop the respective lines?
plot(NULL, type="n", xlim=x.limit, ylim=c(-30, 30), xlab="Year", ylab="Equivalent Water Depth (cm)", axes=F, cex.lab=0.9)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1, lwd=1)
axis(1, seq(2003, 2013, 1), cex.axis=0.7)
axis(2, seq(-40, 40, 10), las=1, cex.axis=0.7, tck=-0.03)
minor.tick(nx=4, ny=0, tick.ratio=0.5)
lines(tws.avg.VNB[,2] ~ tws.avg.VNB[,1], type="l", lwd=2, col=1)
tws.slope1 <- round(as.vector(coef(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1]))[2]), 2)
tws.sdev1 <- round(as.vector(coef(summary(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("GRACE Trend: 2003-2007 (cm/yr): ", tws.slope1, "±", tws.sdev1, sep=""), cex=0.5, side=1, line=-1.8, adj=0.15)
abline(lm(SPLIT.1.ALL.VNB[,2] ~ SPLIT.1.ALL.VNB[,1]), lwd=2, lty=2, col="deepskyblue")
tws.slope2 <- round(as.vector(coef(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1]))[2]), 2)
tws.sdev2 <- round(as.vector(coef(summary(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("GRACE Trend: 2007-2012 (cm/yr): ", tws.slope2, "±", tws.sdev2, sep=""), cex=0.5, side=1, line=-1.1, adj=0.15)
abline(lm(SPLIT.2.ALL.VNB[,2] ~ SPLIT.2.ALL.VNB[,1]), lwd=2, lty=2, col="deepskyblue")
lines(VNB.OBS.TWS[,1] ~ tws.avg.VNB[,1], type="l", lwd=2, col="red")
tws.slope3 <- round(as.vector(coef(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1]))[2]), 2)
tws.sdev3 <- round(as.vector(coef(summary(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("OBSERVED Trend: 2003-2007 (cm/yr): ", tws.slope3, "±", tws.sdev3, sep=""), cex=0.5, side=1, line=-1.8, adj=0.85)
abline(lm(SPLIT.1.ALL.VNB[,6] ~ SPLIT.1.ALL.VNB[,1]), lwd=2, lty=2, col="forestgreen")
tws.slope4 <- round(as.vector(coef(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1]))[2]), 2)
tws.sdev4 <- round(as.vector(coef(summary(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1])))[, "Std. Error"][2]), 2)
mtext(paste("OBSERVED Trend: 2007-2012 (cm/yr): ", tws.slope4, "±", tws.sdev4, sep=""), cex=0.5, side=1, line=-1.1, adj=0.85)
abline(lm(SPLIT.2.ALL.VNB[,6] ~ SPLIT.2.ALL.VNB[,1]), lwd=2, lty=2, col="forestgreen")
legend("bottomright", "(a)", bty="n", cex=0.8)
legend("top", legend=expression(Delta~TWS~(GRACE), GRACE~TREND, Delta~TWS~(OBSERVED), OBSERVED~TREND),
lty=c(1,4,1,4), lwd=c(2,2,2,2), col=c(1,"deepskyblue","red","forestgreen"),
bty="n", horiz=T, cex=0.6)
Look into the package zoo. Among many other features, it implements a new class specifically for time series that keeps track of the time base, and the plot.zoo method makes use of it. As a very, very minimal example, you can try something like a following:
a <- zoo(rnorm(5), 1:5)
b <- zoo(rpois(5, 1), 1:5)
plot(cbind(a, b))
A base R solution is also pretty simple:
a <- rnorm(5)
b <- rpois(5, 1)
plot(a ~ 1:5, xlim = c(0, 10))
points(b ~ 6:10)
Related
I have this line-and-dots plot:
#generate fake data
xLab <- seq(0, 50, by=5);
yLab <- c(0, sort(runif(10, 0, 1)));
#this value is fixed
fixedVal <- 27.3
#new window
dev.new();
#generate the plot
paste0(plot(xLab, yLab, col=rgb(50/255, 205/255, 50/255, 1), type="o", lwd=3,
main="a line-and-dots plot", xlab="some values", ylab="a percentage",
pch=20, xlim=c(0, 50), ylim=c(0, 1), xaxt="n", cex.lab=1.5, cex.axis=1.5,
cex.main=1.5, cex.sub=1.5));
#set axis
axis(side = 1, at=c(seq(min(xLab), max(xLab), by=5)))
#plot line
abline(v=fixedVal, col="firebrick", lwd=3, lty=1);
now, I would like to find the y coordinate of the intersection point between the green and the red lines.
Can I achieve the goal without the need of a regression line? Is there a simple way of getting the coordinates of that unknown point?
You can use approxfun to do the interpolation:
> approxfun(xLab,yLab)(fixedVal)
[1] 0.3924427
Alternatively, just use approx:
> approx(xLab,yLab,fixedVal)
$x
[1] 27.3
$y
[1] 0.3924427
Quick fix like #JohnColeman said:
# find the two points flanking your value
idx <- findInterval(fixedVal,xLab)
# calculate the deltas
y_delta <- diff(yLab[idx:(idx+1)])
x_delta <- diff(xLab[idx:(idx+1)])
# interpolate...
ycut = (y_delta/x_delta) * (fixedVal-xLab[idx]) + yLab[idx]
ycut
[1] 0.4046399
So we try it on the plot..
paste0(plot(xLab, yLab, col=rgb(50/255, 205/255, 50/255, 1), type="o", lwd=3,
main="a line-and-dots plot", xlab="some values", ylab="a percentage",
pch=20, xlim=c(0, 50), ylim=c(0, 1), xaxt="n", cex.lab=1.5, cex.axis=1.5,
cex.main=1.5, cex.sub=1.5));
#set axis
axis(side = 1, at=c(seq(min(xLab), max(xLab), by=5)))
#plot line
abline(v=fixedVal, col="firebrick", lwd=3, lty=1);
abline(h=ycut, col="lightblue", lwd=3, lty=1);
Given two variables, x and y, I run a dynlm regression on the variables and would like to plot the fitted model against one of the variables and the residual on the bottom showing how the actual data line differs from the predicting line. I've seen it done before and I've done it before, but for the life of me I can't remember how to do it or find anything that explains it.
This gets me into the ballpark where I have a model and two variables, but I can't get the type of graph I want.
library(dynlm)
x <- rnorm(100)
y <- rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red")
lines(y, type="l", col="blue")
I want to generate a graph that looks like this where you see the model and the real data overlaying each other and the residual plotted as a separate graph on the bottom showing how the real data and the model deviate.
This should do the trick:
library(dynlm)
set.seed(771104)
x <- 5 + seq(1, 10, len=100) + rnorm(100)
y <- x + rnorm(100)
model <- dynlm(x ~ y)
par(oma=c(1,1,1,2))
plotModel(x, model) # works with models which accept 'predict' and 'residuals'
and this is the code for plotModel,
plotModel = function(x, model) {
ymodel1 = range(x, fitted(model), na.rm=TRUE)
ymodel2 = c(2*ymodel1[1]-ymodel1[2], ymodel1[2])
yres1 = range(residuals(model), na.rm=TRUE)
yres2 = c(yres1[1], 2*yres1[2]-yres1[1])
plot(x, type="l", col="red", lwd=2, ylim=ymodel2, axes=FALSE,
ylab="", xlab="")
axis(1)
mtext("residuals", 1, adj=0.5, line=2.5)
axis(2, at=pretty(ymodel1))
mtext("observed/modeled", 2, adj=0.75, line=2.5)
lines(fitted(model), col="green", lwd=2)
par(new=TRUE)
plot(residuals(model), col="blue", type="l", ylim=yres2, axes=FALSE,
ylab="", xlab="")
axis(4, at=pretty(yres1))
mtext("residuals", 4, adj=0.25, line=2.5)
abline(h=quantile(residuals(model), probs=c(0.1,0.9)), lty=2, col="gray")
abline(h=0)
box()
}
what you're looking for is resid(model). Try this:
library(dynlm)
x <- 10+rnorm(100)
y <- 10+rnorm(100)
model <- dynlm(x ~ y)
plot(x, type="l", col="red", ylim=c(min(c(x,y,resid(model))), max(c(x,y,resid(model)))))
lines(y, type="l", col="green")
lines(resid(model), type="l", col="blue")
I found, that axes were rescaled during multiple plotting using par(new=T) parameter.
An example to demonstrate this:
a <- seq(1,10, by = 0.25)
b <- sin(a)
c <- sin(2*a)+1
d <- sin(0.5*a)+2
df <- data.frame(a,b,c,d)
plot(df$a, df$b, type="l")
par(new=T)
plot(df$a, df$c, type="l", col="blue")
par(new=T)
plot(df$a, df$d, type="l", col="red")
This is the result.
Instead of real scales, I have a transformed curves.
And this is the real result:
I used parameters axes=F, xlab="", ylab="" and did not see this "rescaling".
I find it very dangerous, that it is so easy to transform the data during plotting if you do not control y-limits.
Are there better ways to control y-limits than looking for min and max values in all plotted data to avoid this "rescaling" effect?
I have several quite big files and each of them gives only one line from 10 in one plot and I have several plots on one page to compare my data.
The code for the last "correct" image:
plot(df$a, df$b, type="l", ylim=c(-1.5,3.5))
par(new=T)
plot(df$a, df$c, type="l", ylim=c(-1.5,3.5), col="blue", axes=F, xlab="", ylab="")
par(new=T)
plot(df$a, df$d, type="l", ylim=c(-1.5,3.5), col="red", axes=F, xlab="", ylab="")
#Create an empty plot with enough xlim and ylim to accomodate all data
plot(1, 1, xlim = range(df[,1]), ylim = range(df[,-1]), type = "n", ann = FALSE)
#Draw the three lines
lines(df$a, df$b)
lines(df$a, df$c, col="blue")
lines(df$a, df$d, col="red")
I have the following data and code in R:
x <- runif(1000, -9.99, 9.99)
mx <- mean(x)
stdevs_3 <- mx + c(-3, +3) * sd(x/5) # Statndard Deviation 3-sigma
And I plotted as line (alongwith 3 standard deviation and mean lines) in R:
plot(x, t="l", main="Plot of Data", ylab="X", xlab="")
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
What I want to do:
Anywhere on the plot, whenever line is crossing 3 sigma thresholds (blue lines), above or below it, line should be in different color than black.
I tried this, but did not work:
plot(x, type="l", col= ifelse(x < stdevs_3[[1]],"red", "black"))
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
Is there any other way?
This is what is requested, but it appears meaningless to me because of the arbitrary division of x by 5:
png( )
plot(NA, xlim=c(0,length(x)), ylim=range(x), main="Plot of Data", ylab="X", xlab="", )
stdevs_3 <- mx + c(-3, +3) * sd(x/5)
abline(h=mx, col="red", lwd=2)
abline(h=stdevs_3, lwd=2, col="blue")
segments( 0:999, head(x,-1), 1:1000, tail(x,-1) , col=c("black", "red")[
1+(abs(tail(x,-1)) > mx+3*sd(x/5))] )
dev.off()
Can someone help me. I have a dataset that had NA values that I have interpolated with zoo. I have added a 'colour column' in the hope that I could create a line plot (time series) with the interpolated values plotted in a different colour to the rest of the line. That is, the segment of the line defined by the point immediately before and immediately after the interpolated point should be red, and not black.
I've attached an example of my table here (where the colour is 'red' defines the values that have been interpolated). I've also put an image of the graph so far and the desired output here too:
https://drive.google.com/folderview?id=0B_eJi0urUAzFM0JBS1ZIbUdGck0&usp=drive_web
This is my code thus far. The 'lines' part of the code is where I hoped to define the colour as the column in the data frame:
par(mfrow=c(2,1), mar=c(4,4.5,2,2), mgp=c(2,0.6,0))
x.limit <- round(range(UN.GRACE.Int$DecimDate), 2)
plot(NULL, type="n", xlim=x.limit, ylim=c(-20, 25), xlab="Year", ylab="GRACE-TWS (cm)", axes=F)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1)
axis(1, seq(2003, 2012, 1), cex.axis=0.8)
axis(2, seq(-20, 25, 5), las=1, cex.axis=0.8)
minor.tick(nx=4, ny=0, tick.ratio=0.5)
lines(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1], type="l", lwd=3, col=UN.GRACE.Int[,3])
tws.slope <- round(as.vector(coef(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]))[2]), 2)
tws.sdev <- round(as.vector(coef(summary(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1])))[, "Std. Error"][2]), 2)
abline(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]), lwd=2.5, lty=2, col=2)
mtext(paste("Trend (cm/year): ", tws.slope, "±", tws.sdev, sep=""), cex=0.8, side=1, line=-1.1)
Any help would be appreciated - Thanks
If I understand this correctly, you want the interpolated points to show up with a different color. You can accomplish this using the type="o" option in R, which gives over-plotted lines. Here's some adjusted code that produces the following plot. I took the minor.tick command out because it must have been from a package I don't have, but otherwise it works fine (using R 2.15.3 on my local machine).
You'll notice that I just plot the item directly, rather than calling plot to NULL and then adding in lines. This simplifies the code substantially. You can play with the pch parameter in the plot call to change the symbols used, and also alter the lwd parameters as needed. In fact, you could easily give a different value to pch for the interpolated values, like you did color - it accepts a vector as an argument.
par(mar=c(4,4.5,2,2), mgp=c(2,0.6,0))
x.limit <- round(range(UN.GRACE.Int$DecimDate), 2)
plot(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1],
type="o",
pch=18,
col=UN.GRACE.Int[,3],
xlim=x.limit,
ylim=c(-20, 25),
xlab="Year",
ylab="GRACE-TWS (cm)",
axes=F)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1)
axis(1, seq(2003, 2012, 1), cex.axis=0.8)
axis(2, seq(-20, 25, 5), las=1, cex.axis=0.8)
tws.slope <- round(as.vector(coef(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]))[2]), 2)
tws.sdev <- round(as.vector(coef(summary(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1])))[, "Std. Error"][1]), 2)
abline(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]), lwd=2.5, lty=2, col=2)
mtext(paste("Trend (cm/year): ", tws.slope, "±", tws.sdev, sep=""), cex=0.8, side=1, line=-1.1)
You could also add the points later if you JUST want to see the points where the data was interpolated. This could be done as follows:
par(mar=c(4,4.5,2,2), mgp=c(2,0.6,0))
x.limit <- round(range(UN.GRACE.Int$DecimDate), 2)
plot(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1],
type="l",
pch=18,
col="black",
xlim=x.limit,
ylim=c(-20, 25),
xlab="Year",
ylab="GRACE-TWS (cm)",
axes=F)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1)
axis(1, seq(2003, 2012, 1), cex.axis=0.8)
axis(2, seq(-20, 25, 5), las=1, cex.axis=0.8)
tws.slope <- round(as.vector(coef(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]))[2]), 2)
tws.sdev <- round(as.vector(coef(summary(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1])))[, "Std. Error"][3]), 2)
abline(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]), lwd=2.5, lty=2, col=2)
mtext(paste("Trend (cm/year): ", tws.slope, "±", tws.sdev, sep=""), cex=0.8, side=1, line=-1.1)
points(x=UN.GRACE.Int[UN.GRACE.Int$Col.CSR=="red",1],
y=UN.GRACE.Int[UN.GRACE.Int$Col.CSR=="red",2],
pch=16,
col="red")
EDITED TO ADD: This is a way to color the line segments themselves by overplotting the original plot, assuming the distance to be colored is always of length one. It uses a quick'n'dirty for() loop, but it could be made into a function if you wanted.
par(mar=c(4,4.5,2,2), mgp=c(2,0.6,0))
x.limit <- round(range(UN.GRACE.Int$DecimDate), 2)
plot(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1],
type="l",
pch=18,
col="black",
xlim=x.limit,
ylim=c(-20, 25),
xlab="Year",
ylab="GRACE-TWS (cm)",
axes=F)
box(lwd=1.5)
abline(h=0, col="gray50", lty=1)
axis(1, seq(2003, 2012, 1), cex.axis=0.8)
axis(2, seq(-20, 25, 5), las=1, cex.axis=0.8)
tws.slope <- round(as.vector(coef(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]))[2]), 2)
tws.sdev <- round(as.vector(coef(summary(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1])))[, "Std. Error"][5]), 2)
abline(lm(UN.GRACE.Int[,2] ~ UN.GRACE.Int[,1]), lwd=2.5, lty=2, col=2)
mtext(paste("Trend (cm/year): ", tws.slope, "±", tws.sdev, sep=""), cex=0.8, side=1, line=-1.1)
line_segs <- cbind(lstart=UN.GRACE.Int[which(UN.GRACE.Int$Col.CSR=="red")-1,c("DecimDate","CSR")],
lend=UN.GRACE.Int[which(UN.GRACE.Int$Col.CSR=="red")+1,c("DecimDate","CSR")])
for(x in 1:nrow(line_segs)) {
lines(x=c(line_segs[x,1],line_segs[x,3]),
y=c(line_segs[x,2],line_segs[x,4]),
lwd=3,
col="red")
}