Why Forest plot is not showing the confidence interval bars? - r

Hi I am generating a forest plot by following code. but my visual graph doesnot show the confidence interval on boxes. How can i improve this graphical representation.
mydf <- data.frame(
Variables=c('Variables','Neuroticism_2','Neuroticism_3','Neuroticism_4'),
HazardRatio=c(NA,1.109,1.296,1.363),
HazardLower=c(NA,1.041,1.206,1.274),
HazardUpper=c(NA,1.182,1.393,1.458),
Pvalue=c(NA,"0.001","<0.001","<0.001"),
stringsAsFactors=FALSE
)
#png('temp.png', width=8, height=4, units='in', res=400)
rowseq <- seq(nrow(mydf),1)
par(mai=c(1,0,0,0))
plot(mydf$HazardRatio, rowseq, pch=15,
xlim=c(-10,12), ylim=c(0,7),
xlab='', ylab='', yaxt='n', xaxt='n',
bty='n')
axis(1, seq(0,5,by=.5), cex.axis=.5)
segments(1,-1,1,6.25, lty=3)
segments(mydf$HazardLower, rowseq, mydf$HazardUpper, rowseq)
text(-8,6.5, "Variables", cex=.75, font=2, pos=4)
t1h <- ifelse(!is.na(mydf$Variables), mydf$Variables, '')
text(-8,rowseq, t1h, cex=.75, pos=4, font=3)
text(-1,6.5, "Hazard Ratio (95%)", cex=.75, font=2, pos=4)
t3 <- ifelse(!is.na(mydf$HazardRatio), with(mydf, paste(HazardRatio,' (',HazardLower,'-',HazardUpper,')',sep='')), '')
text(3,rowseq, t3, cex=.75, pos=4)
text(7.5,6.5, "P Value", cex=.75, font=2, pos=4)
t4 <- ifelse(!is.na(mydf$Pvalue), mydf$Pvalue, '')
text(7.5,rowseq, t4, cex=.75, pos=4)
#dev.off()
Edit
I even tried to do this by forestplot package. But i am not getting Confidence interval on grpah as well as i want presentation as above graph.
test_data <- data.frame(coef=c(1.109,1.296,1.363),
low=c(1.041,1.206,1.274),
high=c(1.182,1.393,1.458),
boxsize=c(0.1, 0.1, 0.1))
row_names <- cbind(c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3"),
c("HR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
test_data <- rbind(NA, test_data)
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
clip =c(0.1, 25),
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1,colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7),
title = gpar(cex = 1) ),
xlog = TRUE,
xlab = "HR (95% CI)",
col = fpColors(lines="black", box="black"),
ci.vertices = TRUE,
xticks = c(0.1, 1, 2.5,5,7.5))

Your intervals are quite small, so if you do it manually on plot it will take a while to refine the correct settings, and putting text together with it is not trivial. Right now your first code is not even 50% there.
My suggestion is to build up the plot slowly using forestplot, and identify the problem, for example if you just plot your data.frame, you see it works, that is the c.i is there, just that it's very narrow, and that's your problem at hand, adjusting the size using lwd.ci so that it is visible:
forestplot(test_data[,1:3],lwd.ci=3)
Now if we add in the text:
forestplot(
labeltext =row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
txt_gp=fpTxtGp(cex=0.8),
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,lwd.ci=3)
So the text is taking up a bit too much space, i think one way is to use the conventional est[ll - ul] way of representing estimate and confidence interval, you can see examples here. One way I can try below is to wrap the values for the CI into 1 string, and have just two columns for text:
library(stringr)
test_data <- data.frame(coef=c(1.109,1.296,1.363),
low=c(1.041,1.206,1.274),
high=c(1.182,1.393,1.458),
boxsize=c(0.1, 0.1, 0.1))
column1 = c("Variable", "N_Quartile 1", "N_Quartile 2", "N_Quartile 3")
column2 = cbind(c("HR", test_data$coef),
c("CI -95%", test_data$low),
c("CI +95%", test_data$high))
L = max(nchar(column2))
padded_text =apply(column2,1,
function(i)paste(str_pad(i,L),collapse=" "))
test_data <- rbind(NA, test_data)
pdf("test.pdf",width=8,height=4)
forestplot(
labeltext =cbind(column1,padded_text),
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
txt_gp=fpTxtGp(cex=0.8),align="c",
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,lwd.ci=3,
graphwidth=unit(100,'mm'))
dev.off()

Related

How can I fit two semivariograms in one plot in R?

I tried to plot two semivariogram in one plot but unfortunately it does not work.
I guess the solution is pretty easy but I am at the end of my latin.
This ist the code of the variograms I want to put in one plot together:
variog_iso_a1 <- fit.variogram( emp_variog_iso_a1,
vgm( psill = 2000,
model = "Sph",
range = 200,
nugget = 500))
plot(emp_variog_iso_a1, variog_iso_a1, as.table=TRUE, main = "Acker H1 C_org", plot.numbers=T)
variog_iso_a2 <- fit.variogram( emp_variog_iso_a2,
vgm( psill = 2000,
model = "Sph",
range = 200,
nugget = 500))
plot(emp_variog_iso_a2, variog_iso_a2, as.table=TRUE, main = "Acker H2 C_org", plot.numbers=T)
Secondly I would also like to plot two semivariogram in one plot but with a second y-axis on the right side because of the different values.
variog_iso_a1 <- fit.variogram( emp_variog_iso_a1,
vgm( psill = 2000,
model = "Sph",
range = 200,
nugget = 500))
plot(emp_variog_iso_a1, variog_iso_a1, as.table=TRUE, main = "Acker H1 C_org", plot.numbers=T)
variog_iso_l1 <- fit.variogram( emp_variog_iso_l1,
vgm( psill = 2000,
model = "Sph",
range = 200,
nugget = 500))
plot(emp_variog_iso_l1, variog_iso_l1, as.table=TRUE, main = "Acker H1 Lichtwert", plot.numbers=T)
I was only able two show the points of each variogram but not with the model fitted inside. This is the code I tried but does not work!
plot(emp_variog_iso_a1$dist, emp_variog_iso_a1$gamma, ylim=c(0,2500))
points(emp_variog_iso_a2$dist, emp_variog_iso_a2$gamma, col = "red", add=T, labels=emp_variog_iso_a2$np)
plot(emp_variog_iso_a1$dist, emp_variog_iso_a1$gamma, ylim=c(0,2500),main="Semivarianz Lichtwert und organische Substanz Horizont 1 Ackerland", ylab = "Semivarianz", xlab = "Distanz" )
par(new = TRUE)
plot(emp_variog_iso_l1$dist, emp_variog_iso_l1$gamma, col = "red", labels=emp_variog_iso_l1$np )
points(emp_variog_iso_l1$dist, emp_variog_iso_l1$gamma, col = "red", add=T, labels=emp_variog_iso_l1$np, yaxt = "n")
plot(emp_variog_iso_a2$dist, emp_variog_iso_a2$gamma, ylim=c(0,2500),main="Semivarianz Lichtwert und organische Substanz Horizont 2 Ackerland", ylab = "Semivarianz", xlab = "Distanz" )
points(emp_variog_iso_l2$dist, emp_variog_iso_l2$gamma, col = "red", add=T, labels=emp_variog_iso_l2$np)
Thanks for any help!!!
You can use variogramLine() for this:
VL=variogramLine(YourVariogramFit, maxdist = ...)
next, add it to your plot with lines()
lines(VL)

R: How to plot multiple ARIMA forecasts on the same time-series

I would like to plot several forecasts on the same plot in different colours, however, the scale is off.
I'm open to any other methods.
reproducible example:
require(forecast)
# MAKING DATA
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
# FORECASTS
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = forecast(arima011_css, h=10, level=c(99.5))
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = forecast(arima321_css, h=10, level=c(99.5))
# MY ATTEMPT AT PLOTS
plot(arima321_forecast)
par(new=T)
plot(arima011_forecast)
Here is something similar to #jay.sf but using ggplot2.
library(ggplot2)
autoplot(a.ts) +
autolayer(arima011_forecast, series = "ARIMA(0,1,1)", alpha = 0.5) +
autolayer(arima321_forecast, series = "ARIMA(3,2,1)", alpha = 0.5) +
guides(colour = guide_legend("Model"))
Created on 2020-05-19 by the reprex package (v0.3.0)
You could do a manual plot using a sequence of dates.
rn <- format(seq.Date(as.Date("2005-01-01"), by="months", length.out=12*3), "%Y.%m")
Your ARIMAs you'll need as.matrix form.
arima321_mat <- as.matrix(as.data.frame(arima321_forecast))
arima011_mat <- as.matrix(as.data.frame(arima011_forecast))
Some colors with different alpha=.
col.1 <- rainbow(2, ,.7)
col.2 <- rainbow(2, ,.7, alpha=.2)
For the CIs use polygon.
plot(data, type="l", xlim=c(1, length(rn)), ylim=c(0, 3500), xaxt="n", main="Forecasts")
axis(1, axTicks(1), labels=F)
mtext(rn[(seq(rn)-1) %% 5 == 0], 1, 1, at=axTicks(1))
lines((length(data)+1):length(rn), arima321_mat[,1], col=col.1[1], lwd=2)
polygon(c(27:36, 36:27), c(arima321_mat[,2], rev(arima321_mat[,3])), col=col.2[1],
border=NA)
lines((length(data)+1):length(rn), arima011_mat[,1], col=col.1[2], lwd=3)
polygon(c(27:36, 36:27), c(arima011_mat[,2], rev(arima011_mat[,3])), col=col.2[2],
border=NA)
legend("topleft", legend=c("ARIMA(3,2,1)", "ARIMA(0,1,1)"), col=col.1, lwd=2, cex=.9)
Edit: To avoid the repetition of lines and polygon calls, you may unite them using Map.
mats <- list(arima321_mat, arima011_mat) ## put matrices into list
plot(.)
axis(.)
mtext(.)
Map(function(i) {
lines((length(data)+1):length(rn), mats[[i]][,1], col=col.1[i], lwd=2)
polygon(c(27:36, 36:27), c(mats[[i]][,2], rev(mats[[i]][,3])), col=col.2[i], border=NA)
}, 1:2)
legend(.)
require(forecast)
data <- c(3.86000, 19.55810, 19.51091, 20.74048, 20.71333, 29.04191, 30.28864, 25.64300, 23.33368, 23.70870 , 26.16600 ,27.61286 , 27.88409 , 28.41400 , 24.81957 , 24.60952, 27.49857, 32.08000 , 29.98000, 27.49000 , 237.26150, 266.35478, 338.30000, 377.69476, 528.65905, 780.00000 )
a.ts <- ts(data,start=c(2005,1),frequency=12)
arima011_css =stats::arima(x = a.ts, order = c(0, 1, 1), method = "CSS") # css estimate
arima011_forecast = predict(arima011_css, n.ahead = 2)$pred
arima321_css =stats::arima(x = a.ts, order = c(3, 2, 1), method = "CSS") # css estimate
arima321_forecast = predict(arima321_css, n.ahead = 2)$pred
plot(a.ts, type = "o", xlim = c(2005, 2007.5) , ylim = c(-1, 1200) , ylab = "price" ,main = "2 month Forecast")
range = c(2007+(3/12), 2007+(4/12)) # adding the dates for the prediction
lines(y = arima011_forecast , x = range , type = "o", col = "red")
lines(y = arima321_forecast, x = range , type = "o", col = "blue")

Forestplot R - expanding plot, edit a variable name

I prepared a code to visualize my data:
library(forestplot)
test_data <- data.frame(coef=c(1.14, 0.31, 10.70),
low=c(1.01, 0.12, 1.14),
high=c(1.30, 0.83, 100.16),
boxsize=c(0.2, 0.2, 0.2))
row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and nasty name of the variable"),
c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
test_data <- rbind(rep(NA, 4), test_data)
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1,
xlog = TRUE,
xlab = "OR (95% CI)",
col = fpColors(lines="black", box="black"),
title="My Happy Happy Title \n o happy happy title...\n",
ci.vertices = TRUE,
xticks = c(0.1, 1, 10, 100))
It gives a following forestplot:
I would like to:
1) expand the plot and diminish font of the plot details on the left for better visualization
2) edit "So looooooong and nasty name of the variable" to move part "name..." below the row like:
"
So looooooong and nasty
name of the variable
"
However, when I write as "/nSo.../n" it gives another row of number from columns "OR" and "CIs".
How correct it?
Three possibilities (one more than you asked for):
1) change text of row labels with txt_gp.
2) cut column spacing from 6 mm default to half that value by passing colgap a grid call to unit. Fully understanding the options for forestplot requires understanding the grid system of plotting.
3) add a "\n" to the loooong label. (I'm puzzled you didn't see that possibility, since you already had a "\n" in the title.)
row_names <- cbind(c("Variable", "Variable 1", "Variable 2", "So looooooong and \nnasty name of the variable"),
c("OR", test_data$coef), c("CI -95%", test_data$low), c("CI +95%", test_data$high) )
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
is.summary=c(TRUE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1, colgap = unit(3, "mm"), txt_gp=fpTxtGp(label= gpar(cex = 0.7),
title = gpar(cex = 1) ),
xlog = TRUE,
xlab = "OR (95% CI)",
col = fpColors(lines="black", box="black"),
title="My Happy Happy Title \n o happy happy title...\n",
ci.vertices = TRUE,
xticks = c(0.1, 1, 10, 100))
If I only used a cex of 0.7 in the call to gpar passed to 'label', it also affected the size of the title, so I needed to "reset" the 'cex' of the 'title' back to 1.

How to set the y range in boxplot graph?

I'm using boxplot() in R. My code is:
#rm(list=ls())
#B2
fps_error <- c(0.058404273, 0.028957446, 0.026276044, 0.07084294, 0.078438563, 0.024000178, 0.120678965, 0.081774358, 0.025644741, 0.02931614)
fps_error = fps_error *100
fps_qp_error <-c(1.833333333, 1.69047619, 1.666666667, 3.095238095, 2.738095238, 1.714285714, 3.634146341, 5.142857143, 1.238095238, 2.30952381)
bit_error <- c(0.141691737, 0.136173785, 0.073808209, 0.025057931, 0.165722097, 0.004276999, 0.365353752, 0.164757488, 0.003362543, 0.022423845)
bit_error = bit_error *100
bit_qp_error <-c(0.666666667, 0.785714286, 0.428571429, 0.142857143, 0.785714286, 0.023809524, 1.523809524, 0.976190476, 0.023809524, 0.142857143)
ssim_error <-c(0.01193773, 0.015151569, 0.003144532, 0.003182908, 0.008125274, 0.013796366, 0.00359078, 0.019002591, 0.005031524, 0.004370175)
ssim_error = ssim_error * 100
ssim_qp_error <-c(3.833333333, 1.80952381, 0.69047619, 0.571428571, 2, 1.904761905, 0.761904762, 2.119047619, 0.857142857, 0.976190476)
all_errors = cbind(fps_error, bit_error, ssim_error)
all_qp_errors = cbind(fps_qp_error, bit_qp_error, ssim_qp_error)
modes = cbind(rep("FPS error",10), rep("Bitrate error",10), rep("SSIM error",10))
journal_linear_data <-data.frame(fps_error, fps_qp_error,bit_error,bit_qp_error,ssim_error,ssim_qp_error )
yvars <- c("fps_error","bit_error","ssim_error")
yvars_qp <-c("fps_qp_error","bit_qp_error","ssim_qp_error")
xvars <- c("FPS", "Bitrate", "SSIM")
graphics.off()
bmp(filename="boxplot_B2_error.bmp")
op <- par(mfrow = c(1, 3), #matrix of plots
oma = c(0,0,2,0),mar=c(5.1, 7.1, 2.1, 2.1),mgp=c(4,1,0)) #outer margins
par(cex.lab=3)
par(cex.axis=3)
for (i in 1:3) {boxplot(journal_linear_data[,yvars[i]], xlab=xvars[i], ylab="Percentage error", outcex = 2)}
par(op)
mtext(text="Percentage error per mode for B2",side=3, line=1.5, font=2, cex=2,adj=0.95, col='black')
dev.off()
The image output is shown below. As you can see the y-axis does not have the same range for all graphs. How can I fix this? For example starting in 0.5 or 0.
You can simply put an ylim = c(0, 5) in all your boxplot() call. This sets y-axis range (roughly) between 0 and 5.
Perhaps you did not see ylim argument in ?boxplot; the "Arguments" section also does not mention it. But ylim is just a trivial graphical parameter passed via "...". You can also find such example in the "Examples" session of ?boxplot:
boxplot(len ~ dose, data = ToothGrowth,
boxwex = 0.25, at = 1:3 - 0.2,
subset = supp == "VC", col = "yellow",
main = "Guinea Pigs' Tooth Growth",
xlab = "Vitamin C dose mg",
ylab = "tooth length",
xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")

R - explore the graphs stocked in a variable

I there a way to see in R how a graph was built into a variable: the code behind the graph. I have tried the str(), deparse(), and replayPlot() functions but these don't give the answer I am searching for.
Precisely I am looking at the result returned by the MackChainLadder() function from the "ChainLadder" package. When I plot the the variable, say plot(MCL), it returns me 6 different graphs. Is it a way to find out how these graphs were built and saved into the variable?
library("ChainLadder")
MCL <- MackChainLadder(ABC)
plot(MCL)
One way to do this is to look at the package source code directly (download it from this page):
http://cran.r-project.org/web/packages/ChainLadder/index.html
The trick is finding the right method that plot() calls. I think it might be this one in MackChainLadderFunctions.R
################################################################################
## plot
##
plot.MackChainLadder <- function(x, mfrow=c(3,2), title=NULL,lattice=FALSE,...){
.myResult <- summary(x)$ByOrigin
.FullTriangle <- x[["FullTriangle"]]
.Triangle <- x[["Triangle"]]
if(!lattice){
if(is.null(title)) myoma <- c(0,0,0,0) else myoma <- c(0,0,2,0)
op=par(mfrow=mfrow, oma=myoma, mar=c(4.5,4.5,2,2))
plotdata <- t(as.matrix(.myResult[,c("Latest","IBNR")]))
n <- ncol(plotdata)
if(getRversion() < "2.9.0") { ## work around missing feature
bp <- barplot(plotdata,
legend.text=c("Latest","Forecast"),
## args.legend=list(x="topleft"), only avilable from R version >= 2.9.0
names.arg=rownames(.myResult),
main="Mack Chain Ladder Results",
xlab="Origin period",
ylab="Amount",#paste(Currency,myUnit),
ylim=c(0, max(apply(.myResult[c("Ultimate", "Mack.S.E")],1,sum),na.rm=TRUE)))
}else{
bp <- barplot(plotdata,
legend.text=c("Latest","Forecast"),
args.legend=list(x="topleft"),
names.arg=rownames(.myResult),
main="Mack Chain Ladder Results",
xlab="Origin period",
ylab="Amount",#paste(Currency,myUnit),
ylim=c(0, max(apply(.myResult[c("Ultimate", "Mack.S.E")],1,sum),na.rm=TRUE)))
}
## add error ticks
## require("Hmisc")
errbar(x=bp, y=.myResult$Ultimate,
yplus=(.myResult$Ultimate + .myResult$Mack.S.E),
yminus=(.myResult$Ultimate - .myResult$Mack.S.E),
cap=0.05,
add=TRUE)
matplot(t(.FullTriangle), type="l",
main="Chain ladder developments by origin period",
xlab="Development period", ylab="Amount", #paste(Currency, myUnit)
)
matplot(t(.Triangle), add=TRUE)
Residuals=residuals(x)
plot(standard.residuals ~ fitted.value, data=Residuals,
ylab="Standardised residuals", xlab="Fitted")
lines(lowess(Residuals$fitted.value, Residuals$standard.residuals), col="red")
abline(h=0, col="grey")
plot(standard.residuals ~ origin.period, data=Residuals,
ylab="Standardised residuals", xlab="Origin period")
lines(lowess(Residuals$origin.period, Residuals$standard.residuals), col="red")
abline(h=0, col="grey")
plot(standard.residuals ~ cal.period, data=Residuals,
ylab="Standardised residuals", xlab="Calendar period")
lines(lowess(Residuals$cal.period, Residuals$standard.residuals), col="red")
abline(h=0, col="grey")
plot(standard.residuals ~ dev.period, data=Residuals,
ylab="Standardised residuals", xlab="Development period")
lines(lowess(Residuals$dev.period, Residuals$standard.residuals), col="red")
abline(h=0, col="grey")
title( title , outer=TRUE)
par(op)
}else{
## require(grid)
## Set legend
fl <-
grid.layout(nrow = 2, ncol = 4,
heights = unit(rep(1, 2), "lines"),
widths =
unit(c(2, 1, 2, 1),
c("cm", "strwidth", "cm",
"strwidth"),
data = list(NULL, "Chain ladder dev.", NULL,
"Mack's S.E.")))
foo <- frameGrob(layout = fl)
foo <- placeGrob(foo,
linesGrob(c(0.2, 0.8), c(.5, .5),
gp = gpar(col=1, lty=1)),
row = 1, col = 1)
foo <- placeGrob(foo,
linesGrob(c(0.2, 0.8), c(.5, .5),
gp = gpar(col=1, lty=3)),
row = 1, col = 3)
foo <- placeGrob(foo,
textGrob(label = "Chain ladder dev."),
row = 1, col = 2)
foo <- placeGrob(foo,
textGrob(label = "Mack's S.E."),
row = 1, col = 4)
long <- expand.grid(origin=as.numeric(dimnames(.FullTriangle)$origin),
dev=as.numeric(dimnames(.FullTriangle)$dev))
long$value <- as.vector(.FullTriangle)
long$valuePlusMack.S.E <- long$value + as.vector(x$Mack.S.E)
long$valueMinusMack.S.E <- long$value - as.vector(x$Mack.S.E)
sublong <- long[!is.na(long$value),]
xyplot(valuePlusMack.S.E + valueMinusMack.S.E + value ~ dev |
factor(origin), data=sublong, t="l", lty=c(3,3,1), as.table=TRUE,
main="Chain ladder developments by origin period",
xlab="Development period",
ylab="Amount",col=1,
legend = list(top = list(fun = foo)),...)
}
}

Resources