How to plot absolute values and differences including confidence intervals - r

In the followup of the discussion on stackexchange I tried to implement the following plot
from
Cumming, G., & Finch, S. (2005). [Inference by Eye: Confidence Intervals and How to Read Pictures of Data][5]. American Psychologist, 60(2), 170–180. doi:10.1037/0003-066X.60.2.170
I share some people's dislike of double axis, but I think this is a fair use.
Below my partial attempt, the second axis is still missing. I am looking for more elegant alternatives, intelligent variations are welcome.
library(lattice)
library(latticeExtra)
d = data.frame(what=c("A","B","Difference"),
mean=c(75,105,30),
lower=c(50,80,-3),
upper = c(100,130,63))
# Convert Differences to left scale
d1 = d
d1[d1$what=="Difference",-1] = d1[d1$what=="Difference",-1]+d1[d1=="A","mean"]
segplot(what~lower+upper,centers=mean,data=d1,horizontal=FALSE,draw.bands=FALSE,
lwd=3,cex=3,ylim=c(0,NA),pch=c(16,16,17),
panel = function (x,y,z,...){
centers = list(...)$centers
panel.segplot(x,y,z,...)
panel.abline(h=centers[1:2],lty=3)
} )
## How to add the right scale, close to the last bar?

par(mar=c(3,5,3,5))
plot(NA, xlim=c(.5,3.5), ylim=c(0, max(d$upper[1:2])), bty="l", xaxt="n", xlab="",ylab="Mean")
points(d$mean[1:2], pch=19)
segments(1,d$mean[1],5,d$mean[1],lty=2)
segments(2,d$mean[2],5,d$mean[2],lty=2)
axis(1, 1:3, d$what)
segments(1:2,d$lower[1:2],1:2,d$upper[1:2])
axis(4, seq((d$mean[1]-30),(d$mean[1]+50),by=10), seq(-30,50,by=10), las=1)
points(3,d$mean[1]+d$mean[3],pch=17, cex=1.5)
segments(3,d$lower[3]+d$lower[2],3,d$lower[3]+d$upper[2], lwd=2)
mtext("Difference", side=4, at=d$mean[1], line=3)

As a starting point another base R solution with Hmisc:
library(Hmisc)
with(d1,
errbar(as.integer(what),mean,upper,lower,xlim=c(0,4),xaxt="n",xlab="",ylim=c(0,150))
)
points(3,d1[d1$what=="Difference","mean"],pch=15)
axis(1,at=1:3,labels=d1$what)
atics <- seq(floor(d[d$what=="Difference","lower"]/10)*10,ceiling(d[d$what=="Difference","upper"]/10)*10,by=10)
axis(4,at=atics+d1[d1=="A","mean"],labels=atics,pos=3.5)

I would also go with base graph, as it includes the possibility to actually have two y-axis, see the answer here:
Here is my soultion that uses only d:
xlim <- c(0.5, 3.5)
plot(1:2, d[d$what %in% LETTERS[1:2], "mean"], xlim = xlim, ylim = c(0, 140),
xlab = "", ylab = "", xaxt = "n", bty = "l", yaxs = "i")
lines(c(1,1), d[1, 3:4])
lines(c(2,2), d[2, 3:4])
par(new = TRUE)
plot(3, d[d$what == "Difference", "mean"], ylim = c(-80, 130), xlim = xlim,
yaxt = "n", xaxt = "n", xlab = "", ylab = "", bty = "n")
lines(c(3,3), d[3, 3:4])
Axis(x = c(-20, 60), at = c(-20, 0, 20, 40, 60), side = 4)
axis(1, at = c(1:3), labels = c("A", "B", "Difference"))
Which gives:
To make it clearer that the difference is something different, you can increase the distance from the other two points:
xlim <- c(0.5, 4)
plot(1:2, d[d$what %in% LETTERS[1:2], "mean"], xlim = xlim, ylim = c(0, 140),
xlab = "", ylab = "", xaxt = "n", bty = "l", yaxs = "i")
lines(c(1,1), d[1, 3:4])
lines(c(2,2), d[2, 3:4])
par(new = TRUE)
plot(3.5, d[d$what == "Difference", "mean"], ylim = c(-80, 130), xlim = xlim,
yaxt = "n", xaxt = "n", xlab = "", ylab = "", bty = "n")
lines(c(3.5,3.5), d[3, 3:4])
Axis(x = c(-20, 60), at = c(-20, 0, 20, 40, 60), side = 4)
axis(1, at = c(1,2,3.5), labels = c("A", "B", "Difference"))

I think you can do that also with base R, what about:
d = data.frame(what=c("A","B","Difference"),
mean=c(75,105,30),
lower=c(50,80,-3),
upper = c(100,130,63))
plot(-1,-1,xlim=c(1,3),ylim=c(0,140),xaxt="n")
lines(c(1,1),c(d[1,3],d[1,4]))
points(rep(1,3),d[1,2:4],pch=4)
lines(c(1.5,1.5),c(d[2,3],d[2,4]))
points(rep(1.5,3),d[2,2:4],pch=4)
lines(c(2,2),c(d[3,3],d[3,4]))
points(rep(2,3),d[3,2:4],pch=4)
lines(c(1.5,2.2),c(d[2,2],d[2,2]),lty="dotted")
axis(1, at=c(1,1.5,2), labels=c("A","B","Difference"))
axis(4,at=c(40,80,120),labels=c(-1,0,1),pos=2.2)
I simplified some things and didn't wrote it as function, but I think the idea is clear and could easily be extended to a function.

Related

How can I make the y axis on a r plot display numbers in decimal format with commas?

I am running the following R code:
plot(datereal, casesimm8lock7,
type = "l", lwd = 1, col = "red",
main = "a Sensitivity to time delays: 8 month immunity",
xaxt = "n", xlab = "Month",
ylab = "Daily new cases", ylim = c(0,250000))
And I want the y-axis, which currently displays 50000 100000 150000 etc. to display 50,000 100,000 150,000 etc.
I am trying to use the scales package but haven't figured it out yet.
Start by making an empty plot with no y axis. Then plot the data.
Then plot the y axis with axis(2, ...). In order to have the labels formatted like the question asks for, use help("formatC").
# test data
datereal <- Sys.Date() - 10:0
casesimm8lock7 <- seq(0,250000, length.out = 11)
# the plot
plot(datereal, casesimm8lock7, type = "n", yaxt = "n")
lines(datereal, casesimm8lock7,
lwd = 1, col = "red",
main = "a Sensitivity to time delays: 8 month immunity",
xaxt = "n", xlab = "Month",
ylab = "Daily new cases", ylim = c(0,250000))
axis(2, at = seq(0, 250000, by = 50e3),
labels = formatC(seq(0, 250000, by = 50e3),
format = "d", big.mark = ","))
With package scales, it could be with either label_comma, like below, or label_number. Thse functions return a labeller function, to be applied to the vector of axis marks.
labeller <- scales::label_comma()
plot(datereal, casesimm8lock7, type = "n", yaxt = "n")
lines(datereal, casesimm8lock7,
lwd = 1, col = "red",
main = "a Sensitivity to time delays: 8 month immunity",
xaxt = "n", xlab = "Month",
ylab = "Daily new cases", ylim = c(0,250000))
axis(2, at = seq(0, 250000, by = 50e3),
labels = labeller(seq(0, 250000, by = 50e3)))

Remove margin on inside of R plot

I am trying to plot return against date, and I would like the line to start and end at the line border. The plot code I am using -
minX = min(hf_instl$date)
maxX = max(hf_instl$date)
plot(df$date, cumsum(df$return), type = "l", col = rgb(0, 0.447, 0.741), xlim = c(minX, maxX), ylim = c(minY, maxY), yaxt = "n", xlab = NA, ylab = NA)
What I get is this -
Any solution to this in the base plot package? ggplot2 solutions are also okay.
You can use the xaxs property for that:
# create some fake data for the example
hf_instl = data.frame(date=seq(Sys.Date()-200,Sys.Date(),by='day'),return=runif(201,-1,1.5))
df = hf_instl
minX = min(hf_instl$date)
maxX = max(hf_instl$date)
minY=0
maxY=sum(df$return)+10
# your plot
plot(df$date, cumsum(df$return), type = "l", col = rgb(0, 0.447, 0.741),
xlim = c(minX, maxX), ylim = c(minY, maxY), yaxt = "n",
xlab = NA, ylab = NA,xaxs='i')
Hope this helps!

Adjust distance between x-axis and text in R plot

In the plot below, you probably find the distance between x-aixs and the date (Jan-01-60 to Mar-31-16) is large.
Is there anyway I could make the distance smaller?
Here is my example code:
plot(dates, data, type = "l", lwd = 3, ylab = " ",
xlab = " ", col = "gray35", xaxt='n', ann = FALSE)
axis(side=1, at=dates_ten, labels=format(dates_ten, "%b-%d-%y"), las =
1, cex.axis=0.5, las = 1, font = 2, tcl = -0.2)
Many thanks.
You can use the padj argument to axis
# make a reproducible example
dates <- seq(as.Date("2016/1/1"), as.Date("2016/4/1"), "days")
dates_ten <- seq(as.Date("2016/1/1"), as.Date("2016/4/1"), "10 days")
set.seed(42)
data <- rnorm(seq_along(dates))
Vary padj as needed:
plot(dates, data, type = "l", lwd = 3, ylab = " ",
xlab = " ", col = "gray35", xaxt='n', ann = FALSE)
axis(side=1, at=dates_ten, labels=format(dates_ten, "%b-%d-%y"), las = 1,
cex.axis=0.5, las = 1, font = 2, tcl = -0.2, padj = -2)

How to plot (almost) the same function at both sides of the "y" axis in R?

I have a function that depends on distance and behaves different according to the direction (east or west) to where you evaluate it. Now I have two plots side by side, but I need to have them as one, where the labels of the axis are shared and the x axis label is centered. Here is an example of how the code looks right now:
x = (0:300)
par(mfrow=c(2,2))
Idriss70 = function(x){
exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(x+10)+0.00047*x+0.12)
}
plot(Idriss70(x), log = "x", type="l",xlim = c(300,1), xlab="Distancia [km]",ylab="PGA [g]", main="Aroma y Humayani extendida, Mw 7,0", col="green", panel.first = grid(equilogs = TRUE))
Idriss70 = function(x){
ifelse (x >= 0 & x<=3, exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(0+10)+0.00047*0+0.12),
exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(x+10)+0.00047*x+0.12))
}
plot(Idriss70(x), log = "x", type="l", xlab="Distancia [km]",ylab="PGA [g]", main="Aroma y Humayani extendida, Mw 7,0", col="green", panel.first = grid(equilogs = TRUE))
As you can see, the log scale of the plots don't allow "negative" values to be evaluated so I haven't been able to use only one plot.
How can I get this plot as one without using Illustrator or another graphics software, as I have to create lots of this ones for differente areas?
I haven't used ggplot in the past but I am willing to learn if necessary.
You can basically make one plot and mess around with fig so that you restrict the first plot to the left half of the device and the second to the right half
x <- 0:300
Idriss70 = function(x){
exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(x+10)+0.00047*x+0.12)
}
Idriss71 = function(x){
ifelse(x >= 0 & x<=3,
exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(0+10)+0.00047*0+0.12),
exp(5.6315-0.4104*7-(2.9832-0.2339*7)*log(x+10)+0.00047*x+0.12))
}
par(fig = c(0, .5, 0, 1), mar = c(5, 4, 4, 0), xaxs = 'i', ann = FALSE)
plot(Idriss70(x), log = "x", type="l", xlim = c(300,1),
col="green", axes = FALSE, panel.first = grid(equilogs = TRUE))
xx <- axis(1)
axis(2)
par(fig = c(.5, 1, 0, 1), new = TRUE, mar = c(5, 0, 4, 2))
plot(Idriss71(x), log = "x", type="l", col="green",
panel.first = grid(equilogs = TRUE), axes = FALSE)
axis(1, at = xx, labels = c('', xx[-1]))
title(xlab = "Distancia [km]", main = 'Aroma y Humayani extendida, Mw 7,0',
ylab = 'PGA [g]', outer = TRUE, line = -1.5)
good luck with ggplot. you'd probably have to summon #baptiste
well you could still use mfrow I suppose
graphics.off()
par(mfrow = c(1, 2), mar = c(5, 4, 4, 0), xaxs = 'i', ann = FALSE)
plot(Idriss70(x), log = "x", type="l", xlim = c(300,1),
col="green", axes = FALSE, panel.first = grid(equilogs = TRUE))
xx <- axis(1)
axis(2)
par(mar = c(5, 0, 4, 2))
plot(Idriss71(x), log = "x", type="l", col="green",
panel.first = grid(equilogs = TRUE), axes = FALSE)
axis(1, at = xx, labels = c('', xx[-1]))
title(xlab = "Distancia [km]", main = 'Aroma y Humayani extendida, Mw 7,0',
ylab = 'PGA [g]', outer = TRUE, line = -1.5)

Why do cat and text handle spaces differently?

I am trying to plot some summary information as text on a plot, using the text function. What I do not understand is why text interprets spaces differently than cat.
Simple exmaple:
spaces <- function(x) paste(rep(" ", x), collapse = "")
vals <- c(1000, 5)
e <- paste0(spaces(3), "val1", spaces(8), "val2\n",
"v: ", vals[1], spaces(12 - nchar(vals[1])), vals[2])
> cat(e) # Gives exactly the output I want
val1 val2
v: 1000 5
plot(0, type = "n", bty = "n", xaxt = "n", yaxt = "n", ylab = "",
xlab = "", xlim = c(0, 5), ylim = c(0, 5))
text(y = 4, x = 1, labels = e, adj = c(0, 1))
As you can see, text does not handle the spaces the same as the console output. I want things to line up nicely, like they do in the console output. How can I modify the object, or the call to text so that the plotted version mirrors the console output?
I also tried using:
spaces <- function(x) paste(rep("\t", x), collapse = "")
Based on the very helpful comments from #Jongware, setting par$family works well for my purposes:
par(family = "mono")
plot(0, type = "n", bty = "n", xaxt = "n", yaxt = "n", ylab = "",
xlab = "", xlim = c(0, 5), ylim = c(0, 5))
text(y = 4, x = 1, labels = e, adj = c(0, 1))

Resources