log probability plot in R? - r

Does anyone know how to create log probability plot like this one in R where the x-axis is probability and y-axis is in log-scale. I read and downloaded the package heR.Misc package but I don't know how to use it.
!

#create log probablity plot
#MPM 131201
#Make some dummy data
set.seed(21)
Dt<-as.data.frame(rlnorm(625, log(10), log(2.5)))
names(Dt)<-"Au_ppm"
#Create probablity scale lines and associated labels -
PrbGrd <- qnorm(c(0.001,0.01, 0.05, 0.10,0.20,0.30,0.40, 0.50, 0.60, 0.70,0.80,0.90,0.95,0.99,0.999))
PrbGrdL<-c("0.1","1","5","10","20","30","40","50","60","70","80","90","95","99","99.9")
#create some value grid lines then convert to logs
ValGrd<-c(seq(0.001,0.01,0.001),seq(0.01,0.1,0.01),seq(0.1,1,0.1),seq(1,10,1),seq(10,100,10))
ValGrd<-log10(ValGrd)
#load up lattice packages - latticeExtra for nice log scale
require(lattice)
require(latticeExtra)
#Use qqmath to make the plot (note lattice does not work for weighted data - shame about that)
qqmath(~ Au_ppm,
data= Dt,
distribution = function(p) qnorm(p),
main = "Normal probablity / log (base 10) plot",
pch=20,
cex=0.5,
xlab="Normal distribution scale (%)",
scales=list(y=list(log=10,alternating=1),x = list(at = PrbGrd, labels = PrbGrdL, cex = 0.8)),
yscale.components=yscale.components.log10ticks,
panel=function(x,...){
panel.abline(v=PrbGrd ,col="grey",lty=3)
panel.abline(h=ValGrd,col="grey",lty=3)
panel.qqmath(x,distribution=qnorm)
}
)

Here is an example using base R, simplified a bit from this post: (https://stat.ethz.ch/pipermail/r-help/2004-February/045370.html).
## Make some data
y <- rnorm(n=175, mean=100, sd=75)
y <- sort(y)
pct <- 1:length(y)/(length(y)+1)
## For x ticks
probs <- c(0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7,
0.9, 0.99, 0.999, 0.9999)
x.vals <- qnorm(probs)
## Plotting area
xs <- c(-4, 4)
ys <- seq(-2,4)
par(xaxs="i", yaxs="i")
plot(NA, NA, xlim=c(xs[1], xs[2]), ylim=c(min(ys), max(ys)),
axes=FALSE, xlab=NA, ylab=NA)
## X Axis
axis(side=1, at=x.vals, labels=FALSE, tck=-0.01)
text(x=x.vals, y=rep(min(ys)-0.35, length(x.vals)),
labels=probs*100, xpd=TRUE, srt=325, font=2)
## Y Axis
axis(side=2, at=ys, labels=FALSE)
text(y=ys, x=rep(xs[1]-.08, length(ys)),
labels= as.character(10^ys), xpd = NA, font=2,
pos=2)
for (i in ys){
axis(side=2, at=log10(seq(2,9))+ i,
labels=NA, tck = -0.01)
}
## Grid lines and box
abline(h=ys, col="grey80", lty=2)
abline(v=qnorm(probs), col="grey80", lty=2)
box()
## Plot Data
lines(x=qnorm(pct), y=log10(y), col="blue")

Related

Remove blank space in raster plot

I'm plotting a map of hydropower as a function of the distance to the turbine and distance to the uptake. However, when plotting I got an ugly blank space on the sides of my contour plot. I already tried the xlim = c(0,2500) but it did not work. Here you have the code:
# read data
data <- read.csv("ArchivoPotencia.csv", header = TRUE)
# make d into a SpatialPointsDataFrame object
library(sp)
coords <- data[, c("DTurbinas", "DIntake")]
s <- SpatialPointsDataFrame(coords = coords, data = data)
# interpolate with a thin plate spline
# (or another interpolation method: kriging, inverse distance weighting).
library(raster)
library(fields)
tps <- Tps(coordinates(s), as.vector(data$Potencia))
p <- raster(s)
p <- interpolate(p, tps)
# plot raster, points, and contour lines
plot(p, xlab= "Distance to Turbine", ylab = "Distance to Intake", cex=1.3, cex.lab = 1.3, cex.axis = 1.3,
legend.width=1, legend.args=list(text='HP (KW)',side=3, line=1, cex=1.0), xlim = c(0,2500))
plot(s, add=T, cex=1.5, cex.lab = 1.3, cex.axis = 1.3)
contour(p, add=T, labcex = 0.8, lwd = 1.5)
Could anyone help me to remove the space on the plot?

R plot remove dashed lines

I was trying to follow this tutorial (https://popgen.nescent.org/2018-03-27_RDA_GEA.html) and plot the RDA, but I would like to remove the two dashed lines (x=0 and y=0). Does anyone know how to get rid of them?
This is the graph I'm talking about
According to this post you can alter a plot.rda() to remove the dotted lines if you build the plot up yourself from scratch, but it's a complicated/challenging task. The easiest/best solution in my opinion is to draw white lines over the dotted lines with abline(h = 0, v = 0, col = "white", lwd = 2) and redraw the plot borders with box() before you plot the points/lines. See the ## PLOTTING ## section below for an example:
## OBTAIN & LOAD THE DATA ##
#install.packages(c("psych","vegan"), dependencies=TRUE)
library(psych) # Used to investigate correlations among predictors
library(vegan) # Used to run RDA
temp <- tempfile()
download.file("https://github.com/NESCent/popgenInfo/blob/master/data/wolf_geno_samp_10000.zip?raw=true",
temp)
gen <- read.csv(unzip(temp, "wolf_geno_samp_10000.csv"), row.names=1)
dim(gen)
sum(is.na(gen))
gen.imp <- apply(gen,
2,
function(x) replace(x,
is.na(x),
as.numeric(names(which.max(table(x))))))
sum(is.na(gen.imp)) # No NAs
env <- read.csv(url("https://raw.githubusercontent.com/NESCent/popgenInfo/master/data/wolf_env.csv"))
str(env)
env$individual <- as.character(env$individual)
env$land_cover <- as.factor(env$land_cover)
identical(rownames(gen.imp), env[,1])
pairs.panels(env[,5:16], scale=T)
pred <- subset(env, select=-c(precip_coldest_quarter, max_temp_warmest_month, min_temp_coldest_month))
table(pred$land_cover)
pred <- subset(pred, select=-c(land_cover))
pred <- pred[,5:12]
colnames(pred) <- c("AMT","MDR","sdT","AP","cvP","NDVI","Elev","Tree")
pairs.panels(pred, scale=T)
wolf.rda <- rda(gen.imp ~ ., data=pred, scale=T)
wolf.rda
RsquareAdj(wolf.rda)
summary(eigenvals(wolf.rda, model = "constrained"))
screeplot(wolf.rda)
signif.full <- anova.cca(wolf.rda, parallel=getOption("mc.cores"))
signif.full
signif.axis <- anova.cca(wolf.rda, by="axis", parallel=getOption("mc.cores"))
signif.axis
vif.cca(wolf.rda)
plot(wolf.rda, scaling=3)
plot(wolf.rda, choices = c(1, 3), scaling=3)
levels(env$ecotype) <- c("Western Forest","Boreal Forest","Arctic","High Arctic","British Columbia","Atlantic Forest")
eco <- env$ecotype
bg <- c("#ff7f00","#1f78b4","#ffff33","#a6cee3","#33a02c","#e31a1c")
## PLOTTING ##
plot(wolf.rda, type="n", scaling=3)
abline(h = 0, v = 0, col = "white", lwd = 2)
box()
points(wolf.rda, display="species", pch=20, cex=0.7, col="gray32", scaling=3) # the SNPs
points(wolf.rda, display="sites", pch=21, cex=1.3, col="gray32", scaling=3, bg=bg[eco]) # the wolves
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1) # the predictors
legend("bottomright", legend=levels(eco), bty="n", col="gray32", pch=21, cex=1, pt.bg=bg)
plot(wolf.rda, type="n", scaling=3, choices=c(1,3))
abline(h = 0, v = 0, col = "white", lwd = 2)
box()
points(wolf.rda, display="species", pch=20, cex=0.7, col="gray32", scaling=3, choices=c(1,3))
points(wolf.rda, display="sites", pch=21, cex=1.3, col="gray32", scaling=3, bg=bg[eco], choices=c(1,3))
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1, choices=c(1,3))
legend("topleft", legend=levels(eco), bty="n", col="gray32", pch=21, cex=1, pt.bg=bg)
Also, in future, if you could please post the code required to obtain and load the data or a minimal, reproducible example, it would have made this question a lot easier to answer; see How to make a great R reproducible example

R - how two have two y-axes with zeroes aligned in the middle of the plot

I am plotting two graphs on the same plot. Each one has a different ylim, and I would like to have the zeroes aligned in the middle of the plot.
This is my code:
# data
time <- seq(0.1, 10, by = 0.1)
det_rot <- runif(100, min=-100, max=100)
vel_mag <- runif(100, min=0, max=5)
# first plot
smoothingSpline = smooth.spline(time, det_rot, spar=0.20)
plot(time, det_rot,lwd=2,
ann=FALSE, las=2, pch="", ylim=c(-100,250)) # , pch=""
lines(smoothingSpline, lwd=2, col="red")
par(new=TRUE)
# second plot
smoothingSpline2 = smooth.spline(time, vel_mag, spar=0.20)
plot(time, vel_mag,
xaxt="n",yaxt="n",xlab="",ylab="",pch="", ylim=c(0,6))
lines(smoothingSpline2, lwd=2, col="blue",)
axis(4)
See the plot:
Simple fix: change ylims to c(-250, 250) and c(-6,6) respectively.

R- Break axis matplot function

I need to break an axis from 0.5 to 1.5. My code is:
matplot( wxyz$days_until_last_pay, wxyz[,c(2,3,4,5)], type=c("b"), pch=1, col=1:4,
main="x![enter image description here][1]", cex.main=0.8)
legend("bottomright", inset=c(0,-0.57), fill=NULL,
legend = c("mean","median","max", "min"), col=1:4, pch=1, cex=0.8)
library("plotrix")
axis.break(axis=2,1,,2,style="zigzag", brw=0.03)
But I only get a line in it. This is not breaking the axis.
How can I solve this?
Thanks!
axis.break puts a break into an existing plot, so if the axis is not "broken" it will not work.
One suggestion is to make two plots on top of each other and set their ylim be so that there is a gap between 0.5 and 1.5, e.g.
## Some data, set.seed(1)
dat <- matrix(c(rnorm(50, 2, 0.1),
rnorm(50, 0.2, 0.05),
rnorm(50, 0.3, 0.05)),
byrow=FALSE, ncol=3)
## Split the device into two subplots
par(mfrow = c(2,1))
## Set the bottom margin of the top plot to 0.1
par(mar=c(0.1,4.1,4.1,2))
## Top plot (first column of the matrix)
plot(dat[,1], add=T, type="l", xaxt="n", ylab="", ylim=c(1.5, 2.5))
## Set the top margin of the bottom plot to 0.1
par(mar=c(5.1,4.1,0.1,2))
## Bottom plot
matplot(dat[,2:3], type="l", col=2:3, ylab="", ylim=c(0, 0.5))
This gives you something like:

Rotate histogram in R or overlay a density in a barplot

I would like to rotate a histogram in R, plotted by hist(). The question is not new, and in several forums I have found that it is not possible. However, all these answers date back to 2010 or even later.
Has anyone found a solution meanwhile?
One way to get around the problem is to plot the histogram via barplot() that offers the option "horiz=TRUE". The plot works fine but I fail to overlay a density in the barplots. The problem probably lies in the x-axis since in the vertical plot, the density is centered in the first bin, while in the horizontal plot the density curve is messed up.
Any help is very much appreciated!
Thanks,
Niels
Code:
require(MASS)
Sigma <- matrix(c(2.25, 0.8, 0.8, 1), 2, 2)
mvnorm <- mvrnorm(1000, c(0,0), Sigma)
scatterHist.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(2/3,1/3), heights=c(1/3,2/3))
xrange <- range(x) ; yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$counts, yhist$counts))
par(mar=c(0,3,1,1))
plot(xhist, axes=FALSE, ylim=c(0,top), main="", col="grey")
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit,mean=mean(x),sd=sd(x))
x.yfit <- x.yfit*diff(xhist$mids[1:2])*length(x)
lines(x.xfit, x.yfit, col="red")
par(mar=c(0,3,1,1))
plot(yhist, axes=FALSE, ylim=c(0,top), main="", col="grey", horiz=TRUE)
y.xfit <- seq(min(x),max(x),length.out=40)
y.yfit <- dnorm(y.xfit,mean=mean(x),sd=sd(x))
y.yfit <- y.yfit*diff(yhist$mids[1:2])*length(x)
lines(y.xfit, y.yfit, col="red")
}
scatterHist.Norm(mvnorm[,1], mvnorm[,2])
scatterBar.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(2/3,1/3), heights=c(1/3,2/3))
xrange <- range(x) ; yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$counts, yhist$counts))
par(mar=c(0,3,1,1))
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0)
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit,mean=mean(x),sd=sd(x))
x.yfit <- x.yfit*diff(xhist$mids[1:2])*length(x)
lines(x.xfit, x.yfit, col="red")
par(mar=c(3,0,1,1))
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
y.xfit <- seq(min(x),max(x),length.out=40)
y.yfit <- dnorm(y.xfit,mean=mean(x),sd=sd(x))
y.yfit <- y.yfit*diff(yhist$mids[1:2])*length(x)
lines(y.xfit, y.yfit, col="red")
}
scatterBar.Norm(mvnorm[,1], mvnorm[,2])
#
Source of scatter plot with marginal histograms (click first link after "adapted from..."):
http://r.789695.n4.nabble.com/newbie-scatterplot-with-marginal-histograms-done-and-axes-labels-td872589.html
Source of density in a scatter plot:
http://www.statmethods.net/graphs/density.html
scatterBarNorm <- function(x, dcol="blue", lhist=20, num.dnorm=5*lhist, ...){
## check input
stopifnot(ncol(x)==2)
## set up layout and graphical parameters
layMat <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(layMat, widths=c(5/7, 2/7), heights=c(2/7, 5/7))
ospc <- 0.5 # outer space
pext <- 4 # par extension down and to the left
bspc <- 1 # space between scatter plot and bar plots
par. <- par(mar=c(pext, pext, bspc, bspc),
oma=rep(ospc, 4)) # plot parameters
## scatter plot
plot(x, xlim=range(x[,1]), ylim=range(x[,2]), ...)
## 3) determine barplot and height parameter
## histogram (for barplot-ting the density)
xhist <- hist(x[,1], plot=FALSE, breaks=seq(from=min(x[,1]), to=max(x[,1]),
length.out=lhist))
yhist <- hist(x[,2], plot=FALSE, breaks=seq(from=min(x[,2]), to=max(x[,2]),
length.out=lhist)) # note: this uses probability=TRUE
## determine the plot range and all the things needed for the barplots and lines
xx <- seq(min(x[,1]), max(x[,1]), length.out=num.dnorm) # evaluation points for the overlaid density
xy <- dnorm(xx, mean=mean(x[,1]), sd=sd(x[,1])) # density points
yx <- seq(min(x[,2]), max(x[,2]), length.out=num.dnorm)
yy <- dnorm(yx, mean=mean(x[,2]), sd=sd(x[,2]))
## barplot and line for x (top)
par(mar=c(0, pext, 0, 0))
barplot(xhist$density, axes=FALSE, ylim=c(0, max(xhist$density, xy)),
space=0) # barplot
lines(seq(from=0, to=lhist-1, length.out=num.dnorm), xy, col=dcol) # line
## barplot and line for y (right)
par(mar=c(pext, 0, 0, 0))
barplot(yhist$density, axes=FALSE, xlim=c(0, max(yhist$density, yy)),
space=0, horiz=TRUE) # barplot
lines(yy, seq(from=0, to=lhist-1, length.out=num.dnorm), col=dcol) # line
## restore parameters
par(par.)
}
require(mvtnorm)
X <- rmvnorm(1000, c(0,0), matrix(c(1, 0.8, 0.8, 1), 2, 2))
scatterBarNorm(X, xlab=expression(italic(X[1])), ylab=expression(italic(X[2])))
It may be helpful to know that the hist() function invisibly returns all the information that you need to reproduce what it does using simpler plotting functions, like rect().
vals <- rnorm(10)
A <- hist(vals)
A
$breaks
[1] -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
$counts
[1] 1 3 3 1 1 1
$intensities
[1] 0.2 0.6 0.6 0.2 0.2 0.2
$density
[1] 0.2 0.6 0.6 0.2 0.2 0.2
$mids
[1] -1.25 -0.75 -0.25 0.25 0.75 1.25
$xname
[1] "vals"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
You can create the same histogram manually like this:
plot(NULL, type = "n", ylim = c(0,max(A$counts)), xlim = c(range(A$breaks)))
rect(A$breaks[1:(length(A$breaks) - 1)], 0, A$breaks[2:length(A$breaks)], A$counts)
With those parts, you can flip the axes however you like:
plot(NULL, type = "n", xlim = c(0, max(A$counts)), ylim = c(range(A$breaks)))
rect(0, A$breaks[1:(length(A$breaks) - 1)], A$counts, A$breaks[2:length(A$breaks)])
For similar do-it-yourselfing with density(), see:
Axis-labeling in R histogram and density plots; multiple overlays of density plots
I'm not sure whether it is of interest, but I sometimes want to use horizontal histograms without any packages and be able to write or draw at any position of the graphic.
That's why I wrote the following function, with examples provided below. If anyone knows a package to which this would fit well, please write me: berry-b at gmx.de
Please be sure not to have a variable hpos in your workspace, as it will be overwritten with a function. (Yes, for a package I would need to insert some safety parts in the function).
horiz.hist <- function(Data, breaks="Sturges", col="transparent", las=1,
ylim=range(HBreaks), labelat=pretty(ylim), labels=labelat, border=par("fg"), ... )
{a <- hist(Data, plot=FALSE, breaks=breaks)
HBreaks <- a$breaks
HBreak1 <- a$breaks[1]
hpos <<- function(Pos) (Pos-HBreak1)*(length(HBreaks)-1)/ diff(range(HBreaks))
barplot(a$counts, space=0, horiz=T, ylim=hpos(ylim), col=col, border=border,...)
axis(2, at=hpos(labelat), labels=labels, las=las, ...)
print("use hpos() to address y-coordinates") }
For examples
# Data and basic concept
set.seed(8); ExampleData <- rnorm(50,8,5)+5
hist(ExampleData)
horiz.hist(ExampleData, xlab="absolute frequency")
# Caution: the labels at the y-axis are not the real coordinates!
# abline(h=2) will draw above the second bar, not at the label value 2. Use hpos:
abline(h=hpos(11), col=2)
# Further arguments
horiz.hist(ExampleData, xlim=c(-8,20))
horiz.hist(ExampleData, main="the ... argument worked!", col.axis=3)
hist(ExampleData, xlim=c(-10,40)) # with xlim
horiz.hist(ExampleData, ylim=c(-10,40), border="red") # with ylim
horiz.hist(ExampleData, breaks=20, col="orange")
axis(2, hpos(0:10), labels=F, col=2) # another use of hpos()
One shortcoming: the function doesn't work with breakpoints provided as a vector with different widths of the bars.
Thank you, Tim and Paul. You made me think harder and use what hist() actually provides.
This is my solution now (with great help from Alex Pl.):
scatterBar.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(5/7,2/7), heights=c(2/7,5/7))
xrange <- range(x)
yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$density, yhist$density))
par(mar=c(0,3,1,1))
barplot(xhist$density, axes=FALSE, ylim=c(0, top), space=0)
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit, mean=mean(x), sd=sd(x))
x.xscalefactor <- x.xfit / seq(from=0, to=19, length.out=40)
lines(x.xfit/x.xscalefactor, x.yfit, col="red")
par(mar=c(3,0,1,1))
barplot(yhist$density, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
y.xfit <- seq(min(y),max(y),length.out=40)
y.yfit <- dnorm(y.xfit, mean=mean(y), sd=sd(y))
y.xscalefactor <- y.xfit / seq(from=0, to=19, length.out=40)
lines(y.yfit, y.xfit/y.xscalefactor, col="red")
}
For examples:
require(MASS)
#Sigma <- matrix(c(2.25, 0.8, 0.8, 1), 2, 2)
Sigma <- matrix(c(1, 0.8, 0.8, 1), 2, 2)
mvnorm <- mvrnorm(1000, c(0,0), Sigma) ; scatterBar.Norm(mvnorm[,1], mvnorm[,2])
An asymmetric Sigma leads to a somewhat bulkier histogram of the respective axis.
The code is left deliberately "unelegant" in order to increase comprehensibility (for myself when I revisit it later...).
Niels
When using ggplot, flipping axes works very well. See for example this example which shows how to do this for a boxplot, but it works equally well for a histogram I assume. In ggplot one can quite easily overlay different plot types, or geometries in ggplot2 jargon. So combining a density plot and a histogram should be easy.

Resources