Rotate histogram in R or overlay a density in a barplot - r

I would like to rotate a histogram in R, plotted by hist(). The question is not new, and in several forums I have found that it is not possible. However, all these answers date back to 2010 or even later.
Has anyone found a solution meanwhile?
One way to get around the problem is to plot the histogram via barplot() that offers the option "horiz=TRUE". The plot works fine but I fail to overlay a density in the barplots. The problem probably lies in the x-axis since in the vertical plot, the density is centered in the first bin, while in the horizontal plot the density curve is messed up.
Any help is very much appreciated!
Thanks,
Niels
Code:
require(MASS)
Sigma <- matrix(c(2.25, 0.8, 0.8, 1), 2, 2)
mvnorm <- mvrnorm(1000, c(0,0), Sigma)
scatterHist.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(2/3,1/3), heights=c(1/3,2/3))
xrange <- range(x) ; yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$counts, yhist$counts))
par(mar=c(0,3,1,1))
plot(xhist, axes=FALSE, ylim=c(0,top), main="", col="grey")
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit,mean=mean(x),sd=sd(x))
x.yfit <- x.yfit*diff(xhist$mids[1:2])*length(x)
lines(x.xfit, x.yfit, col="red")
par(mar=c(0,3,1,1))
plot(yhist, axes=FALSE, ylim=c(0,top), main="", col="grey", horiz=TRUE)
y.xfit <- seq(min(x),max(x),length.out=40)
y.yfit <- dnorm(y.xfit,mean=mean(x),sd=sd(x))
y.yfit <- y.yfit*diff(yhist$mids[1:2])*length(x)
lines(y.xfit, y.yfit, col="red")
}
scatterHist.Norm(mvnorm[,1], mvnorm[,2])
scatterBar.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(2/3,1/3), heights=c(1/3,2/3))
xrange <- range(x) ; yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$counts, yhist$counts))
par(mar=c(0,3,1,1))
barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0)
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit,mean=mean(x),sd=sd(x))
x.yfit <- x.yfit*diff(xhist$mids[1:2])*length(x)
lines(x.xfit, x.yfit, col="red")
par(mar=c(3,0,1,1))
barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
y.xfit <- seq(min(x),max(x),length.out=40)
y.yfit <- dnorm(y.xfit,mean=mean(x),sd=sd(x))
y.yfit <- y.yfit*diff(yhist$mids[1:2])*length(x)
lines(y.xfit, y.yfit, col="red")
}
scatterBar.Norm(mvnorm[,1], mvnorm[,2])
#
Source of scatter plot with marginal histograms (click first link after "adapted from..."):
http://r.789695.n4.nabble.com/newbie-scatterplot-with-marginal-histograms-done-and-axes-labels-td872589.html
Source of density in a scatter plot:
http://www.statmethods.net/graphs/density.html

scatterBarNorm <- function(x, dcol="blue", lhist=20, num.dnorm=5*lhist, ...){
## check input
stopifnot(ncol(x)==2)
## set up layout and graphical parameters
layMat <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(layMat, widths=c(5/7, 2/7), heights=c(2/7, 5/7))
ospc <- 0.5 # outer space
pext <- 4 # par extension down and to the left
bspc <- 1 # space between scatter plot and bar plots
par. <- par(mar=c(pext, pext, bspc, bspc),
oma=rep(ospc, 4)) # plot parameters
## scatter plot
plot(x, xlim=range(x[,1]), ylim=range(x[,2]), ...)
## 3) determine barplot and height parameter
## histogram (for barplot-ting the density)
xhist <- hist(x[,1], plot=FALSE, breaks=seq(from=min(x[,1]), to=max(x[,1]),
length.out=lhist))
yhist <- hist(x[,2], plot=FALSE, breaks=seq(from=min(x[,2]), to=max(x[,2]),
length.out=lhist)) # note: this uses probability=TRUE
## determine the plot range and all the things needed for the barplots and lines
xx <- seq(min(x[,1]), max(x[,1]), length.out=num.dnorm) # evaluation points for the overlaid density
xy <- dnorm(xx, mean=mean(x[,1]), sd=sd(x[,1])) # density points
yx <- seq(min(x[,2]), max(x[,2]), length.out=num.dnorm)
yy <- dnorm(yx, mean=mean(x[,2]), sd=sd(x[,2]))
## barplot and line for x (top)
par(mar=c(0, pext, 0, 0))
barplot(xhist$density, axes=FALSE, ylim=c(0, max(xhist$density, xy)),
space=0) # barplot
lines(seq(from=0, to=lhist-1, length.out=num.dnorm), xy, col=dcol) # line
## barplot and line for y (right)
par(mar=c(pext, 0, 0, 0))
barplot(yhist$density, axes=FALSE, xlim=c(0, max(yhist$density, yy)),
space=0, horiz=TRUE) # barplot
lines(yy, seq(from=0, to=lhist-1, length.out=num.dnorm), col=dcol) # line
## restore parameters
par(par.)
}
require(mvtnorm)
X <- rmvnorm(1000, c(0,0), matrix(c(1, 0.8, 0.8, 1), 2, 2))
scatterBarNorm(X, xlab=expression(italic(X[1])), ylab=expression(italic(X[2])))

It may be helpful to know that the hist() function invisibly returns all the information that you need to reproduce what it does using simpler plotting functions, like rect().
vals <- rnorm(10)
A <- hist(vals)
A
$breaks
[1] -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5
$counts
[1] 1 3 3 1 1 1
$intensities
[1] 0.2 0.6 0.6 0.2 0.2 0.2
$density
[1] 0.2 0.6 0.6 0.2 0.2 0.2
$mids
[1] -1.25 -0.75 -0.25 0.25 0.75 1.25
$xname
[1] "vals"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
You can create the same histogram manually like this:
plot(NULL, type = "n", ylim = c(0,max(A$counts)), xlim = c(range(A$breaks)))
rect(A$breaks[1:(length(A$breaks) - 1)], 0, A$breaks[2:length(A$breaks)], A$counts)
With those parts, you can flip the axes however you like:
plot(NULL, type = "n", xlim = c(0, max(A$counts)), ylim = c(range(A$breaks)))
rect(0, A$breaks[1:(length(A$breaks) - 1)], A$counts, A$breaks[2:length(A$breaks)])
For similar do-it-yourselfing with density(), see:
Axis-labeling in R histogram and density plots; multiple overlays of density plots

I'm not sure whether it is of interest, but I sometimes want to use horizontal histograms without any packages and be able to write or draw at any position of the graphic.
That's why I wrote the following function, with examples provided below. If anyone knows a package to which this would fit well, please write me: berry-b at gmx.de
Please be sure not to have a variable hpos in your workspace, as it will be overwritten with a function. (Yes, for a package I would need to insert some safety parts in the function).
horiz.hist <- function(Data, breaks="Sturges", col="transparent", las=1,
ylim=range(HBreaks), labelat=pretty(ylim), labels=labelat, border=par("fg"), ... )
{a <- hist(Data, plot=FALSE, breaks=breaks)
HBreaks <- a$breaks
HBreak1 <- a$breaks[1]
hpos <<- function(Pos) (Pos-HBreak1)*(length(HBreaks)-1)/ diff(range(HBreaks))
barplot(a$counts, space=0, horiz=T, ylim=hpos(ylim), col=col, border=border,...)
axis(2, at=hpos(labelat), labels=labels, las=las, ...)
print("use hpos() to address y-coordinates") }
For examples
# Data and basic concept
set.seed(8); ExampleData <- rnorm(50,8,5)+5
hist(ExampleData)
horiz.hist(ExampleData, xlab="absolute frequency")
# Caution: the labels at the y-axis are not the real coordinates!
# abline(h=2) will draw above the second bar, not at the label value 2. Use hpos:
abline(h=hpos(11), col=2)
# Further arguments
horiz.hist(ExampleData, xlim=c(-8,20))
horiz.hist(ExampleData, main="the ... argument worked!", col.axis=3)
hist(ExampleData, xlim=c(-10,40)) # with xlim
horiz.hist(ExampleData, ylim=c(-10,40), border="red") # with ylim
horiz.hist(ExampleData, breaks=20, col="orange")
axis(2, hpos(0:10), labels=F, col=2) # another use of hpos()
One shortcoming: the function doesn't work with breakpoints provided as a vector with different widths of the bars.

Thank you, Tim and Paul. You made me think harder and use what hist() actually provides.
This is my solution now (with great help from Alex Pl.):
scatterBar.Norm <- function(x,y) {
zones <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(zones, widths=c(5/7,2/7), heights=c(2/7,5/7))
xrange <- range(x)
yrange <- range(y)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="", cex=0.5)
xhist <- hist(x, plot=FALSE, breaks=seq(from=min(x), to=max(x), length.out=20))
yhist <- hist(y, plot=FALSE, breaks=seq(from=min(y), to=max(y), length.out=20))
top <- max(c(xhist$density, yhist$density))
par(mar=c(0,3,1,1))
barplot(xhist$density, axes=FALSE, ylim=c(0, top), space=0)
x.xfit <- seq(min(x),max(x),length.out=40)
x.yfit <- dnorm(x.xfit, mean=mean(x), sd=sd(x))
x.xscalefactor <- x.xfit / seq(from=0, to=19, length.out=40)
lines(x.xfit/x.xscalefactor, x.yfit, col="red")
par(mar=c(3,0,1,1))
barplot(yhist$density, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
y.xfit <- seq(min(y),max(y),length.out=40)
y.yfit <- dnorm(y.xfit, mean=mean(y), sd=sd(y))
y.xscalefactor <- y.xfit / seq(from=0, to=19, length.out=40)
lines(y.yfit, y.xfit/y.xscalefactor, col="red")
}
For examples:
require(MASS)
#Sigma <- matrix(c(2.25, 0.8, 0.8, 1), 2, 2)
Sigma <- matrix(c(1, 0.8, 0.8, 1), 2, 2)
mvnorm <- mvrnorm(1000, c(0,0), Sigma) ; scatterBar.Norm(mvnorm[,1], mvnorm[,2])
An asymmetric Sigma leads to a somewhat bulkier histogram of the respective axis.
The code is left deliberately "unelegant" in order to increase comprehensibility (for myself when I revisit it later...).
Niels

When using ggplot, flipping axes works very well. See for example this example which shows how to do this for a boxplot, but it works equally well for a histogram I assume. In ggplot one can quite easily overlay different plot types, or geometries in ggplot2 jargon. So combining a density plot and a histogram should be easy.

Related

R - how two have two y-axes with zeroes aligned in the middle of the plot

I am plotting two graphs on the same plot. Each one has a different ylim, and I would like to have the zeroes aligned in the middle of the plot.
This is my code:
# data
time <- seq(0.1, 10, by = 0.1)
det_rot <- runif(100, min=-100, max=100)
vel_mag <- runif(100, min=0, max=5)
# first plot
smoothingSpline = smooth.spline(time, det_rot, spar=0.20)
plot(time, det_rot,lwd=2,
ann=FALSE, las=2, pch="", ylim=c(-100,250)) # , pch=""
lines(smoothingSpline, lwd=2, col="red")
par(new=TRUE)
# second plot
smoothingSpline2 = smooth.spline(time, vel_mag, spar=0.20)
plot(time, vel_mag,
xaxt="n",yaxt="n",xlab="",ylab="",pch="", ylim=c(0,6))
lines(smoothingSpline2, lwd=2, col="blue",)
axis(4)
See the plot:
Simple fix: change ylims to c(-250, 250) and c(-6,6) respectively.

Icons as x-axis labels in R

I would like to plot something like this (from this paper) where icons, in this case small graphs, are used as tick labels.
I get this far, where icons are more or less properly placed:
This is the code:
library(igraph)
npoints <- 15
y <- rexp(npoints)
x <- seq(npoints)
par(fig=c(0.05,1,0.3,1), new=FALSE)
plot(y, xlab=NA, xaxt='n', pch=15, cex=2, col="red")
lines(y, col='red', lwd=2)
xspan <- 0.9
xoffset <- (0.07+0.5/npoints)*xspan
for(i in 1:npoints){
x1 <- (xoffset+(i-1)/npoints)*xspan
x2 <- min(xspan*(xoffset+(i)/npoints),1)
par(fig=c(x1,x2,0,0.5), new=TRUE)
plot(graph.ring(i), vertex.label=NA)
}
However, if the number of points grows (e.g. npoints <- 15) it complains because there is no place for the icons:
Error in plot.new() : figure margins too large
I wonder wether there is a more natural way to do this so that it works for any (reasonable) number of points.
Any advice is welcome.
library(igraph)
npoints <- 15
y <- rexp(npoints)
x <- seq(npoints)
# reserve some extra space on bottom margin (outer margin)
par(oma=c(3,0,0,0))
plot(y, xlab=NA, xaxt='n', pch=15, cex=2, col="red")
lines(y, col='red', lwd=2)
# graph numbers
x = 1:npoints
# add offset to first graph for centering
x[1] = x[1] + 0.4
x1 = grconvertX(x=x-0.4, from = 'user', to = 'ndc')
x2 = grconvertX(x=x+0.4, from = 'user', to = 'ndc')
# abline(v=1:npoints, xpd=NA)
for(i in x){
print(paste(i, x1[i], x2[i], sep='; '))
# remove plot margins (mar) around igraphs, so they appear bigger and
# `figure margins too large' error is avoided
par(fig=c(x1[i],x2[i],0,0.2), new=TRUE, mar=c(0,0,0,0))
plot(graph.ring(i), vertex.label=NA)
# uncomment to draw box around plot to verify proper alignment:
# box()
}

Setting an image as the colour of a polygon

I borrowed this code from statmethods[dot]net. The result is a coloured area under a normal distribution.
mean=100; sd=15
lb=80; ub=120
x <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)
plot(x, hx, type="n", xlab="IQ Values", ylab="Density",
main="Normal Distribution", axes=FALSE)
i <- x >= lb & x <= ub
lines(x, hx)
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")
area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< IQ <",ub,") =",
signif(area, digits=3))
mtext(result,2)
I was wondering if it is possible to select an image as the colour of the red polygon?
Many thanks!
Working with your code, I basically suggest plotting an image (I used a random image below) using the png package (based on advice from In R, how to plot with a png as background?) and then plot your lines over that.
mean=100; sd=15
lb=80; ub=120
x <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)
# load package and an image
library(png)
ima <- readPNG("Red_Hot_Sun.PNG")
# plot an empty plot with your labels, etc.
plot(1,xlim=c(min(x),max(x)), type="n", xlab="IQ Values", ylab="Density",
main="Normal Distribution", axes=FALSE)
# put in the image
lim <- par()
rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4])
# add your plot
par(new=TRUE)
plot(x, hx, xlim=c(min(x),max(x)), type="l", xlab="", ylab="", axes=FALSE)
i <- x >= lb & x <= ub
lines(x, hx)
# add a polygon to cover the background above plot
polygon(c(x,180,180,20,20), c(hx,0,1,1,0), col="white")
# add polygons to cover the areas under the plot you don't want visible
polygon(c(-20,-20,x[x<=lb],lb), c(-10,min(hx),hx[x<=lb],-10), col="white")
polygon(c(ub,x[x>=ub],200,200), c(-1,hx[x>=ub],min(hx),-1), col="white")
# add your extra text
area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< IQ <",ub,") =",
signif(area, digits=3))
mtext(result,2)
Gives you:

Overlay histogram on top and right of a plot

I am trying to create a plot similar to the one asked here:
Rotate histogram in R or overlay a density in a barplot
I am replacing the scatterplot with heatmap keeping everything else same without any luck. I tried to modify different parameters but it is not helping.
scatterBarNorm <- function(x, dcol="blue", lhist=20, num.dnorm=5*lhist, ...){
## check input
stopifnot(ncol(x)==2)
## set up layout and graphical parameters
layMat <- matrix(c(2,0,1,3), ncol=2, byrow=TRUE)
layout(layMat, widths=c(5/7, 2/7), heights=c(2/7, 5/7))
ospc <- 0.5 # outer space
pext <- 4 # par extension down and to the left
bspc <- 1 # space between scatter plot and bar plots
par. <- par(mar=c(pext, pext, bspc, bspc),
oma=rep(ospc, 4)) # plot parameters
## scatter plot (removed this)
#plot(x, xlim=range(x[,1]), ylim=range(x[,2]), ...)
##added this
heatmap(x, Rowv=NA, Colv=NA)
## 3) determine barplot and height parameter
## histogram (for barplot-ting the density)
xhist <- hist(x[,1], plot=FALSE, breaks=seq(from=min(x[,1]), to=max(x[,1]),
length.out=lhist))
yhist <- hist(x[,2], plot=FALSE, breaks=seq(from=min(x[,2]), to=max(x[,2]),
length.out=lhist)) # note: this uses probability=TRUE
## determine the plot range and all the things needed for the barplots and lines
xx <- seq(min(x[,1]), max(x[,1]), length.out=num.dnorm) # evaluation points for the overlaid density
xy <- dnorm(xx, mean=mean(x[,1]), sd=sd(x[,1])) # density points
yx <- seq(min(x[,2]), max(x[,2]), length.out=num.dnorm)
yy <- dnorm(yx, mean=mean(x[,2]), sd=sd(x[,2]))
## barplot and line for x (top)
par(mar=c(0, pext, 0, 0))
barplot(xhist$density, axes=FALSE, ylim=c(0, max(xhist$density, xy)),
space=0) # barplot
lines(seq(from=0, to=lhist-1, length.out=num.dnorm), xy, col=dcol) # line
## barplot and line for y (right)
par(mar=c(pext, 0, 0, 0))
barplot(yhist$density, axes=FALSE, xlim=c(0, max(yhist$density, yy)),
space=0, horiz=TRUE) # barplot
lines(yy, seq(from=0, to=lhist-1, length.out=num.dnorm), col=dcol) # line
## restore parameters
par(par.)
}
require(mvtnorm)
X <- abs(rmvnorm(1000, c(0,0), matrix(c(1, 0.8, 0.8, 1), 2, 2)))
scatterBarNorm(X, xlab=expression(italic(X[1])), ylab=expression(italic(X[2])))

layout add line R

In the 4th example of layout function, which can be generated by example(layout),
I want to overlay the line plot on the barplot of samples from a normal distriubtion.
I tried lines(), plot( , add=TRUE), but with no luck.
How can I do that? Or do I have to take a different route from using layout?
Here is one approach:
library(TeachingDemos)
x <- pmin(3, pmax(-3, stats::rnorm(50)))
y <- pmin(3, pmax(-3, stats::rnorm(50)))
xhist <- hist(x, breaks=seq(-3,3,0.5), plot=FALSE)
yhist <- hist(y, breaks=seq(-3,3,0.5), plot=FALSE)
top <- max(c(xhist$density, yhist$density))
xrange <- c(-3,3)
yrange <- c(-3,3)
nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE)
layout.show(nf)
par(mar=c(3,3,1,1))
plot(x, y, xlim=xrange, ylim=yrange, xlab="", ylab="")
par(mar=c(0,3,1,1))
bx.out <- barplot(xhist$density, axes=FALSE, ylim=c(0, top), space=0)
updateusr( bx.out[1:2], 0:1, xhist$mids[1:2], 0:1 )
xdens <- density(x)
lines(xdens$x, xdens$y, col='blue')
par(mar=c(3,0,1,1))
by.out <- barplot(yhist$density, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE)
updateusr( 0:1, by.out[1:2], 0:1, yhist$mids[1:2] )
ydens <- density(y)
lines(ydens$y, ydens$x, col='blue')
Note the change from counts to density so that the "heights" of the bars will match with the density and the use of updateusr from the TeachingDemos package to match the coordinate systems. Instead of updateusr you could also specify width and xlim to the barplot function.

Resources