I am trying to create this stratified histogram on R, however I am not getting the right plot. I would like to use the rect function as well if possible.
How would I write an R function to create this stratified histogram using the iris dataset in R?
This is the code so far:
strathist = function(x, y, ylab = "Frequency", xlab = "", main = ""){
cols = hcl(h=seq(0, 360, by = 120))
h = hist(x, breaks = 24, plot = FALSE)
tb = table(y, cut(x, h$breaks))
plot.new()
barplot(tb, ylim = c(0, max(h$count)), col = cols,
ylab = ylab, xlab = xlab, main = main, axisnames = FALSE)
box()
axis(1, 0:(length(h$breaks)-1), h$breaks)
axis(2)
legend("topright", c(rownames(tb)), fill = cols)
}
with(iris, strathist(Sepal.Width, Species, xlab = "Sepal.Width", main = "Stratified Histogram"))
There are a few different ways to do this, if you insist on using R base, you could use two barplots, one to add the colors and one to add the boxes:
strathist = function(x,
y,
ylab = "Frequency",
xlab = "",
main = "") {
cols = hcl(h = seq(0, 360, by = 120))
h = hist(x,
breaks = 24,
plot = F)
tb = table(y, cut(x, h$breaks))
ylim <- c(-.05 * max(colSums(tb)),
1.25 * max(colSums(tb)))
barplot(
tb,
col = cols,
ylim = ylim,
ylab = ylab,
xlab = xlab,
main = main,
axisnames = FALSE,
border = NA,
space = 0
)
barplot(
colSums(tb),
ylim = ylim,
col = NA,
ylab = ylab,
xlab = xlab,
main = main,
axisnames = FALSE,
add = T,
space = 0
)
box()
axis(1, seq(0,
length(h$breaks) - 1,
by = 5),
seq(min(x),max(x), by = .5))
axis(2)
legend("topright", c(rownames(tb)), fill = cols,border = NA,)
}
with(
iris,
strathist(Sepal.Width, Species, xlab = "Sepal.Width", main = "Stratified Histogram")
)
Or you could go the somewhat easier route to use ggplot2:
library(ggplot2)
ggplot(iris,aes(x = Sepal.Width)) +
geom_histogram(bins = 25,aes(fill = Species)) +
geom_histogram(bins = 25, fill = NA, color = 'black') +
theme_minimal()
Created on 2020-09-13 by the reprex package (v0.3.0)
Related
I have a 2 x 2 figure where the columns represent two different variables and the rows represent two different locations. How can I add the names of the two locations to the two rows?
Example Data
library(biwavelet)
par(mfrow = c(2,2),
oma = c(3,3,0,0) + 0.1,
mar = c(1,1,1,1) + 0.1)
dat <- as.data.frame(matrix(nrow = 500, ncol = 2))
dat[1] <- seq(1,500)
dat[2] <- sin(dat[1])
# top-left figure
plot(wt(dat),
xaxt = 'n',
xlab = "",
cex.axis = 1.5,
cex.lab = 1.5)
axis(1, at = seq(0, 500, by = 100), cex.axis = 0.1, col.axis = 'NA')
title("Variable 1", line = 0.1, cex.main = 1.5)
# top-right figure
dat[2] <- sin(dat[1]*.5)
plot(wt(dat),
xaxt = 'n',
col.axis = 'NA',
xlab = "",
ylab = "")
axis(1, at = seq(0, 500, by = 100), cex.axis = 0.1, col.axis = 'NA')
title("Variable 2", line = 0.1, cex.main = 1.5)
# bottom-left figure
dat[2] <- sin(dat[1]*.25)
plot(wt(dat),
cex.axis = 1.5)
# bottom-right figure
dat[2] <- sin(dat[1]*.125)
plot(wt(dat),
col.axis = 'NA',
ylab = "",
xlab = "")
axis(1, at = seq(0, 500, by = 100), cex.axis = 1.5)
title(xlab = "Time (hours)",
ylab = "Period",
outer = TRUE,
line = 1.5,
cex.lab = 1.5)
The ideal figure would look like this
Neither of the suggested solutions so far have put the right-hand labels in the orientation requested. You cannot do so with mtext, but rather need to use text, first allowing text to display outside the plot region with par(xpd=NA). (See ?text and ?par, where one reads that the srt,rotation, parameter only applies to text):
par(mfrow=c(2,2), xpd=FALSE)
par(mar = c(1,1,1,1) + 2)
plot(disp ~ mpg, data = mtcars)
mtext("disp 1", side=3)
plot(I(2*disp) ~ mpg, data = mtcars)
mtext("disp 2", side=3); par(xpd=NA)
text("mpg 1", x=36, y=500, srt=270)
plot(disp ~ I(2*mpg), data = mtcars); par(xpd=NA)
plot(I(2*disp) ~ I(2*mpg), data = mtcars)
text("mpg 2", x=72, y=500, srt=270)
It is not as automatic as mtext in the sense that one needs to look at each plotting figure separately to derive a estimate for the x and y positions in the plotting coordinates.
Here's one method:
par(mfrow=c(2,2))
par(mar = c(1,1,1,1) + 0.1)
plot(disp ~ mpg, data = mtcars)
mtext("disp 1", side=3)
plot(I(2*disp) ~ mpg, data = mtcars)
mtext("disp 2", side=3)
mtext("mpg 1", side=4)
plot(disp ~ I(2*mpg), data = mtcars)
plot(I(2*disp) ~ I(2*mpg), data = mtcars)
mtext("mpg 2", side=4)
Unfortunately, mtext does not support rotating text, so you're stuck with the right labels being oriented as they are.
In case anyone stumbles across this page, the code I ended up using was based on #IRTFM answer, see below. It was challenging finding the appropriate y = in the text() function. I used an iterative approach and found the y = to be much lower than I anticipated.
library(biwavelet)
par(mfrow = c(2,2), xpd=F)
par(mar = c(1,1,1.1,1.1) + 0.1,
oma = c(3,3,0.5,0.5) + 0.1)
dat <- as.data.frame(matrix(nrow = 500, ncol = 2))
dat[1] <- seq(1,500)
dat[2] <- sin(dat[1])
# top-left figure
plot(wt(dat),
xaxt = 'n',
xlab = "",
cex.axis = 1.5,
cex.lab = 1.5)
axis(1, at = seq(0, 500, by = 100), cex.axis = 1.5, col.axis = 'NA')
mtext("Variable 1", side = 3, cex = 1.5, line = 0.1)
box(lty = "solid", col = 'black')
# top-right figure
dat[2] <- sin(dat[1]*.5)
plot(wt(dat),
xaxt = 'n',
col.axis = 'NA',
xlab = "",
ylab = "")
axis(1, at = seq(100, 500, by = 100), cex.axis = 0.1, col.axis = 'NA')
mtext("Variable 2", side = 3, cex = 1.5, line = 0.1)
text("Location 1", x = 520, y = 4.1, srt = 270, cex = 1.5, xpd=NA)
box(lty = "solid", col = 'black')
# bottom-left figure
dat[2] <- sin(dat[1]*.25)
plot(wt(dat),
cex.axis = 1.5,
xlab = "",
ylab = "")
axis(1, at = seq(100, 500, by = 100), cex.axis = 1.5)
box(lty = "solid", col = 'black')
# bottom-right figure
dat[2] <- sin(dat[1]*.125)
plot(wt(dat),
col.axis = 'NA',
ylab = "",
xlab = "")
axis(1, at = seq(100, 500, by = 100), cex.axis = 1.5)
text("Location 2", x = 520, y = 4.5, srt = 270, cex = 1.5, xpd=NA)
box(lty = "solid", col = 'black')
title(xlab = "Time (hours)",
ylab = "Period",
outer = TRUE,
line = 1.5,
cex.lab = 1.5)
I have a plot that I made using qqmath in the Lattice package (I subset it to only 3 points to make it easier for an example).
table <- data.table(Col1=c(12,3,4), Col2 = c(54,4,6), Col3 = c("Pink", "Pink", "Red"))
PrbGrd <- qnorm(c(0.00001,0.0001,0.001,0.01, 0.05, 0.10,0.20,0.30,0.40,
0.50, 0.60, 0.70,0.80,0.90,0.95,0.99,0.999,0.9999,0.99999))
PrbGrdL<-c("0.001","0.01","0.1","1","5","10","20","30","40","50","60","70","80","90","95","99","99.9","99.99","99.999")
PrbGrdL2<- c("99.999","99.99","99.9","99","95","90","80","70","60","50","40","30","20","10","5","1","0.1","0.01","0.001")
ValGrd<- c(seq(0.001,0.01,0.001),seq(0.01,0.1,0.01),seq(0.1,1,0.1),seq(1,10,1),seq(10,100,10),seq(100,1000,100),seq(1000,10000,1000))
ValGrd<- log10(ValGrd)
ValGrd2 <- c(-2:20)
ProbPlot <- qqmath(~ Col1,
data= table,
distribution = function(p) qnorm(p),
main = "Normal probability plot",
pch=20,
cex=0.5,
xlab="Probability of Lower",
ylab = "Pb",
#xlim = c(max(PrbGrd),min(PrbGrd)),
xlim = c(min(PrbGrd),max(PrbGrd)),
scales=list(y=list(alternating=1),x = list(at = PrbGrd, labels = PrbGrdL, cex = 0.8)),
#yscale.components=yscale.components.log10ticks,
panel=function(x,...){
panel.abline(v=PrbGrd ,col="grey",lty=3)
panel.abline(h=ValGrd2,col="grey",lty=3)
panel.qqmath(x,distribution=qnorm)
}
)
I would like to use the colors that are in the 3rd column of my table (Col3) to change the colors of the respective point on the plot. I can't figure out how to do it within qqmath, this would be simple with the regular plot function... but it doesn't seem as simple with qqmath.
Thank you!
Updated: based on OP's comment.
You can add the col argument, like:
ProbPlot <- qqmath(~ Col1,
data = table,
distribution = function(p) qnorm(p),
main = "Normal probability plot",
pch = 20,
cex = 0.5,
xlab = "Probability of Lower",
ylab = "Pb",
#xlim = c(max(PrbGrd),min(PrbGrd)),
xlim = c(min(PrbGrd),max(PrbGrd)),
scales = list(y = list(alternating = 1),
x = list(at = PrbGrd,
labels = PrbGrdL,
cex = 0.8)),
#yscale.components=yscale.components.log10ticks,
panel = function(x, ...){
panel.abline(v = PrbGrd,
col = "grey",
lty = 3)
panel.abline(h = ValGrd2,
col = "grey",
lty = 3)
panel.qqmath(x,
distribution=qnorm,
col = table$Col3) # add colors for each point
}
)
Any tips to remove the zero labels in between the histogram bars?
hist(links$Survey_Duration, breaks = seq(0,50,5), main = "Survey Duration",
labels = TRUE, border = "black",
xlab = "Survey", ylim = c(0, 15), col = "gray", las = 1, xaxt='n')
axis(side=1, at=seq(0,50,5), labels=seq(0,50,5))
abline(v = mean(links$Survey_Duration), col = "royalblue", lwd = 1.5)
abline(v = median(links$Survey_Duration), col = "red", lwd = 1.5)
legend(x = "topright", c("Mean", "Median"), col = c("royalblue","red"),
lwd = c(1.5,1.5))
How about this?
# modify data so there's zero in one of the bins
mtcars$mpg <- ifelse(mtcars$mpg >= 25 & mtcars$mpg <= 30, NA, mtcars$mpg)
# save plot parameters
h <- hist(mtcars$mpg, plot = FALSE)
# produce plot
plot(h, ylim = c(0, 14))
# add labels manually, recoding zeros to nothing
text(h$mids, h$counts + 1, ifelse(h$counts == 0, "", h$counts))
A slightly different answer using the labeling in hist instead of adding text afterwards.
You do not provide your data, so I will use some data that is handy to illustrate.
The labels argument can specify the individual labels
H1 = hist(iris$Sepal.Length, breaks = 3:8, plot=FALSE)
BarLabels = H1$counts
BarLabels[BarLabels == 0] = ""
hist(iris$Sepal.Length, breaks = 3:8, labels = BarLabels)
Thanks #Daniel Anderson, it Ok now (Thumbs Up)
links$Survey_Duration <- ifelse(links$Survey_Duration > 15 &
links$Survey_Duration <= 25,
NA,
links$Survey_Duration)
h <- hist(links$Survey_Duration, breaks = seq(0,50,5), plot = FALSE)
plot(h, ylim = c(0, 14), main = "Survey Duration", xlab = "Time", col = "gray", las = 1)
text(h$mids, h$counts + 1, ifelse(h$counts == 0, "", h$counts))
axis(side=1, at=seq(0,50,5), labels=seq(0,50,5))
abline(v = mean(links$Survey_Duration), col = "royalblue", lwd = 1.5)
abline(v = median(links$Survey_Duration), col = "red", lwd = 1.5)
legend(x = "topright",
c("Mean", "Median"),
col = c("royalblue","red"),
lwd = c(1.5,1.5))
i am writing up a function to generate a stratified histogram.
Here is my code.
i would like to remove the inside border (so those stack counts are separated by colour only, not colour and border). any ideas?
data("iris")
strathist = function(x, y, ylab = "Frequency", xlab = "", main = ""){
cols = hcl(h=seq(0, 300, by = 50), fixup = FALSE)
h = hist(x, breaks = 20, plot = F)
tb = table(y, cut(x, h$breaks))
par(mar = rep(4, 4))
plot.new()
barplot(tb, space = 0, ylim = c(-0.4, 2 + max(h$count)), col = cols,
ylab = ylab, xlab = xlab, main = main, axisnames = F)
axis(1, 0:(length(h$breaks)-1), h$breaks)
box()
legend("topright", c(rownames(tb)), cex = 0.8, fill = cols)
}
with(iris, strathist(Sepal.Length, Species, xlab = "Sepal.Length", main = "Stratified Histogram of Iris Species"))
I would like to draw a level plot with x and y scales as a log10 ticks.
For example, I have a normal level plot like this.
x <- 10*1:nrow(volcano)
y <- 10*1:ncol(volcano)
filled.contour(x, y, volcano, color = terrain.colors, plot.title = title(main = "Volcano topolgy", xlab = "Meters North", ylab = "Meters West"), plot.axes = { axis(1, seq(100, 800, by = 100)); axis(2, seq(100, 600, by = 100)) }, key.title = title(main = "Height\n(meters)"), key.axes = axis(4, seq(90, 190, by = 10)))
But, the x and y scales are not log tick scales. I found the other library "latticeExtra" with log tick scale function. For example, using the same x and y from above I can draw the log ticks, but cannot fill the contour data.
library(lattice)
library(latticeExtra)
xyplot(y ~ x, scales = list(x = list(log = 10), y = list(log = 10)), xscale.components = xscale.components.log10ticks, yscale.components = yscale.components.log10ticks)
How can I draw a level plot with log tick scales? I would like to plot scatters on the level plot later on as a log location.
Thanks in advance.
Here's an alternative using lattice and latticeExtra
library(lattice)
library(latticeExtra)
xx <- 1:nrow(volcano)
yy <- 1:ncol(volcano)
levelplot(
x = volcano,
xlim = range(xx),
ylim = range(yy),
scales = list(x = list(log = 10), y = list(log = 10)),
xscale.components = xscale.components.log10ticks,
yscale.components = yscale.components.log10ticks
)
if you want to keep using filled.contour you can logtransform the x and y data directly and adjust the axes accordingly with a custom axis statement, but it's not very elegant (the base::plot log = "xy" parameter sadly doesn't do anything in filled.contour):
x <- log(10*1:nrow(volcano))
y <- log(10*1:ncol(volcano))
filled.contour(x, y, volcano, color = terrain.colors,
plot.title = title(main = "Volcano topolgy",
xlab = "Meters North",
ylab = "Meters West"),
plot.axes = { axis(1, at = log(seq(100, 800, by = 100)), labels = seq(100, 800, by = 100));
axis(2, at = log(seq(100, 600, by = 100)), labels = seq(100, 600, by = 100)) },
key.title = title(main = "Height\n(meters)"),
key.axes = axis(4, seq(90, 190, by = 10)))
you can also try if ggplot2 with scale_y_log10() and scale_x_log10() would work for you, see this question and answer.