combine histogram with scatter plot in R - r

I am trying to produce a plot with histogram and scatter plot in just one plot using a secondary axis. In detail, here is an example data:
#generate example data
set.seed(1)
a <- rnorm(200,mean=500,sd=35)
data <- data.frame(a = a,
b = rnorm(200, mean=10, sd=2),
c = c(rep(1,100), rep(0,100)))
# produce a histogram of data$a
hist(a, prob=TRUE, col="grey")
#add a density line
lines(density(a), col="blue", lwd=2)
#scatter plot
plot(data$a,data$b,col=ifelse(data$c==1,"red","black"))
What I want to do is to combine the histogram and scatter plot together. This implies my x-axis will be data$a, my primary y-axis is the frequency/density for the histogram and my secondary y-axis is data$b.

Maybe something like this...
# produce a histogram of data$a
hist(a, prob=TRUE, col="grey")
#add a density line
lines(density(a), col="blue", lwd=2)
par(new = TRUE)
#scatter plot
plot(data$a,data$b,col=ifelse(data$c==1,"red","black"),
axes = FALSE, ylab = "", xlab = "")
axis(side = 4, at = seq(4, 14, by = 2))

There's a good blog on this here http://www.r-bloggers.com/r-single-plot-with-two-different-y-axes/.
Basically, as the blog describes you need to do:
par(new = TRUE)
plot(data$a,data$b,col=ifelse(data$c==1,"red","black"), axes = F, xlab = NA, ylab = NA)
axis(side = 4)

Related

"col" argument in plot function not working when a factor value is used for x - axis

I am doing quarterly analysis, for which I want to plot a graph. To maintain continuity on x axis I have turned quarters into factors. But then when I am using plot function and trying to color it red, the col argument is not working.
An example:
quarterly_analysis <- data.frame(Quarter = as.factor(c(2020.1,2020.2,2020.3,2020.4,2021.1,2021.2,2021.3,2021.4)),
AvgDefault = as.numeric(c(0.24,0.27,0.17,0.35,0.32,0.42,0.38,0.40)))
plot(quarterly_analysis, col="red")
But I am getting the graph in black color as shown below:
Converting it to a factor is not ideal to plot unless you have multiple values for each factor - it tries to plot a box plot-style plot. For example, with 10 observations in the same factor, the col = "red" color shows up as the fill:
set.seed(123)
fact_example <- data.frame(factvar = as.factor(rep(LETTERS[1:3], 10)),
numvar = runif(30))
plot(fact_example$factvar, fact_example$numvar,
col = "red")
With only one observation for each factor, this is not ideal because it is just showing you the line that the box plot would make.
You could use border = "red:
plot(quarterly_analysis$Quarter,
quarterly_analysis$AvgDefault, border="red")
Or if you want more flexibility, you can plot it numerically and do a little tweaking for more control (i.e., can change the pch, or make it a line graph):
# make numeric x values to plot
x_vals <- as.numeric(substr(quarterly_analysis$Quarter,1,4)) + rep(seq(0, 1, length.out = 4))
par(mfrow=c(1,3))
plot(x_vals,
quarterly_analysis$AvgDefault, col="red",
pch = 7, main = "Square Symbol", axes = FALSE)
axis(1, at = x_vals,
labels = quarterly_analysis$Quarter)
axis(2)
plot(x_vals,
quarterly_analysis$AvgDefault, col="red",
type = "l", main = "Line graph", axes = FALSE)
axis(1, at = x_vals,
labels = quarterly_analysis$Quarter)
axis(2)
plot(x_vals,
quarterly_analysis$AvgDefault, col="red",
type = "b", pch = 7, main = "Both", axes = FALSE)
axis(1, at = x_vals,
labels = quarterly_analysis$Quarter)
axis(2)
Data
set.seed(123)
quarterly_analysis <- data.frame(Quarter = as.factor(paste0(2019:2022,
rep(c(".1", ".2", ".3", ".4"),
each = 4))),
AvgDefault = runif(16))
quarterly_analysis <- quarterly_analysis[order(quarterly_analysis$Quarter),]

Reverse the scale of the x axis in a plot

I have created a plot in R and my own custom x and y axes. I would like the x axis to be displayed in a reverse order (1-0 by -.02). I have read numerous posts and threads that suggest using xlim and reverse range but I just can't seem to make it work. Once plotted I am also converting the axes labels to percentages by multiplying by 100 (as you will see in the code). Here is what I have so far;
plot(roc.val, xlab = "Specificity (%)", ylab = "Sensitivity (%)", axes = FALSE)
axis(2, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
axis(1, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
How can I reverse the x axis scale so that the values begin at 100 and end at 0 with increments of 20?
I think this creates a plot in which the y-axis is in reverse order:
x <- seq(-4, 4, length = 10)
y <- exp(x) / (1 + exp(x))
plot(x,y, ylim = rev(range(y)))
This removes the axis values:
x <- seq(-4, 4, length = 10)
y <- exp(x) / (1 + exp(x))
plot(x,y, ylim = rev(range(y)), labels = FALSE)
I guess you can assign the axis values you want then with a variation of your lines:
axis(2, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
axis(1, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
df <- data.frame(x=seq(0,1, length.out=50), y=seq(0, 1, length.out=50))
plot(df)
df$x1 <- (max(df$x) - df$x)/ (max(df$x) - min(df$x))
plot(df$x1, df$y, axes=F, xlab = "Specificity (%)", ylab = "Sensitivity (%)")
axis(2, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
axis(1, at = seq(0,1,by=.2), labels = paste(100*seq(1,0, by=-.2)), tick = TRUE)
Adapting Mark Miller's answer to solve a similar problem (I found this topic by looking for the solution) and I found a variation of his solution in https://tolstoy.newcastle.edu.au/R/help/05/03/0342.html.
Basically if you want to reverse the X-axis values in the plot, instead of using ylim=rev(range(y)), you can use xlim=rev(c(-4,4)).
x <- seq(-4, 4, length = 10)
y <- exp(x) / (1 + exp(x))
par(mfrow=c(1,2))
plot(x, y, ylim=range(y), xlim=c(-4, 4))
plot(x, y, ylim=range(y), xlim=rev(c(-4, 4)))
plot1
And if you want to keep the x-axis values in the true order, you can use this:
par(mfrow=c(1,1))
plot(x, y, ylim=range(y), xlim=c(-4, 4), axes=FALSE)
par(new=TRUE)
plot(-100, -100, ylim=range(y), xlim=c(-4, 4), axes=FALSE, xlab="", ylab="", main="")
axis(1, at = seq(-4,4,by=1), labels = seq(-4,4,by=1), tick = TRUE)
axis(2, at = seq(0,1,by=.2), labels = paste(100*seq(0,1, by=.2)), tick = TRUE)
plot2
I'm posting this solution because I needed something very straightforward to solve my problem. And the solution for it needed the plot with the X-axis value in the correct order (and not reversed).
First, check out the ggplot2 library for making beautiful and extendable graphics. It is part of the Tidyverse approach to R and a gamechanger if you have not been exposed to it.
For example, to solve your issue using ggplot, you simply add the term scale_x_reverse() to your graphic.
See: http://ggplot.yhathq.com/docs/scale_x_reverse.html

create a secondary y-axis in R

I had a question regarding creating secondary y-axis in R. Here is an example dataset
#generate some artifical data
per_cur <- runif(1171, 0.1, 7.62)
obs<-runif(1171,100,1000)
#create a density histogram of per_cur
par(mfrow=c(2,1))
op <- par(mar = c(5,4,4,4) + 0.5)
hist(per_cur, prob=TRUE, border="white",main=NULL,las=1,cex.axis=0.8,ann = FALSE)
lines(density(per_cur), col="blue",lwd=2)
#add obs with a secondary y-axis
par(new = TRUE)
plot(per_cur,obs, cex=.5, pch=16, col=adjustcolor("black",alpha=0.2), axes = FALSE, ylab="Density")
axis(4,cex.axis=0.5)
It produces a plot which tells me the distribution of per_cur and also shows my the relationship
between per_cur and obs through the secondary y-axis. However, when I run the following code with the only
difference that I set the limit of primary y-axis using ylim=c(0,0.3) you can see the plot completely changes.
It gives the impression that relationship between the obs and pre_cur is different in both plots (more obs points
come under the curve in first plot compared to the second plot).
op <- par(mar = c(5,4,4,4) + 0.5)
hist(per_cur, prob=TRUE,ylim=c(0,0.3), border="white",main=NULL,las=1,cex.axis=0.8,ann = FALSE)
lines(density(per_cur), col="blue",lwd=2)
par(new = TRUE)
plot(per_cur,obs, cex=.5, pch=16, col=adjustcolor("black",alpha=0.2), axes = FALSE, ylab="Density")
axis(4,cex.axis=0.5)
I wanted to ask is there any way my secondary y-axis also get adjusted as I adjust the primary y-axis limit so that
equal number of obs points are under the curve in both plots. Hope this is clear.
This can be accomplished by manipulating the ylim on each plot. For example:
#generate some artifical data
per_cur <- runif(1171, 0.1, 7.62)
obs<-runif(1171,100,1000)
#create a density histogram of per_cur
# use these variables to set the limits on all plots
y1max = max(density(per_cur)$y)
y2max = max(obs)
par(mfrow=c(2,1))
op <- par(mar = c(5,4,4,4) + 0.5)
hist(per_cur, prob=TRUE, ylim = c(0, y1max), border="white",main=NULL,las=1,cex.axis=0.8,ann = FALSE)
lines(density(per_cur), col="blue",lwd=2)
#add obs with a secondary y-axis
par(new = TRUE)
plot(per_cur,obs, cex=.5, ylim = c(0, y2max), pch=16, col=adjustcolor("black",alpha=0.2), axes = FALSE, ylab="Density")
axis(4,cex.axis=0.5)
Then you scale each axis by the same factor:
# used to scale the axes
factor <- 2
op <- par(mar = c(5,4,4,4) + 0.5)
hist(per_cur, prob=TRUE, ylim = c(0, y1max * factor), border="white",main=NULL,las=1,cex.axis=0.8,ann = FALSE)
lines(density(per_cur), col="blue",lwd=2)
par(new = TRUE)
plot(per_cur,obs, cex=.5, ylim = c(0, y2max * factor),pch=16, col=adjustcolor("black",alpha=0.2), axes = FALSE, ylab="Density")
axis(4,cex.axis=0.5)

How to do 3D bar plot in R

I would like to produce this kind of graph:
However, I don't know how to do it using R. I was wondering if someone knew a solution to do it in R?
I would use the package rgl.
library(rgl)
# load your data
X= c(1:6)
Y=seq(10,70, 10)
Z=c(-70, -50, -30, -20, -10, 10)
# create an empty plot with the good dimensions
plot3d(1,1,1, type='n', xlim=c(min(X),max(X)),
ylim=c(min(Y),max(Y)),
zlim=c(min(Z),max(Z)),
xlab="", ylab="", zlab="", axe=F )
# draw your Y bars
for(i in X){ segments3d(x = rep(X[i],2), y = c(0,Y[i]), z=0, lwd=6, col="purple")}
# do the same for the Z bars
plot3d(X,0,Z, add=T, axe=F, typ="n")
for(i in X){segments3d(x = rep(X[i],2), y = 0, z= c(0,Z[i]), lwd=6, col="blue" )}
# draw your axis
axes3d()
mtext3d(text = "Time (days)", edge = "y+", line =3, col=1 )
mtext3d(text = "Change %", edge = "z++", line = 5, col=1 )
However I have found the width of the bars restricted to 6. That could be a limit. Better looking when you have more data.
Hope it could help.

Can anybody help figure out why my labels for the y-axis and x-axis are not appearing?

As part of my code to have a 4 rows by 2 columns panel with 8 plots I was suggested to use the code below as an example but when doing so I cannot get the text on the y and x axis. Please see the code below.
#This is the code to have the plots as 4 x 2 in the page
m <- rbind(c(1,2,3,4), c(5,6,7,8) )
layout(m)
par(oma = c(6, 6, 1, 1)) # manipulate the room for the overall x and y axis titles
par(mar = c(.1, .1, .8, .8)) # manipulate the plots be closer together or further apart
###this is the code to insert for instance one of my linear regression plots as part of this panel (imagine I have other 7 identical replicates of this)
####ASF 356 standard curve
asf_356<-read.table("asf356.csv", head=TRUE, sep=',')
asf_356
# Linear Regression
fit <- lm( ct ~ count, data=asf_356)
summary(fit) # show results
predict.lm(fit, interval = c("confidence"), level = 0.95, add=TRUE)
newx <- seq(min(asf_356$count), max(asf_356$count), 0.1)
a <- predict(fit, newdata=data.frame(count=newx), interval="confidence")
plot(x = asf_356$count, y = asf_356$ct, xlab="Log(10) for total ASF 356 genome copies", ylab="Cycle threshold value", xlim=c(0,10), ylim=c(0,35), lty=1, family="serif")
curve(expr=fit$coefficients[1]+fit$coefficients[2]*x,xlim=c(min(asf_356$count), max(asf_356$count)),col="black", add=TRUE, lwd=2)
lines(newx,a[,2], lty=3)
lines(newx,a[,3], lty=3)
legend(x = 0.5, y = 20, legend = c("Logistic regression model", "95% individual confindence interval"), lty = c("solid", "dotdash"), col = c("black", "black"), enter code herebty = "n")
mod.fit=summary(fit)
r2 = mod.fit$r.squared
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(x = 8.2, y = 25, labels = mylabel)
legend(x = 7, y = 35, legend =c("y= -3.774*x + 41.21"), bty="n")
I have been able to find a similar post here and the argument that I was missing was :
title(xlab="xx", ylab="xx", outer=TRUE, line=3, family="serif")
Thanks
Finally I have my work..thanks to whom helped me before as well

Resources