Beeswarm plot in R: weird gaps? - r

I've made a beeswarm plot in R by basically looping through groups in my data and creating a beeswarm for each group and combining into one plot:
#Build color field
color.By.Category <- function(x) {
if (x == "Polydrug") {
return("red")
} else if (x == "Opioid") {
return("blue")
} else if (x == "Cocaine") {
return("green")
} else if (x == "Gabapentin") {
return("orange")
} else if (x == "Meth") {
return("blue")
} else if (x == "Benzo") {
return("pink")
} else {
return("black")
}
}
# Make 5 rows, the first is the legend
par(mfrow=c(5,1), mar=c(0,10,0,0), oma=c(10,0,5,2), bg="black")
# Build legend
drug.categories <- unique(death.data.2$drug)
colr <- sapply(drug.categories, color.By.Category)
xleg <- seq(-10,110, by=23)
beeswarm(xleg, pwcol=colr,
horizontal = TRUE,
pch=15,
method="center",
cex=1,
xlim=c(-10,110),
ylim=c(-10,4))
text(x=xleg+0, y=.5, labels=toupper(drug.categories), col="white", pos=4, cex=1.1)
races <- unique(death.data.2$race)
for (i in 1:length(races)) {
# Subset to race/ethnicity
current <- death.data.2[death.data.2$race == races[i],]
# Draw the symbols
beeswarm(current$age, pwcol=current$color,
pch=15, method="center",
cex=1, horizontal=TRUE,
ylim=c(-10,10), xlim=c(0,100), axes=FALSE)
# label
mtext(races[i], side = 2, las=1, col="white", cex=.9)
}
x <- seq(0, 100, 5)
axis(side = 1, labels = x, at=x, col="black", col.ticks = "white")
mtext(x, side = 1, outer=FALSE, at=x, line=.8, col="white", cex=.8)
mtext("Age", side=1, at=x[1], outer=FALSE, col="white", line=2, adj=0.05, padj=.5, cex=.8)
# Title
mtext("Overdose Deaths by Substance Type", side=3, outer=TRUE, line=1, cex=1.5, col="white")
Which looks like this:
You can see there are weird gaps (i.e. blank lanes) where no data is showing, which almost looks like some kind of artifact from how the plot is rendering. What's going on here?

I believe you can remove the gaps by setting breaks = NA in your "Draw the symbols" beeswarm() call. The help file ?beeswarm tells us
breaks Breakpoints (optional). If NULL, breakpoints are chosen
automatically. If NA, bins are not used (similar to stripchart with
method = "stack").
So when the breaks = NA the binning behavior is prevented and the gaps should not appear.
# Draw the symbols
beeswarm(current$age, pwcol=current$color,
pch=15, method="center",
cex=1, horizontal=TRUE,
ylim=c(-10,10), xlim=c(0,100), axes=FALSE, breaks = NA)

Related

R plot remove dashed lines

I was trying to follow this tutorial (https://popgen.nescent.org/2018-03-27_RDA_GEA.html) and plot the RDA, but I would like to remove the two dashed lines (x=0 and y=0). Does anyone know how to get rid of them?
This is the graph I'm talking about
According to this post you can alter a plot.rda() to remove the dotted lines if you build the plot up yourself from scratch, but it's a complicated/challenging task. The easiest/best solution in my opinion is to draw white lines over the dotted lines with abline(h = 0, v = 0, col = "white", lwd = 2) and redraw the plot borders with box() before you plot the points/lines. See the ## PLOTTING ## section below for an example:
## OBTAIN & LOAD THE DATA ##
#install.packages(c("psych","vegan"), dependencies=TRUE)
library(psych) # Used to investigate correlations among predictors
library(vegan) # Used to run RDA
temp <- tempfile()
download.file("https://github.com/NESCent/popgenInfo/blob/master/data/wolf_geno_samp_10000.zip?raw=true",
temp)
gen <- read.csv(unzip(temp, "wolf_geno_samp_10000.csv"), row.names=1)
dim(gen)
sum(is.na(gen))
gen.imp <- apply(gen,
2,
function(x) replace(x,
is.na(x),
as.numeric(names(which.max(table(x))))))
sum(is.na(gen.imp)) # No NAs
env <- read.csv(url("https://raw.githubusercontent.com/NESCent/popgenInfo/master/data/wolf_env.csv"))
str(env)
env$individual <- as.character(env$individual)
env$land_cover <- as.factor(env$land_cover)
identical(rownames(gen.imp), env[,1])
pairs.panels(env[,5:16], scale=T)
pred <- subset(env, select=-c(precip_coldest_quarter, max_temp_warmest_month, min_temp_coldest_month))
table(pred$land_cover)
pred <- subset(pred, select=-c(land_cover))
pred <- pred[,5:12]
colnames(pred) <- c("AMT","MDR","sdT","AP","cvP","NDVI","Elev","Tree")
pairs.panels(pred, scale=T)
wolf.rda <- rda(gen.imp ~ ., data=pred, scale=T)
wolf.rda
RsquareAdj(wolf.rda)
summary(eigenvals(wolf.rda, model = "constrained"))
screeplot(wolf.rda)
signif.full <- anova.cca(wolf.rda, parallel=getOption("mc.cores"))
signif.full
signif.axis <- anova.cca(wolf.rda, by="axis", parallel=getOption("mc.cores"))
signif.axis
vif.cca(wolf.rda)
plot(wolf.rda, scaling=3)
plot(wolf.rda, choices = c(1, 3), scaling=3)
levels(env$ecotype) <- c("Western Forest","Boreal Forest","Arctic","High Arctic","British Columbia","Atlantic Forest")
eco <- env$ecotype
bg <- c("#ff7f00","#1f78b4","#ffff33","#a6cee3","#33a02c","#e31a1c")
## PLOTTING ##
plot(wolf.rda, type="n", scaling=3)
abline(h = 0, v = 0, col = "white", lwd = 2)
box()
points(wolf.rda, display="species", pch=20, cex=0.7, col="gray32", scaling=3) # the SNPs
points(wolf.rda, display="sites", pch=21, cex=1.3, col="gray32", scaling=3, bg=bg[eco]) # the wolves
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1) # the predictors
legend("bottomright", legend=levels(eco), bty="n", col="gray32", pch=21, cex=1, pt.bg=bg)
plot(wolf.rda, type="n", scaling=3, choices=c(1,3))
abline(h = 0, v = 0, col = "white", lwd = 2)
box()
points(wolf.rda, display="species", pch=20, cex=0.7, col="gray32", scaling=3, choices=c(1,3))
points(wolf.rda, display="sites", pch=21, cex=1.3, col="gray32", scaling=3, bg=bg[eco], choices=c(1,3))
text(wolf.rda, scaling=3, display="bp", col="#0868ac", cex=1, choices=c(1,3))
legend("topleft", legend=levels(eco), bty="n", col="gray32", pch=21, cex=1, pt.bg=bg)
Also, in future, if you could please post the code required to obtain and load the data or a minimal, reproducible example, it would have made this question a lot easier to answer; see How to make a great R reproducible example

Create gradient color bar with ggplot2

I have the following code to create color bars, modified from here:
color.bar <- function(lut, title='') {
min <- -1
max <- -min
nticks <- 5
ticks <- seq(min, max, len=nticks)
scale <- length(lut)/(max-min)
pdf(NULL)
dev.control(displaylist="enable")
plot(c(min,max), c(10,0), type='n', bty='n', xaxt='n', xlab='', yaxt='n', ylab='', main=title, cex.main=3)
axis(1, ticks, las=1, labels=c('MIN','','','','MAX'), cex.axis=2)
for (i in 1:length(lut)) {
x = (i-1)/scale + min
rect(x, 0, x+1/scale, 10, col=lut[i], border=NA)
}
P <- recordPlot()
invisible(dev.off())
return(P)
}
myplot <- color.bar(colorRampPalette(c("light green", "yellow", "orange", "red"))(100), "Intensity")
myplot
Which produces the following:
Now what I would need would be to do the exact same in ggplot2, cause I want to add the result, along with a list of ggplots, to a pdf using grid.arrange.
I do not really know how to start... Anybody can help me get started to produce the same output using ggplot2?

If function and using dots from a graph

I am making a graph using these datas:
Upper_limit_graph_wt <- (((log(8)/50:5000)-log(d_wt))/log(g_wt))
Lower_limit_graph_wt <- (((log(1/8)/50:5000)-log(d_wt))/log(g_wt))
plot(Upper_limit_graph_wt, type="l", ylim=g_range, xlim=range(0:5000), ann=FALSE, col="pink")
par(new=TRUE)
plot(Lower_limit_graph_wt, type="l", ylim=g_range, xlim=range(0:5000), ann=FALSE, col="gold")
par(new=TRUE)
plot(Total_count, (Alt_count/Total_count), pch=16, ann=FALSE, ylim=g_range, xlim=range(0:5000), col="dark green")
I can't add an image but I basically get a graph with 2 curves and 1 dot.
The coordinates for my dot are x=Total_count and y=(Alt_count/Total_count)
However I don't seem to be able to add the function if else
When I do :
if((Total_count, (Alt_count/Total_count))> Upper_limit_graph)print"Fail"
It tells me "," was unexpected
How do I make it print something when my dot is above my curves?
Thanks
You should use text to add the text to your plot. Please see as follows:
d_wt <- 2
g_wt <- 3
Upper_limit_graph_wt <- (((log(8) / 50:5000) - log(d_wt)) / log(g_wt))
Lower_limit_graph_wt <- (((log(1/8)/50:5000) - log(d_wt)) / log(g_wt))
g_range <- seq_along(Upper_limit_graph_wt)
plot(c(Lower_limit_graph_wt, Upper_limit_graph_wt), xlim = range(g_range), type = "n")
lines(Lower_limit_graph_wt)
lines(Upper_limit_graph_wt)
Total_count <- 1000
Alt_count <- -600
points(Total_count, Alt_count/Total_count, cex = 2, col = "blue", pch = 19)
text(2000, -0.62,
ifelse((Alt_count / Total_count > Upper_limit_graph_wt[Total_count]), "Fail", ""),
col = "red")
Output:

Avoid multiple plots overlay in `pdf` function when looping

I have to create some graphs through a loop. The graphs are multipaneled and each panel has three different layers.
I tried this code
pdf('plot.pdf', width=14, height=7)
R <- dim(dataset)[1]
for (i in 1:R) {
par(mfrow=c(1,2))
par(mfg=c(1,1))
plot(...)
points(...)
polygon(...)
par(mfg=c(1,2)
plot(...)
points(...)
polygon(....)
}
dev.off()
but the result is a single graph (and not one graph per loop) fully overlaid.
Graph
Is there an issue in looping with the par function?
EDIT: here's a reproducible example. I tried with split.screen, but the result is the same single-paged pdf with overlaid plots.
The issue seems related to the pdf function itself, since the loop does the job correctly.
set.seed(123)
## create data
varA1 <- matrix(rnorm(60,5,1), nrow=3)
varA2 <- matrix(rnorm(60,5,1), nrow=3)
varB1 <- matrix(rnorm(80,20,10), nrow=4)
varB2 <- matrix(rnorm(80,30,20), nrow=4)
sitesA <- 1:nrow(varA1)
sitesB <- 1:nrow(varB1)
totsites <- 1:max(sitesA, sitesB)
## create pdf
pdf('prova.pdf', width=14, height=7)
for(i in totsites) { # the pdf should contain "totsites" number of pages (in this case, 4)
split.screen(c(1,2))
if(i %in% sitesA) {
screen(1)
plot(var1[i,], ylim=c(0, max(c(var1, var2))), col='darkred', type='b', pch=16)
points(var2[i,], col='red', type='b', pch=16)
polygon(c(1:20,rev(1:20)),c(var1[i,]-1,rev(var1[i,]+1)), col=rgb(100, 0, 0, maxColorValue=255, alpha=50), border=NA)
}
if(i %in% sitesB) {
screen(2)
plot(var3[i,], ylim=c(0, max(c(var3, var4))), col='darkgreen', type='b', pch=16)
points(var4[i,], col='green', type='b', pch=16)
polygon(c(1:20,rev(1:20)),c(var3[i,]-10,rev(var3[i,]+10)), col=rgb(0, 100, 0, maxColorValue=255, alpha=50), border=NA)
}
}
dev.off()
BTW, I got this warning
Warning message:
In par(new = TRUE) : calling par(new=TRUE) with no plot
Using layout() rather than split.screen() seems to be a better option. Also keep it outside the loop.
pdf('prova.pdf', width=14, height=7)
layout(matrix(1:2, nrow=1))
for(i in totsites) { # the pdf should contain "totsites" number of pages (in this case, 4)
if(i %in% sitesA) {
plot(varA1[i,], ylim=c(0, max(c(varA1, varA2))), col='darkred', type='b', pch=16)
points(varA2[i,], col='red', type='b', pch=16)
polygon(c(1:20,rev(1:20)),c(varA1[i,]-1,rev(varA1[i,]+1)), col=rgb(100, 0, 0, maxColorValue=255, alpha=50), border=NA)
} else {
plot.new()
}
if(i %in% sitesB) {
plot(varB1[i,], ylim=c(0, max(c(varB1, varB2))), col='darkgreen', type='b', pch=16)
points(varB2[i,], col='green', type='b', pch=16)
polygon(c(1:20,rev(1:20)),c(varB1[i,]-10,rev(varB1[i,]+10)), col=rgb(0, 100, 0, maxColorValue=255, alpha=50), border=NA)
} else {
plot.new()
}
}
dev.off()

Arrange points and lines in an r plot legend

Is it possible to rearrange the legend of the following plot
plot(1,1, type="n")
legend("topleft", c("1", "2"), col=c("darkblue", "darkred"), pch = 1, bty = "n", horiz = T, lwd=1.25, cex=1.8)
to look like this ("point-line-point" pattern)?
Usually, if you want this level of control over plot elements, you'll have to do it manually with primitives (points(), lines()/segments(), text(), etc.) and careful calculations from the plot parameters (e.g. par('usr')). It's not easy. Here's an example of how this could be done:
point.line.point <- function(x1,y1,x2=x1,y2=y1,...) {
points(c(x1,x2),c(y1,y2),...);
segments(x1,y1,x2,y2,...);
};
legend.plp <- function(x,y,labels,col,linewidth=diff(par('usr')[1:2])/10,textgap=diff(par('usr')[1:2])/20,...) {
comb <- cbind(labels,col);
xc <- x;
for (i in seq_len(nrow(comb))) {
x2 <- xc+linewidth;
point.line.point(xc,y,x2,col=comb[i,'col'],...);
text(x2+textgap,y,comb[i,'labels'],...);
xc <- x2+textgap*1.5+strwidth(comb[i,'labels']);
};
};
plot(1,1,type="n");
legend.plp(par('usr')[1]+diff(par('usr')[1:2])/20,par('usr')[4]-diff(par('usr')[3:4])/20,1:2,c('darkblue','darkred'),font=2,cex=1.5);
Here is an alternative solution that is the opposite of elegant. It involves embedding a couple of plots (one per legend), and a great deal of manual manipulation (to set the 'legends' where you want them to be):
library(Hmisc)
data(mtcars)
#plots the one in blue
plot(mtcars$cyl, type="o", col="darkblue")
#plots the one in red
lines(mtcars$carb, type="o", col="darkred")
#name the legends
text(6.5,7, "Cyl", font=2)
text(14,7, "Carb", font=2)
#add the subplots, it's actually a normal plot wrapped around the subplot with the x and y positions
subplot(plot(c(1,0),c(1,1), xlab=NA, ylab=NA, xaxt="n", yaxt="n", col="darkblue", type="o", axes=FALSE), 3, 7)
subplot(plot(c(1,0),c(1,1), xlab=NA, ylab=NA, xaxt="n", yaxt="n", col="darkred", type="o", axes=FALSE), 10, 7)
That yields the following plot:

Resources