How to add centroids to an RDA plot - r

I'd like to replace the arrows on this RDA plot with centroids, something like what's pictured here.
This is the code I currently have which provides me arrows (I guess by default). I have shared our RDA code and I think this is where we might be able to change it from arrows to centroid:
# add arrows for effects of the expanatory variables
arrows(0,0, # start them from (0,0)
sc_bp[,1], sc_bp[,2], # end them at the score value
col = "red",
lwd = 1,
length = .1)
(but I share the entire code chunk (below), just in case.
Please note that my data is on fish community (species) and substrate types at 36 sites, I'd like to replace the arrows for substrates with centroids within my RDA.
##Now, the RDA
Y.mat<-Belt_2021_fish_transformed_forPCA #fish community
str(Y.mat)
X.mat<-Reefcheck_2021_forPCA #substrate
str(X.mat)
###Community data has already been transformed with hellinger
##Now, try the RDA
fish_substrate_rda<-rda(Y.mat,X.mat)
```
##Plot
## extract % explained by the first 2 axes
perc_b <- round(100*(summary(fish_substrate_rda)$cont$importance[2, 1:2]), 2)
## extract scores - these are coordinates in the RDA space
sc_si <- scores(fish_substrate_rda, display="sites", choices=c(1,2), scaling=1)
sc_sp <- scores(fish_substrate_rda, display="species", choices=c(1,2), scaling=1)
sc_sp <- sc_sp[c(2,7,8),]
sc_bp <- scores(fish_substrate_rda, display="bp", choices=c(1,2), scaling=1)
sc_bp <- sc_bp[c(2,5,6),]
# Set up a blank plot with scaling, axes, and labels
plot(fish_substrate_rda,
scaling = 1, # set scaling type
type = "none", # this excludes the plotting of any points from the results
frame = TRUE,
# set axis limits
ylim = c(-1.5,0.7),
xlim = c(-1.5,1.2),
# label the plot (title, and axes)
main = "Triplot RDA - scaling 1",
xlab = paste0("RDA1 (", perc_b[1], "%)"),
ylab = paste0("RDA2 (", perc_b[2], "%)")
)
# add points for site scores
points(sc_si,
pch = 21, # set shape (here, circle with a fill colour)
col = "black", # outline colour
bg = "steelblue", # fill colour
cex = 0.7) # size
# add points for species scores
points(sc_sp,
pch = 22, # set shape (here, square with a fill colour)
col = "black",
bg = "#f2bd33",
cex = 0.7)
# add text labels for species abbreviations
text(sc_sp + c(-0.09, -0.09), # adjust text coordinates to avoid overlap with points
labels = rownames(sc_sp),
col = "grey40",
font = 2, # bold
cex = 0.6)
# add arrows for effects of the expanatory variables
arrows(0,0, # start them from (0,0)
sc_bp[,1], sc_bp[,2], # end them at the score value
col = "red",
lwd = 1,
length = .1)
# add text labels for arrows
text(x = sc_bp[,1] -0.01, # adjust text coordinate to avoid overlap with arrow tip
y = sc_bp[,2] - 0.09,
labels = rownames(sc_bp),
col = "red",
cex = .7,
font = 1)
```
I have not found anything online that might help me to accomplish this.

Related

RDA triplot in R- plot only numeric explanatory variables as arrows; factors as centroids

I ran a distance-based RDA using capscale() in the vegan library in R and I am trying to plot my results as a custom triplot. I only want numeric or continuous explanatory variables to be plotted as arrows/vectors. Currently, both factors and numeric explanatory variables are being plotted with arrows, and I want to remove arrows for factors (site and year) and plot centroids for these instead.
dbRDA=capscale(species ~ canopy+gmpatch+site+year+Condition(pair), data=env, dist="bray")
To plot I extracted % explained by the first 2 axes as well as scores (coordinates in RDA space)
perc <- round(100*(summary(spe.rda.signif)$cont$importance[2, 1:2]), 2)
sc_si <- scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1)
sc_sp <- scores(spe.rda.signif, display="species", choices=c(1,2), scaling=1)
sc_bp <- scores(spe.rda.signif, display="bp", choices=c(1, 2), scaling=1)
I then set up a blank plot with scaling, axes, and labels
dbRDAplot<-plot(spe.rda.signif,
scaling = 1, # set scaling type
type = "none", # this excludes the plotting of any points from the results
frame = FALSE,
# set axis limits
xlim = c(-1,1),
ylim = c(-1,1),
# label the plot (title, and axes)
main = "Triplot db-RDA - scaling 1",
xlab = paste0("db-RDA1 (", perc[1], "%)"),
ylab = paste0("db-RDA2 (", perc[2], "%)"))
Created a legend and added points for site scores and text for species
pchh <- c(2, 17, 1, 19)
ccols <- c("black", "red", "black", "red")
legend("topleft", c("2016 MC", "2016 SP", "2018 MC", "2018 SP"), pch = pchh[unique(as.numeric(as.factor(env$siteyr)))], pt.bg = ccols[unique(as.factor(env$siteyr))], bty = "n")
points(sc_si,
pch = pchh[as.numeric(as.factor(env$siteyr))], # set shape
col = ccols[as.factor(env$siteyr)], # outline colour
bg = ccols[as.factor(env$siteyr)], # fill colour
cex = 1.2) # size
text(sc_sp , # text(sc_sp + c(0.02, 0.08) tp adjust text coordinates to avoid overlap with points
labels = rownames(sc_sp),
col = "black",
font = 1, # bold
cex = 0.7)
Here is where I add arrows for explanatory variables, but I want to be selective and do so for numeric variables only (canopy and gmpatch). The variables site and year I want to plot as centroids, but unsure how to do this. Note that the data structure for these are definitely specified as factors already.
arrows(0,0, # start them from (0,0)
sc_bp[,1], sc_bp[,2], # end them at the score value
col = "red",
lwd = 2)
text(x = sc_bp[,1] -0.1, # adjust text coordinate to avoid overlap with arrow tip
y = sc_bp[,2] - 0.03,
labels = rownames(sc_bp),
col = "red",
cex = 1,
font = 1)
#JariOksanen thank you for your answer. I was able to use the following to fix the problem
text(dbRDA, choices = c(1, 2),"cn", arrow=FALSE, length=0.05, col="red", cex=0.8, xpd=TRUE)
text(dbRDA, display = "bp", labels = c("canopy", "gmpatch"), choices = c(1, 2),scaling = "species", arrow=TRUE, select = c("canopy", "gmpatch"), col="red", cex=0.8, xpd = TRUE)
#JariOksanen thank you for your answer. I was able to use the following to fix the problem
text(dbRDA, choices = c(1, 2),"cn", arrow=FALSE, length=0.05, col="red", cex=0.8, xpd=TRUE)
text(dbRDA, display = "bp", labels = c("canopy", "gmpatch"), choices = c(1, 2),scaling = "species", arrow=TRUE, select = c("canopy", "gmpatch"), col="red", cex=0.8, xpd = TRUE)

Add calculated mean value to vertical line in plot in R

I have created a density plot with a vertical line reflecting the mean - I would like to include the calculated mean number in the graph but don't know how
(for example the mean 1.2 should appear in the graph).
beta_budget[,2] is the column which includes the different numbers of the price.
windows()
plot(density(beta_budget[,2]), xlim= c(-0.1,15), type ="l", xlab = "Beta Coefficients", main = "Preis", col = "black")
abline(v=mean(beta_budget[,2]), col="blue")
legend("topright", legend = c("Price", "Mean"), col = c("black", "blue"), lty=1, cex=0.8)
I tried it with the text command but it didn't work...
Thank you for your advise!
Something along these lines:
Data:
set.seed(123)
df <- data.frame(
v1 = rnorm(1000)
)
Draw histogram with density line:
hist(df$v1, freq = F, main = "")
lines(density(df$v1, kernel = "cosine", bw = 0.5))
abline(v = mean(df$v1), col = "blue", lty = 3, lwd = 2)
Include the mean as a text element:
text(mean(df$v1), # position of text on x-axis
max(density(df$v1)[[2]]), # position of text on y-axis
mean(df$v1), # text to be plotted
pos = 4, srt = 270, cex = 0.8, col = "blue") # some graphical parameters

Is there a way to use R to break chart axis and break linear regression line?

I'm trying to figure out how to modify a scatter-plot that contains two groups of data along a continuum separated by a large gap. The graph needs a break on the x-axis as well as on the regression line.
This R code using the ggplot2 library accurately presents the data, but is unsightly due to the vast amount of empty space on the graph. Pearson's correlation is -0.1380438.
library(ggplot2)
p <- ggplot(, aes(x = dis, y = result[, 1])) + geom_point(shape = 1) +
xlab("X-axis") +
ylab("Y-axis") + geom_smooth(color = "red", method = "lm", se = F) + theme_classic()
p + theme(plot.title = element_text(hjust = 0.5, size = 14))
This R code uses gap.plot to produce the breaks needed, but the regression line doesn't contain a break and doesn't reflect the slope properly. As you can see, the slope of the regression line isn't as sharp as the graph above and there needs to be a visible distinction in the slope of the line between those disparate groups.
library(plotrix)
gap.plot(
x = dis,
y = result[, 1],
gap = c(700, 4700),
gap.axis = "x",
xlab = "X-Axis",
ylab = "Y-Axis",
xtics = seq(0, 5575, by = 200)
)
abline(v = seq(700, 733) , col = "white")
abline(lm(result[, 1] ~ dis), col = "red", lwd = 2)
axis.break(1, 716, style = "slash")
Using MS Paint, I created an approximation of what the graph should look like. Notice the break marks on the top as well as the discontinuity between on the regression line between the two groups.
One solution is to plot the regression line in two pieces, using ablineclip to limit what's plotted each time. (Similar to #tung's suggestion, although it's clear that you want the appearance of a single graph rather than the appearance of facets.) Here's how that would work:
library(plotrix)
# Simulate some data that looks roughly like the original graph.
dis = c(rnorm(100, 300, 50), rnorm(100, 5000, 100))
result = c(rnorm(100, 0.6, 0.1), rnorm(100, 0.5, 0.1))
# Store the location of the gap so we can refer to it later.
x.axis.gap = c(700, 4700)
# gap.plot() works internally by shifting the location of the points to be
# plotted based on the gap size/location, and then adjusting the axis labels
# accordingly. We'll re-compute the second half of the regression line in the
# same way; these are the new values for the x-axis.
dis.alt = dis - x.axis.gap[1]
# Plot (same as before).
gap.plot(
x = dis,
y = result,
gap = x.axis.gap,
gap.axis = "x",
xlab = "X-Axis",
ylab = "Y-Axis",
xtics = seq(0, 5575, by = 200)
)
abline(v = seq(700, 733), col = "white")
axis.break(1, 716, style = "slash")
# Add regression line in two pieces: from 0 to the start of the gap, and from
# the end of the gap to infinity.
ablineclip(lm(result ~ dis), col = "red", lwd = 2, x2 = x.axis.gap[1])
ablineclip(lm(result ~ dis.alt), col = "red", lwd = 2, x1 = x.axis.gap[1] + 33)

Padding Around Legend when using Pch in Base R

Just a minor question. I am trying to make a legend for the following plot.
# fitting the linear model
iris_lm = lm(Petal.Length ~ Sepal.Length, data = iris)
summary(iris_lm)
# calculating the confidence interval for the fitted line
preds = predict(iris_lm, newdata = data.frame(Sepal.Length = seq(4,8,0.1)),
interval = "confidence")
# making the initial plot
par(family = "serif")
plot(Petal.Length ~ Sepal.Length, data = iris, col = "darkgrey",
family = "serif", las = 1, xlab = "Sepal Length", ylab = "Pedal Length")
# shading in the confidence interval
polygon(
c(seq(8,4,-0.1), seq(4,8,0.1)), # all of the necessary x values
c(rev(preds[,3]), preds[,2]), # all of the necessary y values
col = rgb(0.2745098, 0.5098039, 0.7058824, 0.4), # the color of the interval
border = NA # turning off the border
)
# adding the regression line
abline(iris_lm, col = "SteelBlue")
# adding a legend
legend("bottomright", legend = c("Fitted Values", "Confidence Interval"),
lty = c(1,0))
Here's the output so far:
My goal is to put a box in the legend next to the "Confidence Interval" tab, and color it in the same shade that it is in the picture. Naturally, I thought to use the pch parameter. However, when I re-run my code with the additional legend option pch = c(NA, 25), I get the following:
It is not super noticeable, but if you look closely at the padding on the left margin of the legend, it actually has decreased, and the edge of the border is now closer to the line than I would like. Is there any way to work around this?
That's a curious behavior in legend(). I'm sure someone will suggest a ggplot2 alternative. However, legend() does offer a solution. This solution calls the function without plotting anything to capture the dimensions of the desired rectangle. The legend is then plotted with the elements you really want but no enclosing box (bty = "n"). The desired rectangle is added explicitly. I assume you mean pch = 22 to get the filled box symbol. I added pt.cex = 2 to make it a bit larger.
# Capture the confidence interval color, reusable variables
myCol <- rgb(0.2745098, 0.5098039, 0.7058824, 0.4)
legText <- c("Fitted Values", "Confidence Interval")
# Picking it up from 'adding a legend'
ans <- legend("bottomright", lty = c(1,0), legend = legText, plot = F)
r <- ans$rect
legend("bottomright", lty = c(1,0), legend = legText, pch = c(NA,22),
pt.bg = myCol, col = c(1, 0), pt.cex = 2, bty = "n")
# Draw the desired box
rect(r$left, r$top - r$h, r$left + r$w, r$top)
By the way, I don't think this will work without further tweaking if you place the legend on the left side.

Legend graphing help in R

So I am trying to add some graphs to my notes. I have created a simple interest function that will plot several simple interest functions using different rates and I would like to add a legend that would simple say...
"i =: 0%, x%, y%, z%" on one single line, where each 0,x,y,z is in the different color of the representative function using that interest rate.
I looked into the paste() function and attempted to make it one string but I am not sure exactly how to loop it into the int_seq and pull out each individual index and make it a different color then put it into a single string.
# indexs to be used
t = 0:50
int_seq = seq(0.025,0.10,by=0.025) # intere rate sequence
colors = c("red","blue","green","orange") #colors of interest rate seq
index = 1:length(int_seq)
# AV Simple Interest (all good)
avSimple = function(i,t){
av = (1 + (i * t))
return(av)}
# Plot range for y-axis (all good)
yrange = c(avSimple(min(int_seq),min(t)) * 0.95,
avSimple(max(int_seq),max(t)) * 1.05)
# Plots Simple Interest with different interest rates (all good)
plot(t,avSimple(0,t), type="l", main = "AV Simple Interest", xlab = "Time",
ylab = "AV", ylim = yrange)
# loops through the int_seq and plots line based on interest rate
# and specified color (all good)
for (i in index)
lines(t,avSimple(int_seq[i],t), col = colors[i])
# Adds legend to plot for different interest rates
# !!This is where I need the help, not sure best way to approach!!
legend(0,avSimple(0.075,50), c("i =: 0%", for (i in index) int_seq[i]),
col = colors)
Not sure what kind of legend you want. Since you say in one line, you might want to add horiz = TRUE, but here are some other options:
You can pass full vectors to legend so there is no need for a loop in this case. Just create a vector of labels but also use a vector of colors corresponding to each label (which you have already done).
# indexs to be used
t = 0:50
int_seq = seq(0.025,0.10,by=0.025) # intere rate sequence
colors = c("red","blue","green","orange") #colors of interest rate seq
index = 1:length(int_seq)
# AV Simple Interest (all good)
avSimple = function(i,t){
av = (1 + (i * t))
return(av)}
# Plot range for y-axis (all good)
yrange = c(avSimple(min(int_seq),min(t)) * 0.95,
avSimple(max(int_seq),max(t)) * 1.05)
plot(t, type="n", main = "AV Simple Interest", xlab = "Time",
ylab = "AV", ylim = yrange)
# for (i in index)
# lines(t,avSimple(int_seq[i],t), col = colors[i])
# Adds legend to plot for different interest rates
# !!This is where I need the help, not sure best way to approach!!
labs <- sprintf('i =: %s%%', c(0, int_seq))
labs2 <- paste0(c(0, int_seq), '%')
legend('topleft', legend = labs, col = colors, lty = 1, title = 'normal')
l <- legend('top', legend = rep('i =:', length(labs)), lty = 1,
col = colors, text.width = max(strwidth(labs)) + 1,
title = 'right-justified')
text(l$rect$left + l$rect$w, l$text$y, labs2, pos = 2)
legend('topright', legend = labs, text.col = colors, title = 'colored')
legend('bottom', legend = labs, col = colors, lty = 1, horiz = TRUE,
cex = .7, title = 'horizontal')

Resources