labeling nmds plot by column values in r - r

I ran metaMDS and want to plot and color code by a grouping based on certain data frame characters. In my original data frame, df$yr are years and df$2 are sites. I want to color by the years.
caltmds <- metaMDS(df[,3:12], k=3)
plot(caltmds, type = 'n')
cols <- c("red2", "mediumblue")
points(caltmds, col = cols[df$yr])
I also tried from this post:
scl <- 3
colvec <- c("red2", "mediumblue")
plot(caltmds, type = "n", scaling = scl)
with(df, points(caltmds, display = "sites", col = colvec[yr], pch = 21, bg = colvec[yr]))
text(caltmds, display = "species", cex = 0.8, col = "darkcyan")
with(df, legend("topright", legend = levels(yr), bty = "n", col = colvec, pch = 21, pt.bg = colvec))
Nothing plots

#DATA
df1 = mtcars
mycolors = df1$cyl #Identify the grouping vector
library(vegan)
m = metaMDS(df1)
x = scores(m) #Extract co-ordinates
plot(x, col = as.numeric(as.factor(mycolors)))

Related

How can I change the colour of my points on my db-RDA triplot in R?

QUESTION: I am building a triplot for the results of my distance-based RDA in R, library(vegan). I can get a triplot to build, but can't figure out how to make the colours of my sites different based on their location. Code below.
#running the db-RDA
spe.rda.signif=capscale(species~canopy+gmpatch+site+year+Condition(pair), data=env, dist="bray")
#extract % explained by first 2 axes
perc <- round(100*(summary(spe.rda.signif)$cont$importance[2, 1:2]), 2)
#extract scores (coordinates in RDA space)
sc_si <- scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1)
sc_sp <- scores(spe.rda.signif, display="species", choices=c(1,2), scaling=1)
sc_bp <- scores(spe.rda.signif, display="bp", choices=c(1, 2), scaling=1)
#These are my location or site names that I want to use to define the colours of my points
site_names <-env$site
site_names
#set up blank plot with scaling, axes, and labels
plot(spe.rda.signif,
scaling = 1,
type = "none",
frame = FALSE,
xlim = c(-1,1),
ylim = c(-1,1),
main = "Triplot db-RDA - scaling 1",
xlab = paste0("db-RDA1 (", perc[1], "%)"),
ylab = paste0("db-RDA2 (", perc[2], "%)")
)
#add points for site scores - these are the ones that I want to be two different colours based on the labels in the original data, i.e., env$site or site_names defined above. I have copied the current state of the graph
points(sc_si,
pch = 21, # set shape (here, circle with a fill colour)
col = "black", # outline colour
bg = "steelblue", # fill colour
cex = 1.2) # size
Current graph
I am able to add species names and arrows for environmental predictors, but am just stuck on how to change the colour of the site points to reflect their location (I have two locations defined in my original data). I can get them labelled with text, but that is messy.
Any help appreciated!
I have tried separating shape or colour of point by site_name, but no luck.
If you only have a few groups (in your case, two), you could make the group a factor (within the plot call). In R, factors are represented as an integer "behind the scenes" - you can represent up to 8 colors in base R using a simple integer:
set.seed(123)
df <- data.frame(xvals = runif(100),
yvals = runif(100),
group = sample(c("A", "B"), 100, replace = TRUE))
plot(df[1:2], pch = 21, bg = as.factor(df$group),
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group), pch = 21,
pt.bg = unique(as.factor(df$group)), bty = "n")
If you have more than 8 groups, or if you would like to define your own colors, you can simply create a vector of colors the length of your groups and still use the same factor method, though with a few slight tweaks:
# data with 10 groups
set.seed(123)
df <- data.frame(xvals = runif(100),
yvals = runif(100),
group = sample(LETTERS[1:10], 100, replace = TRUE))
# 10 group colors
ccols <- c("red", "orange", "blue", "steelblue", "maroon",
"purple", "green", "lightgreen", "salmon", "yellow")
plot(df[1:2], pch = 21, bg = ccols[as.factor(df$group)],
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group), pch = 21,
pt.bg = ccols[unique(as.factor(df$group))], bty = "n")
For pch just a slight tweak to wrap it in as.numeric:
pchh <- c(21, 22)
ccols <- c("slateblue", "maroon")
plot(df[1:2], pch = pchh[as.numeric(as.factor(df$group))], bg = ccols[as.factor(df$group)],
bty = "n", xlim = c(-1, 2), ylim = c(-1, 2))
legend("topright", unique(df$group),
pch = pchh[unique(as.numeric(as.factor(df$group)))],
pt.bg = ccols[unique(as.factor(df$group))], bty = "n")

How to arrange symbols randomly into grid

I have a Grid with n*m size and pch Symbols from 1-13 with 8 Colors(so 104 different combinations)
Grid empty:
And now I am trying to draw the symbols as pairs randomly onto my grid.
My thinking was I create a matrix with the same size as my grid and random integer numbers and a data frame with all combinations of pch and color and draw them on the grid.
My Goal:
row <- 4
col <- 13
n <- row * col
plot.new()
plot.window(xlim = c(1, col), ylim = c(1, row))
grid(nx = col, ny = row, col = "black")
box(lwd = 2)
axis(1, at=1:col,tick=FALSE,las = 1)
axis(2,at = 1:row,tick= FALSE,las = 2)
pch_v <- c(1:13)
col_v <- c(1:8)
board <- matrix(sample(1:n),nrow = row,ncol=col)
unique_combination <- expand.grid(pch_v,col_v)
Thats the point where I am stuck. If somebody got an idea I would appreciate it.
Shuffle the unique_combination, then plot:
row <- 4
col <- 13
pch_v <- c(1:col)
col_v <- c(1:row)
unique_combination <- expand.grid(pch_v,col_v)
# random sort
# set.seed(1) # if we want to reproduce
ucr <- unique_combination[ sample(nrow(unique_combination)), ]
d <- data.frame(x = 1:col, y = 1:row, pch = ucr$Var1, col = ucr$Var2)
with(d, plot(x, y, col = col, pch = pch, bty = "n"))
If we need to plot X points, subset before the data:
# subset x rows then plot
with(head(d, 6), plot(x, y, col = col, pch = pch, bty = "n"))

Apply column names to legend in a function

I'm writing a function that takes two variables -- ideally columns from the same data frame -- and plots them. The plot will also include a legend using the names from the columns, and that's where I'm running into difficulty.
The code below is as close to the desired outcome as I can get. I'm only interested in using base R.
plotpairs <- function(x,y){
plot(x, type = "l", col = "red")
lines(y, type = "l", col = "blue")
legend(0,ylim_max, legend = paste0(x, y), lwd = c(5,5), col = c("red", "blue"), bty = "n")
}
plotpairs(df$F3, df$F4)
If you supply a data.frame or matrix as argument, you can extract the column names using colnames(), else you have to use deparse(substitute()), or match.call() as I've used here.
set.seed(1)
F3 <- cumsum(runif(1e3, -2, 2))+runif(1e3)
F4 <- cumsum(rnorm(1e3))+rnorm(1e3, 0, 0.5)
df <- data.frame(F3, F4)
plotpairs <- function(x, y) {
if (NCOL(x) > 1) {
nam <- colnames(x)[1:2]
y <- x[,2]
x <- x[,1]
} else {
nam <- as.character(match.call()[c("x", "y")])
}
plot(x, type="l", col="red", ylim=range(c(x, y)))
lines(y, type="l", col="blue")
legend("topleft", legend=nam, lwd=c(5, 5), col=c("red", "blue"), bty="n")
}
plotpairs(F3, F4)
with(df, plotpairs(F3, F4)) # same
plotpairs(df) # same
This plots the indicated columns from the data frame given as first argument or if no names are given then it plots the first two columns. Note that we first plot both together using type = "n" to ensure that the plot gets set up large enough to accommodate both variables. The example uses the builtin data frame trees.
plotpairs <- function(data, name1 = names(data)[1], name2 = names(data)[2]) {
both <- c(data[[name1]], data[[name2]])
plot(seq_along(both) / 2, both, type = "n", xlab = "", ylab = "")
lines(data[[name1]], type = "l", col = "red")
lines(data[[name2]], type = "l", col = "blue")
legend("topleft", legend = c(name1, name2), lwd = 5,
col = c("red", "blue"), bty = "n")
}
plotpairs(trees, "Girth", "Volume")
I also worked out an answer based on the comment #Rui Barradas that included regex. Since I'll be using inputs like "df$F3", I can count on the "$" symbol to be present, though this might limit the flexibility of the code.
plotpairs <- function(x,y){
xnam <- deparse(substitute(x))
ynam <- deparse(substitute(y))
xnam1 <- substring(xnam, regexpr("[$]", xnam)+1)
ynam1 <- substring(ynam, regexpr("[$]", ynam)+1)
plot(x, type = "l", col = "red")
lines(y, type = "l", col = "blue")
legend("topleft", legend = c(paste0(xnam1), paste0(ynam1)),lwd = c(5,5), col = c("red", "blue"), bty = "n")
}

R: Two graphs (boxplot and barplot) sharing one X-Axis

I am trying to match two graphs in such a way that the two graphs are located vertically above each other sharing one x Axis
I already tried to use ggplot but didn't succeed. I did not manage to rewrite the commands barplot() and plot() to ggplot() in such a way that the graphs still come out right.
I would be very grateful for any help!
That's the first plot:
plot(as.factor(DauerK_mcpM$Kulturkategorie),
DauerK_mcpM$Electivity,
ylim = c(-1,1),
ylab="Elektivitätsindex",
col = DauerK_mcpM$Farbe, xaxt = "n",
main = "Elektivität Männchen mit Dauer")
abline(h = 0, lty = 2)
x.labels <- gsub("^.*?)","",levels(as.factor(DauerK_mcpM$Kulturkategorie)))
breaks <- seq(1,length(x.labels), 1)
axis(1, labels = x.labels, at = breaks, las = 2, cex.axis = 1)
dev.off()
That's the second plot:
barplot(Dauer_pro_Kultur_prozentM,
beside = TRUE,
xaxt = "n", ylab="verbrachte Zeit [%]",
main = "Männchen", col = Dauer_pro_KulturW$Farbe)
x.labels <- gsub("^.*?)", "", levels(as.factor(Dauer_pro_KulturW$Kulturkategorie)))
length <- length(x.labels)*1.2
breaks <- seq(from = 0.7, to = length, 1.2)
axis(1, labels = x.labels, at = breaks, las = 2, cex.axis = 1)
dev.off()
This can be done in ggplot by adding an indicator column for the plot type and then faceting by that indicator:
library(tidyverse)
#create some data
set.seed(20181022)
data <- data.frame(x = letters[ceiling(runif(100, 0, 10))],
y = runif(100),
stringsAsFactors = FALSE)
#duplicate the data and add an indicator for the Plot Type
data <- data %>%
bind_rows(data) %>%
mutate(PlotType = rep(1:2, each = nrow(data)))
#Facet by the plot type and subset each geom
data %>%
ggplot(aes(x, y)) +
facet_grid(PlotType~., scales = "free")+
geom_boxplot(data = filter(data, PlotType == 1)) +
geom_bar(data = filter(data, PlotType == 2), stat = "identity")

Dates on x-axis; big dataset

I would like to plot a time series. I created an example to show how the graph should look like:
set.seed(1)
r <- rnorm(20,0,1)
z <- c(1,1,1,1,1,-1,-1,-1,1,-1,1,1,1,-1,1,1,-1,-1,1,-1)
data <- as.data.frame(na.omit(cbind(z, r)))
series1 <- ts(cumsum(c(1,data[,2]*data[,1])))
series2 <- ts(cumsum(c(1,data[,2])))
d1y <- seq(as.Date("1991-01-01"),as.Date("2015-01-01"),length.out=21)
plot_strategy <- function(series1, series2, currency)
{x11()
matplot(cbind(series1, series2), xaxt = "n", xlab = "Time",
ylab = "Value", col = 1:3, ann = TRUE, type = 'l',
lty = 1)
axis(1, at=seq(2,20,2), labels=format(d1y[seq(2,20,2)],"%Y"))
legend(x = "topleft", legend = c("TR", "BA"),
lty = 1,col = 1:3)
dev.copy2pdf(file= currency, width = 11.69, height = 8.27)}
plot_strategy(series1, series2,
currency= "all.pdf")
The actual dataset contains 6334 values. I therefore change the code to this:
axis(1, at=seq(2,6334,365), labels=format(d1y[seq(2,6334,365)],"%Y"))
But now, there are no values on the x-axis. Any suggstions?

Resources