I am using a copula to look at the probability of occurrence of events based on duration and magnitude of the events. I can create contours for recurrence intervals with observed and simulated data in base R graphics, but I can't figure out how to reproduce in ggplot2. Why not just produce the graphs in base graphics and move on you may be wondering? Because I'm including the graphs in a short summary report and want to have consistency with numerous other graphs in the report. Below is some example code. I know that using the location, scale, and shape for the GEV distribution to create random deviates to get the same distribution from is not ideal, but it is the best way I could think of to create a somewhat reproducible example, despite the poor correlation at the end. In base R, the contours are generated from a matrix of simulated data. Is this possible in ggplot2?
library(evd)
library(copula)
dur <- rgev(500, 2.854659, 2.170122, -0.007829)
mag <- rgev(500, 0.02482, 0.01996, 0.04603)
fDurGev <- fgev(dur)
fMagGev <- fgev(mag)
durVec <- dgev(dur, fDurGev[[1]][1], fDurGev[[1]][2], fDurGev[[1]][3])
magVec <- dgev(mag, fMagGev[[1]][1], fMagGev[[1]][2], fMagGev[[1]][3])
durMagMat <- as.matrix(cbind(duration = durVec, magnitude = magVec))
theta <- coef(fitCopula(claytonCopula(dim = 2), durMagMat, method = "itau"))
clayCop <- claytonCopula(theta, dim = 2)
fCopDurMag <- pCopula(durMagMat, clayCop)
copPts <- data.frame(duration = dur, magnitude = mag, copNEP = fCopDurMag,
copEP = (1 - fCopDurMag), copRI = (1 / fCopDurMag))
fSim <- seq(0.05, 0.99998, length.out = 1000)
quaDur <- qgev(fSim, fDurGev[[1]][1], fDurGev[[1]][2], fDurGev[[1]][3])
quaMag <- qgev(fSim, fMagGev[[1]][1], fMagGev[[1]][2], fMagGev[[1]][3])
expDurMagMat <- cbind(expand.grid(fSim, fSim)$Var1, expand.grid(fSim,
fSim)$Var2)
simPred <- pCopula(expDurMagMat, clayCop)
simPredMat <- matrix(simPred, 1000, 1000)
simDF <- data.frame(simDur = quaDur, simMag = quaMag, simPredMat)
rndPred <- data.frame(rCopula(5000, clayCop))
rndPred$rndDur <- qgev(rndPred[,1], fDurGev[[1]][1], fDurGev[[1]][2],
fDurGev[[1]][3])
rndPred$rndMag <- qgev(rndPred[,2], fMagGev[[1]][1], fMagGev[[1]][2],
fMagGev[[1]][3])
RI <- c(1.25, 2 ,5, 10, 20, 50, 100, 200, 500)
NEP <- 1 - (1 / RI)
plot(rndPred$rndDur, rndPred$rndMag, col = "light grey", cex = 0.5, xlab =
"Duration (time)", ylab = "Magnitude (x)")
points(copPts[,1], copPts[,2], col = "red", cex = 0.5)
contour(simDF$simDur, simDF$simMag, simPredMat, levels = NEP, labels = RI,
xaxs = 'i', yaxs = 'i', labcex = 0.6, lwd = 1, col = "black", add =
TRUE, method = "flattest", vfont = c("sans serif", "plain"))
And now for my attempt to recreate in ggplot2 (which fails to draw contours).
library(dplyr)
simDF <- data.frame(dur = expDurMagMat[, 1], mag = expDurMagMat[, 2], NEP = simPred)
simDF <- simDF %>%
dplyr::mutate(quaDur = qgev(NEP, fDurGev[[1]][1], fDurGev[[1]][2], fDurGev[[1]][3])) %>%
dplyr::mutate(quaMag = qgev(NEP, fMagGev[[1]][1], fMagGev[[1]][2], fMagGev[[1]][3]))
library(ggplot2)
ggplot(data = rndPred, aes(x = rndDur, y = rndMag)) +
geom_point(color = "light grey", alpha = 0.5) +
labs(x = "Duration (time)", y = "Magnitude (x)") +
geom_point(data = copPts, aes(x = duration, y = magnitude),
color = "red") +
geom_contour(data = simDF, aes(x = quaDur, y = quaMag, z = NEP),
inherit.aes = FALSE, breaks = NEP) +
theme_classic()
Thank you to anyone who can help.
Related
I have created the following model and predictions but I'm having trouble with the code to plot the predictions. I think it's a dimensions issue, does anyone know the changes I need to make for this to work?
code used;
#variogram
summer_vario = variog(geo_summer_df2, option = 'bin', estimator.type='modulus', bin.cloud = TRUE)
#fitting a basic parametric model
defult_summer_mod = variofit(summer_vario)
#creating predictions
preds_grid = matrix(c(-5.697, 55.441, -0.807, 51.682, -5.328, 50.218, -2.451, 54.684, -4.121, 50.355, -1.586, 54.768, -0.131, 51.505, -4.158, 52.915,
-0.442, 53.875, -3.413, 56.214, -2.860, 54.076, -3.323, 57.711, 0.566, 52.651, -0.626, 54.481, -1.185, 60.139, -2.643, 51.006,
-1.491, 53.381, -1.536, 52.424, -6.319, 58.213, -1.992, 51.503), nrow = 20, byrow = TRUE)
summer_preds = krige.conv(geo_summer_df2, locations = preds_grid, krige = krige.control(obj.model = defult_summer_mod))
#plotting predictions
#mean
image(summer_preds, col = viridis::viridis(100), zlim = c(100, max(c(summer_preds$predict))),
coords.data = geo_summer_df2[1]$coords, main = 'Mean', xlab = 'x', ylab = 'y',
x.leg = c(700, 900), y.leg = c(20, 70))
#variation
image(summer_preds, values = summer_preds$krige.var, col = heat.colors(100)[100:1],
zlim = c(0,max(c(summer_preds$krige.var))), coords.data = geo_summer_df2[1]$coords,
main = 'Variance', xlab = 'x', ylab = 'y', x.leg = c(700, 900), y.leg = c(20, 70))
data used;
https://drive.google.com/file/d/1ngwto6hgqCumoDsStOtPoG2J5EbmqxDf/view?usp=sharing
https://drive.google.com/file/d/1s9yBHsgaFRlF38CgiXCf_vum1DyhEbz4/view?usp=sharing
data changes made before code at the top of the page
#converting data to long format and combining both dataframes
MaxTemp %>%
pivot_longer(.,Machrihanish:Lyneham, names_to = "Location") %>%
full_join(.,metadata) -> MaxTemp_df
#renaming value column to temperature
MaxTemp_df = MaxTemp_df %>%
rename(Temp = 'value')
#filtering data for summer months
summer_df = MaxTemp_df %>%
filter(Date >= 20200701 & Date <=20200731)
#converting our data to geodata
geo_summer_df = as.geodata(summer_df, coords.col = 4:5, data.col = 3)
geo_summer_df2 = jitterDupCoords(geo_summer_df, max = 0.1, min = 0.05)
You're right about the dimensions. The predictions should be made over a regular grid of locations if you want to plot them as an image. Get all the unique x co-ordinates and all the unique y co-ordinates, sort them, then use expand.grid to get x, y co-ordinates for the whole grid. You'll then need to use this for kriging.
When you come to drawing the image, you need to arrange the predictions into a matrix:
xvals <- sort(unique(preds_grid[,1]))
yvals <- sort(unique(preds_grid[,2]))
preds_grid <- as.matrix(expand.grid(xvals, yvals))
colnames(preds_grid) <- NULL
summer_preds = krige.conv(geo_summer_df2, locations = preds_grid,
krige = krige.control(obj.model = default_summer_mod))
image(xvals, yvals, matrix(summer_preds$predict, nrow = length(xvals)),
col = viridis::viridis(100), main = 'Mean', xlab = 'x', ylab = 'y')
image(xvals, yvals, matrix(summer_preds$krige.var, nrow = length(xvals)),
col = heat.colors(100)[100:1], main = 'Variance', xlab = 'x', ylab = 'y')
Note that you will get better images if you use a finely-spaced sequence for x and y:
xvals <- seq(-7, 1, 0.1)
yvals <- seq(50, 62, 0.1)
The plots this produces with the same code otherwise are:
Update - using ggplot
The following adds the data to an outline of the British Isles:
devtools::install_github("ropensci/rnaturalearthhires")
library(rnaturalearth)
xvals <- seq(-7, 1, 0.1)
yvals <- seq(50, 62, 0.1)
preds_grid <- as.matrix(expand.grid(xvals, yvals))
summer_preds <- krige.conv(
geo_summer_df2, locations = preds_grid,
krige = krige.control(obj.model = default_summer_mod))
df <- as.data.frame(cbind(preds_grid,
mean = summer_preds$predict,
var = summer_preds$krige.var))
gb <- sf::st_crop(ne_coastline(scale = 10, returnclass = 'sf'),
xmin = -7, xmax = 1, ymin = 50, ymax = 62)
ggplot(gb) +
geom_tile(data = df, aes(Var1, Var2, fill = mean),
width = 0.11, height = 0.11, size = 0) +
geom_sf() +
scale_fill_viridis_c() +
ggtitle('Mean')
ggplot(gb) +
geom_tile(data = df, aes(Var1, Var2, fill = var),
width = 0.11, height = 0.11, size = 0) +
geom_sf() +
scale_fill_gradientn(colors = heat.colors(100, rev = TRUE)) +
ggtitle('Variance')
I am trying to get a spearman correlation for two scatter plots with confidence intervals on the same graph but it is proving difficult to do.
Given the following data:
spentWithTool <- sample(1:7, 20, replace = TRUE)
understoodWithTool <- sample(1:5, 20, replace = TRUE)
spentWithoutTool <- sample(1:4, 10, replace = TRUE)
understoodWithoutTool <- sample(1:5, 10, replace = TRUE)
This is the best workaround I came up with but it is not what I want - I want the withTool and the withoutTool plots to be on the same graph and NOT side by side.
plot_with <- ggscatter(data = data.frame(spentWithTool, understoodWithTool),
x = 'spentWithTool',
y = 'understoodWithTool',
color = 'darkred',
cor.method = "spearman",
cor.coef = TRUE,
conf.int = TRUE,
add = "reg.line",
add.params = list(color = 'red'),
)
plot_without <- ggscatter(data = data.frame(spentWithoutTool, understoodWithoutTool),
x = 'spentWithoutTool',
y = 'understoodWithoutTool',
color = 'darkblue',
cor.method = "spearman",
cor.coef = TRUE,
conf.int = TRUE,
add = "reg.line",
add.params = list(color = 'blue')
ggarrange(plotlist = list(plot_with, plot_without))
The above gives me:
Using ggpar(p = list(plot_with, plot_without)) just created two separate graphs so it was not helpful.
I am trying to get two lines (red and blue), where each line represents withTool and withoutTool respectively and the x and y axis represent timeSpent and understood respectively. Ideally, the confidence intervals, would be the same colour as what the line represents (red for withTool and blue for withoutTool).
Is there a way to get both correlation lines and points on the same graph?
Aside - Is there a way to use ggscatter without creating a data frame from vectors?
After checking out the docs and trying several options using the color and ggp arguments of ggscatter IMHO the easiest and less time-consuming option to achieve your desired result would be to build your plot from scratch using ggplot2 with some support from ggpubr to add the regression equations and the theme:
set.seed(1)
spentWithTool <- sample(1:7, 20, replace = TRUE)
understoodWithTool <- sample(1:5, 20, replace = TRUE)
spentWithoutTool <- sample(1:4, 10, replace = TRUE)
understoodWithoutTool <- sample(1:5, 10, replace = TRUE)
library(ggplot2)
library(ggpubr)
df <- rbind.data.frame(
data.frame(x = spentWithTool, y = understoodWithTool, id = "with"),
data.frame(x = spentWithoutTool, y = understoodWithoutTool, id = "without")
)
ggplot(df, aes(x, y, color = id, fill = id)) +
geom_point() +
geom_smooth(method = "lm") +
stat_cor(method = "spearman") +
scale_color_manual(values = c(with = "red", without = "blue"), aesthetics = c("color", "fill")) +
theme_pubr() +
labs(x = "timeSpent", y = "understood")
#> `geom_smooth()` using formula = 'y ~ x'
I've been trying to plot different exponential decay curves on to one graph. Initially I thought this would be rather be easy but it is turning out to be rather frustrating.
What I want to get:
nlsplot(k_data_nls, model = 6, start = c(a= 603.3, b= -0.03812), xlab = "hours", ylab = "copies")
nlsplot(r4, model=6, start=c(a=25.5487,b=-0.5723), xlab = "hours", ylab = "copies")
Here is some additional code for the data:
df4 <- data.frame(hours=c(0,1,3,5,12,24,48,96,168,336,504,720), copies=c(603.3,406,588,393.27,458.47,501.67,767.53,444.13,340.6,298.47,61.42,51.6))
nlsfit(df4, model=6, start=c(a=603.3,b=-0.009955831526))
d4plot <- nlsplot(df4, model=6, start=c(a=603.3,b=-0.009955831526))
r4 <- data.frame(hours=c(0,1,3,5,12,24,48,96,168,336,504,720), copies=c(26,13.44,4.57,3.12,6.89,0.71,0.47,0.47,0,0,0.24,0.48))
nlsLM(copies ~ a*exp(b*hours), data=r4, start=list(a=26,b=-0.65986))
r4plot <- nlsplot(r4, model=6, start=c(a=25.5487,b=-0.5723))
Essentially I want to be able to get both of these plots on one graph. I'm new to R so I'm not too sure where I can go from here. Thank you !
I don't know if this is actually helpful because it's so specific, but this is how I would do it (with ggplot2). First, you need data for the function you want to plot. Take the x for all the values you want to display and apply your function with your coefficients to the data. You need to have data points, not just a function, to plot data.
df_simulated <- data.frame("x" = rep(1:100, 2),
"class"= rep(c("DNA", "RNA"), each = 100))
df_simulated$y <- c(1683.7 * exp(-0.103 * 1:100), # DNA
578.7455 * exp(-0.156 * 1:100)) # RNA
However, since I never used the packages you used, I don't know how to extract the values from the models, so I took the values in your example plot. It's important that the "simulated" values for both groups are within one dataframe, and that you have a column which attributes the points to the respective group (RNA or DNA). At least it's easier if you do it like this. Then you need a data frame with your actual observations for the dots. I invented data again:
df_observed <- data.frame("x" = c(12, 13, 25, 26, 50, 51),
"y" = c(500, 50, 250, 25, 0, 5),
"class" = rep(c("DNA", "RNA"), 3))
Then you can create the plot. With color=class you specify that the data points will be grouped by "class" and will be colored accordingly. ("apple" and "banana" are just dummy words to demonstrate linebreaks)
ggplot() +
geom_line(data = df_simulated, aes(x = x, y = y, color = class), size = 1, linetype = "dashed") +
geom_point(data = df_observed, aes(x = x, y = y, color = class), size = 4, pch = 1) +
annotate("text", x = 50, y = 1250, label = "DNA\napple", color = "tomato", hjust = 0) +
annotate("text", x = 50, y = 750, label ="RNA\nbanana", color = "steelblue", hjust = 0) +
ggtitle(expression(~italic("Styela clava")~"(isolated)")) +
ylab("COI copies per 1ml") +
xlab("Time since removal of organisms (hours)") +
theme_classic() +
theme(legend.position = "none") +
scale_color_manual(values = c("DNA" = "tomato", "RNA" = "steelblue"))
This is the output:
First note that R squared is normally used for linear models and not for nonlinear models so the use of this statistic is suspect here; however, below we show it anyways since it seems that is what was asked for. A different goodness of fit measurement that is often used is residual standard error. If fm is the fitted model from nls then sigma(fm) is the residual standard error. Smaller values are more favorable. summary(fm) also reports this value.
For each of df4 and r4 we use lm to get starting values (taking log of both sides we get a model that is linear in log(a) and b), run nls fits and get the coefficients.
Now plot the points and add the fitted lines and legend. (Note that in setting up the graph we use rbind which assumes that df4 and r4 have the same column names, which they do.)
Note that the data provided in the question is much different than that shown in the question's image.
The code below does not need starting values since it uses lm to get them, runs nls and automatically extracts whatever information is needed for the graph.
1) Classic graphics In this alternative no packages are used.
r2 <- function(fm, digits = 3) {
y <- fitted(fm) + resid(fm)
r2 <- 1 - deviance(fm) / sum((y - mean(y))^2)
if (is.numeric(digits)) r2 <- round(r2, digits)
r2
}
fo <- copies ~ a * exp(b * hours) # formula used in nls
# get nls fitted model and coefficients for df4
co_d0 <- coef(lm(log(copies) ~ hours, df4, subset = copies > 0))
fmd <- nls(fo, df4, start = list(a = exp(co_d0[[1]]), b = co_d0[[2]]))
co_d <- round(coef(fmd), 4)
# get nls fitted model and coefficients for r4
co_r0 <- coef(lm(log(copies) ~ hours, r4, subset = copies > 0))
fmr <- nls(fo, r4, start = list(a = exp(co_r0[[1]]), b = co_r0[[2]]))
co_r <- round(coef(fmr), 4)
both <- rbind(cbind(df4, col = "red"), cbind(r4, col = "blue"))
plot(both[1:2], col = both$col,
xlab = "Time since removal of organisms", ylab = "COI copies per 1ml",
main = "C)" ~ italic("Styela clava") ~ "(isolated)", adj = 0)
lines(fitted(fmd) ~ hours, df4, col = "red", lty = 2)
lines(fitted(fmr) ~ hours, r4, col = "blue", lty = 2)
legend <- c(bquote(DNA),
bquote(y == .(co_d[[1]]) * e ^ {.(co_d[[2]])*x}),
bquote(R^2 == .(r2(fmd))),
bquote(),
bquote(RNA),
bquote(y == .(co_r[[1]]) * e ^ {.(co_r[[2]])*x}),
bquote(R^2 == .(r2(fmr))))
legend("right", legend = as.expression(legend), bty = "n",
text.col = c("red", "red", "red", NA, "blue", "blue", "blue"))
2) ggplot2 This uses ggplot2 and gridtext. r2, fmd, fmr, co_d and co_r are all taken from (1). We use richtest_grob from gridtext to create a custom grob for the legend and pass it using annotate_custom.
library(gridtext)
library(ggplot2)
txt <- sprintf(
"<span style='color:red'>DNA
<br>y = %.3f*e<sup>%.3fx</sup>
<br>R<sup>2</sup> = %.3f</span>
<br><br><span style='color:blue'>RNA
<br>y = %.3f*e<sup>%.3fx</sup>
<br>R<sup>2</sup> = %.3f</span>",
co_d[[1]], co_d[[2]], r2(fmd), co_r[[1]], co_r[[2]], r2(fmr))
both2 <- rbind(cbind(df4, col = "red", fitted = fitted(fmd)),
cbind(r4, col = "blue", fitted = fitted(fmr)))
ggplot(both2, aes(hours, copies, col = I(col))) +
geom_point() +
geom_line(aes(y = fitted), linetype = 2) +
annotation_custom(richtext_grob(txt, hjust = 0)) +
theme(legend.position = "none") +
labs(x = "Time since removal of organisms", y = "COI copies per 1ml") +
ggtitle(("C)" ~ italic("Styela clava") ~ "(isolated)"))
3) lattice
This uses legend from (1) and both2 from (2). First create a plot for the data points. It will also contain the legend, axes and labels. Then add a layer for the fitted lines. main.settings specifies that the main title should be left justified and bold and is adapted from this page.
library(latticeExtra)
main.settings <- list(par.main.text = list(font = 2, just = "left",
x = grid::unit(25, "mm")))
xyplot(copies ~ hours, both2, col = both2$col,
xlab = "Time since removal of organisms", ylab = "COI copies per 1ml",
main = "C)" ~ italic("Styela clava") ~ "(isolated)", adj = 0,
key = list(text = list(as.expression(legend),
col = c("red", "red", "red", NA, "blue", "blue", "blue")),
x = 0.65, y = 0.65, columns = 1),
par.settings = main.settings) +
as.layer(xyplot(fitted ~ hours, both2, groups = col, type = "l", lty = 2))
I'm trying to write my own Central Limit Theorem demonstration using ggplot2 and am unable to get my stat_function to display a changing normal distribution.
below is my code, I want the normal distribution in stat_function to transition through different states; specifically, I'm hoping for it to change the standard deviation to correspond with each value in dataset. Any help would be greatly appreciated.
#library defs
library(gganimate)
library(ggplot2)
library(transformr)
#initialization for distribution, rolls, and vectors
k = 2
meanr = 1/k
sdr = 1/k
br = sdr/10
rolls <- 200
avg <- 1
dataset <- 1
s <- 1
#loop through to create vectors of sample statistics from 200 samples of size i
#avg is sample average, s is standard deviations of sample means, and dataset is the indexes to run the transition states
for (i in c(1:40)){
for (j in 1:rolls){
avg <- c(avg,mean(rexp(i,k)))
}
dataset <- c(dataset, rep(i,rolls))
s <- c(s,rep(sdr/sqrt(i),rolls))
}
#remove initialized vector information as it was only created to start loops
avg <- avg[-1]
rn <- rn[-1]
dataset <- dataset[-1]
s <- s[-1]
#dataframe
a <- data.frame(avgf=avg, rnf = rn,datasetf = dataset,sf = s)
#plot histogram, density function, and normal distribution
ggplot(a,aes(x=avg,y=s))+
geom_histogram(aes(y = ..density..), binwidth = br,fill='beige',col='black')+
geom_line(aes(y = ..density..,colour = 'Empirical'),lwd=2, stat = 'density') +
stat_function(fun = dnorm, aes(colour = 'Normal', y = s),lwd=2,args=list(mean=meanr,sd = mean(s)))+
scale_y_continuous(labels = scales::percent_format()) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal"))+
labs(x = 'Sample Average',title = 'Sample Size: {closest_state}')+
transition_states(dataset,4,4)+ view_follow(fixed_x = TRUE)
I think it's difficult to use stat_function here because the dnorm function that you are passing includes a grouped variable (mean(s)). There is no way to indicate that you wish to group s by the dataset column, and the transition_states function doesn't filter the whole data frame. You could use transition_filter to filter the whole data frame, but this would be laborious.
It's not much work to just add a dnorm to your input data frame and plot it as a line, particularly since the rest of your code can be simplified substantially. Here's a fully reproducible example:
library(gganimate)
library(ggplot2)
library(transformr)
k <- 2
meanr <- sdr <- 1/k
br <- sdr/10
rolls <- 200
a <- do.call(rbind, lapply(1:40, function(i){
data.frame(avg = replicate(rolls, mean(rexp(i, k))),
dataset = rep(i, rolls),
x = seq(0, 2, length.out = rolls),
s = dnorm(seq(0, 2, length.out = rolls),
meanr, sdr/sqrt(i))) }))
ggplot(a, aes(x = avg, group = dataset)) +
geom_histogram(aes(y = ..density..), fill = 'beige',
colour = "black", binwidth = br) +
geom_line(aes(y = ..density.., colour = 'Empirical'),
lwd = 2, stat = 'density', alpha = 0.5) +
geom_line(aes(x = x, y = s, colour = "Normal"), size = 2, alpha = 0.5) +
scale_y_continuous(labels = scales::percent_format()) +
coord_cartesian(xlim = c(0, 2)) +
scale_color_discrete(name = "Densities", labels = c("Empirical", "Normal")) +
labs(x = 'Sample Average', title = 'Sample Size: {closest_state}') +
transition_states(dataset, 4, 4) +
view_follow(fixed_x = TRUE, fixed_y = TRUE)
I am fairly new to vegan and ggplot, I have drawn a species diversity plot in vegan. Ggplot has better graph so I was wondering if these codes could be modified to ggplot code.
Any help would be greatly appreciated. I am using bray in vegan.
library(vegan)
library(mass)
data <- read.table("data.txt", header = T)
attach(data)
rownames(data) <- c("TCI1", "TCI2", "TCI3", "TCII1", "TCII2", "TCII3", "TCIII1", "TCIII2", "TCIII3", "TCIV1", "TCIV2", "TCIV3",
"NCI1", "NCI2", "NCI3", "NCII1", "NCII2", "NCII3", "NCIII1", "NCIII2", "NCIII3", "NCIV1", "NCIV2", "NCIV3","TFI1", "TFI2", "TFI3", "TFII1", "TFII2", "TFII3", "TFIII1", "TFIII2", "TFIII3", "TFIV1", "TFIV2", "TFIV3",
"NFI1", "NFI2", "NFI3", "NFII1", "NFII2", "NFII3", "NFIII1", "NFIII2", "NFIII3", "NFIV1", "NFIV2", "NFIV3")
bcdist <- vegdist(data, "bray")
bcmds <- isoMDS(bcdist, k = 2)
plot(bcmds$points, type = "n", xlab = "", ylab = "")
text(bcmds$points, dimnames(data)[[1]])
You can indeed create a plot that looks like the imgur image. First I created some made-up data for your weeds. Then I called ggplot2 and put the weed names at the points, but made the points transparent.
x <- seq(from = -1, to = 1, .025)
df <- data.frame(valuesX = sample(x, size = 48, replace = TRUE),
valuesY = sample(x, size = 48, replace = TRUE),
seeds = c("TCI1", "TCI2", "TCI3", "TCII1", "TCII2", "TCII3", "TCIII1", "TCIII2", "TCIII3", "TCIV1", "TCIV2", "TCIV3",
"NCI1", "NCI2", "NCI3", "NCII1", "NCII2", "NCII3", "NCIII1", "NCIII2", "NCIII3", "NCIV1", "NCIV2", "NCIV3","TFI1", "TFI2", "TFI3", "TFII1", "TFII2", "TFII3", "TFIII1", "TFIII2", "TFIII3", "TFIV1", "TFIV2", "TFIV3",
"NFI1", "NFI2", "NFI3", "NFII1", "NFII2", "NFII3", "NFIII1", "NFIII2", "NFIII3", "NFIV1", "NFIV2", "NFIV3")
)
ggplot(df, aes(x = valuesX, y = valuesY)) +
geom_point(colour = "transparent") +
geom_text(data = df, aes(label = seeds), hjust = 1.5) +
theme_bw() +
labs(x = "Your axis label", y = "", title = "Weed Distribution") +
theme(axis.ticks= element_blank()) +
theme(plot.title = element_text(face = "bold", size = 12))
You can adjust all the elements of the plot as you see fit.