ggplot2 for procrustes rotation in vegan - r

I want to plot procrustes rotations between to RDA-objects obtained by vegan with ggplot2.
library(vegan)
#perform two RDAs, do procrustes:
pro.test <- procrustes(rda.t1,rda.t2)
I extracted the x,y coordinates from list of class "procrustes" and added a factor "dates".
test <- data.frame(rda1=pro.test$Yrot[,1], rda2=pro.test$Yrot[,2])
test$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
test.2 <- data.frame(rda1=pro.test$X[,1], rda2=pro.test$X[,2])
test.2$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
Now the basic plot:
ggplot() +
geom_point(data=test, aes(x=rda1, y=rda2, color=dates)) +
geom_point(data=test.2,aes(x=rda1, y=rda2, color=dates))
The part i cannot do is the plotting of small lines between each corresponding point in test and test.2
Vegan does plot these rotations with arrows rather than connected points. However, vegan does not khow to color according to sampling groups/factors, which is important for me.
Having arrows in the ggplot would be extremely great - i know there is a geom_segment with the argument "arrow".
Could you help me?
the dput of pro.test is below.
dput(pro.test)
structure(list(Yrot = structure(c(0.126093537705143, 0.196350569855869,
-0.0513472841582749, -0.304416713452662, 0.210682972922012, -0.0219477831881197,
-0.24519038499101, 0.338357488742126, -0.399739151138497, -0.366424716631558,
0.0321561053701086, 0.565794811541598, 0.606054432756139, -0.0122819831669951,
-0.00403199420346042, -0.0448308879361932, 0.0631101371381566,
-0.150820933315408, -0.018216051372273, -0.68513841544701, -0.117446131920294,
-0.450735018917557, 0.25749869839177, 0.47646869541639, -0.211447138648954,
-0.236584149111598, -0.0316882271224907, -0.281680981927695,
-0.182346139754316, -0.366221121187894, -0.263915986724565, -0.203160918536977,
0.209888424862468, 0.219400450315756, 0.143569801341895, 0.258388604988749,
0.542334722496036, 0.465147580652753, 0.294835945722885, 0.523372408452242,
0.0739580893460179, 0.242768571724456, 0.0409877673276456, -0.0942111509903291,
-0.193072299067071, -0.38889179801965, -0.352882980509932, -0.208549475629433
), .Dim = c(24L, 2L), .Dimnames = list(c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25"), NULL)),
X = structure(c(0.0860177119127241, 0.217144585357183, -0.0301829830202831,
-0.246142550516987, 0.230574651598493, 0.00485065775494245,
-0.225907453854864, 0.371465194869491, -0.395330365511425,
-0.359255005182027, -0.00775013746753128, 0.47442649486468,
0.519983070801763, -0.0146878517934982, 0.0377018407084686,
-0.0885829362985767, 0.0935962405791314, -0.186192083265912,
0.00247095461296341, -0.655467761687806, -0.0966978065526177,
-0.398672122636169, 0.275589258531376, 0.39104839619648,
-0.273098318897548, -0.237373845171625, -0.0351119316278201,
-0.279271270040404, -0.171188235636994, -0.342350443283954,
-0.297148604541773, -0.21965804713297, 0.269830887309913,
0.268669489120665, 0.143826114581508, 0.229549645414776,
0.531869658831067, 0.479136042616735, 0.380638462867711,
0.548249030471031, 0.161449266776772, 0.282765937749097,
0.0756433464279055, 0.00516171212969907, -0.195519622624857,
-0.568932423412245, -0.381681091857682, -0.375455760069009
), .Dim = c(24L, 2L), const = 1.30375728392289, .Dimnames = list(
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25"), c("RDA1", "RDA2")), "`scaled:center`" = structure(c(1.3588667228485e-17,
4.69820941410385e-18), .Names = c("RDA1", "RDA2"))), ss = 0.146265769408323,
rotation = structure(c(-0.958883328045618, 0.283800569407742,
0.283800569407742, 0.958883328045619), .Dim = c(2L, 2L)),
translation = structure(c(-9.76742606822348e-18, 1.35523649355013e-17
), .Dim = 1:2), scale = 0.918742698883168, xmean = structure(c(1.3586408473959e-17,
4.71176194125992e-18), .Names = c("RDA1", "RDA2")), symmetric = FALSE,
call = procrustes(X = rda.t1, Y = rda.t2), svd = structure(list(
d = c(2.51563498111738, 2.16729713036852), u = structure(c(0.743008138366833,
0.669282381600362, 0.669282381600362, -0.743008138366833
), .Dim = c(2L, 2L)), v = structure(c(-0.522515395489416,
0.852629850214347, -0.852629850214347, -0.522515395489416
), .Dim = c(2L, 2L))), .Names = c("d", "u", "v"))), .Names = c("Yrot",
"X", "ss", "rotation", "translation", "scale", "xmean", "symmetric",
"call", "svd"), class = "procrustes")

Does this do the job?
library(ggplot2)
library(grid)
ctest <- data.frame(rda1=pro.test$Yrot[,1],
rda2=pro.test$Yrot[,2],xrda1=pro.test$X[,1],
xrda2=pro.test$X[,2],dates=rep(c("A","B","C"),each=8))
ggplot(ctest) +
geom_point(aes(x=rda1, y=rda2, colour=dates)) +
geom_point(aes(x=xrda1, y=xrda2, colour=dates)) +
geom_segment(aes(x=rda1,y=rda2,xend=xrda1,yend=xrda2,colour=dates),arrow=arrow(length=unit(0.2,"cm")))

Related

Show unique values on bubbles graph in R

I'm working with a db which looks more or less like this:
dput(ex)
structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
And I'm using the packcircles package to create a bubble graph for just one variable since standard bubbles are added as a third dimension on ggplot. I'm using paletteer library too:
library(packcircles)
library(paletteer)
Next code creates the data.frame that will be used on the graph. First I create the coordinates for the bubbles (circles) and then I incorporate my clave and n variables from original data.frame:
# Create circles
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
Now we're ready to graph:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= unique(ex_$n))-> my_graph
plotly::ggplotly(my_graph)
Code above throws following error:
Error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (1683): label
Run `rlang::last_error()` to see where the error occurred.
If I use instead:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= ex_$n)-> my_graph
plotly::ggplotly(my_graph)
Now every circle is surrounded by text (51 times same text). What I want is that only clave and one value of n were showed when you pass the mouse pointer through each circle.
Any advice or idea to handle with this will be much appreciated.
How about this. The mouse-over doesn't work in the static picture below, but if you run the code, it should.
ex <- structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(packcircles)
library(paletteer)
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
ex_ <- ex_ %>%
mutate(lab = paste0("clave: ", clave, "\nN: ", n))
ggplot(data = ex_, aes(x, y, fill=clave, text=lab)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors))-> my_graph
plotly::ggplotly(my_graph, tooltip = "text")
Created on 2022-04-06 by the reprex package (v2.0.1)

Recode a factor variable, dropping N/A

I have a factor variable with 14 levels, which I'm trying to into collapse into only 3 levels. It contains two N/A which I also wanna remove.
My code looks like this:
job <- fct_collapse(E$occupation, other = c("7","9", "10", "13" "14"), 1 = c("1", "2", "3", "12"), 2 = c("4", "5", "6", "8", "11"))
However it just gives me tons of error. Can anyone help here me here?
We could also this with a named list
library(forcats)
lst1 <- setNames(list(as.character(c(7, 9, 10, 13, 14)),
as.character(c(1, 2, 3, 12)), as.character(c(4, 5, 6, 8, 11))), c('other', 1, 2))
fct_collapse(df$occupation, !!!lst1)
data
df <- structure(list(occupation = c("1", "3", "5", "7", "9", "10",
"12", "14", "13", "4", "7", "6", "5")), class = "data.frame", row.names = c(NA,
-13L))
For numbers try using backquotes in fct_collapse.
job <- forcats::fct_collapse(df$occupation,
other = c("7","9", "10", "13", "14"),
`1` = c("1", "2", "3", "12"),
`2` = c("4", "5", "6", "8", "11"))

Plotting lines of multiple groups in ggplot2 gives a weird result

I have done species accumulation curves and would like to plot the SAC results of different substrate sizeclasses in the same ggplot, with expected species richness on y-axis and number of sites samples on x-axis. The data features a cumulative number of samples in each sizeclass (column "sites"), the expected species richness (column "richness"), and substrate size classes 10, 20 and 30 (column "sc").
sites richness sc
1 1 0.6696915 10
2 2 1.2008513 10
3 3 1.6387310 10
4 4 2.0128472 10
5 5 2.3424933 10
6 6 2.6403239 10
sites richness sc
2836 1 1.000000 20
2837 2 1.703442 20
2838 3 2.249188 20
2839 4 2.706618 20
2840 5 3.110651 20
2841 6 3.479173 20
I want each sizeclass to have unique linetype. I used the following code for ggplot:
sac_kaikki<-ggplot(sac_data, aes(x=sites, y=richness,group=sc)) +
geom_line(aes(linetype=sc))+
coord_cartesian(xlim=c(0,100))+
theme(axis.title.y = element_blank())+
theme(axis.title.x = element_blank())
However, instead of getting three neat lines in different linetypes, I got [this jumbly muddly messy thing with more stripes than a herd of zebras][1]: https://i.stack.imgur.com/iD75K.jpg. I am sure the solution is rather simple, but for my life I am not able to figure it out.
// as Brookes kindly pointed out I should add some reproducible data, here is a subset of my data with dput, featuring 10 first observations of size classes 10 and 20:
dput(head(subset(sac_data,sac_data$sc=="10"),10))
structure(list(sites = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), richness = c(0.669691470054462,
1.20085134466255, 1.63873100707468, 2.01284716414471, 2.34249332096243,
2.64032389106845, 2.91468283244696, 3.17111526890278, 3.41334794519086,
3.64392468817362), sc = c("10", "10", "10", "10", "10", "10",
"10", "10", "10", "10")), .Names = c("sites", "richness", "sc"
), row.names = c(NA, 10L), class = "data.frame")
dput(head(subset(sac_data,sac_data$sc=="20"),10))
structure(list(sites = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), richness = c(0.999999999999987,
1.70344155844158, 2.24918831168832, 2.70661814764865, 3.11065087175364,
3.47917264517669, 3.82165739030286, 4.14341144680334, 4.44765475554031,
4.73653870494466), sc = c("20", "20", "20", "20", "20", "20",
"20", "20", "20", "20")), .Names = c("sites", "richness", "sc"
), row.names = 2836:2845, class = "data.frame")
// okay so for whatever reason, the plot works just fine if I plot only two sizeclasses, but including the third one produces the absurd plot I posted a picture of.
structure(list(sites = 1:10, richness = c(0.42857142857143, 0.838095238095238,
1.22932330827066, 1.60300751879699, 1.95989974937343, 2.30075187969924,
2.62631578947368, 2.93734335839598, 3.23458646616541, 3.5187969924812
), sc = c("30", "30", "30", "30", "30", "30", "30", "30", "30",
"30")), .Names = c("sites", "richness", "sc"), row.names = c(NA,
10L), class = "data.frame")
Works fine for me with your sample data:
a <- structure(list(sites = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), richness = c(0.669691470054462,
1.20085134466255, 1.63873100707468, 2.01284716414471, 2.34249332096243,
2.64032389106845, 2.91468283244696, 3.17111526890278, 3.41334794519086,
3.64392468817362), sc = c("10", "10", "10", "10", "10", "10",
"10", "10", "10", "10")), .Names = c("sites", "richness", "sc"
), row.names = c(NA, 10L), class = "data.frame")
b <- structure(list(sites = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), richness = c(0.999999999999987,
1.70344155844158, 2.24918831168832, 2.70661814764865, 3.11065087175364,
3.47917264517669, 3.82165739030286, 4.14341144680334, 4.44765475554031,
4.73653870494466), sc = c("20", "20", "20", "20", "20", "20",
"20", "20", "20", "20")), .Names = c("sites", "richness", "sc"
), row.names = 2836:2845, class = "data.frame")
c <- structure(list(sites = 1:10, richness = c(0.42857142857143, 0.838095238095238,
1.22932330827066, 1.60300751879699, 1.95989974937343, 2.30075187969924,
2.62631578947368, 2.93734335839598, 3.23458646616541, 3.5187969924812
), sc = c("30", "30", "30", "30", "30", "30", "30", "30", "30",
"30")), .Names = c("sites", "richness", "sc"), row.names = c(NA,
10L), class = "data.frame")
sac_data <- bind_rows(a, b, c)
Plotting:
ggplot(sac_data, aes(sites, richness, group = sc)) +
geom_line(aes(linetype = sc))

Iterating over data.table in lapply or for loop

I basically am wondering why it makes a difference if you iterate over a vector created from a data.table or over a basic vector and why that is the case.
I think basically it's quite similar to this question, but I can narrow it down to a more basic example.
dt <- setDT(structure(list(File.Name = c("file1.xlsx", "file2.xlsx", "file3.xlsx")
, Split.ID = structure(c(1L, 1L, 1L)
, .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11","12", "13", "14", "15", "16", "17", "18", "19", "20"), class = "factor"))
, .Names = c("File.Name", "Split.ID")
, class = c("data.table", "data.frame"), row.names = c(NA, -3L)))
for(file in as.vector(dt[1:3,1])){
print(file)
}
for(i in letters[1:4]){print(i)}
As can be seen from the output, in the first for-loop it just prints out all values in the first iteration and in the second loop it loops over the distinct letters.
I am trying to use the file names in connection with readxl, so vectorizing the function is not really an option, since I want to do it sequentially. Additionally I would like to keep the list of files as an data.table to be able to split in different parts.

invalid color name background in qgraph

I have been trying to use qgraph to generate the network graph. The code is as following
Gw <- qgraph(edgeList, diag = TRUE, labels = TRUE,legend.cex = 0.3, vsize = 1,edge.color=colorLabels,legend=TRUE,asize=1)
The figure can be generated, but the R command line gives the following error message. I do not know what does the invalid color name 'background' mean.
The dput result is shown as follows,
dput(edgeList)
structure(c("1", "2", "2", "3", "4", "5", "6", "7", "8", "1",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "16",
"4", "5", "7", "1", "9", "10", "19", "20", "2", "16", "21", "3",
"22", "5", "23", "8", "1", "20", "2", "13", "14", "17", "14",
"1", "19", "14", "2", "21", "14", "24", "1", ":499.3", "nk Transfe",
"de of tran", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"ine:4121", "ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.",
"ine:10603", "ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8",
"ine:884.6", "ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7",
"ine:5482.6", "ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6",
"ine:3257.4", "ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1",
"ine:364.8", ":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6",
"l:1637.6", "l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1",
"l:2715.1", "l:12708.2", "l:1042", ":499.3", "nk Transfe", "de of tran",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "ine:4121",
"ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.", "ine:10603",
"ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8", "ine:884.6",
"ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7", "ine:5482.6",
"ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6", "ine:3257.4",
"ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1", "ine:364.8",
":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6", "l:1637.6",
"l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1", "l:2715.1",
"l:12708.2", "l:1042", "25", "1", "1", "26", "27", "28", "29",
"30", "31", "25", "32", "33", "4", "4", "3", "3", "5", "5", "7",
"6", "6", "27", "28", "30", "25", "32", "33", "9", "8", "1",
"1", "10", "12", "12", "16", "16", "16", "16", "8", "1", "3",
"3", "7", "7", "25", "9", "9", "1", "10", "10", "14", "14"), .Dim = c(104L,
2L), .Dimnames = list(NULL, c("newsendId", "newtoId")))
The generated figure is as follows. I used the following command to generate it
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red",legend.cex = 0.5, vsize = 8)
Which nodes are problems? With your data and code you can modify label.cex. There are other variations of the arguments for the label and legend sizes. Here is one version, with the color blue.
library(qgraph)
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red", legend.cex = 0.3, vsize = 4, label.cex = 0.3, label.color = "blue")
Gw

Resources