get structure of unrooted tree with tree_layout() - r

Is there a way to get the structure of an unrooted tree with the phyloseq::tree_layout() function?
Using tree_layout() will give you the coordinates of the nodes and segments that compose the tree plotted below. You can then easily redraw that tree.
library(ape)
library(phyloseq) # bioconductor
#
cat("(((Strix_aluco:4.2,Asio_otus:4.2):3.1,",
"Athene_noctua:7.3):6.3,Tyto_alba:13.5);",
file = "ex.tre", sep = "\n")
tree.owls <- read.tree("ex.tre")
par(mfrow=c(2,1))
#original tree
plot.phylo(tree.owls,type = 'p',main = 'plot.phylo')
#redraw structure with tree_layout object
tree.ly <- tree_layout(tree.owls)
plot(1,type='n',axes=FALSE, xlim = c(0,max(tree.ly$edgeDT$xright)),ylim = c(0,max(tree.ly$edgeDT$y)),main = 'tree_layout')
segments(x0=tree.ly$edgeDT$xleft,y0=tree.ly$edgeDT$y,x1=tree.ly$edgeDT$xright,y1=tree.ly$edgeDT$y)
segments(x0=tree.ly$vertDT$x,y0=tree.ly$vertDT$vmin,x1=tree.ly$vertDT$x,y1=tree.ly$vertDT$vmax)
But what if you want to redraw this tree: plot.phylo(tree.owls,type = 'u'). How would you do it?

library(ape)
library(phyloseq)
cat(
"(((Strix_aluco:4.2,Asio_otus:4.2):3.1,",
"Athene_noctua:7.3):6.3,Tyto_alba:13.5);",
file = "ex.tre",
sep = "\n"
)
tree.owls <- read.tree("ex.tre")
tree_layout() works with the phylo object, not with the plotted phylogeny. So calling tree_layout() after plot.phylo(type='p') vs. plot.phylo(type='u') will result in the same object.
plot.phylo(tree.owls, type = 'p')
a <- tree_layout(tree.owls)
plot.phylo(tree.owls, type = 'u')
b <- tree_layout(tree.owls)
identical(a, b)
[1] TRUE
To get the coordinates of an already plotted phylogeny, we can use last_plot.phylo like so:
plot.phylo(tree.owls, type = 'p')
coords_phylogram <- get("last_plot.phylo",envir=.PlotPhyloEnv)
plot.phylo(tree.owls, type = 'u')
coords_unrooted <- get("last_plot.phylo",envir=.PlotPhyloEnv)
identical(coords_phylogram, coords_unrooted)
[1] FALSE
We can see that the coordinates are now different between the phylogram and unrooted network.
The output of get("last_plot.phylo",envir=.PlotPhyloEnv) is a named list, so we can extract particular element easily. For example, if we wanted to add some annotations at the coordinates of nodes and tips, we could do the following:
points(x = coords_unrooted$xx, y=coords_unrooted$yy, pch=21, bg="yellow", cex=3)
EDIT: create a layout data frame (like tree_layout) and plot an unrooted phylogeny
Preliminaries. phyloseq is no longer required as we are extracting the layout by hand.
library(ape)
library(dplyr)
tr <- "(((Strix_aluco:4.2,Asio_otus:4.2):3.1,Athene_noctua:7.3):6.3,Tyto_alba:13.5);"
tree.owls <- read.tree(text = tr)
A function to create tree_layouts()'s edgeDT and vertDT. To draw segments that are not only horizontal or vertical (like you would have in a cladogram or phylogram), we need another column for the y0 coordinates as these are now different from the y coordinates. This way we can plot slanted phylograms and unrooted trees.
Important note: Edge lengths as calculated by this function and stored in edgeDT are incorrect (in some cases)! This is because they are calculated by subtracting coordinates, and this obviously doesn't work if edges were plotted at an angle (as in a slanted phylogram or unrooted network). This does not matter for plotting, as the plotting function does not use edge.lengths. But keep it in mind.
layout_from_plot <- function(tree, type, drop_root=FALSE, use_vert=FALSE) {
plot.phylo(x = tree, type = type)
title("Plot to get coordinates")
coords <- get("last_plot.phylo",envir=.PlotPhyloEnv)
edgeDT <- tibble(
xright = coords$xx[coords$edge[,2]],
xleft = coords$xx[coords$edge[,1]],
y = coords$yy[coords$edge[,2]],
edge.length = xright-xleft,
V1 = coords$edge[,1],
V2 = coords$edge[,2]
)
edgeDT <- edgeDT %>%
arrange(V2) %>%
mutate(OTU = c(tree$tip.label, rep(NA_character_, coords$Nnode - 1))) %>%
select(V1, V2, edge.length, OTU, xleft, xright, y)
if (!use_vert) {
edgeDT <- mutate(edgeDT, y0=y[V1-1])
} else {
edgeDT <- mutate(edgeDT, y0=y)
}
# this is a bit hacky, but it does work.
# double check that OTUs are in the right positions
# and branch lengths are correct.
# ideally, you would unroot the tree, then plot the network
# to extract coordinates. See below.
if (is.rooted(phy = tree)) {
if (drop_root) {
edgeDT <- edgeDT %>%
group_by(y0) %>%
mutate(xleft = ifelse(y == y0, xright, xleft)) %>%
mutate(xright = ifelse(y == y0, lead(xright), xright)) %>%
mutate(y = ifelse(y == y0, lead(y), y)) %>%
distinct(y, y0, .keep_all = TRUE)
}
}
vertDT <- edgeDT %>%
group_by(V1) %>%
mutate(vmin=min(y), vmax=max(y)) %>%
mutate(x=xleft[which(y==min(y))]) %>%
select(V1, x, vmin, vmax) %>%
distinct()
return(list("edgeDT"=edgeDT, "vertDT"=vertDT))
}
A function to plot different types of trees based on the extracted tree layout.
plot_from_layout <- function(tree_ly, plot_vert=FALSE) {
plot(
1,
type = 'n',
axes = TRUE,
xlim = c(0, max(tree_ly$edgeDT$xright)),
ylim = c(0, max(c(tree_ly$edgeDT$y, tree_ly$edgeDT$y0))),
main = 'tree_layout'
)
segments(
x0 = tree_ly$edgeDT$xleft,
y0 = tree_ly$edgeDT$y0,
x1 = tree_ly$edgeDT$xright,
y1 = tree_ly$edgeDT$y
)
if (plot_vert) {
segments(
x0 = tree_ly$vertDT$x,
y0 = tree_ly$vertDT$vmin,
x1 = tree_ly$vertDT$x,
y1 = tree_ly$vertDT$vmax
)
}
}
Testing.
par(mfrow=c(1,2))
layout_from_plot(
tree = tree.owls,
type = "p",
drop_root = FALSE,
use_vert = TRUE
) %>%
plot_from_layout(tree_ly = ., plot_vert = TRUE)
layout_from_plot(
tree = tree.owls,
type = "c",
drop_root = FALSE,
use_vert = FALSE
) %>%
plot_from_layout(tree_ly = ., plot_vert = FALSE)
layout_from_plot(
tree = tree.owls,
type = "u",
drop_root = TRUE,
use_vert = FALSE
) %>%
plot_from_layout(tree_ly = ., plot_vert = FALSE)
The preferred way, instead of removing edges by hand (as above), would be to unroot the tree first, then plot it. For this to work, you would need to modify the way coordinates are extracted. The main question is how the root node splits the edges and to which branch to add the leftover after removing the root node. By default it appears that this extra edge is added to the one internal edge of the network, but this does not seem right given the phylogram and branch lengths there. See below.
owls.unrooted <- unroot(tree.owls)
layout_from_plot(
tree = owls.unrooted,
type = "u",
drop_root = FALSE,
use_vert = FALSE
) %>%
plot_from_layout(tree_ly = ., plot_vert = FALSE)
EDIT 2: Update the layout_from_plot function to handle the y0 coordinates correctly and use unrooted trees instead of manually removing edges.
I realized I was over-complicating with creating the column for the y0 coordinates. All that is needed is to take the yy coordinates ordered by the from the first column of edge from the output of get("last_plot.phylo",envir=.PlotPhyloEnv). Now things work fine with rooted and unrooted trees.
Updated function:
layout_from_plot <- function(tree, type, drop_root=FALSE, use_vert=FALSE) {
if (drop_root) {
tree <- unroot(tree)
}
plot.phylo(x = tree, type = type)
title("Plot to get coordinates")
coords <- get("last_plot.phylo",envir=.PlotPhyloEnv)
edgeDT <- tibble(
xright = coords$xx[coords$edge[,2]],
xleft = coords$xx[coords$edge[,1]],
y = coords$yy[coords$edge[,2]],
y0 = coords$yy[coords$edge[,1]],
edge.length = xright-xleft,
V1 = coords$edge[,1],
V2 = coords$edge[,2]
)
edgeDT <- edgeDT %>%
arrange(V2) %>%
mutate(OTU = c(tree$tip.label, rep(NA_character_, coords$Nnode - 1))) %>%
select(V1, V2, edge.length, OTU, xleft, xright, y, y0)
if (use_vert) {
edgeDT <- mutate(edgeDT, y0=y)
}
vertDT <- edgeDT %>%
group_by(V1) %>%
mutate(vmin=min(y), vmax=max(y)) %>%
mutate(x=xleft[which(y==min(y))]) %>%
select(V1, x, vmin, vmax) %>%
distinct()
return(list("edgeDT"=edgeDT, "vertDT"=vertDT))
}
Additional test code:
layout_from_plot(tree = bird.orders, type = "c", use_vert = FALSE, drop_root = TRUE) %>%
plot_from_layout(tree_ly = ., plot_vert = FALSE)
layout_from_plot(tree = bird.orders, type = "u", drop_root = TRUE, use_vert = FALSE) %>%
plot_from_layout(tree_ly = ., plot_vert = FALSE)

Related

How to provide group-wise boundaries for parameters in modelling using R nls_multstart?

I am new to using the purrr package in R and I am struggling with trying to pass a further argument to a function inside nls_multstart.
I have a nested data frame that contains data for different combinations of grouping variables.
I want to fit the same model to the data of each combinations of groups in the nested data frame.
So far, I was able to fit the model to each data.
# model
my_model <- function(ymax, k, t) {
ymax * (1 - exp(-k*t))
}
# data
t = seq(from = 1, to = 100, by = 1)
y1 = unlist(lapply(t, my_model, ymax = 500, k = 0.04))
y2 = unlist(lapply(t, my_model, ymax = 800, k = 0.06))
y = c(y1, y2)
a <- rep(x = "a", times = 100)
b <- rep(x = "b", times = 100)
groups <- c(a, b)
df <- data.frame(groups, t, y)
nested <- df %>%
group_by(groups) %>%
nest() %>%
rowwise() %>%
ungroup() %>%
mutate(maximum = map_dbl(map(data, "y"), max))
# set staring values
l <- c(ymax = 100 , k = 0.02)
u <- c(ymax = 300, k = 0.03)
# works, but without group-specific lower and upper boundaries
# fit the model
fit <- nested %>%
mutate(res = map(.x = data,
~ nls_multstart(y ~ my_model(ymax, k, t = t),
data = .x,
iter = 20,
start_lower = l,
start_upper = u,
supp_errors = 'N',
na.action = na.omit)))
However, when trying to use the value in column maximum as a group-specific boundary, R throws the following error:
# using group-specific boundary does not work
# fit the model
fit2 <- nested %>%
mutate(res = map(.x = data,
~ nls_multstart(y ~ my_model(ymax, k, t = t),
data = .x,
iter = 20,
start_lower = l,
start_upper = u,
supp_errors = 'N',
na.action = na.omit,
lower = c(maximum, 0),
upper = c(maximum*1.2, 1))))
Error in nls.lm(par = start, fn = FCT, jac = jac, control = control, lower = lower, :
length(lower) must be equal to length(par)
Can anybody give a hint how to improve on that?

Applying text_transform() to row groups in R gt

In the {gt} package I want to use text_transform() on the row group titles in order to render the HTML but I'm getting the `no applicable method for 'resolve_location' error.
In my example below, you can see that text_transform() works if the locations argument is cells_body() (which is not what I actually want) but not if it's cells_row_groups() which is what I want.
Thoughts?
Zev
# As an experiment, I put HTML in both a value and in the groups, though
# in the real data there is only HTML in groups.
tbl <- tibble(values = c("test<sup>2</sup>", 2:4), groups = c("x<sup>2</sup>", "x<sup>2</sup>", "y", "y"))
unescape_html <- function(str){
xml2::xml_text(xml2::read_html(paste0("<x>", str, "</x>")))
}
# Error, no applicable method for resolve_location
tbl |>
gt::gt(groupname_col = "b") |>
gt::text_transform(
locations = gt::cells_row_groups(),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)
# This works so it shows that I'm close :)
tbl |>
gt::gt(groupname_col = "b") |>
gt::text_transform(
locations = gt::cells_body(columns = 1),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)
After some trial and error and a look at the rendered html code I figured out a solution using gt::html:
tbl <- tibble::tibble(values = c("test<sup>2</sup>", 2:4), groups = c("x<sup>2</sup>", "x<sup>2</sup>", "y", "y"))
unescape_html <- function(str){
xml2::xml_text(xml2::read_html(paste0("<x>", str, "</x>")))
}
tbl |>
gt::gt(groupname_col = "groups") |>
gt::text_transform(
locations = gt::cells_row_groups(),
fn = function(x) {
purrr::map(x, ~ gt::html(paste("<span style=color:blue;>", .x, "</span>")))
}
) |>
gt::text_transform(
locations = gt::cells_body(columns = 1),
fn = function(x){
x <- purrr::map_chr(x, unescape_html)
paste("<span style=color:red;>", x, "</span>")
}
)

R Plotly show string on contour plots

I have overlayed two contour plots:
library(plotly)
cluster_count <- 5
volcan <- plot_ly(z = ~volcano,
type = "contour",
contours = list(
coloring= "fill",
showlines = F
))
cluster_matrix <- volcano
cluster_matrix[cluster_matrix < 100] <- 1
cluster_matrix[cluster_matrix <= 120 & cluster_matrix >= 100] <- 2
cluster_matrix[cluster_matrix < 140 & cluster_matrix >= 120] <- 3
cluster_matrix[cluster_matrix <= 160 & cluster_matrix >= 140] <- 4
cluster_matrix[cluster_matrix > 160] <- 5
cluster_name_matrix <- cluster_matrix
cluster_name_matrix[cluster_matrix ==1] <- "Eins"
cluster_name_matrix[cluster_matrix ==2] <- "Zwei"
cluster_name_matrix[cluster_matrix ==3] <- "Drei"
cluster_name_matrix[cluster_matrix ==4] <- "Vier"
cluster_name_matrix[cluster_matrix ==5] <- "Funf"
volcan %>% add_contour(cluster_matrix,
type = "contour",
opacity =1,
text=cluster_name_matrix,
hovertemplate = 'Cluster: %{text}<extra></extra>',
autocontour = F,
line=list(color="orange"),
contours = list(
start = 1,
showlabels = T,
coloring= "lines",
end = cluster_count,
size = 1,
showlines = T
))
Is it possible to have a plot like this:
Like I did for the hovering text? Thanks for tips and suggestions in advance!
What you've been looking for is the add_annotations() function. In the code below, I write a function that retrieves a random coordinate pair for each level and then passes the corresponding coordinates to the add_annotations() function. Note that I stored your contour plot in the variable p:
library(purrr)
# Custom function
find_rand_annotation_index <- function(name_matrix, string){
d <- which(name_matrix == string, arr.ind = TRUE)
d2 <- as.data.frame(d[sample(nrow(d), size = 1), , drop = FALSE])
cbind(d2, string)
}
# Get 5 random coordinates to plot the labels
text_coords <- purrr::map_dfr(c("Eins", "Zwei", "Drei", "Vier", "Funf"), ~ find_rand_annotation_index(cluster_name_matrix, .x))
# Plot the annotations on the contour plot
p %>%
add_annotations(
x = text_coords$col,
y = text_coords$row,
text = text_coords$string,
font = list(color = "IndianRed"),
showarrow = F
)
The positioning of the labels may not be to your liking (because the coordinates are chosen randomly), but you may want to do something about it in your code.

Function that produces a sample of random vectors (X,Y,Z) in R

We have a spherical globe of radius 1, centre 1,1.
There is spot randomly located on the globes surface.
We are generating independent Unif(-1,1) random variables X,Y,Z which will be the coordinates of the random point.
Divide (X,Y,Z) by √{X2 +Y2 +Z2} to get a point 1m from the centre of the globe.
**
Write a function sample3d that produces a sample of random vectors (X, Y, Z), each of which is a point from a uniform distribution on the globe’s surface. Calling this function by the command sample3d(n) should produce an n × 3 array, where each row is a vector (X, Y, Z).
I have managed it up to ** but cannot create the sample3d, any help would be appreciated!
I had fun trying to come up with something helpful, here is what I got:
First, I defined a function norm.2 which calculates the two norm of a vector:
norm.2 <- function(x, na.rm){
if(length(dim(x)) != 0){
v1.logical <- ifelse(missing(na.rm), FALSE, TRUE)
return(sqrt(colSums(x^2, na.rm = v1.logical)))
}
if(length(dim(x)) == 0){
v1.logical <- ifelse(missing(na.rm), FALSE, TRUE)
return(sqrt(sum(x^2, na.rm = v1.logical)))
}
}
Then, I proceeded to define a function sample3d which will give n points in the 3-dimensional space, all of which with norm 1, i.e. on the 2-sphere:
sample3d <- function(n, Boundary){
M <- ifelse(missing(Boundary), sqrt(2e+300), Boundary)
x1 <- runif(n, min = -M, max = M)
x2 <- runif(n, min = -M, max = M)
x3 <- runif(n, min = -M, max = M)
x <- t(cbind(x1,x2,x3))
p <- t(t(x)/norm.2(x))
df <- data.frame(t(p), stringsAsFactors = FALSE)
return(df)
}
Here is a sample of the result:
> head(sample3d(10000))
x1 x2 x3
1 0.321159709 -0.5014622 -0.8033630
2 0.488181408 0.5547003 -0.6737852
3 -0.661576495 -0.4592729 0.5927773
4 -0.333447393 0.9331249 -0.1345016
5 -0.009070263 0.4267690 0.9043152
6 -0.375122328 -0.2393661 -0.8955373
Now, using the plotly package, we can have some fun visualizing it:
library(plotly)
dat <- sample3d(100000)
p <- plot_ly(dat, x = ~x1, y = ~x2, z = ~x3, color = norm.2(t(dat)), colors = c('#BF382A', '#0C4B8E')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'X'),
yaxis = list(title = 'Y'),
zaxis = list(title = 'Z')))

fixed fill for different sections of a density plot with ggplot

Given draws from a rnorm, and cutoff c I want my plot to use the following colors:
Red for the section that is to the left of -c
Blue for the section in between -c and c
and Green for the section that is to the right of c
For example, if my data is:
set.seed(9782)
mydata <- rnorm(1000, 0, 2)
c <- 1
I want to plot something like this:
But if my data is all to the right of c the whole plot should be green. Similarly, if all is between -c and c or to the left of -c the plot should be all red or blue.
This is the code I wrote:
MinD <- min(mydata)
MaxD <- max(mydata)
df.plot <- data.frame(density = mydata)
if(c==0){
case <- dplyr::case_when((MinD < 0 & MaxD >0) ~ "L_and_R",
(MinD > 0) ~ "R",
(MaxD < 0) ~ "L")
}else{
case <- dplyr::case_when((MinD < -c & MaxD >c) ~ "ALL",
(MinD > -c & MaxD > c) ~ "Center_and_R",
(MinD > -c & MaxD <c) ~ "Center",
(MinD < -c & MaxD < c) ~ "Center_and_L",
MaxD < -c ~ "L",
MaxD > c ~ "R")
}
# Draw the Center
if(case %in% c("ALL", "Center_and_R", "Center", "Center_and_L")){
ds <- density(df.plot$density, from = -c, to = c)
ds_data_Center <- data.frame(x = ds$x, y = ds$y, section="Center")
} else{
ds_data_Center <- data.frame(x = NA, y = NA, section="Center")
}
# Draw L
if(case %in% c("ALL", "Center_and_L", "L", "L_and_R")){
ds <- density(df.plot$density, from = MinD, to = -c)
ds_data_L <- data.frame(x = ds$x, y = ds$y, section="L")
} else{
ds_data_L <- data.frame(x = NA, y = NA, section="L")
}
# Draw R
if(case %in% c("ALL", "Center_and_R", "R", "L_and_R")){
ds <- density(df.plot$density, from = c, to = MaxD)
ds_data_R <- data.frame(x = ds$x, y = ds$y, section="R")
} else{
ds_data_R <- data.frame(x = NA, y = NA, section="R")
}
L_Pr <- round(mean(mydata < -c),2)
Center_Pr <- round(mean((mydata>-c & mydata<c)),2)
R_Pr <- round(mean(mydata > c),2)
filldf <- data.frame(section = c("L", "Center", "R"),
Pr = c(L_Pr, Center_Pr, R_Pr),
fill = c("red", "blue", "green")) %>%
dplyr::mutate(section = as.character(section))
if(c==0){
ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_L, ds_data_R)) %>%
dplyr::full_join(filldf, by = "section") %>% filter(Pr!=0) %>%
dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","R")))
ds_data <- ds_data[order(ds_data$section), ] %>%
filter(Pr!=0) %>%
mutate(Pr=scales::percent(Pr))
}else{
ds_data <- suppressWarnings(dplyr::bind_rows(ds_data_Center, ds_data_L, ds_data_R)) %>%
dplyr::full_join(filldf, by = "section") %>% mutate(section = ordered(section, levels=c("L","Center","R")))
ds_data <- ds_data[order(ds_data$section), ] %>%
filter(Pr!=0) %>%
mutate(Pr=scales::percent(Pr))
}
fillScale <- scale_fill_manual(name = paste0("c = ", c, ":"),
values = as.character(unique(ds_data$fill)))
p <- ggplot(data = ds_data, aes(x=x, y=y, fill=Pr)) +
geom_area() + fillScale
Alas, I cannot figure out how to assign the colors to the different sections while keeping the percentages as labels for the colors.
We use the density function to create the data frame we'll actually plot. Then, We use the cut function to create groups using ranges of the data values. Finally, we calculate the probability mass for each group and use those as the actual legend labels.
We also create a labeled vector of colors to ensure that the same color always goes with a given range of x-values, regardless of whether the data contains any values within a given range of x-values.
The code below packages all this into a function.
library(tidyverse)
library(gridExtra)
fill_density = function(x, cc=1, adj=1, drop_levs=FALSE) {
# Calculate density values for input data
dens = data.frame(density(x, n=2^10, adjust=adj)[c("x","y")]) %>%
mutate(section = cut(x, breaks=c(-Inf, -1, cc, Inf))) %>%
group_by(section) %>%
mutate(prob = paste0(round(sum(y)*mean(diff(x))*100),"%"))
# Get probability mass for each level of section
# We'll use these as the label values in scale_fill_manual
sp = dens %>%
group_by(section, prob) %>%
summarise %>%
ungroup
if(!drop_levs) {
sp = sp %>% complete(section, fill=list(prob="0%"))
}
# Assign colors to each level of section
col = setNames(c("red","blue","green"), levels(dens$section))
ggplot(dens, aes(x, y, fill=section)) +
geom_area() +
scale_fill_manual(labels=sp$prob, values=col, drop=drop_levs) +
labs(fill="")
}
Now let's run the function on several different data distributions:
set.seed(3)
dat2 = rnorm(1000)
grid.arrange(fill_density(mydata), fill_density(mydata[mydata>0]),
fill_density(mydata[mydata>2], drop_levs=TRUE),
fill_density(mydata[mydata>2], drop_levs=FALSE),
fill_density(mydata[mydata < -5 | mydata > 5], adj=0.3), fill_density(dat2),
ncol=2)

Resources