Programmatically set attributes in DiagrammeR - r

I want to programmatically set arbitrary edge attributes using DiagrammeR.
From the help, this works fine:
ndf <-
create_node_df(
n = 4,
type = "basic",
label = TRUE,
value = c(3.5, 2.6, 9.4, 2.7))
edf <-
create_edge_df(
from = c(1, 2, 3),
to = c(4, 3, 1),
rel = "leading_to")
graph <-
create_graph(
nodes_df = ndf,
edges_df = edf)
# Set attribute `color = "green"`
# for edges `1`->`4` and `3`->`1`
# in the graph
graph <-
graph %>%
set_edge_attrs(
edge_attr = color,
values = "green",
from = c(1, 3),
to = c(4, 1))
but if I replace the last part with this, it doesn't work:
x="color"
graph <-
graph %>%
set_edge_attrs(
edge_attr = x,
values = "green",
from = c(1, 3),
to = c(4, 1))
Instead, the edges get an attribute called "x" rather than an attribute called "color". Any thoughts?

Related

In directional Circos plot with circlize, how to widen the end of ribbon?

I am trying to widen the end of ribbon. Is it possible?
library(circlize)
mat2 = matrix(sample(100, 35), nrow = 5)
rownames(mat2) = letters[1:5]
colnames(mat2) = letters[1:7]
chordDiagram(mat2, grid.col = 1:7, directional = 1, row.col = 1:5)
Thanks
You can use the target.prop.height argument:
chordDiagram(mat2, grid.col = 1:7, directional = 1, row.col = 1:5,
target.prop.height = mm_h(1))
chordDiagram(mat2, grid.col = 1:7, directional = 1, row.col = 1:5,
target.prop.height = mm_h(3))

Plot tree-like data vs time (including 1-child nodes)

I have some simple tree-like edge-data (e.g. data below) with the following characteristics:
there is a root node (0)
all non-root nodes have exactly one parent, but 0-to-many children (including 1)
there is a time t associated with each edge (or equivalently each unique node in i.fr)
we can compute dt as below, if helpful
I want to plot these data as a tree, with time along one dimension, so that edge lengths are proportional to dt (e.g. sketch below). How can I do this in R?
I explored ape and data.tree packages, and ggtree, but none seem to provide interface for creating tree objects from edge lists, and I think my data (with 1-child nodes) are rejected as some types of trees?
Sample Data
tree = data.frame(
t = c( 0, 1, 1, 4, 5, 7),
i.fr = c( 0, 1, 1, 2, 3, 5),
i.to = c( 1, 2, 3, 4, 5, 6),
dt = c(NA, 1, 1, 3, 4, 2))
Fake phylo
fake.phylo = list(
edge = cbind(tree$i.fr,tree$i.to),
tip.label = c('4','6'),
Nnode = 5,
edge.length = tree$dt)
class(fake.phylo) = 'phylo'
phylo.tree = as.phylo(fake.phylo) # works 😈
plot(tree) # (!) tree badly conformed; cannot plot. Check the edge matrix.
Desired Result
Here is an option using ggraph; I have no idea how well this generalises for trees with more than one split.
library(ggraph)
tree %>%
select(i.fr, i.to) %>%
graph_from_data_frame(directed = TRUE) %>%
ggraph() +
geom_node_point() +
geom_edge_link(arrow = arrow(length = unit(4, 'mm')), end_cap = circle(3, 'mm')) +
geom_node_label(aes(label = name)) +
theme_minimal() +
scale_y_continuous(
limits = c(0, max(tree$dt, na.rm = TRUE)),
breaks = c(0:(max(tree$dt, na.rm = TRUE))),
labels = rev(c(0:(max(tree$dt, na.rm = TRUE)))),
minor_breaks = NULL,
position = "left") +
scale_x_continuous(expand = c(0.1, 0.1), breaks = NULL) +
labs(x = "", y = "Time")
You can also force coord_fixed() which gives you a narrower version
# ... Same as before +
coord_fixed()
With help from here, a reasonably full-featured solution is:
library('ggplot2')
.tip.pos <<- 0
.recurse.tree = function(ii,par=0,gen=0){
# recursively walk the tree and extract the following rows:
# index (ordered by tree search), generation, position, n direct children, n total children
b.par = ii[,1]==par
i.chi = ii[b.par,2]
n.chi = length(i.chi)
if (n.chi > 0){
mat.chi = matrix(nrow=5,unlist(lapply(i.chi,function(i){
.recurse.tree(ii=ii[!b.par,,drop=FALSE],par=i,gen=gen+1)
})))
par.pos = mean(range(mat.chi[3,mat.chi[1,] %in% i.chi])) # midpoint of direct children
mat.par.chi = matrix(nrow=5,c(par,gen,par.pos,n.chi,ncol(mat.chi),mat.chi))
} else {
.tip.pos <<- .tip.pos + 1
mat.par = matrix(nrow=5,c(par,gen,.tip.pos,0,0))
}
}
plot.tree = function(tree,...){
# plot a transmission tree vs time
tree.data = .recurse.tree(as.matrix(tree[c('par','chi')]))
tree = rbind(c(-1,-1,0,NA),tree) # append dummy root
tree = tree[match(tree.data[1,],tree$chi),] # reorder to match tree.data
tree$gen = factor(tree.data[2,]) # generation
tree$pos = tree.data[3,] # position
tree$child.direct = tree.data[4,] # n direct children
tree$child.total = tree.data[5,] # n total children
pc.map = match(tree$par,tree$chi) # lookup index for chi -> par
tree$pos.par = tree$pos[pc.map] # parent position
tree$t.par = tree$t[pc.map] # parent t
g = ggplot(tree) +
geom_segment(aes_string(y='t.par',x='pos.par',xend='pos',yend='t'),alpha=.5) +
geom_point(aes_string(x='pos',y='t',...)) +
scale_x_continuous(labels=NULL,breaks=NULL) + labs(x='')
}
tree = data.frame(
t = c( 0, 1, 1, 4, 5, 7),
par = c( 0, 1, 1, 2, 3, 5),
chi = c( 1, 2, 3, 4, 5, 6),
dt = c(NA, 1, 1, 3, 4, 2))
g = plot.tree(tree,color='gen')
ggsave('Rplots.png',w=4,h=4)
Result
Extending
The code above can be easily modified to give some nice results, e.g. with bigger data, custom aes, and ggMarginal:

How to customize color and scale of y axis in each multiple plot using plot.zoo?

This a reproducible example of my data
dat<-data.frame(
prec<-rnorm(650,mean=300),
temp<-rnorm(650,mean = 22),
pet<-rnorm(650,mean = 79),
bal<-rnorm(650,mean = 225))
colnames(dat)<-c("prec","temp","pet","bal")
dat<-ts(dat,start = c(1965,1),frequency = 12)
#splines
fit1<-smooth.spline(time(dat),dat[,1],df=25)
fit2<-smooth.spline(time(dat),dat[,2],df=25)
fit3<-smooth.spline(time(dat),dat[,3],df=25)
fit4<-smooth.spline(time(dat),dat[,4],df=25)
dat2 <- cbind(dat, fitted(fit1), fitted(fit2), fitted(fit3), fitted(fit4))
plot.zoo(window(dat2, start = 1965), xlab = "", screen = 1:4,
col = c(1:4, 1, 2, 3, 4),yax.flip = TRUE, bty="n")
How can I modify the color and the scale of the y axes in each plot to match the same color of the time series?
Create dat2 which contains both the series and the smooth splines, use window to start it at 1965, specify in screen= that the the columns be in panels 1:4 (it will recycle for the last 4 columns) and specify that the last 4 columns be black, i.e. 1, or modify colors to suit.
dat2 <- cbind(dat, fitted(fit1), fitted(fit2), fitted(fit3), fitted(fit4))
plot.zoo(window(dat2, start = 1965), xlab = "", screen = 1:4,
col = c(1:4, 1, 1, 1, 1))
Regarding the comment, to me it seems easier to read if the ticks, labels and axes are black but if you want to do that anyways use the mfrow= graphical parameter with a for loop and specify col.axis and col.lab in the plot.zoo call:
nc <- ncol(dat)
cols <- 1:nc # specify desired colors
opar <- par(mfrow = c(nc, 1), oma = c(6, 0, 5, 0), mar = c(0, 5.1, 0, 2.1))
for(i in 1:nc) {
dat1965 <- window(dat[, i], start = 1965)
plot(as.zoo(dat1965), col = cols[i], ylab = colnames(dat)[i], col.axis = cols[i],
col.lab = cols[i])
fit <- smooth.spline(time(dat1965), dat1965, df = 25)
lines(cbind(dat1965, fitted(fit))[, 2]) # coerce fitted() to ts
}
par(opar)
mtext("4 plots", line = -2, font = 2, outer = TRUE)

Creating subplot (facets) with custom x,y position of the subplots in ggplot2

How can we custom the position of the panels/subplot in ggplot2?
Concretely I have a grouped times series and I want to produce 1 subplot per time series with custom positions of the subplot, not necessarily in a grid.
The facet_grid() or facet_wrap() functions do not provide a full customization of the position of the panel as it uses grid.
library(tidyverse)
df = data.frame(group = LETTERS[1:5],
x = c(1,2,3,1.5,2.5),
y =c(2,1,2,3,3),
stringsAsFactors = F)%>%
group_by(group)%>%
expand_grid(time = 1:20)%>%
ungroup()%>%
mutate(dv = rnorm(n()))%>%
arrange(group,time)
## plot in grid
df%>%
ggplot()+
geom_line(aes(x=time,y=dv))+
facet_grid(~group)
## plot with custom x, y position
## Is there an equivalent of facet_custom()?
df%>%
ggplot()+
geom_line(aes(x=time,y=dv))+
facet_custom(~group, x.subplot = x, y.subplot = y)
FYI: This dataset is only an example. My data are EEG data where each group represents an electrode (up to 64) and I want to plot the EEG signals of each electrode accordingly to the position of the electrode on the head.
Well, I guess this would not really be a 'facet plot' any more. I therefore don't think there is a specific function out there.
But you can use the fantastic patchwork package for that, in particular the layout option in wrap_plots.
As the main package author Thomas describes in the vignette, the below option using area() may be a bit verbose, but it would give you full programmatic options about positioning all your plots.
library(tidyverse)
library(patchwork)
mydf <- data.frame(
group = LETTERS[1:5],
x = c(1, 2, 3, 1.5, 2.5),
y = c(2, 1, 2, 3, 3),
stringsAsFactors = F
) %>%
group_by(group) %>%
expand_grid(time = 1:20) %>%
ungroup() %>%
mutate(dv = rnorm(n())) %>%
arrange(group, time)
## plot in grid
mylist <-
mydf %>%
split(., .$group)
p_list <-
map(1:length(mylist), function(i){
ggplot(mylist[[i]]) +
geom_line(aes(x = time, y = dv)) +
ggtitle(names(mylist)[i])
}
)
layout <- c(
area(t = 1, l = 1, b = 2, r = 2),
area(t = 2, l = 3, b = 3, r = 4),
area(t = 3, l = 5, b = 4, r = 6),
area(t = 4, l = 3, b = 5, r = 4),
area(t = 5, l = 1, b = 6, r = 2)
)
wrap_plots(p_list, design = layout)
#> result not shown, it's the same as below
For a more programmatic approach, one option is to create the required "patch_area" object manually.
t = 1:5
b = t+1
l = c(1,3,5,3,1)
r = l+1
list_area <- list(t = t, b = b, l = l, r = r)
class(list_area) <- "patch_area"
wrap_plots(p_list, design = list_area)
Created on 2020-04-22 by the reprex package (v0.3.0)

How to prepare data object for R boxplot by var and bin?

I want to plot groups of boxplots, as the following plot shows:
Data generation
bp_data = list(
set = c(1,1,2,2,3,3,4,4),
var = rep(c("red", "green"), 4),
val = t(matrix(runif(800, 1, 50)+seq(1,40,length=800 ), 100, 8)) # fake data
)
str(bp_data)
#List of 3
# $ set: num [1:8] 1 1 2 2 3 3 4 4
# $ var: chr [1:8] "red" "green" "red" "green" ...
# $ val: num [1:8, 1:100] 37.9 27.1 40.7 22.4 32.4 ...
Plot command
#plot boxplots by variable and set
bp = boxplot(
val ~ (var * set),
bp_data,
col = bp_data$var,
xaxt = "n"
)
# add axis labels and legend
axis(1, at = seq(1.5,7.5,2), labels = paste("set", c(1:4)), tick=FALSE , cex.axis=1.2)
legend("topright", c("var 1", "var 2"), col=c("red", "green"),pch =15)
I'm quite happy with the resulting plot using the sample data set. In this sample data you can see, that all distributions have an n of 100.
However, when I switch to real data, of course my vectors do not have the length 100, hence, I can't provide boxplot() with a matrix containing the vals. I do not know, how to adjust the formula given to boxplot() to create the plot with real data.
I tried to prepare my data with the following:
bp_data = list(
set = c(1,1,2,2,3,3,4,4),
var = rep(c("red", "green"), 4),
val = list(
a = runif(100, 1, 10),
b = runif(100, 1, 12),
c = runif(100, 1, 13),
d = runif(100, 1, 14),
e = runif(100, 1, 15),
f = runif(100, 1, 16),
g = runif(100, 1, 17),
h = runif(100, 1, 18)
)
)
The values were stored as list, to potentially differ the length of the individual vectors. But boxplot does not accepts it in its current form.
The real data
Before I use my real data I wanted to get the plot working, to generate my data accordingly. But to explain, this is how I get my data. I'm iterating over a object by set (set). For each set I calculate variables, for which I would come up with a name for each (var) and then calculate the values (val). Thus, for each iteration I will generate a var string of lenght 1, the set string of the iteration and values in the length of 80-100.
Maybe I can prepare the data better and do something with melt() of reshape2? I am not experienced with that...
Suggestion
Is this a good way to solve it?
bp_data = data.frame(set = character(0), var = character(0), val = numeric(0))
for(set in names(data.obj)){
# store slope
bp_data = rbind(bp_data, data.frame(
set = survey,
var = "slope",
val = data.obj[[set]]$height / data.obj[[set]]$length
))
# store whatever
bp_data = rbind(bp_data, data.frame(
set = survey,
var = "whatever",
val = data.obj[[set]]$var1 / data.obj[[set]]$var2 + data.obj[[set]]$var3
))
}
boxplot does not need a matrix nor lists of the same length.
Below, I modified your data generation step to make each list have a different length. Then only minor changes to your code are needed.
## Modified data
bp_data = list(
set = c(1,1,2,2,3,3,4,4),
var = rep(c("red", "green"), 4),
val = list(
a = runif(70, 1, 10),
b = runif(75, 1, 12),
c = runif(80, 1, 13),
d = runif(85, 1, 14),
e = runif(90, 1, 15),
f = runif(95, 1, 16),
g = runif(100, 1, 17),
h = runif(105, 1, 18)
)
)
## Modified code
bp = boxplot(
bp_data$val,
col= bp_data$var,
xaxt = "n"
)
axis(1, at = seq(1.5,7.5,2), labels = paste("set", c(1:4)), tick=FALSE , cex.axis=1.2)
legend("topleft", c("var 1", "var 2"), col=c("red", "green"),pch =15)

Resources