Conditional simulating based on matches and probability - r

Two matrices
df_A = matrix(, nrow = 5, ncol = 3)
df_A[,1] = c(0, 0, 1, -1, 1)
df_A[,2] = c(0, 1, -1, 0, -1)
df_A[,3] = c(1, 0, -1, 1, 1)
df_B = matrix(, nrow = 5, ncol = 3)
df_B[,1] = c(1, -1, 0, 0, 1)
I want to simulate columns 2 and 3 for df_B based on a few conditions. If the value of df_A is zero, the value for df_B does not change. For example, the first two values for df_B should not change for the first iteration because the first two values for df_A are zero. If the value of df_A is one or negative one, then the respective value for df_B will take on that value given a certain probability (20% in this example). For example, if df_A is negative one and df_B is zero (or one), the respective value for df_B will become negative one 20% of the time.
I know the following is incorrect but here is what I have so far:
belief_change = function(x){
if (df_A[x] = -1 & df_B[x] != -1 & sample(1:2, 1, prob = c(0.2, 0.8) = 1))
df_B[x+1] = df_A[x]
else
df_B[x+1] = df_B[x]
if (df_A[x] = 1 & df_B[x] != 1 & sample(1:2, 1, prob = c(0.2, 0.8) = 1))
df_B[x+1] = df_A[x]
else
df_B[x+1] = df_B[x]
if (df_A[x] = 0)
df_B[x+1] = df_B[x]
}
I'm using the sample function here to help generate a probability. I also need to put this into a for-loop eventually.

There are several errors in your code. First, the correct logical equality operator is == not =. Second, you put = 1 inside the call to sample. Third, if you really want to affect the environment outside where the function is called (i.e. to change df_B by calling your function, you need to use the deep assignment operator <<-, not = or <-. You can read more about this here.
Here is a version of your code that works; see if it does what you want it to do.
belief_change <- function(x) {
if (df_A[x] == -1 & df_B[x] != -1 & sample(1:2, 1, prob = c(0.2, 0.8)) == 1)
df_B[x+1] <<- df_A[x]
else
df_B[x+1] <<- df_B[x]
if (df_A[x] == 1 & df_B[x] != 1 & sample(1:2, 1, prob = c(0.2, 0.8)) == 1)
df_B[x+1] <<- df_A[x]
else
df_B[x+1] <<- df_B[x]
if (df_A[x] == 0)
df_B[x+1] <<- df_B[x]
}

Related

How to rotate nodes of a time-calibrated phylogenetic tree to match a particular order in R?

I have a time-calibrated phylogenetic tree from BEAST and I would like to make a figure in which its nodes are rotated to match an arbitrary ordering. The following code works perfectly to plot the tree with the nodes in the order they are in the input file.
library("phytools")
library("phyloch")
library("strap")
library("coda")
t <- read.beast("mcctree.tre") # I couldn't upload the file here
t$root.time <- t$height[1]
num_taxa <- length(t$tip.label)
display_all_node_bars <- TRUE
names_list <-vector()
for (name in t$tip){
v <- strsplit(name, "_")[[1]]
if(display_all_node_bars){
names_list = c(names_list, name)
}
else if(v[length(v)]=="0"){
names_list = c(names_list, name)
}
}
nids <- vector()
pos <- 1
len_nl <- length(names_list)
for(n in names_list){
for(nn in names_list[pos:len_nl]){
if(n != nn){
m <- getMRCA(t,c(n, nn))
if(m %in% nids == FALSE){
nids <- c(nids, m)
}
}
}
pos <- pos+1
}
pdf("tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = t,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+t$height[nv-num_taxa]-t$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(t$"height_95%_HPD_MAX"[nv-num_taxa]-t$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
t$node.label <- t$posterior
p <- character(length(t$node.label))
p[t$node.label >= 0.95] <- "black"
p[t$node.label < 0.95 & t$node.label >= 0.75] <- "gray"
p[t$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
The following code is my attempt to rotate the nodes in the way I want (following this tutorial: http://blog.phytools.org/2015/04/finding-closest-set-of-node-rotations.html). And it works for rotating the nodes. However, the blue bars indicating the confidence intervals of the divergence time estimates get out of their correct place - this is what I would like help to correct. This will be used in much larger files with hundreds of branches - the example here is simplified.
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
t2 <- setNames(1:Ntip(t), new.order)
new.order.tree <- minRotate(t, t2)
new.order.tree$root.time <- t$root.time
new.order.tree$height <- t$height
new.order.tree$"height_95%_HPD_MIN" <- t$"height_95%_HPD_MIN"
new.order.tree$"height_95%_HPD_MAX" <- t$"height_95%_HPD_MAX"
pdf("reordered_tree.pdf", width = 20, height = 20)
geoscalePhylo(tree = new.order.tree,
x.lim = c(-2,21),
units = c("Epoch"),
tick.scale = "myr",
boxes = FALSE,
width = 1,
cex.tip = 2,
cex.age = 3,
cex.ts = 2,
erotate = 0,
label.offset = 0.1)
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
for(nv in nids){
bar_xx_a <- c(lastPP$xx[nv]+new.order.tree$height[nv-num_taxa]-new.order.tree$"height_95%_HPD_MIN"[nv-num_taxa],
lastPP$xx[nv]-(new.order.tree$"height_95%_HPD_MAX"[nv-num_taxa]-new.order.tree$height[nv-num_taxa]))
lines(bar_xx_a, c(lastPP$yy[nv], lastPP$yy[nv]), col = rgb(0, 0, 1, alpha = 0.3), lwd = 12)
}
new.order.tree$node.label <- t$posterior
p <- character(length(new.order.tree$node.label))
p[new.order.tree$node.label >= 0.95] <- "black"
p[new.order.tree$node.label < 0.95 & new.order.tree$node.label >= 0.75] <- "gray"
p[new.order.tree$node.label < 0.75] <- "white"
nodelabels(pch = 21, cex = 1.5, bg = p)
dev.off()
I've found several similar questions here and in other forums, but none dealing specifically with time-calibrated trees - which is the core of the problem described above.
The short answer is that phyTools::minRotate() doesn't recognize the confidence intervals as associated with nodes. If you contact the phyTools maintainers, they may well be able to add this functionality quite easily.
Meanwhile, you can correct this yourself.
I don't know how read.beast() saves confidence intervals – let's say they're saved in t$conf.int. (Type unclass(t) at the R command line to see the full structure; you should be able to identify the appropriate property.)
If the tree's node labels are unique, then you can infer the new sequence of nodes using match():
library("phytools")
new.order <- c("Sp8","Sp9","Sp10","Sp7","Sp6","Sp5","Sp4","Sp2","Sp3","Ou1","Ou2","Sp1")
# Set up a fake initial tree -- you would load the tree from a file
tree <- rtree(length(new.order))
tree$tip.label <- sort(new.order)
tree$node.label <- seq_len(tree$Nnode)
tree$conf.int <- seq_len(tree$Nnode) * 10
# Plot tree
par(mfrow = c(1, 2), mar = rep(0, 4), cex = 0.9) # Create space
plot(tree, show.node.label = TRUE)
nodelabels(tree$conf.int, adj = 1) # Annotate "correct" intervals
# Re-order nodes with minRotate
noTree <- minRotate(tree, setNames(seq_along(new.order), new.order))
plot(noTree, show.node.label = TRUE)
# Move confidence intervals to correct node
tree$conf.int <- tree$conf.int[match(noTree$node.label, tree$node.label)]
nodelabels(tree$conf.int, adj = 1)
If you can't guarantee that the node labels are unique, you can always overwrite them in a temporary object:
# Find node order
treeCopy <- tree
treeCopy$node.label <- seq_len(tree$Nnode)
nodeOrder <- match(minRotate(treeCopy)$node.label, treeCopy$node.label)
# Apply node order
tree$conf.int <- tree$conf.int[nodeOrder]

R function with multiple operators

data ranges from -6 to 6 and I am trying to create 3 categories, however my function is not returning anyone for category 2 even though there are people present
FFMIBMDcopdcases$lowBMD = ifelse((FFMIBMDcopdcases$copd_Tscore >= -1) , 0,
ifelse((FFMIBMDcopdcases$copd_Tscore < -1), 1,
ifelse((FFMIBMDcopdcases$copd_Tscore <= -2.5), 2, NA)))
Try using cut function. Example:
myValues <- runif(n = 20, min = -6, max = 6)
as.numeric(as.character(cut(x = myValues, breaks = c(-Inf, -2.5, -1, Inf), labels = c(2, 1, 0))))
Since you want a numeric result it might be easiest to use findInterval although you will need to subtract the result from 2 to get in the inverse order ( 2 for lowest and 0 for highest) :
FFMIBMDcopdcases$lowBMD = 2 - findInterval(FFMIBMDcopdcases$copd_Tscore ,
c(-Inf, -2.5, -1, Inf) )

R / igraph Removing the edge between specific nodes that fulfill a certain requirement?

I currently have a code like this:
library(igraph)
set.seed(123); g <- erdos.renyi.game(30, 151 , type = "gnm" , directed = F , loops = F) %>%
set_vertex_attr("a", value = 0) %>%
set_vertex_attr("b", value = 0)
V(g)$b <- sample(c(0, .6, .7), vcount(g), replace = TRUE, prob = c(0.3, .4, .3))
repeat{
mean <- mean(V(g)$a == 1)
prev_value <- mean(v(g)$a == 1)
V(g)$a[V(g)$b <= mean & V(g)$a == 0] <- 1
curr_value <- mean(v(g)$a == 1)
if(prev_value == curr_value){
break
}
}
I want to add something to the if code that will randomly delete an edge that has certain "trait". In this case I want the trait to be that the two connected nodes both have values of 0 for "a".
I want to [...] randomly delete an edge [where] the two connected
nodes both have values of 0 for "a".
You could try
# trait:
candidates <- which(head_of(g, E(g))$a==0 & tail_of(g, E(g))$a==0)
# random: 20% prob for deletion
idx <- sample(c(TRUE, FALSE), length(candidates), prob = c(.2,.8), repl=T)
delete_edges(g, candidates[idx])

Assigning NAs to values in a list

I have a data frame that includes many variables. Here is a shortened version of what I have so far:
n_20010_0_0 <- c(1,2,3,4)
n_20010_0_1 <- c(0, -2, NA, 4)
n_20010_0_2 <- c(3, 0, -7, 2)
x <- data.frame (n_20010_0_0, n_20010_0_1, n_20010_0_2)
I created a new variable that returns whether or not there is a 1 within the list of variables:
MotherIllness0 <- paste("n_20010_0_", 0:2, sep = "")
x$MotherCAD_0_0 <- apply(x, 1, function(x) as.integer(any(x[MotherIllness0] == 1, na.rm = TRUE)))
I would like to keep the NAs as 0's, but I would also like to recode it so that if there is a -7 the new value is NA.
This is what I've tried and it doesn't work:
x$MotherCAD_0_0[MotherIllness0 == -7] <- NA
you don't need to define MotherIllness0, the argument 1 in your apply function takes care of that.
Here's a line of code that does both things you want.
MotherIllness0 <- paste("n_20010_0_", 0:2, sep = "")
x$MotherCAD_0_0<- apply(x[,MotherIllness0], 1, function(x) ifelse(any(x==-7), NA,
as.integer(any(x==1, na.rm=T))))
I assumed that a row with both 1s and -7s should have NA for the new variable. If not, then this should work:
x$MotherCAD_0_0<- apply(x[,MotherIllness0], 1, function(x) ifelse(any(x==1, na.rm=T), 1,
ifelse(any(x==-7), NA, 0)))
Note that with the example you have above, these two lines should produce the same outcome.
Here's another way to do it, without using any if-else logic:
# Here's your dataset, with a row including both 1 and -7 added:
x <- data.frame (n_20010_0_0 = c(1, 2, 3, 4, 1),
n_20010_0_1 = c(0, -2, NA, 4, 0) ,
n_20010_0_2 = c(3, 0, -7, 2, -7)
)
# Your original function:
MotherIllness0 <- paste("n_20010_0_", 0:2, sep = "")
x$MotherCAD_0_0 <- apply(x, MARGIN = 1, FUN = function(x) {
as.integer(
any(x[MotherIllness0] == 1, na.rm = TRUE)
)
})
# A simplified version
x$test <- apply(x, MARGIN = 1, FUN = function(row) {
as.integer(
any(row[MotherIllness0] == 1, na.rm = TRUE) &
!any(row[MotherIllness0] == -7, na.rm = TRUE)
)
})
A couple of notes: the name of x in an anonymous function like function(x) can be anything, and you'll save yourself a lot of confusion by calling it what it is (I named it row above).
It's also unlikely that you actually need to convert your result column to integer - logical columns are easier to interpret, and they work the same as 0-1 columns for just about everything (e.g., TRUE + FALSE equals 1).

function ifelse calculation

I am trying to write the function in R, but keep getting an error. Within 1 simulation run I generate random values from 2 intevals - in order to generate 2 different output values.
se.m if the input parameter lies within [0, 1]
se.st if the input parameter lies wiothin [1, 5]
(floating point in neglegible)
Then, these randomly generated values are used as input in the following function:
This is the code I have used:
fuchs08 <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * (x.m[i]^2) - 0.04 * x.m[i])
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * (x.m[i]^2) - 0.04 * x.m[i], 1)
}
return(cbind(se.m, se.st))
}
I dont get any results. I believe the error is in the ifelse statement, but cannot find a solution to it.
> fuchs08(5)
se.m se.st
[1,] 0 NA
[2,] NA 1
[3,] NA 1
[4,] NA NA
[5,] 0 1
The overall idea is add this function to a list of functions called funktionen. Then I run a simulation 100 times. Simulation 1 randomly chooses a function from the list funktionen and executes it. (Function creates two outputs for the aforementioned intevals: se.m and se.st which are combined with the outputs from simulation 2:99) Therefore the function needs to be in the format: function(n) in order to run the random function selection. Here is my code for that part:
funktionen <- list(akbas, bell.glade, borter, fell.hartford, fuchs07, fuchs08)
fxn_list_sample <- sample(1:5, 100, replace=T)
fxn_list_result <- unlist(sapply(fxn_list_sample, function(x) do.call(funktionen[[x]], args=list(n=1))))
results <- as.data.frame(t(fxn_list_result))
colnames(results) <- c("se.m", "se.st")
results <- melt(results)
results$value <-round(results$value, 4)
separate(results, variable, into = c("Parameter", "Intensitaet")) %>%
mutate(Intensitaet = c(3, 2) [(Intensitaet == "m")+1])
Any suggestions how to fix this?
To answer your trouble with ifelse():
ifelse() needs three arguments to make sense (condition, yes, no). it works with only the condition if the condition evaluates to NA, hence the NA's in your results and works with two arguments if the condition evalutates to TRUE, hence the 1 in your resutls. As Konrad says in the comment, the use of ifelse seems redundant. For illustration:
> ifelse(1==1)
Error in ifelse(1 == 1) : argument "yes" is missing, with no default
> ifelse(NA)
[1] NA
> ifelse(1==1, 4)
[1] 4
> ifelse(1!=1, 4)
Error in ifelse(1 != 1, 4) : argument "no" is missing, with no default
> ifelse(1!=1, 4, 10)
[1] 10
Regarding your original problem, I am not sure if I understand you question correctly, but maybe this does what you want:
fuchs08 <- function(x){
ifelse(x<1/3, 0,
ifelse(x<=3.06, 0.12*x^2-0.04*x, 1))
}
fuchs08_with_n_inputs_two_outputcols <- function(n) {
df <- data.frame(input=runif(n, 0, 5))
df$se.m <- ifelse(df$input<1, fuchs08(df$input), NA)
df$se.st <- ifelse(df$input>1 & df$input<5, fuchs08(df$input), NA)
return(df)
}
fuchs08_with_n_inputs_two_outputcols(10)
Edit: replaced n by x to avoid confusion and added a second function after having read your answer (the name is long for the sake of clarity...). It is not the output in your answer but may easily be transformed to that. I think it would be helpfull to give an example of the output you want and which format it should have (data.frame, named vector...?)
I think ifelse & if-and-else are both awkward. You could try something like:
fuchs08<-function(n,min,max) {
x<-runif(n,min,max)
y<-x
y[x<1/3]<-0
y[x>=1/3 & x<=3.06]<-0.12*y[x>=1/3 & x<=3.06]^2-0.04*y[x>=1/3 & x<=3.06]
y[x>3.06]<-1
return(y)
}
(want<-cbind(fuchs08(100,0,1),fuchs08(100,1,5)))
This seems to work. However, not very elegant answer. Feel free to give me tipps to improve it, reduce rebundant elements, etc.
fuchs08 <- function(n) {
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * x.m[i]^2 - 0.04* x.m[i])
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * x.st[i]^2 - 0.04* x.st[i], 1)
}
return(cbind(se.m, se.st))
}
fuchs08(10)
The whole code is:
library(reshape2)
library(stringr)
install.packages("dplyr")
install.packages("tidyr")
library(dplyr)
library(tidyr)
install.packages("data.table")
library(data.table)
# AKBAS u.a. (2009)
akbas <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- 0.17 * (x.m[i]^2) - 0.03 * x.m[i]
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- 0.17 * (x.st[i]^2) - 0.03 * x.st[i]
}
akbasr<-return(cbind(se.m, se.st))
}
# FUCHS u.a.(2007)
fuchs07 <- function(n){
x.m=se.m=x.st=se.st=NULL #solves indexing problem
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- 0.11 * (x.m[i]^2) - 0.02 * x.m[i]
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- 0.11 * (x.st[i]^2) - 0.02 * x.st[i]
}
return(cbind(se.m, se.st))
}
# BELL AND GLADE (2004)
bell.glade <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.2, 0.2)
se.st[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.5, 0.5)
}
return(cbind(se.m, se.st))
}
# BORTER (1999b,a)
borter <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.1, 0.1)
se.st[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 1, 0.5, 0.5)
}
return(cbind(se.m, se.st))
}
# FELL UND HARTFORD (1997)
fell.hartford <- function(n){
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.25, 0.1, 0.4)
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 1.5, 0.4, 0.7)
}
return(cbind(se.m, se.st))
}
# FUCH (2008, 2009)
fuchs08 <- function(n) {
x.m=se.m=x.st=se.st=NULL
for(i in 1:n){
print(x.m[i] <- runif(n = 1, min = 0, max = 1))
se.m[i] <- ifelse (runif(n = 1, min = 0, max = 1) < 0.33, 0, 0.12 * x.m[i]^2 - 0.04* x.m[i])
print(x.st[i] <- runif(n = 1, min = 1, max = 5))
se.st[i] <- ifelse (runif(n = 1, min = 1, max = 5) < 3.06, 0.12 * x.st[i]^2 - 0.04* x.st[i], 1)
}
return(cbind(se.m, se.st))
}
funktionen <- list(akbas, bell.glade, borter, fell.hartford, fuchs07, fuchs08)
fxn_list_sample <- sample(1:5, 100, replace=T)
fxn_list_result <- unlist(sapply(fxn_list_sample, function(x) do.call(funktionen[[x]], args=list(n=1))))
results <- as.data.frame(t(fxn_list_result))
colnames(results) <- c("se.m", "se.st")
results <- melt(results)
results$value <-round(results$value, 4)
separate(results, variable, into = c("Parameter", "Intensitaet")) %>%
mutate(Intensitaet = c(3, 2) [(Intensitaet == "m")+1])
write.csv(results, "murgang-test.csv")

Resources