R function keeps returning NULL - r

I am having a very odd problem in R. The question was to make a function for global and semi global allignment. Appropriate algorithms were made which are able to "print out" the correct allignment. However "returning" the alginment seems to be a problem for the semi global algorithm.
Below are the functions for both alignments which both contain two functions: one computing the score matrix and the other outputs the alignment. As you can see, the output function for semi global was inspired by the global one but although it is able to print out values A and B, when returning A and B a value NULL is returned.
It came to my attention that when making defining A and B, they also contain a NULL part which seen by printing the structures of A and B at the end. This is also the case in the global alignment but does not seem to be a problem here.
Global Alignment Algorithm
########### GLOBAL ALLIGNMENT ALGORITHM ############
GA_score = function(v,w,score.gap=-3,score.match=8,score.mismatch=-5){
v = strsplit(v,split="")[[1]]
w = strsplit(w,split="")[[1]]
S = matrix(0,nrow=(length(v)+1),ncol = (length(w)+1) )
S[1,1] = 0
for(j in 2:dim(S)[2]){
S[1,j] = score.gap*(j-1)
}
for(i in 2:dim(S)[1]){
S[i,1] = score.gap*(i-1)
for(j in 2:dim(S)[2]){
if(v[i-1]==w[j-1]){diag = S[i-1,j-1] + score.match} else {diag = S[i-1,j-1] + score.mismatch}
down = S[i-1,j] + score.gap
right = S[i,j-1] + score.gap
S[i,j] = max(diag,down,right)
}
}
return(S)
}
GA_output = function(v,w,S,score.gap=-3,score.match=8,score.mismatch=-5){
v = strsplit(v,split="")[[1]]
w = strsplit(w,split="")[[1]]
A=c()
B=c()
GA_rec = function(A,B,S,i,j,v,w,score.gap,score.match,score.mismatch){
if (i==1 | j==1){
if(i>1){
for(i1 in seq(i-1,1,-1)){
A = c(v[i1],A)
B = c("-",B)
}
}
if(j>1){
for(j1 in seq(j-1,1,-1)){
A = c("-",A)
B = c(w[j1],B)
}
}
return(list(v=A,w=B))
}
if(v[i-1]==w[j-1] ){diag = score.match} else {diag=score.mismatch}
if (S[i,j] == (S[i-1,j-1] + diag)){
A.temp = c(v[i-1],A)
B.temp = c(w[j-1],B)
GA_rec(A.temp,B.temp,S,i-1,j-1,v,w,score.gap,score.match,score.mismatch)
}
else if (S[i,j] == (S[i-1,j] + score.gap)){
A.temp <- c(v[i-1],A)
B.temp <- c("-",B)
GA_rec(A.temp,B.temp,S,i-1,j,v,w,score.gap,score.match,score.mismatch)
}
else {
A.temp = c("-",A)
B.temp = c(w[j-1],B)
GA_rec(A.temp,B.temp,S,i,j-1,v,w,score.gap,score.match,score.mismatch)
}
}
return( GA_rec(A,B,S,length(v)+1,length(w)+1,v,w,score.gap,score.match,score.mismatch))
}
Semi-Global Alignment Algorithm
########### SEMI GLOBAL ALLIGNMENT ALGORITHM ############
SGA_score = function(sequence1,sequence2,score.gap=-1,score.match=1,score.mismatch=-1){
v=sequence2
w=sequence1
v = strsplit(v,split="")[[1]]
w = strsplit(w,split="")[[1]]
S = matrix(0,nrow=length(v)+1,ncol=length(w)+1)
for(i in 1:(length(w)+1)){
for( j in 1:(length(v)+1)){
if (i==1|j==1){S[i,j]=0}
else{
if((i==length(w)+1) | (j==length(v)+1)){
from.top = S[i,j-1]
from.left = S[i-1,j]
}
else{
from.top = max(S[i,j-1]+score.gap) # Max is artifact from max(0,... )
from.left = max(S[i-1,j]+score.gap)
}
if(w[i-1] == v[j-1]){
from.diag = S[i-1,j-1]+score.match
}
else{
from.diag = S[i-1,j-1]+score.mismatch
}
S[i,j] = max(from.top,from.left,from.diag)
}
}
}
return(S)
}
SGA_output = function(v,w,S,score.gap=-1,score.match=1,score.mismatch=-1){
v = strsplit(v,split="")[[1]]
w = strsplit(w,split="")[[1]]
A=c()
B=c()
print(str(A))
print(str(B))
SGA_rec = function(A,B,S,i,j,v,w,score.gap,score.match,score.mismatch){
if (i==1 | j==1){
if(i>1){
for(i1 in seq(i-1,1,-1)){
A = c(v[i1],A)
B = c("-",B)
}
}
if(j>1){
for(j1 in seq(j-1,1,-1)){
A = c("-",A)
B = c(w[j1],B)
}
}
print(A)
print(B)
out = list(v=A,w=B)
#print(out)
print(str(A))
print(str(B))
print(str(out))
return(out)
}
if(v[i-1]==w[j-1] ){diag = score.match} else {diag=score.mismatch}
if (S[i,j] == (S[i-1,j-1] + diag)){
A.temp = c(v[i-1],A)
B.temp = c(w[j-1],B)
SGA_rec(A.temp,B.temp,S,i-1,j-1,v,w,score.gap,score.match,score.mismatch)
}
#####
if ( j==length(w)+1) { # Are we in last row?
score.temp = score.gap
score.gap=0
}
else{score.temp=score.gap}
if(S[i,j] == (S[i-1,j] + score.gap)){
A.temp <- c(v[i-1],A)
B.temp <- c("-",B)
score.gap = score.temp
SGA_rec(A.temp,B.temp,S,i-1,j,v,w,score.gap,score.match,score.mismatch)
}
score.gap=score.temp
####
if(i==length(v)+1){
score.temp=score.gap
score.gap=0
}
else{score.temp=score.gap}
if(S[i,j] == (S[i,j-1] + score.gap)){
A.temp = c("-",A)
B.temp = c(w[j-1],B)
score.gap=score.temp
SGA_rec(A.temp,B.temp,S,i,j-1,v,w,score.gap,score.match,score.mismatch)
}
}
return(SGA_rec(A,B,S,length(v)+1,length(w)+1,v,w,score.gap,score.match,score.mismatch))
}
S1 = SGA_score("ACGTCAT","TCATGCA")
S1
align = SGA_output("ACGTCAT","TCATGCA",S1)
align
I am surpised that the global alignment works but the semi global one doesn't, even tough they both have this NULL part (can someone maybe explain what this is? Has it something to do with internal objects in a function?) and the semi global knows what A and B is.
Any help is greatly appreciated!

SGA_rec seems to be missing a return value. You need an else {return(<something>)) after the last if.
Illustration:
fun <- function() if (FALSE) 1
x <- fun()
x
#NULL
Read help("NULL") to learn what it means.

Related

How to solve a problem using "break" statement in R?

I am building a simple ant colony optimization code in R, but I have a problem in compiling a function to obtain the optimum route for each ant using the "break" statement. There is always appear an error saying that "missing value where TRUE/FALSE needed" in my looping. Here is the code
rm(list = ls())
x = c(11.7057,17.4151,1.4992,14.9609,9.5711)
y = c(11.1929,10.7112,17.0964,12.2228,6.7928)
n = length(x)
m = 20
t = matrix(0.0001,ncol=n,nrow=n)
beta = 1
alpha = 5
miter = 100
d = matrix(c(rep(0,n*n)),ncol=n,byrow=FALSE)
for (i in 1:n){
for (j in 1:n){
d[i,j] = sqrt((x[i]-x[j])^2+(y[i]-y[j])^2)
}
}
d
h = matrix(c(rep(0,n*n)),ncol=n,byrow=FALSE)
for (i in 1:n){
for (j in 1:n){
if (d[i,j]==0){
h[i,j]=0
}
else{
h[i,j]=1/d[i,j]
}
}
}
h
antour <- function(a1,a2,a3,a4,a5,a6,a7){
for (i in 1:m){
mh = h
for (j in 1:n-1){
a = start_places[i,j]
mh[,c(a)]=0
temp = (t[c(a),]^alpha)*(mh[c(a),]^beta)
q = sum(temp)
p = (1/q)*temp
r = runif(1)
s = 0
for (k in 1:n){
s = s+p[k]
start_places[i,j+1] = k
if (r <= s){
break
}
print(start_places)
}
}
}
new_places = start_places
}
for (i in 1:miter){
start_places = matrix(c(rep(1,m)),ncol=1)
tour = antour(a1=start_places,a2=m,a3=n,a4=h,a5=t,a6=alpha,a7=beta)
}
I expect that in the looping process, the start_places[i,j+1]=k when the value of r <= s and obtain the optimum route for each ant, but the actual output is an error always appears as follows
output is Error in if (r <= s) { : missing value where TRUE/FALSE needed

Why getting argument is of length zero` in R?

I have a simple function called PAP. I'm wondering when I run it why I get the following error:
Error in if (n.sim < 2) { : argument is of length zero
Here is my PAP function:
PAP = function (n.sim, sim.time){
n.sim = if(n.sim < 1) { n.sim = 1 } # If a user put zero or a negative number, make it 1
sim.time = if(n.sim < 2) { sim.time = 0 } else { sim.time }
for (i in 1:n.sim) {
plot( rnorm(1e2) )
Sys.sleep( sim.time ) }
}
PAP(n.sim = 2, sim.time = 5)
You shouldn't try to assign the results of an if statement, because this happens:
> n.sim = 2
> n.sim = if(n.sim < 1) { n.sim = 1 }
> n.sim
NULL
Instead you should do:
PAP = function (n.sim, sim.time){
if(n.sim < 1) {
n.sim = 1
} # If a user put zero or a negative number, make it 1
if(n.sim < 2) {
sim.time = 0
} # else didn't do anything here so removed
for (i in 1:n.sim) {
plot( rnorm(1e2) )
Sys.sleep( sim.time )
}
}
i.e. just use if as a control flow statement that determines whether particular lines get executed, and use the code within the if statement to change your variables.

finding similar elements within two arrays

Is there a faster way to do this? N^2 time just seems terrible.
mergeData<-function(p,c) {
for(i in 1:length(p[[1]])) {
for(k in 1:length(c[[1]])) {
if(toString(c[[k,46]]) == toString(p[[i,1]])) {
#Do stuff here with pairs found
print(i)
}
}
}
}
row1 = c[[,46]]
row2 = p[[,1]]
x = data.frame(row = row1, nr1 = c(1:len(row1)))
y = data.frame(row = row2, nr2 = c(1:len(row2)))
same_pairs = merge(x, y)[c("nr1", "nr2")]
In same_pairs you have now indeces of a rows with the same elements.
Complexity : O(len(row1) + len(row2))

Output to pdf not working with ReferenceClasses methods in R?

Output to pdf not working with ReferenceClasses methods in R?
This is an example taken from the ReferenceClasses R doc, with some minor
modification:
mEdit = setRefClass("mEdit", fields = list(data="matrix", edits="list"))
mEdit$methods(
edit = function(i, j, value) {
backup = list(i, j, data[i, j])
data[i, j] <<- value
edits <<- c(edits, list(backup))
invisible(value)
}
)
mEdit$methods(
undo = function() {
prev = edits
if(length(prev)) {
prev = prev[[length(prev)]]
}
else {
stop("No more edits to undo!")
}
edit(prev[[1]], prev[[2]], prev[[3]])
length(edits) <<- length(edits) - 2
invisible(prev)
}
)
mEdit$methods(
show = function() {
message("ClassName: ", classLabel(class(.self)))
message("Data:")
methods::show(data)
message("Undo list length: ", length(edits))
}
)
mEdit$methods(
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
)
x = matrix(1:24, 3, 8)
xx = mEdit(data=x)
xx$edit(2,2,0)
xx$show()
xx$edit(3, 5, 1)
xx$show()
xx$undo()
xx$show()
mv = setRefClass(
"matrixViewer",
fields=c("viewerDevice", "viewerFile"),
contains="mEdit"
)
mv$methods(
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
)
mv$methods(
view = function() {
## dd = dev.cur();
## dev.set(viewerDevice)
## devAskNewPage(FALSE)
image(
data,
main=paste("After", length(edits), "edits")
)
## dev.set(dd)
}
)
mv$methods(
edit = function(i,j, value) {
callSuper(i,j, value)
view()
}
)
mv$methods(
initialize = function(file="./mv.pdf", ...) {
viewerFile <<- file
## pdf(viewerFile)
## viewerDevice <<- dev.cur()
## dev.set(dev.prev())
callSuper(...)
}
)
mv$methods(
finalize = function() {
dev.off(viewerDevice)
}
)
x = matrix(rnorm(64, 0, 34), 8, 8)
xx = mv(file="/tmp/x.pdf", data=x)
xx$edit(2,2,0)
xx$edit(3, 5, 1)
xx$edit(4, 4, 2.3)
xx$undo()
xx$view()
Note that I have commented out those lines concerning switch
of output devices, so it uses the default device all through,
otherwise when the view method
is called, the plot is not written to the pdf file at all.
Any idea why this is happening?
Call rm on xx and then call garbage collection. finalize will then be called which will invoke dev.off and the pdf will be written. This assumes everything is uncommented.
rm(xx)
gc()
Also your .DollarNames should be
.DollarNames.mEdit = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
.DollarNames.matrixViewer = function(x, pattern) {
grep(pattern, getRefClass(class(x))$methods(), value=TRUE)
}
and are not methods of the Reference class. They are external functions seperate to the Reference classes.
So the main takeaway here is that finalize is not called until the object is garbage collected.

How to label each node in a dendrogram based on label for the children using R

I have a dendrogram in R where each leaf has a value. I like to define each node's value by summing the value of its children. I am familiar with dendrapply, however I don't know how to access a node's child in the function and how to write the function recursively.
here is the code to begin with:
library("stats")
library("fastcluster")
library("cluster")
D = rbind( + c(1,1,1,1,1),
+ c(1,2,1,1,1),
+ c(2,2,2,2,2),
+ c(3,4,5,6,9)
)
dnd = as.dendrogram(hclust.vector(D))
apply_text <<- function(n) {
if (!is.leaf(n)) {
attr(n, "edgetext") <- add the value of the branches
}
if (is.leaf(n)) {
attr(n, "edgetext") <- 1
}
n
}
tmp <- dendrapply(dnd, apply_text)
plot(tmp)
This may be an answer, however, it is reimplementing the dendrapply.
apply_text <<- function(n){
if (!is.leaf(n)) {
cutversion = cut(n, h = attributes(n)$height)
leftLabel = attr(apply_text(cutversion$lower[[1]]), "edgetext")
rightLabel= attr(apply_text(cutversion$lower[[2]]), "edgetext")
attr(n, "edgetext") = as.numeric(as.character(leftLabel)) + as.numeric(as.character(rightLabel))
}
if(is.leaf(n)) {
attr(n,"edgetext") <- 1
}
n
}
tmp <- dendrapply(dnd, apply_text)
Does anybody have a clue how to remove the polygon on the labels? Somebody else also seems to have asked for them to be removed. Any progress on that?

Resources