Making a nested for loop run faster in R - r

I have the following code (nested for loop) in R which is extremely slow. The loop matches values from two columns. Then picks up a corresponding file and iterates through the file to find a match. Then it picks up that row from the file. The iterations could go up to more than 100,000. Please if some one can provide an insight on how to quicken the process.
for(i in 1: length(Jaspar_ids_in_Network)) {
m <- Jaspar_ids_in_Network[i]
gene_ids <- as.character(GeneTFS$GeneIds[i])
gene_names <- as.character(GeneTFS$Genes[i])
print("i")
print(i)
for(j in 1: length(Jaspar_ids_in_Exp)) {
l <- Jaspar_ids_in_Exp[j]
print("j")
print(j)
if (m == l) {
check <- as.matrix(read.csv(file=paste0(dirpath,listoffiles[j]),sep=",",header=FALSE))
data_check <- data.frame(check)
for(k in 1: nrow(data_check)) {
gene_ids_JF <- as.character(data_check[k,3])
genenames_JF <- as.character(data_check[k,4])
if(gene_ids_JF == gene_ids) {
GeneTFS$Source[i] <- as.character(data_check[k,3])
data1 <- rbind(data1, cbind(as.character(data_check[k,3]),
as.character(data_check[k,8]),
as.character(data_check[k,9]),
as.character(data_check[k,6]),
as.character(data_check[k,7]),
as.character(data_check[k,5])))
} else if (toupper(genenames_JF) == toupper(gene_names)) {
GeneTFS$Source[i] <- as.character(data_check[k,4])
data1 <- rbind(data1, cbind(as.character(data_check[k,4]),
as.character(data_check[k,5]),
as.character(data_check[k,6]),
as.character(data_check[k,7]),
as.character(data_check[k,8]),
as.character(data_check[k,2])))
} else {
# GeneTFS[i,4] <- "No Evidence"
}
}
} else {
# GeneTFS[i,4] <- "Record Not Found"
}
}
}

If you pull out the logic for processing one pair into a function, f(m,l), then you could replace the double loop with:
outer(Jaspar_ids_in_Network, Jaspar_ids_in_Exp, Vectorize(f))

Related

Make a for loop run faster in R

I want to create a model where I duplicate a sentence several times, introducing random error each time. The duplicates of the sentence also get duplicated. So, in cycle one, I start with just "example_sentence". In cycle two, I have two copies of that sentence. In cycle three, I have 4 copies of that sentence. I want to do this for 25 cycles with 20k sentences. The code I wrote to do that works way too slowly, and I am wondering if there is a way to make my nested for loops more efficient? Here is the part of the code that is the slowest:
alphabet <- c("a","b","d","j")
modr1 <- "sentencetoduplicate"
errorRate <- c()
errorRate <- append(errorRate, rep(1,1))
errorRate <- append(errorRate, rep(0,999))
duplicate <- c(modr1)
for (q in 1:25) {
collect <- c()
for (z in seq_along(duplicate)) {
modr1 = duplicate[z]
compile1 <- c()
for (k in 1:nchar(modr1)) {
error <- sample(errorRate, 1, replace = TRUE)
if (error == 1) {
compile1 <- append(compile1, sub(substring(modr1,k,k),sample(alphabet,1,replace=TRUE),substring(modr1,k,k)))
} else {
compile1 <- append(compile1, substring(modr1,k,k))
}
}
modr1 <- paste(compile1, collapse='')
collect <- append(collect, modr1)
}
duplicate <- append(duplicate, collect)
}
Here is a faster approach to your loop, but I think the problem of applying this to your problem of 20K sentences remains!
f <- function(let, alphabet = c("a","b","c","d","j"),error_rate=1/1000) {
lenlet=length(let)
let = unlist(let)
k <- rbinom(length(let),1,prob = error_rate)
let[k==1] <- sample(alphabet,size = sum(k==1), replace=T)
return(as.list(as.data.frame(matrix(let, ncol=lenlet))))
}
modr1 <- "sentencetoduplicate"
k <- data.table(list(strsplit(modr1,"")[[1]]))
for(q in 1:25) {
k[, V1:=list(f(V1))]
k <- k[rep(1:nrow(k),2)]
}
Updated with slightly faster version! (Notice this is no longer by=1:nrow(k))

(R language) how to search and record some rows if I have an index file?

I have an index file, say a list. I name it "t". I also have a table, named "b".
I want to search and record all the rows if the index coincides with the first entry of an row in b. I made this code but it doesn't work.
table <- function(t,b){
for (i in 1:length(t)) {
if (t[i] %in% b[1,]) {
for (j in 1:length(b)) {
if (t[i] ==b[1,j]) {
z[i] = c(b[,j])
}
}
}
return z
}
}
Thank you for reading
actually i made it by my self i let u the code down. itegral(sec(q))dq to everyone! see u
autoserch <- function(x,y){
znames <- names(y)
x<-as.matrix(x)
y<-as.matrix(y)
m <- length(y[1,])
n <-length(x[,1])
z <- matrix(0,n,m)
for (i in 1:length(x)){
if (isTRUE(x[i,1] %in% y[,1])) {
for(j in 1:length(y[,1])){
if(isTRUE(x[i,1]==y[j,1])){
z[i,] <- y[j,]
}
}
}
}
colnames(z)<-znames
print(z)
write.csv(z, file = "consulta_solicitada.csv")
}

How do I do a loop using R

I have to create a loop but I don't know how to order to R what I want to do.
for(i in 1:nrow(File1))
for(j in 1:ncol(File2)){
if [(x(i,1)==(cd(1,j)))] # Until here I think it is ok
THEN # I don't know what is the command for THEN in R
for (k in File3) #I have to take all the data appearing in File3
Output (k,1)= K # I don't know what is the command to order the output in R
Output (k,2)= cd(1,j)
Output (k,3)= x(i,2)
Output (k,4)= x(i,3)
Output (k,5)= x(i,4)
Output (k,6)= cd(1,j)
How I have to finish the loop?
Thanks in advance, I'm a bit confused
So this is a basic for-loop, which just prints out the values.
data <- cbind(1:10);
for (i in 1:nrow(data)) {
print(i)
}
If you want to save the output you have to initialize a vector / list /matrix, etc.:
output <- vector()
for (i in 1:nrow(data)) {
k[i] <- i
}
k
And a little example for nested loops:
data <- cbind(1:5);
data1 <- cbind(15:20)
data2 <- cbind(25:30)
for (i in 1:nrow(data)) {
print(paste("FOR 1: ", i))
for (j in 1:nrow(data1)) {
print(paste("FOR 2: ", j))
for (k in 1:nrow(data2)) {
cat(paste("FOR 3: ", k, "\n"))
}
}
}
But as already mentioned, you would probably be better of using an "apply"-function (apply, sapply, lapply, etc). Check out this post: Apply-Family
Or using the package dplyr with the pipe (%>%) operator.
To include some if/else-synthax in the loop:
data <- cbind(1:5);
data1 <- cbind(15:20)
data2 <- cbind(25:30)
for (i in 1:nrow(data)) {
if (i == 1) {
print("i is 1")
} else {
print("i is not 1")
}
for (j in 1:nrow(data1)) {
print(paste("FOR 2: ", j))
for (k in 1:nrow(data2)) {
cat(paste("FOR 3: ", k, "\n"))
}
}
}
In the first loop, I am asking if i is 1.
If yes, the first print statement is used ("i is 1"), otherwise the second is used ("i is not 1").
R Repeat loop Statements
R While loop executes a set of statements repeatedly in a loop as long as the condition is satisfied. We shall learn about the syntax, execution flow of while loop with R example scripts
Reference Site

Using multiple if condition

After some manipulation of data, I want to rename automatically the data taking pieces of name from a string, merging togheter these pieces and then assigning to data. I try to use "if" function in for loop but the code doesn't work. I try to use "grep" as condition in "if" function.
filepath<-c("C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_0.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_1.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_2.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_3.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_4.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_5.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_6.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_1-P1_0.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_1-P1_1.csv",
....)
for(i in filepath){
......
f <- substr(i,10,23) # first piece of name
f2 <- as.character(substr(i,40,57)) # second piece of name
if (grep("W_0",f2)){
m<-c("_sin")
}
if (grep("W_1",f2)){
m<-c("_jan2_febreal")
}
if (grep("W_2",f2)){
m<-c("_real")
}
if (grep("W_3",f2)){
m<-c("_step")
}
if (grep("P1_0",f2,value = FALSE)){
t<-c("_t0.025")
}
if (grepl("P1_1",f2,value = FALSE)){
t<-c("_t0.05")
}
if (grepl("P1_2",f2,value = FALSE)){
t<-c("_t0.1")
}
if (grepl("P1_3",f2,value = FALSE)){
t<-c("_t0.15")
}
if (grepl("P1_4",f2,value = FALSE)){
t<-c("_t0.2")
}
if (grepl("P1_5",f2,value = FALSE)){
t<-c("_t0.25")
}
if (grepl("P1_6",f2,value = FALSE)){
t<-c("_t0.3")
}
}
Outputfilename <- paste(paste(f,m,sep=""),t,sep="")
the result is:
Errore in if (grep("W_1", f2)) { : l'argomento ha lunghezza zero
Without any for loops or if statements, it seems to me you can simply vectorize everything:
f <- substr(filepath,10,23)
m <- t <- character(length(filepath))
m[grepl("W_0",filepath)]<-"_sin"
m[grepl("W_1",filepath)]<-"_jan2_febral"
m[grepl("W_2",filepath)]<-"_real"
m[grepl("W_3",filepath)]<-"_step"
t[grepl("P1_0",filepath)]<-"_t0.025"
t[grepl("P1_1",filepath)]<-"_t0.05"
t[grepl("P1_2",filepath)]<-"_t0.1"
t[grepl("P1_3",filepath)]<-"_t0.15"
t[grepl("P1_4",filepath)]<-"_t0.2"
t[grepl("P1_5",filepath)]<-"_t0.25"
t[grepl("P1_6",filepath)]<-"_t0.3"
Outputfilename <- paste(f,m,t,sep="")
That you can also probably simplify the following way:
f <- substr(filepath,10,23)
m <- t <- character(length(filepath))
w <- array(c(paste("W",0:3,sep="_"),
"_sin", "_jan2_febral", "_real", "_step"), dim=c(4,2))
p <- array(c(paste("P1",0:6,sep="_"),
paste("t_0.",c("025","05","1","15","2","25","3"),sep="")), dim=c(7,2))
for(i in 1:nrow(w)){m[grepl(w[i,1],filepath)] <- w[i,2]}
for(i in 1:nrow(p)){t[grepl(p[i,1],filepath)] <- p[i,2]}
Outputfilename <- paste(f,m,t,sep="")
That you can wrap in a function if you like:
outputfile.namer <- function(filepath,w,p){
# filepath being your vector of file paths
# w and p your correspondance tables for your "W_" and "P_" series respectively
f <- do.call(rbind,strsplit(gsub("C:/Users/","",filepath),split="/"))[,1]
# the preceding is more general than `f <- substr(filepath,10,23)` to grab the name of the User
m <- t <- character(length(filepath))
for(i in 1:nrow(w)){m[grepl(w[i,1],filepath)] <- w[i,2]}
for(i in 1:nrow(p)){t[grepl(p[i,1],filepath)] <- p[i,2]}
paste(f,m,t,sep="")
}

Running a fine & working code through a loop doesn't work

From the string:
"((VBD)(((JJ))(CC)((RB)(JJ)))((IN)((DT)(JJ)(NNP)(NNPS))))"
I needed the following:
"JJ", "RBJJ", "DTJJNNPNNPS", "JJCCRBJJ", "INDTJJNNPNNPS" "VBDJJCCRBJJINDTJJNNPNNPS"
(this was my earlier query on SO, which was solved by #Brian Diggs. Please refer to "R : how to differentiate between inner and innermost brackets using regex", if necessary)
so I Used the following code:
library("plotrix")
library("plyr")
strr<-c("((VBD)(((JJ))(CC)((RB)(JJ)))((IN)((DT)(JJ)(NNP)(NNPS))))")
tmp <- gsub("\\(([^\\(\\)]*)\\)", '("\\1")', strr)
tmp <- gsub("\\(", "list(", tmp)
tmp <- gsub("\\)list", "),list", tmp)
tmp <- eval(parse(text=tmp))
atdepth <- function(l, d) {
if (d > 0 & !is.list(l)) {
return(NULL)
}
if (d == 0) {
return(unlist(l))
}
if (is.list(l)) {
llply(l, atdepth, d-1)
}
}
pastelist <- function(l) {paste(unlist(l), collapse="", sep="")}
down <- llply(1:listDepth(tmp), atdepth, l=tmp)
out <- if (length(down) > 2) {
c(unlist(llply(length(down):3, function(i) {
unlist(do.call(llply, c(list(down[[i]]), replicate(i-3, llply), pastelist)))
})), unlist(pastelist(down[[2]])))
} else {
unlist(pastelist(down[[2]]))
}
out <- out[out != ""]
And I got what I wanted, But Is it not possible to use the above code through a loop to process multiple strings (strr etc.) at the same time? I roughly need a bunch of strings to be processed and collected in a file. I'm trying to include a loop, but always end up in having just the last string from the set of strings in the out file. How should I run the code through a loop? The string set below.
strr<-c("(((((NNS))((IN)((NNS)(CC)(NNS))))((VBD)((PRP))((IN)((NN))))))",
"((((NNS))((VBD)((TO)(((NNP))((NNP))))((TO)((DT)(NNP))))))",
"((((IN)(((NNP))((NNP))))((NNP)(NNP)(NNPW)(NNP))((VBD)((IN)((DT)(JJ)(NN)(NN))))))"
)
Possibly it could be modified in a more clever way, but a straightforward one would be to define a new function and use sapply()
### Nothing new
library("plotrix")
library("plyr")
strr<-c("(((((NNS))((IN)((NNS)(CC)(NNS))))((VBD)((PRP))((IN)((NN))))))",
"((((NNS))((VBD)((TO)(((NNP))((NNP))))((TO)((DT)(NNP))))))",
"((((IN)(((NNP))((NNP))))((NNP)(NNP)(NNPW)(NNP))((VBD)((IN)((DT)(JJ)(NN)(NN))))))")
strrr <- strr[rep(1:3,200)]
atdepth <- function(l, d) {
if (d > 0 & !is.list(l)) {
return(NULL)
}
if (d == 0) {
return(unlist(l))
}
if (is.list(l)) {
llply(l, atdepth, d-1)
}
}
pastelist <- function(l) {paste(unlist(l), collapse="", sep="")}
###
### New function here
fun <- function(strr){
tmp <- gsub("\\(([^\\(\\)]*)\\)", '("\\1")', strr)
tmp <- gsub("\\(", "list(", tmp)
tmp <- gsub("\\)list", "),list", tmp)
tmp <- eval(parse(text=tmp))
down <- llply(1:listDepth(tmp), atdepth, l=tmp)
out <- if (length(down) > 2) {
c(unlist(llply(length(down):3, function(i) {
unlist(do.call(llply, c(list(down[[i]]), replicate(i-3, llply), pastelist)))
})), unlist(pastelist(down[[2]])))
} else {
unlist(pastelist(down[[2]]))
}
out <- out[out != ""]
out
}
sapply(strr, fun)

Resources