After some manipulation of data, I want to rename automatically the data taking pieces of name from a string, merging togheter these pieces and then assigning to data. I try to use "if" function in for loop but the code doesn't work. I try to use "grep" as condition in "if" function.
filepath<-c("C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_0.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_1.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_2.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_3.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_4.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_5.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_0-P1_6.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_1-P1_0.csv",
"C:/Users/Amministratore/Documents/Isolante-T_0-W_1-P1_1.csv",
....)
for(i in filepath){
......
f <- substr(i,10,23) # first piece of name
f2 <- as.character(substr(i,40,57)) # second piece of name
if (grep("W_0",f2)){
m<-c("_sin")
}
if (grep("W_1",f2)){
m<-c("_jan2_febreal")
}
if (grep("W_2",f2)){
m<-c("_real")
}
if (grep("W_3",f2)){
m<-c("_step")
}
if (grep("P1_0",f2,value = FALSE)){
t<-c("_t0.025")
}
if (grepl("P1_1",f2,value = FALSE)){
t<-c("_t0.05")
}
if (grepl("P1_2",f2,value = FALSE)){
t<-c("_t0.1")
}
if (grepl("P1_3",f2,value = FALSE)){
t<-c("_t0.15")
}
if (grepl("P1_4",f2,value = FALSE)){
t<-c("_t0.2")
}
if (grepl("P1_5",f2,value = FALSE)){
t<-c("_t0.25")
}
if (grepl("P1_6",f2,value = FALSE)){
t<-c("_t0.3")
}
}
Outputfilename <- paste(paste(f,m,sep=""),t,sep="")
the result is:
Errore in if (grep("W_1", f2)) { : l'argomento ha lunghezza zero
Without any for loops or if statements, it seems to me you can simply vectorize everything:
f <- substr(filepath,10,23)
m <- t <- character(length(filepath))
m[grepl("W_0",filepath)]<-"_sin"
m[grepl("W_1",filepath)]<-"_jan2_febral"
m[grepl("W_2",filepath)]<-"_real"
m[grepl("W_3",filepath)]<-"_step"
t[grepl("P1_0",filepath)]<-"_t0.025"
t[grepl("P1_1",filepath)]<-"_t0.05"
t[grepl("P1_2",filepath)]<-"_t0.1"
t[grepl("P1_3",filepath)]<-"_t0.15"
t[grepl("P1_4",filepath)]<-"_t0.2"
t[grepl("P1_5",filepath)]<-"_t0.25"
t[grepl("P1_6",filepath)]<-"_t0.3"
Outputfilename <- paste(f,m,t,sep="")
That you can also probably simplify the following way:
f <- substr(filepath,10,23)
m <- t <- character(length(filepath))
w <- array(c(paste("W",0:3,sep="_"),
"_sin", "_jan2_febral", "_real", "_step"), dim=c(4,2))
p <- array(c(paste("P1",0:6,sep="_"),
paste("t_0.",c("025","05","1","15","2","25","3"),sep="")), dim=c(7,2))
for(i in 1:nrow(w)){m[grepl(w[i,1],filepath)] <- w[i,2]}
for(i in 1:nrow(p)){t[grepl(p[i,1],filepath)] <- p[i,2]}
Outputfilename <- paste(f,m,t,sep="")
That you can wrap in a function if you like:
outputfile.namer <- function(filepath,w,p){
# filepath being your vector of file paths
# w and p your correspondance tables for your "W_" and "P_" series respectively
f <- do.call(rbind,strsplit(gsub("C:/Users/","",filepath),split="/"))[,1]
# the preceding is more general than `f <- substr(filepath,10,23)` to grab the name of the User
m <- t <- character(length(filepath))
for(i in 1:nrow(w)){m[grepl(w[i,1],filepath)] <- w[i,2]}
for(i in 1:nrow(p)){t[grepl(p[i,1],filepath)] <- p[i,2]}
paste(f,m,t,sep="")
}
Related
I am a newbie on R. I am trying to create a list within 2 functions. The first one is the extraction function, it takes the data and creates a list with it. The second one is the process one, it calculates some values and I need them to be together in the first list. How do I do that ?
myfun <- function(data,number_meta) { #extraction function
OR <- data$`Odds Ratio`[data$`Identification number`==number_meta]
SE <- ((log(data$`Upper limit`) - (log(data$`Lower limit`))) / 3.92)[data$`Identification number`==number_meta]
res <- metagen(TE=log(OR),seTE=SE,sm="OR")
tableau = cbind(OR, SE)
LIST = list(tableau, res)
return(LIST)
}
myfun(data,number_meta)
number_meta = c(1:33)
i = c(1:33)
number_meta = i
LIST = list()
for (i in 1:33) {
LIST[[i]] = myfun(data, number_meta[i])
}
myfun2 <- function(LIST) { # processing function
dup_OR <- duplicated(LIST[[i]][[1]][,1])
dup_SE <- duplicated(LIST[[i]][[1]][,2])
options(scipen = 999)
Egger <- metabias(LIST[[i]][[2]], method.bias = "linreg", k.min = 1)
Begg <- metabias(LIST[[i]][[2]], method.bias = "rank", k.min = 1)
Result <- c(dup_OR,dup_SE,Egger,Begg)
return(Result)
}
myfun2(LIST)
for (i in 1:33) {
LIST[[i]] = c(LIST, list(myfun2(LIST))) ## This one is not working !
}
I would like to obtain a final list of 33 items in which I could find inside the different values of res, dup_OR, dup_SE, Egger, Begg. These values varies from the values of res. Thanks for your help
Here is my original script :
setwd("U:/Stage M2 Phame")
library(readxl)
library(meta)
data <- read_excel("Tableau_OR.xlsx")
OR <- ((data$`Odds Ratio`[data$`Identification number`==number_meta[i]]))
SE <- (((log(data$`Upper limit`) - (log(data$`Lower limit`)))/3.92)[data$`Identification
number`==number_meta[i]])
dup_OR <- duplicated(OR)
dup_SE <- duplicated(SE)
options(scipen = 999)
res <- metagen(TE=log(OR),seTE=SE,sm="OR")
Egger <- metabias(res, method.bias = "linreg", k.min = 5)
Begg <- metabias(res, method.bias = "rank", k.min = 5)
Trim <- trimfill(res)
LIST=list(dup_OR, dup_SE, Egger, Begg, Trim)
Sorry for my whole block of text.
How about this (I have taken the liberty of generating a minimal working example):
## define a function that appends something to an existing list
appendtolist = function(oldlist, add_element){
if(class(add_element) == "list"){
oldlist = c(oldlist, add_element)
}
else if(class(add_element) != "list"){
oldlist[[length(oldlist) + 1]] = add_element
}
return(oldlist)
}
## define a test list
firstlist = list("a", c(1:10), "test")
## add content to the first list
newlist = appendtolist(firstlist, c(1:1000))
Given the following data:
list_A <- list(data_cars = mtcars,
data_air = AirPassengers,
data_list = list(A = 1,
B = 2))
I would like to print names of objects available across list_A.
Example:
Map(
f = function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
},
list_A
)
returns:
[1] "dots[[1L]][[1L]]"
[1] "dots[[1L]][[2L]]"
[1] "dots[[1L]][[3L]]"
$data_cars
NULL
$data_air
NULL
$data_list
[1] 3
Desired results
I would like to get:
`data_cars`
`data_air`
`data_list`
Update
Following the comments, I have modified the example to make it more reflective of my actual needs which are:
While using Map to iterate over list_A I'm performing some operations on each element of the list
Periodically I want to create a flat file with name reflecting name of object that was processed
In addition to list_A, there are also list_B, list_C and so forth. Therefore, I would like to avoid calling names(list) inside the function f of the Map as I will have to modify it n number of times. The solution I'm looking to find should lend itself for:
Map(function(l){...}, list_A)
So I can later replace list_A. It does not have to rely on Map. Any of the apply functions would do; same applied to purrr-based solutions.
Alternative example
do_stuff <- function(x) {
nm <- deparse(match.call()$x)
print(nm)
# nm object is only needed to properly name flat file that may be
# produced within Map call
if (any(class(x) == "list")) {
length(x) + 1
} else {
length(x) + 1e6
saveRDS(object = x,
file = tempfile(pattern = make.names(nm), fileext = ".RDS"))
}
}
Map(do_stuff, list_A)
As per the notes below, I want to avoid having to modify do_stuff function as I will be looking to do:
Map(do_stuff, list_A)
Map(do_stuff, list_B)
Map(do_stuff, list_...)
We could wrap it into a function, and do it in two steps:
myFun <- function(myList){
# do stuff
res <- Map(
f = function(x) {
#do stuff
head(x)
},
myList)
# write to a file, here we might add control
# if list is empty do not output to a file
for(i in names(res)){
write.table(res[[ i ]], file = paste0(i, ".txt"))
}
}
myFun(list_A)
Would something like this work ?
list_A2 <- Map(list, x = list_A,nm = names(list_A) )
trace(do_stuff, quote({ nm <- x$nm; x<- x$x}), at=3)
Map(do_stuff, list_A2)
readStateData <- function() {
infile <- paste("state",i,".txt",sep="")
state <- readLines(infile,n=1)
statedata <- read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
statename
}
# Start loop
for(i in 1:50) {
readStateData()
# Add function to big.list
big.list[[i]] <- readStateData(statename)
}
The assignment for class is to bring in 50 files, all named state#.txt, get the state via readLines, get the data via read.table, and ultimately put it all into big.list that'll have all of the data through a for loop.
The problem I'm having is calling the function in during the for loop. I get the error:
Error in readStateData(statename) : unused argument (statename)
I'm either not calling in the function properly or I've written the function wrong. Both are likely.
Thank you for your help.
You have different issues here.
Do not refer inside a function to a variable which is defined outside. It means instead of access an outside the function defined i inside the function:
i <- 1
fct <- function() {
a <- i + 1
return(a)
}
fct()
Pass the variable as an argument to the function:
i <- 1
fct <- function(x) {
a <- x + 1
return(a)
}
fct(i)
In your function the return statement is missing. See point 1 the last command in the functions. Without a return statement the last written variable is on the stack and is "returned" by the function. This is not the clean way to return a value.
Ergo your code should look like this
readStateData <- function(x) {
infile <- paste("state",x,".txt",sep="")
state <- readLines(infile,n=1)
statedata <-read.table(infile,header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:50) {
j <- readStateData(i)
# Add function to big.list
big.list[[i]] <- j
}
If your files are all of the pattern: state[number].txt you can simplify your code to:
# Get all files with pattern state*.txt
fls <- dir(pattern='state.*txt')
readStateData <- function(x) {
state <- readLines(x, n=1)
statedata <-read.table(x, header=FALSE,sep=",",skip=1,col.names=c("Rank","City","Population"))
statename <- list(state,statedata)
return(statename)
}
# Start loop
for(i in 1:length(fls)) {
j <- readStateData(fls[i])
# Add function to big.list
big.list[[i]] <- j
}
Assume I have the following function:
f1 <- function()
{
get.var <- function(v)
{
for(n in 1:sys.nframe())
{
varName <- deparse(substitute(v, env = parent.frame(n)))
if(varName != "v")
{
break
}
}
return(list(name = varName, n = n))
}
f2 <- function(v)
{
print(v)
# get original variable name and environment
obj <- get.var(v)
#below doesn't work as expected - this is where q$a and q$b would be updated
assign(obj$name, v + 1, env = parent.frame(obj$n))
}
f3 <- function(v){ f2(v) }
f4 <- function(v){ f3(v) }
q <- list(a = 2, b = 3)
f4(q$a)
f3(q$b)
}
How can I update the value of q$a and q$b from f2? The situation is that a similar routine is called in some of my code to validate a number of arguments in a nested list. If a value is incorrect the list element needs to be updated to reflect the correct value. It's certainly an ugly way to do it but unfortunately I cannot pass the entire list to each and every validation function.
A somewhat similar question was asked here but the user was passing in a list element instead.
Instead of using assign(obj$name, v + 1, env = parent.frame(obj$n)), I replaced this with eval(parse(text = sprintf("%s <- %d", obj$name, v + 1)), envir = parent.frame(obj$n))
It is horrendously ugly, but it works.
I have the following code (nested for loop) in R which is extremely slow. The loop matches values from two columns. Then picks up a corresponding file and iterates through the file to find a match. Then it picks up that row from the file. The iterations could go up to more than 100,000. Please if some one can provide an insight on how to quicken the process.
for(i in 1: length(Jaspar_ids_in_Network)) {
m <- Jaspar_ids_in_Network[i]
gene_ids <- as.character(GeneTFS$GeneIds[i])
gene_names <- as.character(GeneTFS$Genes[i])
print("i")
print(i)
for(j in 1: length(Jaspar_ids_in_Exp)) {
l <- Jaspar_ids_in_Exp[j]
print("j")
print(j)
if (m == l) {
check <- as.matrix(read.csv(file=paste0(dirpath,listoffiles[j]),sep=",",header=FALSE))
data_check <- data.frame(check)
for(k in 1: nrow(data_check)) {
gene_ids_JF <- as.character(data_check[k,3])
genenames_JF <- as.character(data_check[k,4])
if(gene_ids_JF == gene_ids) {
GeneTFS$Source[i] <- as.character(data_check[k,3])
data1 <- rbind(data1, cbind(as.character(data_check[k,3]),
as.character(data_check[k,8]),
as.character(data_check[k,9]),
as.character(data_check[k,6]),
as.character(data_check[k,7]),
as.character(data_check[k,5])))
} else if (toupper(genenames_JF) == toupper(gene_names)) {
GeneTFS$Source[i] <- as.character(data_check[k,4])
data1 <- rbind(data1, cbind(as.character(data_check[k,4]),
as.character(data_check[k,5]),
as.character(data_check[k,6]),
as.character(data_check[k,7]),
as.character(data_check[k,8]),
as.character(data_check[k,2])))
} else {
# GeneTFS[i,4] <- "No Evidence"
}
}
} else {
# GeneTFS[i,4] <- "Record Not Found"
}
}
}
If you pull out the logic for processing one pair into a function, f(m,l), then you could replace the double loop with:
outer(Jaspar_ids_in_Network, Jaspar_ids_in_Exp, Vectorize(f))