R put for loop into vector - r

I would like to put the output of the following for loop into a single vector.
test=c("A","B","C","D")
for(i in 1:3)
{e=runif(1,5,10);
f=round(e);
g=sample(test,f,TRUE);
h=paste(g,collapse = "");
print(h)}
Output:
[1] "BDCCABD"
[1] "DDBAADBBAA"
[1] "DACCAB"
I would like to get a vector like:
i=c("BDCCABD","DDBAADBBAA","DACCAB")
Thank you for your help

Just a slight adaptation of your code will do it.
set.seed(8632) # make the results reproducible
i <- sapply(1:3, function(x){
e = runif(1, 5, 10)
f = round(e)
g = sample(test, f, TRUE)
h = paste(g, collapse = "")
print(h)
h
})
i
#[1] "CACDAABCC" "ADDAACA" "ACCDAACAB"
Do you really need to print(h)?
EDIT.
I've just tested it and the following simplification gives exactly the same result.
set.seed(8632) # make the results reproducible
j <- sapply(1:3, function(x){
f <- sample(5:10, 1) # this is equivalent to your original code
g = sample(test, f, TRUE)
h = paste(g, collapse = "")
print(h)
h
})
j
#[1] "CACDAABCC" "ADDAACA" "ACCDAACAB"
identical(i, j)
#[1] TRUE

You mention vector, then let us using vector
V=vector()
test=c("A","B","C","D")
for(i in 1:3)
{e=runif(1,5,10);
f=round(e);
g=sample(test,f,TRUE);
h=paste(g,collapse = "");
V[i]=h}
V
[1] "BCCAD" "CCDCACBAD" "ADCDBCBCC"
V[1]
[1] "BCCAD"

I think something like this:
test=c("A","B","C","D")
h_final<-0
for(i in 1:3){e=runif(1,5,10);
f=round(e);
g=sample(test,f,TRUE);
h=paste(g,collapse = "");
h_final[i]<-h
if(i==3){print(h_final)}
}

something like this?
j <- character()
test = c("A", "B", "C", "D")
for (i in 1:3) {
e = runif(1, 5, 10)
f = round(e)
g = sample(test, f, TRUE)
h = paste(g, collapse = "")
j <- c(j, h)
}
print(j)
> print(j)
[1] "DDDBADBCD" "ABBCBCC" "BBCAA"
EDIT: Even simpler
test = c("A", "B", "C", "D")
for (i in 1:3) {
e = runif(1, 5, 10)
f = round(e)
g = sample(test, f, TRUE)
h[i] = paste(g, collapse = "")
}
> print(h)
[1] "DBDADDD" "AABDA" "CDBDDABC"

Not the most elegant way especially if you have lots of iterations but it works:
test=c("A","B","C","D")
k = NA
for(i in 1:3)
{e=runif(1,5,10)
f=round(e)
g=sample(test,f,TRUE)
h=paste(g,collapse = "")
k = append(k,h)
print(h)}
k <- na.omit(k)

You should take a look at vectors in R
You need to initialize an empty vector, let's call it test_vector
test_vector = c()
test=c("A","B","C","D")
for(i in 1:3)
{e=runif(1,5,10);
f=round(e);
g=sample(test,f,TRUE);
h=paste(g,collapse = "");
print(h)
test_vector <- c(test_vector,h)
}
Note that you could apply a function to your test vector directly without using a for loop.

Related

multiple data frames with similar names

I needed to generate array or many data frames from other data frames which only varied in names. This required me to do a lot of copy-paste works. Is it possible that I can make it cleaner but not keep copying and pasting? Follows are two examples from many similar cases of the analysis I am doing now (I will provide codes for reproduction at the end of the question), which I think may be able to make them cleaner with the same approach.
case 1, create an array with data from per_d1,per_d1,per_d3,per_d4,per_d5
perd <- array(dim=c(7,15,5))
perd [,,1] <- as.matrix(per_d$per_d1)
perd [,,2] <- as.matrix(per_d$per_d2)
perd [,,3] <- as.matrix(per_d$per_d3)
perd [,,4] <- as.matrix(per_d$per_d4)
perd [,,5] <- as.matrix(per_d$per_d5)
case 2, create multiple data frames from data with similar names.
dataplot <- dfmak (per_d$per_d1,ge$per_d1$g1,ge$per_d1$g2,ge$per_d1$g3,ge$per_d1$g4,ge$per_d1$g5)
dataplot2 <- dfmak (per_d$per_d2,ge$per_d2$g1,ge$per_d2$g2,ge$per_d2$g3,ge$per_d2$g4,ge$per_d2$g5)
dataplot3 <- dfmak (per_d$per_d3,ge$per_d3$g1,ge$per_d3$g2,ge$per_d3$g3,ge$per_d3$g4,ge$per_d3$g5)
dataplot4 <- dfmak (per_d$per_d4,ge$per_d4$g1,ge$per_d4$g2,ge$per_d4$g3,ge$per_d4$g4,ge$per_d4$g5)
dataplot5 <- dfmak (per_d$per_d5,ge$per_d5$g1,ge$per_d5$g2,ge$per_d5$g3,ge$per_d5$g4,ge$per_d5$g5)
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd <- c(0.2, 0.5, 0.8, 1.3, 1.8)
names(sd) <- paste("per", seq_along(sd), sep = "")
per <- lapply(sd, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd), sep = "")
gefun <- function (i){
res <- lapply(exps, function(x){
g <- as.matrix (m0 * exp (-x * i))
for (i in 1:l) {
for (j in 1:t){
g [i,j] <- abs((round (rnorm(1,mean = g[i,j],sd=3), digits = 3)))
colnames(g) <- paste('t', 1:ncol(g), sep = "")
g <- as.data.frame(g)
}}
return(g)
}
)
}
ge <- lapply(per_d, gefun)
for (i in 1:length(ge)){
names(ge[[i]]) <- paste("g", seq_along(ge), sep = "")
}
dfmak <- function(df1,df2,df3,df4,df5,df6){
data.frame(stimulus = c (paste0('S',1:3),'CS+',paste0('S',5:7)),
phy_dis = S,
per_dis = c(df1$t1,df1$t2,df1$t3,df1$t4,df1$t5,df1$t6,df1$t7,df1$t8,df1$t9,df1$t10,df1$t11,df1$t12,df1$t13,df1$t14,df1$t15),
trials = rep(1:15, each = 7),
response_0.2 = c (df2$t1,df2$t2,df2$t3,df2$t4,df2$t5,df2$t6,df2$t7,df2$t8,df2$t9,df2$t10,df2$t11,df2$t12,df2$t13,df2$t14,df2$t15),
response_0.5 = c (df3$t1,df3$t2,df3$t3,df3$t4,df3$t5,df3$t6,df3$t7,df3$t8,df3$t9,df3$t10,df3$t11,df3$t12,df3$t13,df3$t14,df3$t15),
response_0.9 = c (df4$t1,df4$t2,df4$t3,df4$t4,df4$t5,df4$t6,df4$t7,df4$t8,df4$t9,df4$t10,df4$t11,df4$t12,df4$t13,df4$t14,df4$t15),
response_1.5 = c (df5$t1,df5$t2,df5$t3,df5$t4,df5$t5,df5$t6,df5$t7,df5$t8,df5$t9,df5$t10,df5$t11,df5$t12,df5$t13,df5$t14,df5$t15),
response_2 = c (df6$t1,df6$t2,df6$t3,df6$t4,df6$t5,df6$t6,df6$t7,df6$t8,df6$t9,df6$t10,df6$t11,df6$t12,df6$t13,df6$t14,df6$t15)
)
}
You can try the followings. But the codes, unfortunately, are not short.
Case 1
a <- lapply(per_d, as.matrix)
b <- c(a, recursive = TRUE)
pred <- array(b, dim = c(7,15,5))
Case 2
The data frames will be stored in a list. You still have to extract them using $ or [[]].
# create empty lists to store the outputs
out <- list()
name <- list()
for(i in 1:5) {
a <- per_d[[i]]
b <- ge[[i]][[1]]
c <- ge[[i]][[2]]
d <- ge[[i]][[3]]
e <- ge[[i]][[4]]
f <- ge[[i]][[5]]
arg <- list(a, b, c, d, e, f)
name[[i]] <- paste0("df_", i)
out[[i]] <- do.call(dfmak, arg)
}
out <- setNames(out, name)

speed up the for loop

Here is my data.
set.seed(100)
toydata <- data.frame(A = sample(0:50,50,replace = T),
B = sample(0:50,50,replace = T),
C = sample(0:50,50,replace = T),
D = sample(0:50,50,replace = T),
E = sample(0:50,50,replace = T)
)
toydata<-as.matrix(toydata)
toydata[1,]<-0
toydata[44,]<-0
toydata[1,4]<-3
toydata[44,1]<-4
m<-toydata
Here is my swapping function:
derangement <- function(x){
if(max(table(x)) > length(x)/2) return(NA)
while(TRUE){
y <- sample(x)
if(all(y != x)) return(y)
}
}
swapFun <- function(x, n = 10){
inx <- which(x < n)
y <- derangement(x[inx])
if(length(y) == 1) return(NA)
x[inx] <- y
x
}
After swapping, I want the sum of the difference of the row medians between the two matrixes are less than 12.
I use for loop to find a feasible solution:
for (i in 1:1000000000) {
mnew<-swapFun(m)
if((sum(abs(rowMedians(mnew)-rowMedians(m))))<12)
{break}
}
My computer has 5 cores. How can I speed up the for loop by using more cores.
Many thanks.

Efficient code to map genotype matrix in R

Hi I want to convert a matrix of genotypes, encoded as triples to a matrix encoded as 0, 1, 2, i.e.
c(0,0,1) <-> 0; c(0,1,0) <-> 1; c(0,0,1) <-> 2
First here is some code to generate the matrix that needs to be reduced.
# generate genotypes
expand.G = function(n,p){
probs = runif(n = p)
G012.rows = matrix(rbinom(2,prob = probs,n=n*p),nrow = p)
colnames(G012.rows) = paste('s',1:n,sep = '')
rownames(G012.rows) = paste('g',1:p, sep = '')
G012.cols = t(G012.rows)
expand.geno = function(g){
if(g == 0){return(c(1,0,0))}
if(g == 1){return(c(0,1,0))}
if(g == 2){return(c(0,0,1))}
}
gtype = c()
for(i in 1:length(c(G012.cols))){
gtype = c(
gtype,
expand.geno(c(G012.cols)[i])
)
}
length(gtype)
G = matrix(gtype,byrow = T, nrow = p)
colnames(G) = paste('s',rep(1:n,each = 3),c('1','2','3'),sep = '')
rownames(G) = paste('g',1:p, sep = '')
print(G[1:10,1:15])
print(G012.rows[1:10,1:5])
return(G)
}
The output has 3n columns and p rows, where n is sample size and p is number of genotypes. Now we can reduce the matrix back to 0,1,2 coding with the following functions
reduce012 = function(x){
if(identical(x, c(1,0,0))){
return(0)
} else if(identical(x, c(0,1,0))){
return(1)
} else if(identical(x, c(0,0,1))){
return(2)
} else {
return(NA)
}
}
reduce.G = function(G.gen){
G.vec =
mapply(function(i,j) reduce012(as.numeric(G.gen[i,(3*j-2):(3*j)])),
i=expand.grid(1:(ncol(G.gen)/3),1:nrow(G.gen))[,2],
j=expand.grid(1:(ncol(G.gen)/3),1:nrow(G.gen))[,1]
)
G = matrix(G.vec, nrow = ncol(G.gen)/3, ncol = nrow(G.gen))
colnames(G) = rownames(G.gen)
return(G)
}
reduce.G.loop = function(G.gen){
G = matrix(NA,nrow = ncol(G.gen)/3, ncol = nrow(G.gen))
for(i in 1:nrow(G.gen)){
for(j in 1:(ncol(G.gen)/3)){
G[j,i] = reduce012(as.numeric(G.gen[i,(3*j-2):(3*j)]))
}
}
colnames(G) = rownames(G.gen)
return(G)
}
The output is n rows by p columns. It is incidental, but intentional, that the matrix encoded as 0,1,2 is the transpose of the matrix encoded as triples.
The code is not particularly fast. What is bothering me is that the the timing goes with n^2. Can you explain or supply more efficient code?
G = expand.G(1000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
G = expand.G(2000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
G = expand.G(4000,20)
system.time(reduce.G(G))
system.time(reduce.G.loop(G))
You can simply make an accessor lookup table:
decode <- array(dim = c(3, 3, 3))
decode[cbind(1, 0, 0) + 1] <- 0
decode[cbind(0, 1, 0) + 1] <- 1
decode[cbind(0, 0, 1) + 1] <- 2
And then, just do:
matrix(decode[matrix(t(G + 1), ncol = 3, byrow = TRUE)], ncol = nrow(G))
This full vectorized R version will give you the same matrix, without dimnames and super fast.
Yet, if you have much larger matrices, you should really use Rcpp for both memory and timing issues.
This seems to be a about three times faster than your version (renamed reduce.G.orig):
reduce.G <- function(G) {
varmap = c("100"=0, "010"=1, "001"=2)
result <- do.call(rbind, lapply(1:(ncol(G)/3)-1, function(val)
varmap[paste(G[,3*val+1], G[,3*val+2], G[,3*val+3], sep="")]))
colnames(result) <- rownames(G)
result
}
system.time(reduce.G(G))
# user system elapsed
# 0.156 0.000 0.155
system.time(reduce.G.orig(G))
# user system elapsed
# 0.444 0.000 0.441
identical(reduce.G(G), reduce.G.orig(G))
# [1] TRUE

Clean, simple function factories in R

Short example. I am exploring the behavior of a function by testing it with different "specs", f(spec). I wrote down one spec by hand, spec1, and am creating new specs as variations on it. To do this, I decided to write a function:
spec1 = list(fy = list(a = 1), fx = list(f1 = function(x) 10-x, f2 = function(x) 2-x))
make_spec = function(f = function(x) 10-x, xtheta = 2)
list(fy = list(a = 1), fx = list(f1 = f, f2 = function(x) xtheta-x))
res1 = make_spec()
# first problem: they don't match
all.equal(res1,spec1)
# [1] "Component “fx”: Component “f2”: target, current do not match when deparsed"
# ^ this happens, even though...
res1$fx$f2(4) == spec1$fx$f2(4)
# TRUE
# second problem: res1 is fugly
res1
# $fy
# $fy$a
# [1] 1
#
#
# $fx
# $fx$f1
# function (x)
# 10 - x
# <environment: 0x000000000f8f2e20>
#
# $fx$f2
# function (x)
# xtheta - x
# <environment: 0x000000000f8f2e20>
str(res1)
# even worse
My goals for make_spec are...
all.equal(spec1, res1) and/or identical(spec1, res1)
for str(res1) to be human-readable (no <environment: ptr> tags or srcfilecopy)
to avoid substitute and eval altogether if possible (not a high priority)
to avoid writing out the second arg of substitute (see "full" example below)
Is there an idiomatic way to achieve some or all of these goals?
Full example. I'm not sure if the example above fully covers my use case, so here's the latter:
spec0 = list(
v_dist = list(
pdf = function(x) 1,
cdf = function(x) x,
q = function(x) x,
supp = c(0,1)
)
,
ucondv_dist = {
ucondv_dist = list()
ucondv_dist$condmean = function(v) 10-v
ucondv_dist$pdf = function(u,v) dnorm(u, ucondv_dist$condmean(v), 50)
ucondv_dist$cdf = function(u,v) pnorm(u, ucondv_dist$condmean(v), 50)
ucondv_dist
}
)
make_spec = function(ycondx_condmean = function(x) 10-x, ycondx_sd = 50){
s = substitute(list(
x_dist = list(
pdf = function(x) 1,
cdf = function(x) x,
q = function(x) x,
supp = c(0,1)
)
,
ycondx_dist = {
ycondx_dist = list()
ycondx_dist$condmean = ycondx_condmean
ycondx_dist$pdf = function(u,v) dnorm(u, ycondx_dist$condmean(v), ycondx_sd)
ycondx_dist$cdf = function(u,v) pnorm(u, ycondx_dist$condmean(v), ycondx_sd)
ycondx_dist
}
)
, list(ycondx_condmean=ycondx_condmean, ycondx_sd = ycondx_sd))
eval(s, .GlobalEnv)
}
res0 = make_spec()
Side note. I don't know if "function factory" is the right term here, since I am not a computer scientist, but it seems related. I found only a paragraph on the concept related to R.
The enclosing environments of the functions are different leading to the difference in output/difference in deparsing. So, there are two things to do to get the desired output:
make the environments the same
substitute the variables from the enclosing environments into the function bodies.
However, doing it this way you get a double dose of the eval/substitute you didn't want, so maybe there would be an alternative.
make_spec <- function(f = function(x) 10-x, xtheta = 2) {
e <- parent.frame()
fixClosure <- function(func)
eval(eval(substitute(substitute(func)), parent.frame()), e)
list(fy = list(a = 1), fx = list(
f1 = fixClosure(f),
f2 = fixClosure(function(x) xtheta-x)
))
}
spec1 <- list(fy = list(a = 1), fx = list(f1 = function(x) 10-x, f2 = function(x) 2-x))
res1 <- make_spec()
all.equal(res1, spec1)
[1] TRUE

R function to combine lists but prioritize the values in one of them

I'm trying to make a function to combine multiple lists, usually between 2 and 4, that will weed out duplicates and hopefully (if possible) prioritize the values of one of the lists. Is this possible? It's better explained with code:
PassOpts <- function(in1 = list(), in2 = list(), in3 = list(), in4 = list(){
c(in1, in2, in3, in4)
}
opts1 <- list(a = 1, b = 2, c = 4)
opts2 <- list(a = 1, b = 2, c = 4)
opts3 <- list(a = 5, b = 10)
combinedOpts <- PassOpts(opts1, opts2, opts3)
Ideally what I want is for it to be possible to 'prioritize' the list that is the most different from the rest, so in this case I would want for combinedOpts to be a list of a = 5, b = 10, c = 4. I'm using it as a way to set and combine default and also user input options.
Thanks
**Solved, ended up doing this as I realized the latest input (i.e. with 3 inputs in3) would be the one I want to use as default, so did as follows
PassOpts <- function(in1 = list(), in2 = list(), in3 = list(), in4 = list()){
if(length(in4) != 0){
in4Names <- names(in4)
rList <- in4
temp <- c(in1,in2,in3)
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in4Names)){
in4Names <- c(in4Names,nam)
rList[nam] <- temp[nam]
}
}
}else if(length(in3) != 0){
in3Names <- names(in3)
rList <- in3
temp <- c(in1,in2)
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in3Names)){
in3Names <- c(in3Names, nam)
rList[nam] <- temp[nam]
}
}
}else if(length(in2) != 0){
in2Names <- names(in2)
rList <- in2
temp <- in1
tempNames <- names(temp)
for(i in 1:length(tempNames)){
nam <- tempNames[i]
if(!(nam %in% in2Names)){
in2Names <- c(in2Names, nam)
rList[nam] <- temp[nam]
}
}
}else{
return(in1)
}
return(rList)
}
Looks likes you are looking of most unique number.
Here is how I would do:
1. aggregate input lists
2. find out the most unique one for each key
PassOpts <- function(listOfList){
resList = list()
# reduce lists by key
for (l in listOfList){
for (i in 1:length(l)){
key = names(l[i])
value = l[[i]]
resList[[key]] = c(resList[[key]], value)
}
}
# found most diffent one for each key
findDiff <- function(elements){
countTable = table(elements)
minCount = min(countTable)
return(names(countTable)[countTable == minCount])
}
return(lapply(resList, FUN=findDiff))
}
opts1 <- list(a = 1, b = 2, c = 4)
opts2 <- list(a = 1, b = 2, c = 4)
opts3 <- list(a = 5, b = 10)
combinedOpts <- PassOpts(list(opts1, opts2, opts3))

Resources