How to modify a function for different dim? - r

I have got a function which was used for his data dim(1000*1000).My data are the same but with different dim (500*1300).How Can I adapt the function to my dims?
image.arr = array(dim = c(1000, 1000, 20)))
interpolated.lst = vector(mode = "list", length = 1000)
system.time(
{
for(i in 1:1200){
interpolated.lst[[i]] =
apply(image.arr[i, , ], 1,
FUN = function(x){
imageSpline(x = dates, y = x, xout = 1:365)$y
}
)
}
}
)

The code uses apply to go over the rows of the images, hence only the width needs to be provided. Just replace this:
interpolated.lst = vector(mode = "list", length = nrow(image.arr))
system.time(
for(i in seq_len(nrow(image.arr))) {
interpolated.lst[[i]] =
apply(image.arr[i, , ], 1,
FUN = function(x) imageSpline(x = dates, y = x, xout = 1:365)$y)
})

Related

build matrix in a for loop automatically in R

Suppose I have a code like this
probv=c(0.5,0.1,0.2,0.3)
N=c(1,2,3,4)
g1=matrix(rbinom(n = 10, size = N[1], prob = probv[1]), nrow=5)
g2=matrix(rbinom(n = 10, size = N[2], prob = probv[2]), nrow=5)
g3=matrix(rbinom(n = 10, size = N[3], prob = probv[3]), nrow=5)
g4=matrix(rbinom(n = 10, size = N[4], prob = probv[4]), nrow=5)
I want to use a for loop
for i in (1:J)
{......} J=4 in this case
use one line function to return the same output like this, I want to know
how I create a matrix g_ in the loop
which is also benefit for me when I increase the length
of my vector into 5,6,7......
for example N=c(1,2,3,4,5) probv=c(0.5,0.1,0.2,0.3,0.5)
I do not change my code to create another matrix called g5.The code can create it and I just need to change my input to achieve my goal
Thanks Akrun
what is my N is a three dimensional array, I want to map the last dimension of it? How to change in the map method?
probv=c(0.5,0.1,0.2,0.3)
N=array(1:24,c(3,2,4))
g1=matrix(rbinom(n = 10, size = N[,,1], prob = probv[1]), nrow=5)
g2=matrix(rbinom(n = 10, size = N[,,2], prob = probv[2]), nrow=5)
g3=matrix(rbinom(n = 10, size = N[,,3], prob = probv[3]), nrow=5)
g4=matrix(rbinom(n = 10, size = N[,,4], prob = probv[4]), nrow=5)
We can use Map to loop over the 'N' and 'probv' vector, get the corresponding values into rbinom and create a matrix. It returns a list of matrices
lst1 <- Map(function(x, y) matrix(rbinom(n = 10,
size = x, prob = y), nrow = 5), N, probv)
Or using for loop
lst2 <- vector('list', length(N))
for(i in seq_along(N)) {
lst2[[i]] <- matrix(rbinom(n = 10, size = N[i], prob = probv[i]), nrow = 5)
}
names(lst2) <- paste0("g", seq_along(lst2))
For the updated question to extract from an array
mnLength <- min(length(probv), dim(N)[3])
lst2 <- vector('list', mnLength)
for(i in seq_len(mnLength)) {
lst2[[i]] <- matrix(rbinom(n = 10, size = N[,,i], prob = probv[i]), nrow = 5)
}
names(lst2) <- paste0("g", seq_along(lst2))
lst2$g1
lst2$g2

snapPointsToLines can't keep attributes in R

I recently find a problem of snapPointsToLines. It can't keep the attributes of the spatial point dataframe. The example is as below:
# Generate a spatial line dataframe
l1 = cbind(c(1,2,3),c(3,2,2))
l1a = cbind(l1[,1]+.05,l1[,2]+.05)
l2 = cbind(c(1,2,3),c(1,1.5,1))
Sl1 = Line(l1)
Sl1a = Line(l1a)
Sl2 = Line(l2)
S1 = Lines(list(Sl1, Sl1a), ID="a")
S2 = Lines(list(Sl2), ID="b")
Sl = SpatialLines(list(S1,S2))
df = data.frame(z = c(1,2), row.names=sapply(slot(Sl, "lines"), function(x) slot(x, "ID")))
Sldf = SpatialLinesDataFrame(Sl, data = df)
# Generate a spatial point dataframe
xc = c(1.2,1.5,2.5)
yc = c(1.5,2.2,1.6)
Spoints = SpatialPoints(cbind(xc, yc))
Spdf <- SpatialPointsDataFrame(Spoints, data = data.frame(value = 1:length(Spoints)))
#use the function SpatialPointsDataFrame
res <- snapPointsToLines(Spdf, Sldf)
res only has "nearest_line_id" and "snap_dist". It doesn't have "value" field from Spdf, which I need.
#use the function SpatialPointsDataFrame with "withAttrs = TRUE" parameter
res <- snapPointsToLines(Spdf, Sldf, withAttrs = TRUE)
It reports error:
"Error in snapPointsToLines(Spdf, Sldf, withAttrs = TRUE) :
A SpatialPoints object has no attributes! Please set withAttrs as FALSE."
But Spdf is the spatialpointdataframe with attribute.
I don't know what problem it is. When I used this function several weeks ago, it didn't have this problem.
I think the problem may be due to the function itself. When you look at the codes of this function, we can see the codes at the beginning part as below.
if (class(points) == "SpatialPoints" && missing(withAttrs))
withAttrs = FALSE
if (class(points) == "SpatialPoints" && withAttrs == TRUE)
stop("A SpatialPoints object has no attributes! Please set withAttrs as FALSE.")
Sometimes a SpatialPointsDataFrame could be identified as SpatialPoints. So the function will treat your SpatialPointsDataFrame as SpatialPoints and will not keep the attributes in the function.
You can make a little modification in the the codes of the function as below.
snapPointsToLines1 <- function (points, lines, maxDist = NA, withAttrs = TRUE, idField = NA)
{
if (rgeosStatus()) {
if (!requireNamespace("rgeos", quietly = TRUE))
stop("package rgeos required for snapPointsToLines")
}
else stop("rgeos not installed")
if (is(points, "SpatialPointsDataFrame")==FALSE && missing(withAttrs))
withAttrs = FALSE
if (is(points, "SpatialPointsDataFrame")==FALSE && withAttrs == TRUE)
stop("A SpatialPointsDataFrame object is needed! Please set withAttrs as FALSE.")
d = rgeos::gDistance(points, lines, byid = TRUE)
if (!is.na(maxDist)) {
distToLine <- apply(d, 2, min, na.rm = TRUE)
validPoints <- distToLine <= maxDist
distToPoint <- apply(d, 1, min, na.rm = TRUE)
validLines <- distToPoint <= maxDist
points <- points[validPoints, ]
lines = lines[validLines, ]
d = d[validLines, validPoints, drop = FALSE]
distToLine <- distToLine[validPoints]
if (!any(validPoints)) {
if (is.na(idField)) {
idCol = character(0)
}
else {
idCol = lines#data[, idField][0]
}
newCols = data.frame(nearest_line_id = idCol, snap_dist = numeric(0))
if (withAttrs)
df <- cbind(points#data, newCols)
else df <- newCols
res <- SpatialPointsDataFrame(points, data = df,
proj4string = CRS(proj4string(points)), match.ID = FALSE)
return(res)
}
}
else {
distToLine = apply(d, 2, min, na.rm = TRUE)
}
nearest_line_index = apply(d, 2, which.min)
coordsLines = coordinates(lines)
coordsPoints = coordinates(points)
mNewCoords = vapply(1:length(points), function(x) nearestPointOnLine(coordsLines[[nearest_line_index[x]]][[1]],
coordsPoints[x, ]), FUN.VALUE = c(0, 0))
if (!is.na(idField)) {
nearest_line_id = lines#data[, idField][nearest_line_index]
}
else {
nearest_line_id = sapply(slot(lines, "lines"),
function(i) slot(i, "ID"))[nearest_line_index]
}
if (withAttrs)
df = cbind(points#data, data.frame(nearest_line_id, snap_dist = distToLine))
else df = data.frame(nearest_line_id, snap_dist = distToLine,
row.names = names(nearest_line_index))
SpatialPointsDataFrame(coords = t(mNewCoords), data = df,
proj4string = CRS(proj4string(points)))
}
Then using this new function snapPointsToLines1, you can get the attributes what you want.

Why lapply works and apply doesn't?

My data:
df_1 <- data.frame(
x = replicate(
n = 3,
expr = runif(n = 30, min = 20, max = 100)
),
y = sample(
x = 1:3, size = 30, replace = TRUE
)
)
The follow code with lapply works:
lapply(X = names(df_1)[c(1:3)], FUN = function(x) {
pairwise.t.test(
x = df_1[, x],
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
But, with apply doesn't:
apply(X = names(df_1)[c(1:3)], MARGIN = 2, FUN = function(x) {
pairwise.t.test(
x = df_1[, x],
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
Error in apply(X = names(df_1)[c(1:3)], MARGIN = 2, FUN = function(x) { :
dim(X) must have a positive length
Why the problem? Are they not equivalent?
For apply you should instead use
apply(X = df_1[1:3], MARGIN = 2, FUN = function(x) {
pairwise.t.test(
x = x,
g = df_1[['y']],
p.adj = 'bonferroni'
)
})
that is because from ?apply
apply returns a vector if MARGIN has length 1 and an array of dimension dim(X)[MARGIN] otherwise.
In your attempt you are using names(df_1)[c(1:3)] as argument to apply which has
dim(names(df_1)[c(1:3)])[2]
#NULL
Hence, you get the error.

R : Changing values of variables after certain time

the question I am trying to ask is how to I change one of the values of my variables (noted as LO$M in my list) after I pass a certain time.
The thing I am trying to achieve is that after 20,000 seconds passing I would like to change my value of Lac to the value of Lac at time 20,0000 +10,000
So at t = 20,000, Lac = Lac + 10,000
The issue I am having with my code is that within my if command I have if tt>= 20000, but this leads to the issue that every value of Lac after 20,000 being increased by 10,000 when what i want is that the FIRST value after 20,000 be increased by 10,000.
Basically, after 20,000 of my experiment passing I am trying to inject 10,000 more Lac into the experiment.
My code is given below:
LO = list()
LO$M = c(i = 1, ri = 0, I = 50, Lac = 20, ILac = 0, o = 1, Io = 0, RNAP = 100, RNAPo = 0, r = 0, z = 0)
LO$Pre = matrix(c(1,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,1,0,1,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,1,0,
0,0,0,1,0,0,0,0,0,0,1,
0,1,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,1), ncol=11, byrow=TRUE)
LO$Post = matrix(c(1,1,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,
0,0,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,1,0,1,0,0,0,
0,0,0,0,0,1,0,1,0,1,0,
0,0,0,0,0,0,0,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0), ncol=11, byrow=TRUE)
LO$h = function(x,t,th=c(0.02,0.1,0.005,0.1,1,0.01,0.1,0.01,0.03,0.1,1e-05,0.01,0.002,0.01,0.001))
{
with(as.list(c(x, th)), {
return(c(th[1]*i, th[2]*ri, th[3]*I*Lac, th[4]*ILac, th[5]*I*o, th[6]*Io, th[7]*o*RNAP,
th[8]*RNAPo, th[9]*RNAPo, th[10]*r, th[11]*Lac*z, th[12]*ri, th[13]*I,
th[13]*ILac, th[14]*r, th[15]*z))
})
}
gillespie1 = function (N, n, ...)
{
tt = 0
x = N$M
S = t(N$Post - N$Pre)
u = nrow(S)
v = ncol(S)
tvec = vector("numeric", n)
xmat = matrix(ncol = u, nrow = n + 1)
xmat[1, ] = x
for (i in 1:n) {
h = N$h(x, tt, ...)
tt = tt + rexp(1, sum(h))
j = sample(v, 1, prob = h)
x = x + S[, j]
tvec[i] = tt
xmat[i + 1, ] = x
if( tt >=20000){
x[4] = x[4] +10000
}
}
return(list(t = tvec, x = xmat))
}
newout = gillespie1(LO,200000)
matplot(newout$x[,4], type="l", lwd=0.25, col="grey")
I don't have a high enough reputation to attach images, but it should look something like this:
https://gyazo.com/0ffd940a22df23b2ccfdf4a17e85dca8
Sorry if this isn't clear. Thanks
In this example, you have the function myTask(). When you call execMyTask(), you will execute myTask()once, and after that, you will execute it at random intervals between 1 to max_wait milliseconds. When you get tired, you can kill the task with tclTaskDelete().
library(tcltk2)
myTask <- function() cat("some task!\n")
id = "execMyTask"
execMyTask <- function(max_wait = 3000) {
id <- toString(match.call()[[1]])
myTask()
wait = sample(1:max_wait, 1)
cat("Waiting", wait, "miliseconds\n") # replace with your function
if (is.null(tclTaskGet(id))) {
tclTaskSchedule(wait=wait, execMyTask(), id=id, redo = TRUE)
} else {
tclTaskChange(wait=wait, execMyTask(), id=id, redo = TRUE)
}
}
execMyTask()
tclTaskDelete(id)
So far, there is a little problem with this approach, because we can not supply arguments to the function fun in tclTaskChange().

Parallel computing in R (Windows): changing code from foreach %do% to foreach %dopar%

I have written a code to run several time-series rolling-regressions for multiple securities. Since the number of securities is more than 10,000, and having more than 200 rolling windows for each security, the runtime for a sequential set-up (using foreach %do%) is about 30min.
I would like to implement foreach %dopar% for parallel computing instead, using the "doParrallel" backend. Simply changing %do% with %dopar% in the code doesn't do the trick. I am very new to this parrallel computing method, and would hope to get some help.
Here is the foreach %do% code:
sec = ncol(ret.zoo)
num.factors = 2
rows = nrow(ret.zoo) - 60 + 1
beta.temp = matrix(nc = num.factors + 1, nr = sec*rows)
gvkey.vec = matrix(nc = 1, nr = sec*rows)
d = 1
foreach(i=1:sec) %do% {
df = merge(ret.zoo[,i], data)
names(df) <- c("return", names(data))
gvkey = substr(colnames(ret.zoo)[i],2,9)
reg = function(z) {
z.df = as.data.frame(z)
ret = z.df[,which(names(z.df) == "return")]
ret.no.na = ret[!is.na(ret)]
if(length(ret.no.na) >= 30) {
coef(lm(return ~ VAL + SIZE, data = as.data.frame(z), na.action = na.omit))
}
else {
as.numeric(rep(NA,num.factors + 1)) ## the "+1" is for the intercept value
}
}
beta = rollapply(df, width = 60, FUN = reg, by.column = FALSE, align = "right")
beta.temp[d:(d+rows-1),] = beta
gvkey.vec[d:(d+rows-1),] = gvkey
d = d+rows
}
beta.df = data.frame(secId = gvkey.vec, date = rep(index(beta), sec), beta.temp)
colnames(beta.df) <- c("gvkey", "date", "intercept", "VAL", "SIZE")
In order to enable parallel computing using %dopar%, I have called and registered the backend "doParallel".
Thank you very much!
UPDATE
Here is my first try:
library(doParallel) ## parallel backend for the foreach function
registerDoParallel()
sec = ncol(ret.zoo)
num.factors = 2
rows = nrow(ret.zoo) - 60 + 1
result <- foreach(i=1:sec) %dopar% {
library(zoo)
library(stats)
df = merge(ret.zoo[,i], data)
names(df) <- c("return", names(data))
gvkey = substr(colnames(ret.zoo)[i],2,9)
reg = function(z) {
z.df = as.data.frame(z)
ret = z.df[,which(names(z.df) == "return")]
ret.no.na = ret[!is.na(ret)]
if(length(ret.no.na) >= 30) {
coef(lm(return ~ VAL + SIZE, data = as.data.frame(z), na.action = na.omit))
}
else {
as.numeric(rep(NA,num.factors + 1)) ## the "+1" is for the intercept value
}
}
rollapply(df, width = 60, FUN = reg, by.column = FALSE, align = "right")
}
beta.df = do.call('combine', result)
This works perfectly up until the end of the loop. However, the beta.df = do.call('combine', result) gives the following error: Error in do.call("combine", result) : could not find function "combine".
How can I combine the output of result. Now it is a list rather than a dataframe.
Thanks,
Here is the way of combining the results from the different clusters into a dataframe (very efficient from a runtime standpoint):
lstData <- Map(as.data.frame, result)
dfData <- rbindlist(lstData)
beta.df = as.data.frame(dfData)

Resources