Sample function in R vector combining - r

I need your help, I need to combine two vectors(z and Num1 or Num2), so z will 10 in final vector and Num1(Num2) was 90 in final vector.
Code that I have now:
I <- seq(1:100)
NA1<-vector()
NA2<-vector()
z <- rep(NA, 10)
Num1 <- rnorm(100)
Num2 <- rnorm(100)
vect_1 <- sample(c(Num1, z))
vect_2 <- sample(c(Num2, z))
vect_1_NA <- is.na(vect_1)
vect_2_NA <- is.na(vect_2)
for(i in I){
if(vect_1_NA[i] == TRUE)
NA1 <- append(NA1, i)
}
for(i in I){
if(vect_2_NA[i] == TRUE)
NA2 <- append(NA2, i)
}

Related

How to simplifying this R Code to detect repeated sequence

I found this below function to detect repeated sequence. I integrate the function into Monte Carlo Simulation to calculate the probability. The function I have is too long and takes too much time during the simulation. I would appreciate if anyone can help to simply the function and in turn fasten any simulation depends on it.
V1 <- c(68,71,72,69,80,78,80,81,84,82,67,73,65,68,66,70,69,72,74,73,68,75,70,72,75,73,69,75,74,79,80,78,80,81,79,82,69,73,67,66,70,72,69,72,75,80,68,69,71,77,70,73)
Check_repeat_Seq <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
return(max(dat$repeat_length))
}
Check_repeat_Seq(V1)
#### Can you please simplify the following also to calculate the sum of repeated.####
Check_repeat_Seq_no_overlap_sum <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
vec <- unlist(unname(L))
nms <- names(vec)
is_le <- function(i) any(grepl(nms[i], tail(nms, -i)) & (vec[i] <= tail(vec, -i)))
LL <- vec[ ! sapply(seq_along(nms), is_le) ]
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
dat$total_repeat <- dat$seq_rep*dat$repeat_length
return(sum(dat$total_repeat))
}
##### the original function should return data Frame as follows
Check_All_repeat_Seq<- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
vec <- unlist(unname(L))
nms <- names(vec)
is_le <- function(i) any(grepl(nms[i], tail(nms, -i)) & (vec[i] <= tail(vec, -i)))
LL <- vec[ ! sapply(seq_along(nms), is_le) ]
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
dat$total_repeat <- dat$seq_rep*dat$repeat_length
return(sum(dat))
}
please help simplifying the code with the same output
Update
An even faster iterative approach leveraging the Cantor pairing function:
allDup <- function(x) {
duplicated(x) | duplicated(x, fromLast = TRUE)
}
fPair <- function(i, j) {
# Cantor pairing function
k <- j + (i + j)*(i + j + 1L)/2L
match(k, unique(k))
}
Check_repeat_Seq3 <- function(v) {
v <- match(v, unique(v))
vPair <- fPair(head(v, -1), tail(v, -1))
blnKeep <- allDup(vPair)
idx <- which(blnKeep)
len <- 1L
while (length(idx)) {
len <- len + 1L
vPair <- fPair(vPair[blnKeep], v[idx + len])
blnKeep <- allDup(vPair)
idx <- idx[blnKeep]
}
return(len)
}
# benchmark against the rollaply solution
V1 <- c(68,71,72,69,80,78,80,81,84,82,67,73,65,68,66,70,69,72,74,73,68,75,70,72,75,73,69,75,74,79,80,78,80,81,79,82,69,73,67,66,70,72,69,72,75,80,68,69,71,77,70,73)
Check_repeat_Seq <- function(vector){
k <- 2:25
Lall <- setNames(lapply(k, function(i) table(zoo::rollapply(vector, width = i, toString))), k)
L <- Filter(length, lapply(Lall, function(x) x[x == max(x) & x > 1]))
dat <- data.frame(seq_rep=sapply(L, length))
dat$repeat_length <- as.numeric(rownames(dat))
return(max(dat$repeat_length))
}
Check_repeat_Seq(V1)
#> [1] 4
Check_repeat_Seq3(V1)
#> [1] 4
microbenchmark::microbenchmark(Check_repeat_Seq(V1), Check_repeat_Seq3(V1))
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> Check_repeat_Seq(V1) 38445.7 40860.95 43153.058 42249.25 44051.15 60593.2 100
#> Check_repeat_Seq3(V1) 103.9 118.65 150.713 149.05 160.05 465.2 100
Original Solution
Check_repeat_Seq2 <- function(v) {
m <- matrix(c(head(v, -1), tail(v, -1)), ncol = 2)
idx <- which(duplicated(m) | duplicated(m, fromLast = TRUE))
len <- 2L
while (length(idx)) {
len <- len + 1L
m <- matrix(v[sequence(rep(len, length(idx)), idx)], ncol = len, byrow = TRUE)
idx <- idx[duplicated(m) | duplicated(m, fromLast = TRUE)]
}
return(len - 1L)
}
UPDATE 2
This should return your dat data.frame:
Check_repeat_Seq3 <- function(v) {
v1 <- match(v, unique(v))
vPair <- fPair(head(v1, -1), tail(v1, -1))
blnKeep <- allDup(vPair)
idx <- which(blnKeep)
if (length(idx)) {
len <- 1L
seq_rep <- integer(length(v)/2)
while (length(idx)) {
len <- len + 1L
vPair <- fPair(vPair[blnKeep], v1[idx + len])
blnKeep <- allDup(vPair)
seq_rep[len] <- nrow(unique(matrix(v[sequence(rep(len, length(blnKeep)), idx)], ncol = len, byrow = TRUE)))
idx <- idx[blnKeep]
}
len <- 2:len
return(data.frame(seq_rep = seq_rep[len], repeat_length = len, total_repeat = seq_rep[len]*len))
} else {
return(data.frame(seq_rep = integer(0), repeat_length = integer(0), total_repeat = integer(0)))
}
}

multiple data frames with similar names

I needed to generate array or many data frames from other data frames which only varied in names. This required me to do a lot of copy-paste works. Is it possible that I can make it cleaner but not keep copying and pasting? Follows are two examples from many similar cases of the analysis I am doing now (I will provide codes for reproduction at the end of the question), which I think may be able to make them cleaner with the same approach.
case 1, create an array with data from per_d1,per_d1,per_d3,per_d4,per_d5
perd <- array(dim=c(7,15,5))
perd [,,1] <- as.matrix(per_d$per_d1)
perd [,,2] <- as.matrix(per_d$per_d2)
perd [,,3] <- as.matrix(per_d$per_d3)
perd [,,4] <- as.matrix(per_d$per_d4)
perd [,,5] <- as.matrix(per_d$per_d5)
case 2, create multiple data frames from data with similar names.
dataplot <- dfmak (per_d$per_d1,ge$per_d1$g1,ge$per_d1$g2,ge$per_d1$g3,ge$per_d1$g4,ge$per_d1$g5)
dataplot2 <- dfmak (per_d$per_d2,ge$per_d2$g1,ge$per_d2$g2,ge$per_d2$g3,ge$per_d2$g4,ge$per_d2$g5)
dataplot3 <- dfmak (per_d$per_d3,ge$per_d3$g1,ge$per_d3$g2,ge$per_d3$g3,ge$per_d3$g4,ge$per_d3$g5)
dataplot4 <- dfmak (per_d$per_d4,ge$per_d4$g1,ge$per_d4$g2,ge$per_d4$g3,ge$per_d4$g4,ge$per_d4$g5)
dataplot5 <- dfmak (per_d$per_d5,ge$per_d5$g1,ge$per_d5$g2,ge$per_d5$g3,ge$per_d5$g4,ge$per_d5$g5)
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd <- c(0.2, 0.5, 0.8, 1.3, 1.8)
names(sd) <- paste("per", seq_along(sd), sep = "")
per <- lapply(sd, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd), sep = "")
gefun <- function (i){
res <- lapply(exps, function(x){
g <- as.matrix (m0 * exp (-x * i))
for (i in 1:l) {
for (j in 1:t){
g [i,j] <- abs((round (rnorm(1,mean = g[i,j],sd=3), digits = 3)))
colnames(g) <- paste('t', 1:ncol(g), sep = "")
g <- as.data.frame(g)
}}
return(g)
}
)
}
ge <- lapply(per_d, gefun)
for (i in 1:length(ge)){
names(ge[[i]]) <- paste("g", seq_along(ge), sep = "")
}
dfmak <- function(df1,df2,df3,df4,df5,df6){
data.frame(stimulus = c (paste0('S',1:3),'CS+',paste0('S',5:7)),
phy_dis = S,
per_dis = c(df1$t1,df1$t2,df1$t3,df1$t4,df1$t5,df1$t6,df1$t7,df1$t8,df1$t9,df1$t10,df1$t11,df1$t12,df1$t13,df1$t14,df1$t15),
trials = rep(1:15, each = 7),
response_0.2 = c (df2$t1,df2$t2,df2$t3,df2$t4,df2$t5,df2$t6,df2$t7,df2$t8,df2$t9,df2$t10,df2$t11,df2$t12,df2$t13,df2$t14,df2$t15),
response_0.5 = c (df3$t1,df3$t2,df3$t3,df3$t4,df3$t5,df3$t6,df3$t7,df3$t8,df3$t9,df3$t10,df3$t11,df3$t12,df3$t13,df3$t14,df3$t15),
response_0.9 = c (df4$t1,df4$t2,df4$t3,df4$t4,df4$t5,df4$t6,df4$t7,df4$t8,df4$t9,df4$t10,df4$t11,df4$t12,df4$t13,df4$t14,df4$t15),
response_1.5 = c (df5$t1,df5$t2,df5$t3,df5$t4,df5$t5,df5$t6,df5$t7,df5$t8,df5$t9,df5$t10,df5$t11,df5$t12,df5$t13,df5$t14,df5$t15),
response_2 = c (df6$t1,df6$t2,df6$t3,df6$t4,df6$t5,df6$t6,df6$t7,df6$t8,df6$t9,df6$t10,df6$t11,df6$t12,df6$t13,df6$t14,df6$t15)
)
}
You can try the followings. But the codes, unfortunately, are not short.
Case 1
a <- lapply(per_d, as.matrix)
b <- c(a, recursive = TRUE)
pred <- array(b, dim = c(7,15,5))
Case 2
The data frames will be stored in a list. You still have to extract them using $ or [[]].
# create empty lists to store the outputs
out <- list()
name <- list()
for(i in 1:5) {
a <- per_d[[i]]
b <- ge[[i]][[1]]
c <- ge[[i]][[2]]
d <- ge[[i]][[3]]
e <- ge[[i]][[4]]
f <- ge[[i]][[5]]
arg <- list(a, b, c, d, e, f)
name[[i]] <- paste0("df_", i)
out[[i]] <- do.call(dfmak, arg)
}
out <- setNames(out, name)

how to get each output of iteration

i have to do 1000 iteration for this SIMPLS function to get the value of the coefficient. my problem is how to get the value of the coefficient for each iteration? can I print the output for iteration?
n = 10
k = 20
a = 2
coef = matrix(0,nrow=20, ncol=10)
for (i in 1:1000) {
t[,i] = matrix(rnorm(n%*%a,0,1), ncol=a) # n x a
p[,i] = matrix(rnorm(k%*%a,0,1), ncol=a) # k x a
B[,i] = matrix(rnorm(k,0,0.001), nrow=k, ncol=1) # k x 1
e[,i] = matrix(rcauchy(n,location=0,scale=1), nrow=n, ncol=1)##standard cauchy
x[,i] = t%*%t(p) ## explanatary variable xi
y[,i] = (t%*%(t(p)%*%B)) + e ## response variable yi
simpls <- function(y, x, a) {
n <- nrow(x)
k <- ncol(x)
m <- NCOL(y)
y <- matrix(y)
Ps <- matrix(0, k, a)
Cs <- matrix(0, m, a)
Rs <- matrix(0, k, a)
Ts <- matrix(0, n, a)
mx <- apply(x, 2, mean)
sdx <- apply(x, 2, sd)
x <- sapply(1:k, function(i) (x[,i]-mx[i]))
my <- apply(y, 2, mean)
sdy <- apply(y, 2, sd)
y <- sapply(1:m, function(i) (y[,i]-my[i]))
S <- t(x)%*%y
Snew <- S
for (i in 1:a) {
rs <- svd(Snew)$u[,1,drop=FALSE]
rs <- rs/norm(rs,type="F")
ts <- x%*%rs
ts <- ts/norm(ts,type="F")
ps <- t(x)%*%ts
cs <- t(y)%*%ts
Rs[,i] <- rs
Ts[,i] <- ts
Ps[,i] <- ps
Cs[,i] <- cs
Snew <- Snew-Ps[,1:i]%*%solve(t(Ps[,1:i])%*%Ps[,1:i])%*%t(Ps[,1:i])%*%Snew
}
coef[,i] <- matrix(drop(Rs%*%(solve(t(Ps)%*%Rs)%*%t(Cs))))
yfit <- x%*%coef
orgyfit <- yfit+my
res <- y-yfit
SSE <- sum((y-yfit)^2)
scale <- sqrt(SSE/(n-a))
stdres <- sapply(1:m, function(i) (res[,i]-mean(res[,i]))/sqrt(var(res[,i])))
hatt <- diag(Ts%*%solve(t(Ts)%*%Ts)%*%t(Ts))
result <- list(coef=coef, fit=orgyfit, res=res, SSE=SSE,scale=scale, stdres=stdres, leverage=hatt,Ts=Ts,Rs=Rs,Ps=Ps,Cs=Cs)
}
}
print(coef)
You can just add your coef to a vector for every iteration. I've created an example here:
coef_vector <- NULL
for (i in 1:10) {
loop_coef <- i*2
coef_vector <- c(coef_vector, loop_coef)
}
Result:
> coef_vector
[1] 2 4 6 8 10 12 14 16 18 20
>
Of course, if your coef is more complex than a variable, you can add it to a list instead of a vector.

Objective function in optim evaluates to length 3 not 1

I am new to R and trying to find the optimal values of 3 parameters via indirect inference from a simulated panel data set, but getting an error "objective function in optim evaluates to length 3 not 1". I tried to check past posts, but the one I found didn't address the problem I am facing.
The code works if I only try for one parameter instead of 3. Here is the code:
#Generating data
modelp <- function(Y,alpha,N,T){
Yt <- Y[,2:T]
Ylag <- Y[,1:(T-1)]
Alpha <- alpha[,2:T]
yt <- matrix(t(Yt), (T-1)*N, 1)
ylag <- matrix(t(Ylag), (T-1)*N, 1)
alph <- matrix(t(Alpha), (T-1)*N, 1)
rho.ind <- rep(NA,N)
sigma_u <- rep(NA,N)
sigma_a <- rep(NA,N)
for(n in 1:N){
sigma_u[n] <- sigma(lm(yt~alph+ylag))
sigma_a[n] <- lm(yt~alph+ylag)$coef[2] #
(diag(vcov((lm(yt~alph+ylag)$coef),complete=TRUE)))[2] #
rho.ind[n] <- lm(yt~alph+ylag)$coef[3]
}
param <- matrix(NA,1,3)
param[1]<- mean(sum(rho.ind))
param[2]<- mean(sum(sigma_u))
param[3]<- mean(sum(sigma_a))
return(param)
}
## Function to estimate parameters
H.theta <- function(param.s){
set.seed(tmp.seed) #set seed
param.s.tmp <- matrix(0,1,3)
for(s in 1:H){
eps.s <- matrix(rnorm(N*T), N, T) #white noise erros
eps0.s <- matrix(rnorm(N*T), N, 1) #error for initial condition
alph.s <- matrix(rnorm(N*T),N,T)
Y.s <- matrix( 0, N, T)
ys.lag <- eps0.s
for(t in 1:T){ #Simulating the AR(1) process data
ys <- alph.s[,t]+param.s[1] * ys.lag + eps.s[,t] # [n,1:t]
Y.s[,t] <- ys
ys.lag <- ys
}
param.s.tmp <- param.s.tmp + modelp(Y.s, alph.s,N, T)
param.s[2] <- param.s.tmp[2]
param.s[3] <- mean(var(alph.s)) #param.s.tmp[3]
}
return( (param.data - param.s.tmp/H)^2 )
#return(param.s[1])
}
#Results for T = 10 & H = 10, N=100
nrep <-10
rho <-0.9
sigma_u <- 1
sigma_a <- 1.5
param <- matrix(NA,1,3)
param[1] <- rho
param[2] <- sigma_u
param[3] <- sigma_u
s.mu <- 0 # Mean
s.ep <- 0.5 #White Noise -initial conditions
Box <- cbind(rep(100,1),c(20),rep(c(5),1))
r.simu.box <- matrix(0,nrep,nrow(Box))
r.data.box <- matrix(0,nrep,nrow(Box))
for(k in 1:nrow(Box)){
N <- Box[k,1] #Number of individuals in panel
T <- Box[k,2] #Length of Panel
H <- Box[k,3] # Number of simulation paths
p.data <-matrix(NA,nrep,3)
p.simu <-matrix(NA,nrep,3)
est <- matrix(NA,1,3)
for(i in 1:nrep){
mu <- matrix(rnorm(N )*s.mu, N, 1)
eps <- matrix(rnorm(N*T)*s.ep, N, T)
eps0 <- matrix(rnorm(N*T)*s.ep, N, 1)
alph <- matrix(rnorm(N ), N, T)
Y <- matrix( 0, N, T)
y.lag <- (1-param[1])*mu + eps0
for(t in 1:T){
y <- alph[,t]+param[1]*y.lag +eps[,t]
Y[,t] <- y
y.lag <- y
}
param.data <- modelp(Y,alph,N,T) #Actual data
p.data[i,1:3] <- param.data
tmp.seed <- 3864+i+100*(k-1) #Simulated data
x0 <- c(0.5, 0,0)
est[i] <- optim(x0, H.theta,method = "BFGS", hessian = TRUE)$par
p.simu[i,1:3] <- est[i]
if(i%%10==0) print(c("Finished the (",i,")-th replication"))
}
}
mean(p.data[,1])- mean(p.simu[,1])
mean(p.data[,2])- mean(p.simu[,2])
sqrt(mean((p.data[1]-p.simu[1])^2))
I expect to get three values. Any help or suggestion will be greatly appreciated.

Sum value by Combine all variable using R

Can somebody help me with data manipulation using R? i have data (data.train) like this
datex <- rep(c(rep("01/01/17",6),rep("02/01/17",6),rep("03/01/17",6)),1)
datex <- as.Date(datex, "%d/%m/%y")
Ax <- rep("A1",18)
Bx <- rep(c(rep("B1",3),rep("B2",3)),3)
Cx <- rep(c("C1","C2","C3"),6)
valx <- 100
for(i in 1:17){valx[i+1] <- valx[i]+1}
data.train <- data.frame(datex, Ax, Bx, Cx, valx)
i need all combination from variable and the final form is like this
I have tried this code:
### Library
library(dplyr)
## datex
datex <- rep(c(rep("01/01/17",6),rep("02/01/17",6),rep("03/01/17",6)),1)
datex <- as.Date(datex, "%d/%m/%y")
Ax <- rep("A1",18)
Bx <- rep(c(rep("B1",3),rep("B2",3)),3)
Cx <- rep(c("C1","C2","C3"),6)
valx <- 100
for(i in 1:17){valx[i+1] <- valx[i]+1}
data.train <- data.frame(datex, Ax, Bx, Cx, valx)
names.group <- names(data.train)[1:length(data.train)-1]
data.group <- Map(combn, list(names.group), seq_along(names.group), simplify = F) %>% unlist(recursive = F)
find.index <- sapply(data.group, function(x, find.y){
any(find.y %in% x)
}, find.y = c("datex"))
index.group <- NULL
for(i in 2:length(find.index)){
if(find.index[i] == "TRUE"){
index.group[i] <- i
}
}
index.group[is.na(index.group)] <- 0
for(i in 1:length(data.group)){
if(index.group[i] == 0){
data.group[[i]] <- 0
} else {
data.group[[i]] <- data.group[[i]]
}
}
data.group2 <- data.group[sapply(data.group, function(x) any(x != 0))]
combination.result <- lapply(data.group2, FUN = function(x) {
do.call(what = group_by_, args = c(list(data.train), x)) %>% summarise(sumVar = sum(valx))
})
combination.result
but i don't produce what i want. Thanks
You can generate for combinations of length 1 then for combinations of length 2. Use paste to create your Variable column. Then rbindlist all your results to get the final output.
library(data.table)
setDT(data.train)
sumCombi <- function(x, mySep="_") {
data.train[ , sum(Val), by=c("Date", x)][,
list(Date,
Variable=do.call(paste, c(.SD[,x,with=FALSE], list(sep=mySep))),
SumVal=V1)]
}
rbindlist(c(
#combinations with 1 element in each combi
lapply(c("A", "B", "C"), sumCombi)
,
#combinations with 2 elements in each combi
lapply(combn(c("A","B","C"), 2, simplify=FALSE), sumCombi)
), use.names=FALSE)
or more generically/programmatically:
#assuming that your columns are in the middle of the columns while excl. first and last columns
myCols <- names(data.train)[-c(1, ncol(data.train))]
rbindlist(unlist(
lapply(seq_along(myCols), function(n)
combn(myCols, n, sumCombi, simplify=FALSE)
), recursive=FALSE),
use.names=FALSE)

Resources