Dot Product In purrr - r

How would I calculate the dot product in purrr? As a reprex, here is a simple example.
Data generation
#fake data
X <- as_tibble(list(a = rnorm(10,0,1),
b = rnorm(10,10,1),
c = rnorm(10,100,1)))
z <- c(1,0,1)
#make tibble matrix
X_matrix <- X %>% as.matrix()
X_matrix
a b c
[1,] 0.01182775 9.032966 100.95322
[2,] 0.85718250 10.015310 102.30181
[3,] -0.06742915 10.535482 100.21764
[4,] -0.18236798 9.052234 99.37345
[5,] -0.32151084 10.329401 98.81186
[6,] 2.94303948 9.994800 99.93874
[7,] 0.03299169 9.079023 99.73501
[8,] 0.06518171 8.841637 99.91130
[9,] -0.71944580 10.281631 100.32533
[10,] 1.49983359 10.776108 99.35903
Calculate dot product
The dot product is sum(a*z[1] + b*z[2] + c*z[3])
X_matrix %*% z
[,1]
[1,] 100.96505
[2,] 103.15900
[3,] 100.15021
[4,] 99.19108
[5,] 98.49035
[6,] 102.88178
[7,] 99.76800
[8,] 99.97648
[9,] 99.60588
[10,] 100.85886
Ideally, I would like to add the dot product as a column to X

Related

I need to create an accumulation index across columns in a matrix

I need to create an accumulation index across columns in my data. I set up the problem as follows
#accumulation function
mat1 <- matrix(nrow=16, ncol =4)
mat1[1,] <- c(1,1,1,1)
mat1[2:16,] <- 1+rnorm(60,0,0.1)
[,1] [,2] [,3] [,4]
[1,] 1.0000000 1.0000000 1.0000000 1.0000000
[2,] 0.9120755 0.9345682 0.8533162 0.8737582
[3,] 0.7838427 0.9691806 0.8216284 0.9863669
[4,] 0.9095204 1.1906031 1.0253083 1.0700338
[5,] 1.0202524 0.9974672 1.1348315 1.1115018
[6,] 0.9456184 1.1250529 1.0348011 0.9323336
[7,] 1.0053195 0.9917475 1.0178855 1.0880626
[8,] 0.9550709 0.9107060 0.8876688 0.9060996
[9,] 1.0728177 1.0559643 0.9161789 0.9711522
[10,] 0.9579642 1.0082560 0.9833227 0.9306639
[11,] 1.0044883 1.1323498 1.0388025 0.8926033
[12,] 0.8777846 0.9940302 0.8314166 0.8479962
[13,] 1.1042297 0.9767410 0.9355374 0.8859680
[14,] 1.1245737 0.8291948 1.0491585 0.9887672
[15,] 0.9687700 0.9915095 0.8962534 1.0220163
[16,] 0.9432597 1.0310273 0.9288159 1.0838243
The desired output takes the product of entries in each column, up to each row number.
therefore:
mat2 <- matrix(nrow=16, ncol=4)
mat2[1,] <- c(1,1,1,1)
mat2[2,] <- mat1[1,]*mat1[2,]
mat2[3,] <-mat1[1,]*mat1[2,]*mat1[3,]
mat2[4,] <-mat1[1,]*mat1[2,]*mat1[3,]*mat1[4,]
and so on and so forth up to row 16. The idea is to accumulate (take the product) of all entries in mat1 up to a particular row number. So row1 of mat2 = row 1 of mat 1. row 2 of mat 2, is equal to row1 mat1 *row2 mat1. row3 of mat2 is equal to row1 of mat1 *row2 of mat1, *row3 of mat1. This process continues up to row 16.
I need to write a function able to do this calculation for matrices in a list all of the same size.
Basically what you need is cumulative product over each column which can be applied using cumprod function in base R
apply(mat1, 2, cumprod)
# [,1] [,2] [,3] [,4]
# [1,] 1.0000 1.0000 1.0000 1.0000
# [2,] 0.8793 0.9890 1.1102 0.9031
# [3,] 0.9037 0.9384 1.0574 0.8031
# [4,] 1.0017 0.8529 0.9824 0.7026
# [5,] 0.7667 0.7815 0.9332 0.6658
# [6,] 0.7996 0.9703 0.7811 0.6327
# [7,] 0.8401 0.9833 0.6899 0.5184
# [8,] 0.7918 0.9351 0.5395 0.4883
# [9,] 0.7485 0.8939 0.4672 0.4341
#[10,] 0.7063 0.9350 0.4534 0.3901
#[11,] 0.6434 0.8701 0.4323 0.3837
#[12,] 0.6127 0.7441 0.4950 0.4053
#[13,] 0.5515 0.7869 0.4421 0.4721
#[14,] 0.5087 0.7063 0.4043 0.4356
#[15,] 0.5120 0.7052 0.3929 0.5056
#[16,] 0.5611 0.6392 0.3538 0.4470
data
set.seed(1234)
mat1 <- matrix(nrow=16, ncol =4)
mat1[1,] <- c(1,1,1,1)
mat1[2:16,] <- 1+rnorm(60,0,0.1)
We can make use of rowCumprods from matrixStats which would be efficient
library(matrixStats)
rowCumprods(mat1)
# [,1] [,2] [,3] [,4]
# [1,] 1.0000000 1.0000000 1.0000000 1.0000000
# [2,] 0.8792934 0.8695961 0.9654515 0.8719461
# [3,] 1.0277429 0.9752243 0.9288433 0.8259908
# [4,] 1.1084441 1.0074432 0.9359711 0.8187889
# [5,] 0.7654302 0.7013506 0.6661948 0.6312977
# [6,] 1.0429125 1.2948629 1.0839177 1.0300632
# [7,] 1.0506056 1.0646930 0.9403774 0.7705423
# [8,] 0.9425260 0.8962776 0.7008855 0.6600887
# [9,] 0.9453368 0.9036902 0.7825060 0.6957347
#[10,] 0.9435548 0.9869196 0.9578751 0.8606545
#[11,] 0.9109962 0.8477986 0.8082998 0.7951804
#[12,] 0.9522807 0.8143710 0.9324137 0.9849138
#[13,] 0.9001614 0.9518986 0.8501747 0.9902680
#[14,] 0.9223746 0.8279552 0.7571348 0.6985816
#[15,] 1.0064459 1.0049223 0.9767219 1.1335746
#[16,] 1.0959494 0.9933742 0.8945990 0.7910216
data
set.seed(1234)
mat1 <- matrix(nrow=16, ncol =4)
mat1[1,] <- c(1,1,1,1)
mat1[2:16,] <- 1+rnorm(60,0,0.1)

Using calls to update "calls" to functions in R

I'm having a bit of trouble understanding how to use calls in R. I want to take an object created by a function and use it as an argument to another function, modifying some of the arguments to the original function along the way. I've looked at Hadley Wickham's page on expressions, but it doesn't quite seem to tell me how to do what I want to do.
Here is a partially-working example of the sort of thing that I want to do. First, fake data:
library(MASS)
N <- 1000
p <- 10
A <- matrix(rnorm(p^2), p)
X <- mvrnorm(N, rep(0, p), t(A) %*% A)
B <- rnorm(p)
y <- X %*% B + rnorm(N)
Next, a function to do ridge regression. It is a function of X, y, and the ridge penalty L. It returns the coefs and the call:
pols <- function(X, y, L){
cl <- match.call()
beta <- solve(t(X) %*% X + diag(rep(L, p))) %*% t(X) %*% y
return(list(beta = beta, cl = cl))
}
1> pols(X, y, 1)
$beta
[,1]
[1,] -0.02622669
[2,] -1.96523722
[3,] 0.36375563
[4,] -1.14192468
[5,] -0.14436051
[6,] -0.29700918
[7,] -0.81543748
[8,] -0.17699934
[9,] -0.01342649
[10,] 0.58862577
$cl
pols(X = X, y = y, L = 1)
Now, how do I use the call to drive the following function? It takes a pols object and a vector of different values of L and uses them to re-call pols
Lvec <- 1:10
tryLs <- function(pols, Lvec){
for (i in Lvec){
1. Extract the args from the call in pols
2. Modify the argument `L` based on Lvec
3. Run `pols` with old arguments, but `L` modified according to `i`
}
}
How do I make this last function work?
To clarify, the workflow I'm envisioning is something like:
obj <- pols(X, y, 0)
Lvec <- 1:10
output <- tryLs(obj, Lvec)
I'm going to make a few guesses/assumptions here.
(1) When you say "a pols object", you mean an object returned by the pols function. I've modified pols() below so that it returns an object of type "pols". This isn't at all necessary, but might be useful in the future if you want to do fancier things (e.g. implement custom printing or plotting methods for these objects).
Setup:
library(MASS)
N <- 1000
p <- 10
A <- matrix(rnorm(p^2), p)
X <- mvrnorm(N, rep(0, p), t(A) %*% A)
B <- rnorm(p)
y <- X %*% B + rnorm(N)
I'm also modifying pols so that the element containing the call is called call: this makes the objects automatically work with R's default update method.
pols <- function(X, y, L){
cl <- match.call()
beta <- solve(t(X) %*% X + diag(rep(L, p))) %*% t(X) %*% y
r <- list(beta = beta, call = cl)
class(r) <- "pols"
return(r)
}
In order to have a pols object we have to run pols() once and save the result:
pols1 <- pols(X,y,0)
Now here's your function. My second assumption is that you only want the $beta values returned ...
tryLs <- function(pols,Lvec) {
sapply(Lvec,
function(L) update(pols,L=L)$beta)
}
Lvec <- 1:10
tryLs(pols1,Lvec)
If you wanted to do this at a slightly more nuts-and-bolts level (rather than using update) you would do something along the lines of
pols$call$L <- new_L_value
new_result <- eval(pols$call,parent.frame())
If you look at update.default() you'll see that's more or less what it does (it is using the information from match.call(), implicitly ...)
If I guess correctly as to what you need, I would use partial from the pryr package. This allows you to create a function with a number of the arguments already set:
library(pryr)
preset_pols = partial(pols, X = preset_X, y = preset_y)
preset_pols(L = 1)
calling preset_pols will now always use the data specified in preset_X and preset_y.
In my opinion there is no need for the for loop, lapply would do just fine here:
list_of_results = lapply(Lvec, preset_pols)
Lvec <- 1:10
tryLs <- function(pols, Lvec){
for (i in Lvec){
print(paste("Result for ",i))
print(pols(X,y,i))$beta
print(pols(X,y,i))$cl
}
}
tryLs(pols,Lvec)
[1] "Result for 1"
$beta
[,1]
[1,] 0.03317113
[2,] -0.37399461
[3,] -1.35395755
[4,] 0.09850883
[5,] -0.14503628
[6,] -1.97204600
[7,] -0.56459244
[8,] -1.10422047
[9,] -0.92047748
[10,] 1.76236287
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] 0.03317113
[2,] -0.37399461
[3,] -1.35395755
[4,] 0.09850883
[5,] -0.14503628
[6,] -1.97204600
[7,] -0.56459244
[8,] -1.10422047
[9,] -0.92047748
[10,] 1.76236287
$cl
pols(X = X, y = y, L = i)
[1] "Result for 2"
$beta
[,1]
[1,] -0.01014376
[2,] -0.32064189
[3,] -1.29381243
[4,] 0.10695047
[5,] -0.24791384
[6,] -1.83662948
[7,] -0.55615073
[8,] -1.12204424
[9,] -0.96717380
[10,] 1.79084625
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.01014376
[2,] -0.32064189
[3,] -1.29381243
[4,] 0.10695047
[5,] -0.24791384
[6,] -1.83662948
[7,] -0.55615073
[8,] -1.12204424
[9,] -0.96717380
[10,] 1.79084625
$cl
pols(X = X, y = y, L = i)
[1] "Result for 3"
$beta
[,1]
[1,] -0.04097765
[2,] -0.28237279
[3,] -1.25064282
[4,] 0.11286963
[5,] -0.32135783
[6,] -1.74000917
[7,] -0.55025764
[8,] -1.13481390
[9,] -1.00038377
[10,] 1.81099139
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.04097765
[2,] -0.28237279
[3,] -1.25064282
[4,] 0.11286963
[5,] -0.32135783
[6,] -1.74000917
[7,] -0.55025764
[8,] -1.13481390
[9,] -1.00038377
[10,] 1.81099139
$cl
pols(X = X, y = y, L = i)
[1] "Result for 4"
$beta
[,1]
[1,] -0.06401718
[2,] -0.25352501
[3,] -1.21807596
[4,] 0.11721395
[5,] -0.37641945
[6,] -1.66761823
[7,] -0.54595545
[8,] -1.14442668
[9,] -1.02517135
[10,] 1.82592968
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.06401718
[2,] -0.25352501
[3,] -1.21807596
[4,] 0.11721395
[5,] -0.37641945
[6,] -1.66761823
[7,] -0.54595545
[8,] -1.14442668
[9,] -1.02517135
[10,] 1.82592968
$cl
pols(X = X, y = y, L = i)
[1] "Result for 5"
$beta
[,1]
[1,] -0.08186374
[2,] -0.23095555
[3,] -1.19257456
[4,] 0.12050945
[5,] -0.41923287
[6,] -1.61137106
[7,] -0.54271257
[8,] -1.15193566
[9,] -1.04434740
[10,] 1.83739926
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.08186374
[2,] -0.23095555
[3,] -1.19257456
[4,] 0.12050945
[5,] -0.41923287
[6,] -1.61137106
[7,] -0.54271257
[8,] -1.15193566
[9,] -1.04434740
[10,] 1.83739926
$cl
pols(X = X, y = y, L = i)
[1] "Result for 6"
$beta
[,1]
[1,] -0.09607715
[2,] -0.21277987
[3,] -1.17201761
[4,] 0.12307151
[5,] -0.45347618
[6,] -1.56641949
[7,] -0.54021027
[8,] -1.15797228
[9,] -1.05959733
[10,] 1.84644233
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.09607715
[2,] -0.21277987
[3,] -1.17201761
[4,] 0.12307151
[5,] -0.45347618
[6,] -1.56641949
[7,] -0.54021027
[8,] -1.15797228
[9,] -1.05959733
[10,] 1.84644233
$cl
pols(X = X, y = y, L = i)
[1] "Result for 7"
$beta
[,1]
[1,] -0.1076495
[2,] -0.1977993
[3,] -1.1550561
[4,] 0.1251007
[5,] -0.4814888
[6,] -1.5296799
[7,] -0.5382458
[8,] -1.1629381
[9,] -1.0719931
[10,] 1.8537217
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.1076495
[2,] -0.1977993
[3,] -1.1550561
[4,] 0.1251007
[5,] -0.4814888
[6,] -1.5296799
[7,] -0.5382458
[8,] -1.1629381
[9,] -1.0719931
[10,] 1.8537217
$cl
pols(X = X, y = y, L = i)
[1] "Result for 8"
$beta
[,1]
[1,] -0.1172419
[2,] -0.1852151
[3,] -1.1407910
[4,] 0.1267308
[5,] -0.5048296
[6,] -1.4990974
[7,] -0.5366841
[8,] -1.1671009
[9,] -1.0822491
[10,] 1.8596792
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.1172419
[2,] -0.1852151
[3,] -1.1407910
[4,] 0.1267308
[5,] -0.5048296
[6,] -1.4990974
[7,] -0.5366841
[8,] -1.1671009
[9,] -1.0822491
[10,] 1.8596792
$cl
pols(X = X, y = y, L = i)
[1] "Result for 9"
$beta
[,1]
[1,] -0.1253119
[2,] -0.1744744
[3,] -1.1286001
[4,] 0.1280542
[5,] -0.5245776
[6,] -1.4732498
[7,] -0.5354316
[8,] -1.1706458
[9,] -1.0908596
[10,] 1.8646205
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.1253119
[2,] -0.1744744
[3,] -1.1286001
[4,] 0.1280542
[5,] -0.5245776
[6,] -1.4732498
[7,] -0.5354316
[8,] -1.1706458
[9,] -1.0908596
[10,] 1.8646205
$cl
pols(X = X, y = y, L = i)
[1] "Result for 10"
$beta
[,1]
[1,] -0.1321862
[2,] -0.1651825
[3,] -1.1180392
[4,] 0.1291370
[5,] -0.5415033
[6,] -1.4511217
[7,] -0.5344217
[8,] -1.1737051
[9,] -1.0981778
[10,] 1.8687639
$cl
pols(X = X, y = y, L = i)
$beta
[,1]
[1,] -0.1321862
[2,] -0.1651825
[3,] -1.1180392
[4,] 0.1291370
[5,] -0.5415033
[6,] -1.4511217
[7,] -0.5344217
[8,] -1.1737051
[9,] -1.0981778
[10,] 1.8687639
$cl
pols(X = X, y = y, L = i)

Matrix into another matrix with specified dimensions

I have a matrix with 2 columns, and I'd like to turn it into a matrix with specified dimensions.
> t <- matrix(rnorm(20), ncol=2, nrow=10)
[,1] [,2]
[1,] 1.4938530 1.2493088
[2,] -0.8079445 1.8715868
[3,] 0.5775695 -0.9277420
[4,] 0.4415969 2.6357908
[5,] 0.3209226 -1.1306049
[6,] 0.5109251 -0.8661100
[7,] 1.9495571 0.2092941
[8,] 0.7816373 1.1517466
[9,] 0.0300595 -0.1351532
[10,] 0.7550894 0.7778869
What I'd like to do is something like:
> tt <- matrix(t, ncol=4, nrow=5)
[,1] [,2] [3,] [4,]
[1,] 1.4938530 1.2493088 -0.8079445 1.8715868
[2,] 0.5775695 -0.9277420 0.4415969 2.6357908
[3,] etc.
I tried to do things with modulo but my head hurts too much for me to try even one more minute.
You can transpose your first matrix, so that data is stored in the order you want, and then fill the second matrix by row:
tt <- matrix(t(t), ncol=4, nrow=5, byrow = T)
t
# [,1] [,2]
# [1,] -1.4162465950 0.01532476
# [2,] -0.2366332875 -0.04024386
# [3,] 0.5146631983 -0.34720239
# [4,] 1.9243922633 -0.24016160
# [5,] 1.6161165230 0.63187438
# [6,] -0.3558181508 -0.73199138
# [7,] 0.7459405376 0.01934826
# [8,] -1.0428581093 -2.04422042
# [9,] 0.0003166344 0.98973993
#[10,] 0.6390745275 -0.65584930
tt
# [,1] [,2] [,3] [,4]
# [1,] -1.4162465950 0.01532476 -0.2366333 -0.04024386
# [2,] 0.5146631983 -0.34720239 1.9243923 -0.24016160
# [3,] 1.6161165230 0.63187438 -0.3558182 -0.73199138
# [4,] 0.7459405376 0.01934826 -1.0428581 -2.04422042
# [5,] 0.0003166344 0.98973993 0.6390745 -0.65584930
When you work with matrix in R, you can think of it as a vector with data stored column by column. So extracting data by row from a matrix is not as straight forward as extracting by column which is essentially how data is stored. After transposing the first matrix, the data will be stored in an order you want to extract and then fill the second matrix by row would be straight forward.

Subset out all elements with the same name of list

Data
I have a list of lists that looks something like this:
sublist1 <- list(power=as.matrix(c(rnorm(10)),c(rnorm)),x=rnorm(10),y=rnorm(10))
sublist2 <- list(power=as.matrix(c(rnorm(10)),c(rnorm)),x=rnorm(10),y=rnorm(10))
sublist3 <- list(power=as.matrix(c(rnorm(10)),c(rnorm)),x=rnorm(10),y=rnorm(10))
mylist = list(sublist1,sublist2,sublist3)
My goal would be to pull out only the matrices named power
I've tried
mylist_power =mylist[sapply(mylist, '[', 'Power')]
But thats not working.
Brownie point alert!!!
How can I find the mean of the newly created list of matrices named power?
mylist_power <- sapply(mylist, '[', 'power')
and some means:
sapply(mylist_power, mean) # one per matrix
sapply(mylist_power, colMeans) # for each column and each matrix
sapply(mylist_power, rowMeans) # for each row and each matrix
mean(unlist(mylist_power)) # for the whole list
Reduce(`+`, mylist_power) / length(mylist_power) # element-wise
purrr solution which can be replicated to baseR's Map
#part 1 (to return only $power of every list item)
map(mylist, ~.x$power)
[[1]]
[,1]
[1,] 0.33281918
[2,] -1.12404046
[3,] -0.70613078
[4,] -0.72754386
[5,] -1.83431439
[6,] -0.40768794
[7,] 0.02686119
[8,] 0.91162864
[9,] 1.63434648
[10,] 0.06068561
[[2]]
[,1]
[1,] -0.02256943
[2,] -0.90315486
[3,] 0.90777295
[4,] 1.16194290
[5,] -0.45795340
[6,] 0.92795667
[7,] -2.10293514
[8,] -1.67716711
[9,] 1.76565577
[10,] 0.79444742
[[3]]
[,1]
[1,] -0.36200564
[2,] -1.13955016
[3,] -0.81537133
[4,] 1.31024563
[5,] -0.25836094
[6,] 0.60626489
[7,] 0.31344822
[8,] 0.05360308
[9,] 1.12825379
[10,] -0.55813346
part-2
map(mylist, ~.x$power %>% colMeans)
[[1]]
[1] -0.1833376
[[2]]
[1] 0.03939958
[[3]]
[1] 0.02783941
To get these values in a vector instead
map_dbl(mylist, ~.x$power %>% colMeans)
[1] -0.18333763 0.03939958 0.02783941

How to Set Initial Value For Creating An Index Of Returns With cumprod() in R?

I have a series of returns over some period, let's say daily returns:
> Z <-cbind(rnorm(10)*.01)
> Z
[,1]
[1,] -0.0095401182
[2,] 0.0119037893
[3,] 0.0001539471
[4,] -0.0087361367
[5,] -0.0127281577
[6,] -0.0031177198
[7,] -0.0041689219
[8,] -0.0066547279
[9,] 0.0156863175
[10,] -0.0126733237
Next, I create an investment index with cumprod() that represents the return on an initial $1 investment based on return series Z:
> ZZ <-cbind((cumprod(1+Z)*1))
> ZZ
[,1]
[1,] 0.9904599
[2,] 1.0022501
[3,] 1.0024044
[4,] 0.9936473
[5,] 0.9810000
[6,] 0.9779415
[7,] 0.9738645
[8,] 0.9673837
[9,] 0.9825584
[10,] 0.9701061
How do I edit the code for creating index ZZ so that the resulting output shows an initial value of 1.00?
Make the first return zero:
cumprod(1+c(0,rnorm(10)*.01))
Or divide all elements by the first value:
ZZ <- ZZ/ZZ[1,1]
The most direct way is to just add a 1 at the start of ZZ:
ZZ <- cbind(c(1, cumprod(1+Z)))

Resources