Apply mapping to matrix columns in R - r

How can I apply the mapping
x′ = ax + by and y′ = cx + dy
in R to a matrix with 2 columns x and y of numbers, knowing the values of a, b, c, d?

Each column x and y are vectors so you can do :
a <- 1
b <- 2
c <- 3
d <- 4
df <- data.frame(x = 1:4, y = 4:1)
xx <- a*df$x + b*df$y
yy <- c*df$x + d*df$y
xx
> 9 8 7 6
yy
> 19 18 17 16

Another option is matrix multiplication using %*%.
Using Clemsang's data:
mat <- as.matrix(data.frame(x = 1:4, y = 4:1))
A <- matrix(c(1:4), nrow = 2)
mat %*% A
# [,1] [,2]
#[1,] 9 19
#[2,] 8 18
#[3,] 7 17
#[4,] 6 16
The columns correspond to x' and y'.

Related

How to obtain the highest p-values of a matrix?

Under R, I developed the following script :
X = 1:3
Y = 1:2
Z = 1:4
nlargest <- function(m, n) {
res <- order(m)[seq_len(n)]
pos <- arrayInd(res, dim(m), useNames = TRUE)
values = m[res]
position = pos
list(unique(values),
unique(position))
}
m = do.call(expand.grid, lapply(list(X, Y , Z), unique))
m
ecart=as.matrix(dist(m, method = "euclidean", diag = TRUE, upper = FALSE, p = 2))
alpha=0.8
m=10
proba_matrix=alpha*exp(-ecart)
nlargest(proba_matrix, 10)
The function nlargest that I imported from another similar question doesn't extract the n-largest values of the matrix proba_matrix as expected.
I'm searching for a way to obtain n highest values of any matrix with their associated positions.
Example of expected results :
m <- matrix(seq(1,9,by=1),nrow=3,byrow=TRUE);
diag(m) <- 1;
m
[1] 1 2 3 4 5 6 7 8 9
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 4 1 6
[3,] 7 8 1
# I need to obtain :
nlargest(m,4)
$values
[1] 4 6 7 8
$position
row col
[1,] 2 1
[2,] 2 3
[3,] 3 1
[4,] 3 2
In a 2-D matrix you could do something like this:
library(dplyr)
nlargest <- function(m, n){
df <- as.data.frame(cbind(as.vector(m), expand.grid(1:nrow(m), 1:ncol(m))))
colnames(df) <- c("value", "row", "column")
largest <- dplyr::distinct(df, value, .keep_all = T) %>%
dplyr::slice_max(order_by = value, n = n)
l <- list(values = largest$value,
position = largest[, c("row", "column")])
return(l)
}
m <- matrix(c(9, seq(1,9,by=1)),nrow=2,byrow=TRUE)
nlargest(m, 4)
$values
[1] 9 8 7 6
$position
row column
1 1 1
2 2 4
3 2 3
4 2 2

Create a sequence of numbers with breaks

I want to create a sequence of numbers like this:
X=22+1
Y=x+2
Z=x+3
A=x+4
B=X+5
1,2,X,3,4,Y,5,6,Z,7,8,A,10,11,B #and so on...
1,2,23,3,4,25,5,6,26,7,8,27,10,11,28 #and so on...
How do this with R? there's a function to do this?
We can do
unlist(Map(c, split(v1, as.integer(gl(length(v1), 2,
length(v1)))), c(X, Y, Z, A, B)), use.names = FALSE)
#[1] 1 2 23 3 4 25 5 6 26 7 8 27 9 10 28
data
v1 <- 1:10
X <- 23
Y <- X + 2
Z <- X + 3
A <- X + 4
B <- X + 5
You can create a duplicated record at specific position and replace them with another sequence.
seq1 <- 1:10
seq2 <- c(23, 25:28)
seq3 <- sort(c(seq1, seq(2, 10, 2)))
seq3[duplicated(seq3)] <- seq2
seq3
#[1] 1 2 23 3 4 25 5 6 26 7 8 27 9 10 28

storing results of a for function in list or

add <- c( 2,3,4)
for (i in add){
a <- i +3
b <- a + 3
z <- a + b
print(z)
}
# Result
[1] 13
[1] 15
[1] 17
In R, it can print the result, but I want to save the results for further computation in a vector, data frame or list
Thanks in advance
Try something like:
add <- c(2, 3, 4)
z <- rep(0, length(add))
idx = 1
for(i in add) {
a <- i + 3
b <- a + 3
z[idx] <- a + b
idx <- idx + 1
}
print(z)
This is simple algebra, no need in a for loop at all
res <- (add + 3)*2 + 3
res
## [1] 13 15 17
Or if you want a data.frame
data.frame(a = add + 3, b = add + 6, c = (add + 3)*2 + 3)
# a b c
# 1 5 8 13
# 2 6 9 15
# 3 7 10 17
Though in general, when you are trying to something like that, it is better to create a function, for example
myfunc <- function(x) {
a <- x + 3
b <- a + 3
z <- a + b
z
}
myfunc(add)
## [1] 13 15 17
In cases when a loop is actually needed (unlike in your example) and you want to store its results, it is better to use *apply family for such tasks. For example, use lapply if you want a list back
res <- lapply(add, myfunc)
res
# [[1]]
# [1] 13
#
# [[2]]
# [1] 15
#
# [[3]]
# [1] 17
Or use sapply if you want a vector back
res <- sapply(add, myfunc)
res
## [1] 13 15 17
For a data.frame to keep all the info
add <- c( 2,3,4)
results <- data.frame()
for (i in add){
a <- i +3
b <- a + 3
z <- a + b
#print(z)
results <- rbind(results, cbind(a,b,z))
}
results
a b z
1 5 8 13
2 6 9 15
3 7 10 17
If you just want z then use a vector, no need for lists
add <- c( 2,3,4)
results <- vector()
for (i in add){
a <- i +3
b <- a + 3
z <- a + b
#print(z)
results <- c(results, z)
}
results
[1] 13 15 17
It might be instructive to compare these two results with those of #dugar:
> sapply(add, function(x) c(a=x+3, b=a+3, z=a+b) )
[,1] [,2] [,3]
a 5 6 7
b 10 10 10
z 17 17 17
That is the result of lazy evaluation and sometimes trips us up when computing with intermediate values. This next one should give a slightly more expected result:
> sapply(add, function(x) c(a=x+3, b=(x+3)+3, z=(x+3)+((x+3)+3)) )
[,1] [,2] [,3]
a 5 6 7
b 8 9 10
z 13 15 17
Those results are the transpose of #dugar. Using sapply or lapply often saves you the effort off setting up a zeroth case object and then incrementing counters.
> lapply(add, function(x) c(a=x+3, b=(x+3)+3, z=(x+3)+((x+3)+3)) )
[[1]]
a b z
5 8 13
[[2]]
a b z
6 9 15
[[3]]
a b z
7 10 17

Flatten matrix in R to four columns (indexes and upper/lower triangles)

I'm using the cor.prob() function that's been posted several times around the mailing list to get a matrix of correlations (lower diagonal) and p-values (upper diagonals):
cor.prob <- function (X, dfr = nrow(X) - 2) {
R <- cor(X)
above <- row(R) < col(R)
r2 <- R[above]^2
Fstat <- r2 * dfr/(1 - r2)
R[above] <- 1 - pf(Fstat, 1, dfr)
R[row(R) == col(R)] <- NA
R
}
d <- data.frame(x=1:5, y=c(10,16,8,60,80), z=c(10,9,12,2,1))
cor.prob(d)
> cor.prob(d)
x y z
x NA 0.04856042 0.107654038
y 0.8807155 NA 0.003523594
z -0.7953560 -0.97945703 NA
How would I collapse the above correlation matrix (with the correlations in the lower half, p-values in the upper half) into a four-column matrix: two indexes, the correlation, and the p-value? E.g.:
i j cor pval
x y .88 .048
x z -.79 .107
y z -.97 0.0035
I've seen the answer to the previous question like this, but will only give me a 3-column matrix, not a four column matrix with separate columns for the p-value and correlation.
Any help is appreciated!
well it's not a matrix, because you can't mix characters and numerics. But:
this is my first attempt (before your label swap):
m <- cor.prob(d)
ut <- upper.tri(m)
lt <- lower.tri(m)
d <- data.frame(i=rep(row.names(m),ncol(m))[as.vector(ut)],
j=rep(colnames(m),each=nrow(m))[as.vector(ut)],
cor=m[ut],
p=m[lt])
now apply the correction I suggested below and you get
d <- data.frame(i=rep(row.names(m),ncol(m))[as.vector(ut)],
j=rep(colnames(m),each=nrow(m))[as.vector(ut)],
cor=m[ut],
p=t(m)[ut])
finally your label swap, use row()/col(), and write it as a function:
f1 <- function(m) {
ut <- upper.tri(m)
data.frame(i = rownames(m)[row(m)[ut]],
j = rownames(m)[col(m)[ut]],
cor=t(m)[ut],
p=tm[ut])
}
then
m<-matrix(1:25,5,dimnames=list(letters[1:5],letters[1:5])
> m
a b c d e
a 1 6 11 16 21
b 2 7 12 17 22
c 3 8 13 18 23
d 4 9 14 19 24
e 5 10 15 20 25
> f1(m)
i j cor p
1 a b 6 2
2 a c 11 3
3 b c 12 8
4 a d 16 4
5 b d 17 9
6 c d 18 14
7 a e 21 5
8 b e 22 10
9 c e 23 15
10 d e 24 20
Can you explain what you expected if it wasn't this?
cd <- cor.prob(d)
dcd <- as.data.frame( which( row(cd) < col(cd), arr.ind=TRUE) )
dcd$pval <- cd[row(cd) < col(cd)]
dcd$cor <- cd[row(cd) > col(cd)]
dcd[[2]] <-dimnames(cd)[[2]][dcd$col]
dcd[[1]] <-dimnames(cd)[[2]][dcd$row]
dcd
#--------------------
row col pval cor
1 x y 0.048560420 0.8807155
2 x z 0.107654038 -0.7953560
3 y z 0.003523594 -0.9794570

elegant way to loop over a function for a transition matrix in 2 dimensions in R

Imagine I have a function that gives the transition probability of going from state {x,y} to state {X, Y}: transition <- function(x,y,X,Y)
Imagine the x values can assume values in on a discrete set of points x_grid and y assume discrete values in y_grid, and I'd like to compute all possible transitions, e.g. fill out as a 2D matrix like this:
X1Y1 X2Y1 X3Y1 X1Y2 .... X3Y3
x1,y1
x2,y1
x3,y1
x1,y2
x2,y2
x3,y2
...
x3,y3
What's the simplest way to loop over my function in R to generate this matrix?
A cumbersome approach with for loops
x_grid <- 1:3
y_grid <- 1:3
## dummy function
transition <- function(x,y,X,Y)
x == X && y == Y
nx <- length(x_grid)
ny <- length(y_grid)
T <- matrix(NA, ncol = nx * ny, nrow = nx * ny)
for(i in 1:nx)
for(j in 1:ny)
for(k in 1:nx)
for(l in 1:ny)
T[i+(j-1)*ny, k+(l-1)*ny] <-
transition(x_grid[i], y_grid[j], x_grid[k], y_grid[l])
Surely there's a more efficient and more elegant way to do this in R?
For instance,
sapply(x_grid, function(x)
sapply(y_grid, function(y)
sapply(x_grid, function(X)
sapply(y_grid, function(Y)
transition(x,y,X,Y) ))))
works more efficiently but returns an object of the wrong shape. Turning the outermost apply into an lapply and then doing cbind on it's elements corrects this, but feels very crude.
Here's a wild shot in the dark. I hope it's helpful:
#Some simple data grid points
d <- expand.grid(1:3,1:3,1:3,1:3)
#Trivial function
f <- function(x,y,X,Y){x*y*X*Y}
#Wrap mapply in matrix; fills by column by default
matrix(mapply(f,d$Var1,d$Var2,d$Var3,d$Var4),nrow = 9)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
[1,] 1 2 3 2 4 6 3 6 9
[2,] 2 4 6 4 8 12 6 12 18
[3,] 3 6 9 6 12 18 9 18 27
[4,] 2 4 6 4 8 12 6 12 18
[5,] 4 8 12 8 16 24 12 24 36
[6,] 6 12 18 12 24 36 18 36 54
[7,] 3 6 9 6 12 18 9 18 27
[8,] 6 12 18 12 24 36 18 36 54
[9,] 9 18 27 18 36 54 27 54 81
This creates a transition matrix where probability of going from one state to another is defined as 'prob', then assigns those probabilities to a data set. But I am not sure this does what you want.
set.seed(1234)
tran <- expand.grid(x1 = c(1, 2, 3), y1 = c(1, 2, 3),
x2 = c(1, 2, 3), y2 = c(1, 2, 3))
lin.prob <- -1.75 - 1.18 * ((tran[,1] - tran[,3])^2 +
(tran[,2] - tran[,4])^2) ^ 0.5
e <- exp(1)
prob <- e^lin.prob / (1+e^lin.prob)
tran <- cbind(tran, prob)
colnames(tran) = c("x1","y1","x2","y2", "transition.prob")
nsites <- 25
x1sites <- ceiling(runif(nsites, 0, 3))
y1sites <- ceiling(runif(nsites, 0, 3))
x2sites <- ceiling(runif(nsites, 0, 3))
y2sites <- ceiling(runif(nsites, 0, 3))
site <- seq(1:nsites)
sites <- cbind(site, x1sites, y1sites, x2sites, y2sites)
colnames(sites) = c("site", "x1","y1","x2","y2")
my.data <- merge(sites, tran,
by.x = c("x1", "y1", "x2", "y2"),
by.y = c("x1", "y1", "x2", "y2"),
all = F, sort=F )
my.data=my.data[order(my.data$site),]
my.data

Resources