This is the first part of my code:
BSum=0.0
mydata = NULL
while(BSum < 5)
{
A=(rpois (1, lambda=1))
y=runif(A,0,1)
B1 = length(which(y<=0.5))
BSum = BSum + B1
C = A - B1
mydata=rbind(mydata,c("A"=A,"B"=B1,"C"=C))
}
I need 3 more columns here. For column D(row x) I would generate as many random nos. (between 0 and 1) as is the value in Column B(row x). Then I see how many of those random nos. are less than or equal to 0.1. I put the total count of these in Column D. The remainder (B-D) becomes column F. I generate another column E that will get populated the same same way D was generated from B. The remainder again gets added to what had accumulated in Column F.
:= is from the data.table package. As you don't have this loaded, either your object isn't a data.table object or it is but you don't say and the package was not loaded.
If the former (your object is a data frame, not a data.table) then you want cbind(). As in:
set.seed(1)
df <- data.frame(A = runif(10))
cbind(df, list(B = runif(10), C = letters[1:10]))
> cbind(df, list(B = runif(10), C = letters[1:10]))
A B C
1 0.26550866 0.2059746 a
2 0.37212390 0.1765568 b
3 0.57285336 0.6870228 c
4 0.90820779 0.3841037 d
5 0.20168193 0.7698414 e
6 0.89838968 0.4976992 f
7 0.94467527 0.7176185 g
8 0.66079779 0.9919061 h
9 0.62911404 0.3800352 i
10 0.06178627 0.7774452 j
For your particular problem, try:
myfun <- function(z) {
ret1 <- apply(z, 1, function(x) sum(runif(x) <= 0.1))
ret2 <- z[,1] - ret1
cbind(z, B = ret1, C = ret2)
}
set.seed(1)
df <- data.frame(A = rpois(10, 2))
myfun(df)
> myfun(df)
A B C
1 1 0 1
2 1 0 1
3 2 0 2
4 4 0 4
5 1 0 1
6 4 0 4
7 4 1 3
8 2 0 2
9 2 0 2
10 0 0 0
You could make this more efficient, say by not doing each row individually, but it'd involve more coding.
Updated
If I understand your update (and I might not as I already showed you how to do those steps, though not in the same configuration as you now want), then I think this is what you wanted. Note that how you create E is a little ambiguous. I took you literally and just did exactly the same as for D.
set.seed(2)
BSum <- 0.0
mydata <- NULL
while(BSum < 5) {
A <- rpois(1, lambda = 1)
B1 <- sum(runif(A, 0, 1) <= 0.5)
BSum <- BSum + B1
C <- A - B1
D <- sum(runif(B1) <= 0.1)
F <- B1 - D
E <- sum(runif(B1) <= 0.1)
F <- F + (D - E)
mydata <- rbind(mydata, c(A = A, B = B1, C = C, D = D, E = E, F = F))
}
With that seed I get
R> mydata
A B C D E F
[1,] 0 0 0 0 0 0
[2,] 1 0 1 0 0 0
[3,] 0 0 0 0 0 0
[4,] 3 1 2 0 0 1
[5,] 1 1 0 0 0 1
[6,] 1 0 1 0 0 0
[7,] 3 3 0 0 0 3
Related
I have a "small" square matrix that I want to add to a "big" matrix. The big matrix contains all the rows and columns of the small matrix plus extras. I want to add the values where the indices are in common and just keep the values from the big one where that index is not contained in the small one. Unfortunately, all the data is copied on the addition so it takes a long time and can temporarily spike memory when the matrices are large.
I have tried adding subsets using matrices and data.frames, as well as a data.table method using rbindlist. Both the data.frame and matrix methods seem to cause a memory copy (why?) and the rbindlist method is not ideal because it requires a melt and dcast and temporarily spiking the memory by spiking the number of rows.
Is there any way to just change the values of some items in a matrix without causing a copy of the entire matrix?
Here are my attempts:
MList <- list(M1,M2)
unionCols <- Reduce(union, lapply(MList, colnames))
MTotal <- matrix(as.double(rep(0,(length(unionCols))^2)), nrow = length(unionCols))
rownames(MTotal) <- colnames(MTotal) <- unionCols
DFTotal <- as.data.frame(MTotal)
DFList <- lapply(MList, as.data.frame)
for(i in 1:length(MList)){
tracemem(MTotal)
tracemem(DFTotal)
mCol <- match(colnames(MList[[i]]), colnames(MTotal))
MTotal[mCol,mCol] <- MTotal[mCol,mCol] + MList[[i]] # this causes a copy
DFTotal[mCol,mCol] <- DFTotal[mCol,mCol] + DFList[[i]] # this causes a copy
}
M1
M2
MTotal
# rbindlist method
.AggDMCMatsSingleM2 <- function(M1, M2){
.MyMelt <- function(M){
DT <- setnames(reshape2::melt(M, id.vars = colnames(M)), c('Var1','Var2'), c('row','col'))
}
M_total <- as.matrix(data.table::dcast(rbindlist(lapply(list(M1,M2), .MyMelt)),
formula = as.formula(row ~ col),
value.var = 'value',
fun.aggregate = sum,
fill = 0),
rownames = 'row')
return(M_total)
}
M1
M2
.AggDMCMatsSingleM2(M1,M2)
If I follow what you are asking we can directly add and write to the big matrix using the bracket notation row/col names of the small matrix:
big_matrix<-matrix(data=rep(1, 25), nrow=5,
dimnames = list(c(LETTERS[1:5]),
c(letters[1:5])))
# a b c d e
#A 1 1 1 1 1
#B 1 1 1 1 1
#C 1 1 1 1 1
#D 1 1 1 1 1
#E 1 1 1 1 1
small_matrix<-matrix(data=c(1:9), nrow=3,
dimnames = list(c(LETTERS[2:4]),
c(letters[2:4])))
# b c d
#B 1 4 7
#C 2 5 8
#D 3 6 9
big_matrix[rownames(small_matrix), colnames(small_matrix)] <-
big_matrix[rownames(small_matrix), colnames(small_matrix)] + small_matrix
# a b c d e
#A 1 1 1 1 1
#B 1 2 5 8 1
#C 1 3 6 9 1
#D 1 4 7 10 1
#E 1 1 1 1 1
More complex test:
big_matrix<-matrix(data=rep(1, 25), nrow=5,
dimnames = list(c(LETTERS[1:5]),
c(letters[1:5])))
# a b c d e
#A 1 1 1 1 1
#B 1 1 1 1 1
#C 1 1 1 1 1
#D 1 1 1 1 1
#E 1 1 1 1 1
small_matrix<-matrix(data=c(1:9), nrow=3,
dimnames = list(c("A", "D", "C"),
c(letters[c(2:4)])))
# b c d
#A 1 4 7
#D 2 5 8
#C 3 6 9
big_matrix[rownames(small_matrix), colnames(small_matrix)] <-
big_matrix[rownames(small_matrix), colnames(small_matrix)] + small_matrix
big_matrix
# a b c d e
#A 1 2 5 8 1
#B 1 1 1 1 1
#C 1 4 7 10 1
#D 1 3 6 9 1
#E 1 1 1 1 1
I have two square matrix / array like that
## Matrix 1
t1 <- c(2,1,1,1,1,0,1,0,1)
column.names <- c("A","B","C")
row.names <- c("A","B","C")
m1 <- array(t1,dim = c(3,3),dimnames = list(row.names,column.names))
m1
A B C
A 2 1 1
B 1 1 0
C 1 0 1
## Matrix 2
t2 <- c(1,0,0,0,1,1,0,1,1)
column.names <- c("A","B","D")
row.names <- c("A","B","D")
m2 <- array(t2,dim = c(3,3),dimnames = list(row.names,column.names))
m2
A B D
A 1 0 0
B 0 1 1
D 0 1 1
I need to sum up them (each existing column/row pairs) and to keep all possible combinations, like that :
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1
I have to compute this process a lot of times, so I am looking for a fast and lightweight solution.
Any help would be awsome, I am stuck ;)
A base R option using xtabs + expand.grid
as.data.frame.matrix(
xtabs(
p ~ .,
do.call(
rbind,
lapply(
list(m1, m2),
function(x) cbind(expand.grid(dimnames(x)), p = c(x))
)
)
)
)
gives
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1
Another option using igraph
library(igraph)
get.adjacency(
graph_from_data_frame(
do.call(
rbind,
lapply(
list(m1, m2),
function(x) {
get.data.frame(
graph_from_adjacency_matrix(
x,
"undirected"
)
)
}
)
), FALSE
),
sparse = FALSE
)
which gives
A B C D
A 3 1 1 0
B 1 2 0 1
C 1 0 1 0
D 0 1 0 1
Make m1 and m2 of same dimensions by including all the rownames and colnames available in both of them. Replace non-existent value with 0. You can then add both of them together.
cols <- unique(c(colnames(m1), colnames(m2)))
rows <- unique(c(rownames(m1), rownames(m2)))
dummy_m1 <- matrix(0, nrow = length(cols), ncol = length(rows),
dimnames = list(cols, rows))
dummy_m2 <- dummy_m1
dummy_m1[rownames(m1), colnames(m1)] <- m1
dummy_m2[rownames(m2), colnames(m2)] <- m2
dummy_m1 + dummy_m2
# A B C D
#A 3 1 1 0
#B 1 2 0 1
#C 1 0 1 0
#D 0 1 0 1
I am using this for loop to standardize the data in my columns. Here m_sel_cols is a vector with column names.
for(i in m_sel_cols)
{
cal <- work_data1$i
cal <- ((cal-mean(cal))/sd(cal))
}
Suppose if my column name is "A" then is you do :
...
cal <- work_data1$A
...
The number of columns in my dataset is huge and I want to convert it back to data frame, I know cbind() can be used but how within the for loop?
You can "loop" through columns using sapply.
xy <- data.frame(a = 1:3, b = 4:6, c = 7:9)
sapply(xy, FUN = function(x) (x - mean(x))/sd(x))
a b c
[1,] -1 -1 -1
[2,] 0 0 0
[3,] 1 1 1
or
> scale(xy)
a b c
[1,] -1 -1 -1
[2,] 0 0 0
[3,] 1 1 1
attr(,"scaled:center")
a b c
2 5 8
attr(,"scaled:scale")
a b c
1 1 1
We can do this with tidyverse
library(tidyverse)
xy %>%
mutate_all(scale)
# a b c
#1 -1 -1 -1
#2 0 0 0
#3 1 1 1
data
xy <- data.frame(a = 1:3, b = 4:6, c = 7:9)
This question already has answers here:
Reshape dataframe and create similarity matrix
(2 answers)
Closed 7 years ago.
I have a blank matrix called Trial that is 5000 X 5000, but i'll put a small snippet.
a b c d e f
a
b
c
d
e
f
and I want to fill the Matrix, with a Data Table I have.
Name Value
-----------
Cat A
Cat B
Cat E
Dog D
Dog C
Dog F
So basically in the end, I want the matrix to be filled like this:
a b c d e f
a 1 1 0 0 1 0
b 1 1 0 0 1 0
c 0 0 1 1 0 1
d 0 0 1 1 0 1
e 1 1 0 0 1 0
f 0 0 1 1 0 1
So all the values relating to the Name will be 1, and if they don't relate they will be 0. For example, A and F don't relate because they are different names (cat and dog), thus they will get a 0.
Here is a way with loops
dd <- read.table(header = TRUE, text="Name Value
Cat A
Cat B
Cat E
Dog D
Dog C
Dog F")
o <- order(dd$Value)
sapply(1:nrow(dd), function(x) dd$Name %in% dd[x, 'Name'] + 0L)[o, o]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] 1 1 0 0 1 0
# [2,] 1 1 0 0 1 0
# [3,] 0 0 1 1 0 1
# [4,] 0 0 1 1 0 1
# [5,] 1 1 0 0 1 0
# [6,] 0 0 1 1 0 1
or with an explicit for loop
mm <- matrix(nrow = nrow(dd), ncol = nrow(dd))
for (ii in 1:nrow(mm))
mm[ii, ] <- dd$Name %in% dd[ii, 'Name'] + 0L
mm[o, o]
For 5000 x 5000, takes less than 2 seconds on my crummy laptop
dd <- data.frame(Name = sample(LETTERS, 5000, replace = TRUE), Value = 1:5000)
o <- order(dd$Value)
system.time({
oo <- sapply(1:nrow(dd), function(x) dd$Name %in% dd[x, 'Name'] + 0L)[o, o]
})
# user system elapsed
# 1.680 0.188 1.874
system.time({
mm <- matrix(nrow = nrow(dd), ncol = nrow(dd))
for (ii in 1:nrow(mm))
mm[ii, ] <- dd$Name %in% dd[ii, 'Name'] + 0L
mm[o, o]
})
# user system elapsed
# 1.918 0.152 2.073
I need specific format of data.frame for social structure analysis. How to convert data.frame containing list of individuals occuring together on multiple events:
my.df <- data.frame(individual = c("A","B","C","B","C","D"),
time = rep(c("event_01","event_02"), each = 3))
individual time
1 A event_01
2 B event_01
3 C event_01
4 B event_02
5 C event_02
6 D event_02
into a data.frame containing occurence for each pairs (including [A,A]; [B,B] etc. pairs:
ind_1 ind_2 times
A A 0
A B 1
A C 1
A D 0
B A 1
B B 0
B C 2
B D 1
C A 1
C B 2
C C 0
C D 1
D A 0
D B 1
D C 1
D D 0
In base R, you could do the following:
data.frame(as.table(`diag<-`(tcrossprod(table(my.df)), 0)))
# individual individual.1 Freq
# 1 A A 0
# 2 B A 1
# 3 C A 1
# 4 D A 0
# 5 A B 1
# 6 B B 0
# 7 C B 2
# 8 D B 1
# 9 A C 1
# 10 B C 2
# 11 C C 0
# 12 D C 1
# 13 A D 0
# 14 B D 1
# 15 C D 1
# 16 D D 0
tcrossprod gives you the following:
> tcrossprod(table(my.df))
individual
individual A B C D
A 1 1 1 0
B 1 2 2 1
C 1 2 2 1
D 0 1 1 1
That's essentially all the information you are looking for, but you want it in a slightly different form, without the diagonal values.
We can set the diagonals to zero with:
`diag<-`(theOutputFromAbove, 0)
Then, to get the long form, trick R into thinking that the resulting matrix is a table by using as.table, and make use of the data.frame method for tables.
You can do:
create the first 2 variables of the new data.frame:
df2 <- expand.grid(ind_2=levels(my.df$individual), ind_1=levels(my.df$individual))[, 2:1]
Put the value to 0 for the pairs of same individuals:
df2$times[df2[, 1]==df2[, 2]] <- 0
See the other unique combinations:
comb_diff <- combn(levels(my.df$individual), 2)
compute the times each unique combination is found together:
times_uni <- apply(comb_diff, 2, function(inds){
sum(table(my.df$time[my.df$individual %in% inds])==2)
})
Finally, fill the new data.frame:
df2$times[match(c(paste0(comb_diff[1,], comb_diff[2,]), paste0(comb_diff[2, ], comb_diff[1, ])), paste0(df2[, 1],df2[, 2]))] <- rep(times_uni, 2)
df2
# ind_1 ind_2 times
#1 A A 0
#2 A B 1
#3 A C 1
#4 A D 0
#5 B A 1
#6 B B 0
#7 B C 2
#8 B D 1
#9 C A 1
#10 C B 2
#11 C C 0
#12 C D 1
#13 D A 0
#14 D B 1
#15 D C 1
#16 D D 0
You can do it using data.table
dt_combs <- my.dt[,
list(ind_1 = combn(individual, 2)[1, ],
ind_2 = combn(individual, 2)[2, ]),
by = time]
dt_ncombs <- dt_combs[, .N, by = c("ind_1", "ind_2")]
dt_ncombs_inverted <- copy(dt_ncombs)
dt_ncombs_inverted[, temp := ind_1]
dt_ncombs_inverted[, ind_1 := ind_2]
dt_ncombs_inverted[, ind_2 := temp]
dt_ncombs_inverted[, temp := NULL]
dt_ncombs <- rbind(dt_ncombs, dt_ncombs_inverted)
dt_allcombs <- data.table(expand.grid(
ind_1 = my.dt[, unique(individual)],
ind_2 = my.dt[, unique(individual)]
))
dt_final <- merge(dt_allcombs,
dt_ncombs,
all.x = TRUE,
by = c("ind_1", "ind_2"))
dt_final[is.na(N), N := 0]
dt_final