Here I have some codes generating a matrix like this:
N = 200
T = 10
mu_0 <- matrix(diag(1, T))
dim(mu_0) <- c(T,T)
mu_t_0 <- matrix(rep(t(mu_0), N), ncol = T, byrow = TRUE)
And generally the result looks like this
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 1 0 0 0 0 0 0 0 0 0
2 0 1 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0
4 0 0 0 1 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 0 0
6 0 0 0 0 0 1 0 0 0 0
7 0 0 0 0 0 0 1 0 0 0
8 0 0 0 0 0 0 0 1 0 0
9 0 0 0 0 0 0 0 0 1 0
10 0 0 0 0 0 0 0 0 0 1
11 1 0 0 0 0 0 0 0 0 0
12 0 1 0 0 0 0 0 0 0 0
13 0 0 1 0 0 0 0 0 0 0
14 0 0 0 1 0 0 0 0 0 0
15 0 0 0 0 1 0 0 0 0 0
16 0 0 0 0 0 1 0 0 0 0
17 0 0 0 0 0 0 1 0 0 0
18 0 0 0 0 0 0 0 1 0 0
19 0 0 0 0 0 0 0 0 1 0
20 0 0 0 0 0 0 0 0 0 1
...
Now for later calculation I want to split this large matrix into different small matrices like this:
Matrix One:
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
...
Matrix Two:
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
...
I tried the split function but I cannot get what I want. Are there any solutions?
You can use asplit to split an array or matrix by its margin.
x <- asplit(mu_t_0, 2)
str(x)
#List of 10
# $ : num [1:2000(1d)] 1 0 0 0 0 0 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 1 0 0 0 0 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 1 0 0 0 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 1 0 0 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 1 0 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 0 1 0 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 0 0 1 0 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 0 0 0 1 0 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 0 0 0 0 1 0 ...
# $ : num [1:2000(1d)] 0 0 0 0 0 0 0 0 0 1 ...
# - attr(*, "dim")= int 10
This one puts all the columns into a list.
res <- lapply(1:ncol(mu_t_0), function(i) mu_t_0[, i, drop=F])
head(res[[1]])
# [,1]
# [1,] 1
# [2,] 0
# [3,] 0
# [4,] 0
# [5,] 0
# [6,] 0
head(res[[2]])
# [,1]
# [1,] 0
# [2,] 1
# [3,] 0
# [4,] 0
# [5,] 0
# [6,] 0
To extract the single one column matrices use list2env; the objects in the list need names beforehand.
names(res) <- paste0("m.", 1:length(res))
list2env(res, env=.GlobalEnv)
ls()
# [1] "m.1" "m.10" "m.2" "m.3" "m.4" "m.5" "m.6" "m.7" "m.8" "m.9" "mu_0" "mu_t_0"
# [13] "N" "res" "T"
If you want individual vectors named in the global environment (i.e. object V1, V2, and etc.):
invisible(mapply(assign, names(as.data.frame(mu_t_0)), as.data.frame(mu_t_0), MoreArgs=list(envir = globalenv())))
Though I'm really interested in why the OP doesn't want to just work with a single matrix...
Related
I have this loop over the file msp.chr1
for(i in names(msp.chr1[c(7:70)])){
tmp <- rle(msp.chr1[[i]])$lengths
msp.chr1$idx <- rep(1:length(tmp),tmp)
tmp2 <- unlist(by(msp.chr1[msp.chr1[[i]]==1,], list(msp.chr1$idx[msp.chr1[[i]]==1]),function(x){tail(x["epos"],1)-head(x["spos"],1)}))
assign(paste(i, ".chr1", sep=""), as.vector(tmp2))
rm(i); rm(tmp); rm(tmp2)
}
This file is a dataframe with multiple columns:
head(msp.chr1)
chm spos epos sgpos egpos nsnps PDAC1.0 PDAC1.1 PDAC10.0 PDAC10.1 PDAC100.0 PDAC100.1 PDAC101.0 PDAC101.1 PDAC102.0 PDAC102.1 PDAC103.0 PDAC103.1
1 1 123492 134160 0.12 0.13 252 0 0 0 0 1 0 0 0 0 0 0 0
2 1 134160 135025 0.13 0.14 20 0 0 0 0 1 0 0 0 0 0 0 0
3 1 135025 145600 0.14 0.15 150 0 0 0 0 1 0 0 0 0 0 0 0
4 1 145600 316603 0.15 0.32 195 0 1 0 0 1 0 0 1 0 0 0 1
5 1 316603 520140 0.32 0.52 765 0 0 0 0 0 0 0 0 0 0 0 0
6 1 520140 667054 0.52 0.67 1080 0 0 0 0 0 0 0 0 0 0 0 0
PDAC104.0 PDAC104.1 PDAC105.0 PDAC105.1 PDAC11.0 PDAC11.1 PDAC12.0 PDAC12.1 PDAC13.0 PDAC13.1 PDAC14.0 PDAC14.1 PDAC15.0 PDAC15.1 PDAC17.0 PDAC17.1
1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
3 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
4 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
PDAC18.0 PDAC18.1 PDAC19.0 PDAC19.1 PDAC2.0 PDAC2.1 PDAC20.0 PDAC20.1 PDAC21.0 PDAC21.1 PDAC22.0 PDAC22.1 PDAC23.0 PDAC23.1 PDAC24.0 PDAC24.1 PDAC25.0
1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
2 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0
3 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0
4 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
PDAC25.1 PDAC3.0 PDAC3.1 PDAC4.0 PDAC4.1 PDAC5.0 PDAC5.1 PDAC6.0 PDAC6.1 PDAC7.0 PDAC7.1 PDAC8.0 PDAC8.1 PDAC807.0 PDAC807.1 PDAC810.0 PDAC810.1
1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
PDAC9.0 PDAC9.1 idx
1 0 0 1
2 0 0 1
3 0 0 1
4 0 0 1
5 1 0 1
6 1 0 1
for(i in names(msp.chr1[c(7:70)])){
tmp <- rle(msp.chr1[[i]])$lengths
msp.chr1$idx <- rep(1:length(tmp),tmp)
tmp2 <- unlist(by(msp.chr1[msp.chr1[[i]]==1,], list(msp.chr1$idx[msp.chr1[[i]]==1]),function(x){tail(x["epos"],1)-head(x["spos"],1)}))
assign(paste(i, ".chr1", sep=""), as.vector(tmp2))
rm(i); rm(tmp); rm(tmp2)
}
But I actually have 23 files, of names msp.chr1, msp.chr2, ..., msp.chr23.
I want to add another loop on the above, to do that on all files at once.
I tried several things but it is not working...
Basically, every chr1 in my loop (including in the assign) should be replaced by chr1 to chr23.
Can you help?
Thanks,
You can generate the name of the file with paste, and then get the file by its name with get. A better option would be to create these files within a list, then you'd only use the j like df=list[[j]].
for(j in 1:23){
df = get(paste("msp.chr",j,sep=""))
for(i in names(df[c(7:70)])){
tmp <- rle(df[[i]])$lengths
df$idx <- rep(1:length(tmp),tmp)
tmp2 <- unlist(by(df[df[[i]]==1,], list(df$idx[df[[i]]==1]),function(x){tail(x["epos"],1)-head(x["spos"],1)}))
assign(paste(i, ".chr1", sep=""), as.vector(tmp2))
rm(i); rm(tmp); rm(tmp2)
}
}
New poster on Stackoverflow but long time viewer. I could not find any previous posts that get at my specific question.
Basically, I am struggling with how to make use of a nested for loop for my problem. The issue is that the number of variables and outcomes will change with the use case, so I want a solution that is flexible for various permutations. I am not sure that apply would help me because I don't know in advance how many variables and outcomes will exist in any given use case.
The goal is to classify whether the outcome is correctly predicted by the variable (tp = true positive, etc).
The problem is that the inner loop causes the outer loop values to be overwritten, but what I want is for each outcome to be evaluated over each variable once independently. Not sure what the best way to do this is and any advice appreciated.
#Repex code
#Generate variable
variable <- c(1,2,3)
df <- as.data.frame(matrix(0, ncol = 0, nrow = 30))
for(i in 1:length(variable)){
df[,c(paste0("variable",variable[i]))]<-as.vector(sample(c(0,1), replace=TRUE, size=30))
}
df
#Generate outcome
outcome <- c(1,2,3)
df2 <- as.data.frame(matrix(0, ncol = 0, nrow = 30))
for(i in 1:length(outcome)){
df2[,c(paste0("outcome",outcome[i]))]<-as.vector(sample(c(0,1), replace=TRUE, size=30))
}
df2
#Generate performance metrics of outcome and predictor
for (i in variable){
for(j in 1:length(df2)){
df[, c(paste0("tp.",variable[i]))] <- as.vector(ifelse(df[, c(paste0("variable",variable[i]))]==1 & df2[j]==1,1,0))
df[, c(paste0("tn.",variable[i]))] <- as.vector(ifelse(df[, c(paste0("variable",variable[i]))]==0 & df2[j]==0,1,0))
df[, c(paste0("fp.",variable[i]))] <- as.vector(ifelse(df[, c(paste0("variable",variable[i]))]==1 & df2[j]==0,1,0))
df[, c(paste0("fn.",variable[i]))] <- as.vector(ifelse(df[, c(paste0("variable",variable[i]))]==0 & df2[j]==1,1,0))
}
}
df
#bind the data for comparison and spot checking
df3 <- cbind(df2,df)
#here we see that only the final inner loop data are correct
df3
The problem is that you have 3 different variables that you want to compare to 3 different outcomes, so you are making 9 comparisons. However, since you are labelling your columns only according to the variable, you only have three unique numeric suffixes (one for each value of i) pasted on to each statistic (tp, tn, fp and fn). You therefore only have 12 distinct column names.
At no point are you labelling the columns according to both the variable and the outcome. That means that every time your inner loop increments to the next outcome variable, you are over-writing the column in df that you wrote in the previous iteration of the loop.
In any case, how would you intend to keep track of which comparison you are making unless you use both the variable number and the outcome number to label your columns?
So you could do it this way:
for (i in variable)
{
V <- c(paste0("variable", i))
for(j in seq_along(df2))
{
comp <- paste0(i, ".vs.", j)
df[paste0("tp.", comp)] <- as.numeric(df[V] == 1 & df2[j] == 1)
df[paste0("tn.", comp)] <- as.numeric(df[V] == 0 & df2[j] == 0)
df[paste0("fp.", comp)] <- as.numeric(df[V] == 1 & df2[j] == 0)
df[paste0("fn.", comp)] <- as.numeric(df[V] == 0 & df2[j] == 1)
}
}
df3 <- cbind(df2, df)
Which will give you the structure you were looking for. It's a large data frame, so we'll just peek at it with str:
str(df3)
#> 'data.frame': 30 obs. of 42 variables:
#> $ outcome1 : num 0 1 1 1 1 0 1 0 1 1 ...
#> $ outcome2 : num 0 0 0 0 1 0 0 1 1 1 ...
#> $ outcome3 : num 1 1 0 0 0 0 0 0 1 0 ...
#> $ variable1: num 0 1 0 0 1 1 0 1 1 1 ...
#> $ variable2: num 1 1 0 0 0 0 0 0 0 0 ...
#> $ variable3: num 1 0 0 0 1 0 0 1 0 0 ...
#> $ tp.1.vs.1: num 0 1 0 0 1 0 0 0 1 1 ...
#> $ tn.1.vs.1: num 1 0 0 0 0 0 0 0 0 0 ...
#> $ fp.1.vs.1: num 0 0 0 0 0 1 0 1 0 0 ...
#> $ fn.1.vs.1: num 0 0 1 1 0 0 1 0 0 0 ...
#> $ tp.1.vs.2: num 0 0 0 0 1 0 0 1 1 1 ...
#> $ tn.1.vs.2: num 1 0 1 1 0 0 1 0 0 0 ...
#> $ fp.1.vs.2: num 0 1 0 0 0 1 0 0 0 0 ...
#> $ fn.1.vs.2: num 0 0 0 0 0 0 0 0 0 0 ...
#> $ tp.1.vs.3: num 0 1 0 0 0 0 0 0 1 0 ...
#> $ tn.1.vs.3: num 0 0 1 1 0 0 1 0 0 0 ...
#> $ fp.1.vs.3: num 0 0 0 0 1 1 0 1 0 1 ...
#> $ fn.1.vs.3: num 1 0 0 0 0 0 0 0 0 0 ...
#> $ tp.2.vs.1: num 0 1 0 0 0 0 0 0 0 0 ...
#> $ tn.2.vs.1: num 0 0 0 0 0 1 0 1 0 0 ...
#> $ fp.2.vs.1: num 1 0 0 0 0 0 0 0 0 0 ...
#> $ fn.2.vs.1: num 0 0 1 1 1 0 1 0 1 1 ...
#> $ tp.2.vs.2: num 0 0 0 0 0 0 0 0 0 0 ...
#> $ tn.2.vs.2: num 0 0 1 1 0 1 1 0 0 0 ...
#> $ fp.2.vs.2: num 1 1 0 0 0 0 0 0 0 0 ...
#> $ fn.2.vs.2: num 0 0 0 0 1 0 0 1 1 1 ...
#> $ tp.2.vs.3: num 1 1 0 0 0 0 0 0 0 0 ...
#> $ tn.2.vs.3: num 0 0 1 1 1 1 1 1 0 1 ...
#> $ fp.2.vs.3: num 0 0 0 0 0 0 0 0 0 0 ...
#> $ fn.2.vs.3: num 0 0 0 0 0 0 0 0 1 0 ...
#> $ tp.3.vs.1: num 0 0 0 0 1 0 0 0 0 0 ...
#> $ tn.3.vs.1: num 0 0 0 0 0 1 0 0 0 0 ...
#> $ fp.3.vs.1: num 1 0 0 0 0 0 0 1 0 0 ...
#> $ fn.3.vs.1: num 0 1 1 1 0 0 1 0 1 1 ...
#> $ tp.3.vs.2: num 0 0 0 0 1 0 0 1 0 0 ...
#> $ tn.3.vs.2: num 0 1 1 1 0 1 1 0 0 0 ...
#> $ fp.3.vs.2: num 1 0 0 0 0 0 0 0 0 0 ...
#> $ fn.3.vs.2: num 0 0 0 0 0 0 0 0 1 1 ...
#> $ tp.3.vs.3: num 1 0 0 0 0 0 0 0 0 0 ...
#> $ tn.3.vs.3: num 0 0 1 1 0 1 1 0 0 1 ...
#> $ fp.3.vs.3: num 0 0 0 0 1 0 0 1 0 0 ...
#> $ fn.3.vs.3: num 0 1 0 0 0 0 0 0 1 0 ...
The other (and perhaps more sensible) way to do it is to have 3 data frames, one for each variable, and each with twelve columns (three sets of tp, tn, fp, fn). You can do this easily using lapply:
df_list <- lapply(df, function(x)
{
dfs <- list()
for(j in seq_along(df2))
{
dfs[[j]] <- data.frame(ifelse(x == 1 & df2[j] == 1, 1, 0),
ifelse(x == 0 & df2[j] == 0, 1, 0),
ifelse(x == 1 & df2[j] == 0, 1, 0),
ifelse(x == 0 & df2[j] == 1, 1, 0))
}
setNames(do.call("cbind", dfs),
paste0(c("tp.", "tn.", "fp.", "fn."), rep(seq_along(df2), each = 4)))
})
Which gives you:
df_list
#> $variable1
#> tp.1 tn.1 fp.1 fn.1 tp.2 tn.2 fp.2 fn.2 tp.3 tn.3 fp.3 fn.3
#> 1 0 1 0 0 0 1 0 0 0 0 0 1
#> 2 1 0 0 0 0 0 1 0 1 0 0 0
#> 3 0 0 0 1 0 1 0 0 0 1 0 0
#> 4 0 0 0 1 0 1 0 0 0 1 0 0
#> 5 1 0 0 0 1 0 0 0 0 0 1 0
#> 6 0 0 1 0 0 0 1 0 0 0 1 0
#> 7 0 0 0 1 0 1 0 0 0 1 0 0
#> 8 0 0 1 0 1 0 0 0 0 0 1 0
#> 9 1 0 0 0 1 0 0 0 1 0 0 0
#> 10 1 0 0 0 1 0 0 0 0 0 1 0
#> 11 0 1 0 0 0 0 0 1 0 0 0 1
#> 12 0 0 1 0 1 0 0 0 1 0 0 0
#> 13 0 0 0 1 0 1 0 0 0 1 0 0
#> 14 0 1 0 0 0 0 0 1 0 0 0 1
#> 15 0 0 0 1 0 1 0 0 0 0 0 1
#> 16 0 1 0 0 0 1 0 0 0 0 0 1
#> 17 0 0 1 0 1 0 0 0 1 0 0 0
#> 18 0 0 1 0 0 0 1 0 0 0 1 0
#> 19 0 0 1 0 0 0 1 0 0 0 1 0
#> 20 1 0 0 0 0 0 1 0 1 0 0 0
#> 21 0 1 0 0 0 1 0 0 0 1 0 0
#> 22 1 0 0 0 0 0 1 0 1 0 0 0
#> 23 0 0 0 1 0 1 0 0 0 0 0 1
#> 24 0 0 0 1 0 1 0 0 0 0 0 1
#> 25 0 0 1 0 0 0 1 0 0 0 1 0
#> 26 0 0 1 0 0 0 1 0 0 0 1 0
#> 27 1 0 0 0 1 0 0 0 0 0 1 0
#> 28 0 0 1 0 0 0 1 0 1 0 0 0
#> 29 0 0 0 1 0 1 0 0 0 1 0 0
#> 30 0 0 1 0 0 0 1 0 0 0 1 0
#>
#> $variable2
#> tp.1 tn.1 fp.1 fn.1 tp.2 tn.2 fp.2 fn.2 tp.3 tn.3 fp.3 fn.3
#> 1 0 0 1 0 0 0 1 0 1 0 0 0
#> 2 1 0 0 0 0 0 1 0 1 0 0 0
#> 3 0 0 0 1 0 1 0 0 0 1 0 0
#> 4 0 0 0 1 0 1 0 0 0 1 0 0
#> 5 0 0 0 1 0 0 0 1 0 1 0 0
#> 6 0 1 0 0 0 1 0 0 0 1 0 0
#> 7 0 0 0 1 0 1 0 0 0 1 0 0
#> 8 0 1 0 0 0 0 0 1 0 1 0 0
#> 9 0 0 0 1 0 0 0 1 0 0 0 1
#> 10 0 0 0 1 0 0 0 1 0 1 0 0
#> 11 0 1 0 0 0 0 0 1 0 0 0 1
#> 12 0 0 1 0 1 0 0 0 1 0 0 0
#> 13 0 0 0 1 0 1 0 0 0 1 0 0
#> 14 0 0 1 0 1 0 0 0 1 0 0 0
#> 15 1 0 0 0 0 0 1 0 1 0 0 0
#> 16 0 0 1 0 0 0 1 0 1 0 0 0
#> 17 0 1 0 0 0 0 0 1 0 0 0 1
#> 18 0 0 1 0 0 0 1 0 0 0 1 0
#> 19 0 0 1 0 0 0 1 0 0 0 1 0
#> 20 1 0 0 0 0 0 1 0 1 0 0 0
#> 21 0 1 0 0 0 1 0 0 0 1 0 0
#> 22 0 0 0 1 0 1 0 0 0 0 0 1
#> 23 0 0 0 1 0 1 0 0 0 0 0 1
#> 24 1 0 0 0 0 0 1 0 1 0 0 0
#> 25 0 0 1 0 0 0 1 0 0 0 1 0
#> 26 0 0 1 0 0 0 1 0 0 0 1 0
#> 27 1 0 0 0 1 0 0 0 0 0 1 0
#> 28 0 0 1 0 0 0 1 0 1 0 0 0
#> 29 0 0 0 1 0 1 0 0 0 1 0 0
#> 30 0 0 1 0 0 0 1 0 0 0 1 0
#>
#> $variable3
#> tp.1 tn.1 fp.1 fn.1 tp.2 tn.2 fp.2 fn.2 tp.3 tn.3 fp.3 fn.3
#> 1 0 0 1 0 0 0 1 0 1 0 0 0
#> 2 0 0 0 1 0 1 0 0 0 0 0 1
#> 3 0 0 0 1 0 1 0 0 0 1 0 0
#> 4 0 0 0 1 0 1 0 0 0 1 0 0
#> 5 1 0 0 0 1 0 0 0 0 0 1 0
#> 6 0 1 0 0 0 1 0 0 0 1 0 0
#> 7 0 0 0 1 0 1 0 0 0 1 0 0
#> 8 0 0 1 0 1 0 0 0 0 0 1 0
#> 9 0 0 0 1 0 0 0 1 0 0 0 1
#> 10 0 0 0 1 0 0 0 1 0 1 0 0
#> 11 0 1 0 0 0 0 0 1 0 0 0 1
#> 12 0 1 0 0 0 0 0 1 0 0 0 1
#> 13 1 0 0 0 0 0 1 0 0 0 1 0
#> 14 0 1 0 0 0 0 0 1 0 0 0 1
#> 15 0 0 0 1 0 1 0 0 0 0 0 1
#> 16 0 0 1 0 0 0 1 0 1 0 0 0
#> 17 0 0 1 0 1 0 0 0 1 0 0 0
#> 18 0 1 0 0 0 1 0 0 0 1 0 0
#> 19 0 1 0 0 0 1 0 0 0 1 0 0
#> 20 1 0 0 0 0 0 1 0 1 0 0 0
#> 21 0 0 1 0 0 0 1 0 0 0 1 0
#> 22 0 0 0 1 0 1 0 0 0 0 0 1
#> 23 1 0 0 0 0 0 1 0 1 0 0 0
#> 24 0 0 0 1 0 1 0 0 0 0 0 1
#> 25 0 1 0 0 0 1 0 0 0 1 0 0
#> 26 0 1 0 0 0 1 0 0 0 1 0 0
#> 27 1 0 0 0 1 0 0 0 0 0 1 0
#> 28 0 0 1 0 0 0 1 0 1 0 0 0
#> 29 1 0 0 0 0 0 1 0 0 0 1 0
#> 30 0 0 1 0 0 0 1 0 0 0 1 0
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
Please, how do I read a dataset like this into R? It is actually larger than this, I am only trying to minimize it because of space.
"x1" "x2" "x3" "x4" "x5" "x6" "x7" "x8" "x9" "x10" "x11" "x12" "x13" "x14" "x15" "x16" "x17" "x18" "x19" "x42" "x43" "x44" "x45" "x46" "x47" "x48" "x49" "x50" "x51" "x52" "x53" "x54" "x55" "x56" "x57" "x58" "x59" "x60" "x61" "x62" "x63" "x64" "x65" "x66" "x67" "x68" "x69" "x70" "x71" "x72" "x73" "x74" "x75" "x76" "x77" "x78" "x79" "x80" "x81" "x82" "x83" "x84" "x85" "x86" "x87" "x88" "x89" "x90" "x91" "x92" "x93" "x94" "x95" "x96" "x97" "x98" "x99" "x100" "x101" "x102" "x103" "x104" "x105" "x106" "x107" "x108" "x109" "x110" "x111" "x112" "x113" "x114" "x115" "x116" "x117" "x118" "x119" "x120" "x121" "x122" "x123" "x124" "x201" "x202" "x203" "x204" "x205" "x206" "x207" "x208" "x209" "x210" "x211" "x212" "x213" "x214" "x215" "x216" "x217" "x218" "x219" "x220" "nature"
"1" 7 7 0 3 20205 486 19550 6769.2809 118 63 38 105 2 0 0.747 15655.4802 7 382.9968 348.7057 0 0 16 80 0 12123 1 0 0 0 0 0 0 0 1 1 0 0 0 0 17 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 13 1 0 0 0 1 5 0 9 0 1 0 1 0 0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 5.90860829371 0.730683213637 "0"
"2" 13 13 0 2 37402 502 34626 10860.0676 115 49 40 93 2 0 0.9884 16870.0524 7 477.0312 397.7413 0 1 19 81 0 31780 0 1 1 0 0 0 0 0 1 0 0 0 0 0 19 1 2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 17 0 0 1 0 0 2 0 1 5 1 0 1 2 0 0 0 0 0 2 0 0 1 0 0 0 2 1 2 0 8.32539208743 0.869155217211 "0"
"3" 8 7 0 2 132811 471 122729 6206.9286 222 86 108 196 1 1 0.948 6115.3969 7 295.067 221.8416 0 1 18 79 0 117765 0 0 0 0 0 0 0 0 1 0 0 0 0 0 17 1 1 0 0 0 0 0 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 2 0 0 0 1 0 0 1 0 0 0 0 0 0 5 0 0 1 0 0 0 0 0 1 0 5 0 1 0 0 0 29 1 98 2 0 0 1 0 0 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 10.5941656151 0.645706574667 "0"
"4" 15 15 0 3 231497 468 228811 9623.3898 347 134 167 321 1 0 1.4357 14400.1809 7 195.8632 207.8142 0 0 16 76 0 210360 0 0 0 1 0 0 0 1 0 0 0 0 0 0 16 1 1 0 0 0 0 0 0 0 2 0 1 0 0 0 1 0 0 0 0 1 1 3 1 5 5 1 1 0 5 0 0 0 0 0 1 0 0 3 2 0 0 0 0 1 0 5 0 0 0 7 0 1 0 0 0 262 0 71 1 0 0 0 1 0 2 0 0 0 1 0 1 0 1 0 0 0 1 0 0 4.88991089556 0.355427710536 "0"
"5" 153 161 0 2 3637632 377715 3416943 15250.239 34629 22108 12732 34931 1 0 355.1026 2494780.1981 2384 60.8852 89.4526 1 1 18 83 0 365 0 0 0 0 0 0 0 1 1 0 0 1 0 0 18 1 2 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 819 0 1 2 0 0 3 0 0 1 0 1 0 0 1 0 0 0 1 0 10.9453030622 0.304128072824 "0"
As long as your data file is smaller than, say a couple of GB's and you have enough RAM, use read.table(). It's the underlying function of read.csv() etc.. Just:
data <- read.table(file=file.choose(), sep=" ", header = TRUE)
And bob's your uncle.
Note that file.choose() will open an easy dialog to choose your file, header=TRUE indicates that the first row of the dataset are column names (which seems to be your case) and the sep=" " indicates your separator (as long as no data is a string with spaces.
If you have very large data sets, think of learning to use the slightly awkward, yet handy, data.table package.
I'd like to convert a matrix of values into a matrix of 'bits'.
I have been looking for solutions and found this, which seems to be part of a solution.
I'll try to explain what I am looking for.
I have a matrix like
> x<-matrix(1:20,5,4)
> x
[,1] [,2] [,3] [,4]
[1,] 1 6 11 16
[2,] 2 7 12 17
[3,] 3 8 13 18
[4,] 4 9 14 19
[5,] 5 10 15 20
which I would like to convert into
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0
2 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
3 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0
4 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0
5 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1
so for each value in the row a "1" in the corresponding column.
If I use
> table(sequence(length(x)),t(x))
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
5 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
9 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
13 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
17 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
this is close to what I am looking for, but returns a line for each value.
I would only need to consolidate all values from one row into one row.
Because a
> table(x)
x
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
gives alls values of the whole table, so what do I need to do to get the values per row.
Here is another option using table() function:
table(row(x), x)
# x
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# 1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0
# 2 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
# 3 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0
# 4 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0
# 5 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1
bit_x = matrix(0, nrow = nrow(x), ncol = max(x))
for (i in 1:nrow(x)) {bit_x[i,x[i,]] = 1}
Let
(x <- matrix(c(1, 3), 2, 2))
[,1] [,2]
[1,] 1 1
[2,] 3 3
One approach would be
M <- matrix(0, nrow(x), max(x))
M[cbind(c(row(x)), c(x))] <- 1
M
# [,1] [,2] [,3]
# [1,] 1 0 0
# [2,] 0 0 1
In one line:
replace(matrix(0, nrow(x), max(x)), cbind(c(row(x)), c(x)), 1).
Following your approach, and similarly to #Psidom's suggestion:
table(rep(1:nrow(x), ncol(x)), x)
# x
# 1 3
# 1 2 0
# 2 0 2
We can use the reshape2 package.
library(reshape2)
# At first we make the matrix you provided
x <- matrix(1:20, 5, 4)
# then melt it based on first column
da <- melt(x, id.var = 1)
# then cast it
dat <- dcast(da, Var1 ~ value, fill = 0, fun.aggregate = length)
which gives us this
Var1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0
2 2 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
3 3 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0
4 4 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0
5 5 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1
For this example graph:
set.seed(1)
g <- make_chordal_ring(15, matrix(c(3, 12, 4, 7, 8, 11), nr = 2))
k <- Vectorize(all_shortest_paths, "from", F)(g, V(g), 7)
We have all shortest paths starting any given node in the graph and ending with node 7 (the reference node). What I want is to count the number of times a node Y is present in the shortest path from Node X to node 7.
If i denote the number of shortest paths from node 1 to node 7 that pass through node 2 by n(1,2,7), and the total number of shortest paths from node 1 to node 7 by n(1,7) I want a way to generate a table that looks something like :
I am really stuck with doing this, for example, if we look at the output of k:
> k[[1]][1]
$res
$res[[1]]
+ 3/15 vertices:
[1] 1 4 7
I don't know how to isolate the path 1,4,7 and count this towards n(1,4,7)
I would go for a simple loop :
# initialize your matrix with all zeros
m <- matrix(0,nrow=vcount(g),ncol=vcount(g)+1)
# iterate over elements of k
for(fromVertex in 1:length(k)){
# iterate over res entry of each element of k
for(path in k[[fromVertex]]$res){
# path is a vertex sequence, same type as V(g),
# calling as.integer we get the vertices indexes inside the sequence
verticesOfPath <- as.integer(path)
# we exclude the first and the last vertex (from,to)
innerVertices <- verticesOfPath[c(-1,-length(verticesOfPath))]
if(length(innerVertices) > 0){
# this is not a direct path
m[verticesOfPath[1],innerVertices] <- m[verticesOfPath[1],innerVertices] + 1
}
# add 1 to the last column
m[verticesOfPath[1],ncol(m)] <- m[verticesOfPath[1],ncol(m)] + 1
}
}
Result :
> m
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
[1,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1
[2,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
[3,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1
[4,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[5,] 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 2
[6,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[7,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[8,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[9,] 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 2
[10,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[11,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
[12,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
[13,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
[14,] 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
[15,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
If vertices have name attributes, you can set them as row and col names of the matrix :
rownames(m) <- V(g)$name
colnames(m) <- c(V(g)$name,'TOT')
> m
A B C D E F G H I J K L M N O TOT
A 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1
B 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
C 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1
D 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
E 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 2
F 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
G 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
H 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
I 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 2
J 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
K 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
L 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
M 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
N 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
O 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1