How to subset columns based on value in a different column? - r

EDITED:
I have a dataframe that stores information about when particular assessment happened ('when'). This assessment happened at different times (t1 - t3) which vary by participant.
The dataframe also contains all the assessments ever completed by every participant (including the one referenced in the 'when' column). I only want the assessment information represented in the 'when' column. So if the number is 1, I want to keep all the data related to that assessment and remove all the data that was not collected at that assessment. Please note that I have many more variables in my actual data set than are represented in this shortened data set so any solution should not rely on repeating variable names.
Here's the best I can do. The problem with this solution is that it would have to be repeated for every variable name.
df2 <- mutate(.data = df,
a1G_when = if_else(when == 1, a1G_t1, NA_real_))
# here is what we start with
df <- structure(list(id = 1:10, when = c(1, 3, 2, 1, 2, 1, 3, 2, 3,
1), a1G_t1 = c(0.78, 0.21, 0.04, 0.87, 0.08, 0.25, 0.9, 0.77,
0.51, 0.5), Stqo_t1 = c(0.68, 0.77, 0.09, 0.66, 0.94, 0.05, 0.97,
0.92, 1, 0.04), Twcdz_t1 = c(0.95, 0.41, 0.29, 0.54, 0.06, 0.45,
0.6, 0.24, 0.17, 0.55), Kgh_t1 = c(0.25, 0.86, 0.37, 0.34, 0.97,
0.75, 0.73, 0.68, 0.37, 0.66), `2xWX_t1` = c(0.47, 0.52, 0.23,
0.5, 0.88, 0.71, 0.21, 0.98, 0.76, 0.21), `2IYnS_t1` = c(0.32,
0.75, 0.03, 0.46, 0.89, 0.71, 0.51, 0.83, 0.34, 0.32), a1G_t2 = c(0.97,
0.01, 0.58, 0.33, 0.58, 0.37, 0.76, 0.33, 0.39, 0.56), Stqo_t2 = c(0.78,
0.42, 0.5, 0.69, 0.09, 0.72, 0.84, 0.94, 0.46, 0.83), Twcdz_t2 = c(0.62,
0.34, 0.72, 0.62, 0.8, 0.26, 0.3, 0.88, 0.42, 0.53), Kgh_t2 = c(0.99,
0.66, 0.02, 0.17, 0.51, 0.03, 0.03, 0.74, 0.1, 0.26), `2xWX_t2` = c(0.68,
0.97, 0.56, 0.27, 0.66, 0.71, 0.96, 0.24, 0.37, 0.76), `2IYnS_t2` = c(0.24,
0.88, 0.58, 0.31, 0.8, 0.92, 0.91, 0.9, 0.55, 0.52), a1G_t3 = c(0.73,
0.6, 0.66, 0.06, 0.33, 0.34, 0.09, 0.44, 0.73, 0.56), Stqo_t3 = c(0.28,
0.88, 0.56, 0.75, 0.85, 0.33, 0.88, 0.4, 0.63, 0.61), Twcdz_t3 = c(0.79,
0.95, 0.41, 0.07, 0.99, 0.06, 0.74, 0.17, 0.89, 0.4), Kgh_t3 = c(0.06,
0.52, 0.35, 0.91, 0.43, 0.74, 0.72, 0.96, 0.39, 0.4), `2xWX_t3` = c(0.25,
0.09, 0.64, 0.32, 0.15, 0.14, 0.18, 0.33, 0.97, 0.6), `2IYnS_t3` = c(0.92,
0.49, 0.09, 0.95, 0.3, 0.83, 0.82, 0.56, 0.29, 0.36)), row.names = c(NA,
-10L), class = "data.frame")
# here is an example of what I want with the first column. I would also want all other repeating columns to look like this (Stq0_when, Twcdz, etc.)
id when a1G_when
1 1 1 0.78
2 2 3 0.88
3 3 2 0.58
4 4 1 0.87
5 5 2 0.58
6 6 1 0.25
7 7 3 0.09
8 8 2 0.33
9 9 3 0.73
10 10 1 0.50

Using data.table, you could do something like:
library(data.table)
cols <- unique(paste0(gsub("_.*", "", setdiff(names(df), c("id", "when"))), "_when"))
setDT(df)[
, (cols) := lapply(cols, function(x) paste0(gsub("_.*", "", x), "_t", when))][
, (cols) := lapply(cols, function(x) as.character(.SD[[get(x)]])), by = cols][
, (cols) := lapply(.SD, as.numeric), .SDcols = cols
]
Output (only first 10 rows and only relevant when columns):
a1G_when Stqo_when Twcdz_when Kgh_when 2xWX_when 2IYnS_when
1: 0.78 0.68 0.95 0.25 0.47 0.32
2: 0.60 0.88 0.95 0.52 0.09 0.49
3: 0.58 0.50 0.72 0.02 0.56 0.58
4: 0.87 0.66 0.54 0.34 0.50 0.46
5: 0.58 0.09 0.80 0.51 0.66 0.80
6: 0.25 0.05 0.45 0.75 0.71 0.71
7: 0.09 0.88 0.74 0.72 0.18 0.82
8: 0.33 0.94 0.88 0.74 0.24 0.90
9: 0.73 0.63 0.89 0.39 0.97 0.29
10: 0.50 0.04 0.55 0.66 0.21 0.32

Here is an opportunity to use the new tidyr::pivot_longer. We can use this to reshape the data so that var and t are in their own columns, filter to just the rows with the data we want (i.e. where t equals when) and then pivot the data back out to wide.
library(tidyverse)
df1 <- structure(list(ID = c(101, 102, 103, 104, 105), when = c(1, 2, 3, 1, 2), var1_t1 = c(5, 6, 4, 5, 6), var2_t1 = c(2, 3, 4, 2, 3), var1_t2 = c(7, 8, 9, 7, 8), var2_t2 = c(5, 4, 5, 4, 5), var1_t3 = c(3, 4, 3, 4, 3), var2_t3 = c(6, 7, 6, 7, 6)), row.names = c(NA, 5L), class = "data.frame")
df1 %>%
pivot_longer(
cols = starts_with("var"),
names_to = c("var", "t"),
names_sep = "_t",
values_to = "val",
col_ptypes = list(var = character(), t = numeric())
) %>%
filter(when == t) %>%
select(-t) %>%
pivot_wider(names_from = "var", values_from = "val")
#> # A tibble: 5 x 4
#> ID when var1 var2
#> <dbl> <dbl> <dbl> <dbl>
#> 1 101 1 5 2
#> 2 102 2 8 4
#> 3 103 3 3 6
#> 4 104 1 5 2
#> 5 105 2 8 5
Created on 2019-07-16 by the reprex package (v0.3.0)

Related

How to use Corrplot with correlation matrix created by hand (of type list)

I used a for loop to create a correlation matrix, because I needed to use polychor to generate polychoric correaltions and I was only able to get polychor to correlate two variables at a time. Anyway, I created my own correlation table with the following code:
for(i in 1:ncol(gd2)) {
for (j in 1:ncol(gd2)) {
corVal
The table looks like this:
head(dtnew)
Better Afraid Alive Bored Drop Empty Energy Happy Help Home Hope Memory Satis Spirit Worth TOT
1: 1.00 0.32 0.29 0.39 0.36 0.46 0.25 0.43 0.39 0.13 0.46 0.39 0.50 0.45 0.48 0.67
2: 0.32 1.00 0.25 0.20 0.24 0.30 0.23 0.30 0.43 0.15 0.44 0.28 0.31 0.29 0.34 0.62
3: 0.29 0.25 1.00 0.26 0.28 0.46 0.38 0.60 0.35 0.19 0.41 0.10 0.49 0.53 0.43 0.65
4: 0.39 0.20 0.26 1.00 0.36 0.56 0.31 0.36 0.39 0.16 0.32 0.23 0.39 0.35 0.44 0.67
5: 0.36 0.24 0.28 0.36 1.00 0.44 0.41 0.37 0.43 0.31 0.35 0.22 0.42 0.37 0.40 0.72
6: 0.46 0.30 0.46 0.56 0.44 1.00 0.32 0.55 0.51 0.18 0.45 0.17 0.62 0.52 0.64 0.75
>
But longer.
Here is the dput()
structure(list(Better = c(1, 0.32, 0.29, 0.39, 0.36, 0.46, 0.25,
0.43, 0.39, 0.13, 0.46, 0.39, 0.5, 0.45, 0.48, 0.67), Afraid = c(0.32,
1, 0.25, 0.2, 0.24, 0.3, 0.23, 0.3, 0.43, 0.15, 0.44, 0.28, 0.31,
0.29, 0.34, 0.62), Alive = c(0.29, 0.25, 1, 0.26, 0.28, 0.46,
0.38, 0.6, 0.35, 0.19, 0.41, 0.1, 0.49, 0.53, 0.43, 0.65), Bored = c(0.39,
0.2, 0.26, 1, 0.36, 0.56, 0.31, 0.36, 0.39, 0.16, 0.32, 0.23,
0.39, 0.35, 0.44, 0.67), Drop = c(0.36, 0.24, 0.28, 0.36, 1,
0.44, 0.41, 0.37, 0.43, 0.31, 0.35, 0.22, 0.42, 0.37, 0.4, 0.72
), Empty = c(0.46, 0.3, 0.46, 0.56, 0.44, 1, 0.32, 0.55, 0.51,
0.18, 0.45, 0.17, 0.62, 0.52, 0.64, 0.75), Energy = c(0.25, 0.23,
0.38, 0.31, 0.41, 0.32, 1, 0.48, 0.37, 0.36, 0.31, 0.14, 0.4,
0.43, 0.38, 0.74), Happy = c(0.43, 0.3, 0.6, 0.36, 0.37, 0.55,
0.48, 1, 0.45, 0.21, 0.49, 0.22, 0.69, 0.84, 0.49, 0.8), Help = c(0.39,
0.43, 0.35, 0.39, 0.43, 0.51, 0.37, 0.45, 1, 0.2, 0.51, 0.32,
0.5, 0.44, 0.6, 0.73), Home = c(0.13, 0.15, 0.19, 0.16, 0.31,
0.18, 0.36, 0.21, 0.2, 1, 0.23, 0.13, 0.13, 0.15, 0.26, 0.63),
Hope = c(0.46, 0.44, 0.41, 0.32, 0.35, 0.45, 0.31, 0.49,
0.51, 0.23, 1, 0.38, 0.48, 0.47, 0.59, 0.73), Memory = c(0.39,
0.28, 0.1, 0.23, 0.22, 0.17, 0.14, 0.22, 0.32, 0.13, 0.38,
1, 0.25, 0.24, 0.31, 0.66), Satis = c(0.5, 0.31, 0.49, 0.39,
0.42, 0.62, 0.4, 0.69, 0.5, 0.13, 0.48, 0.25, 1, 0.66, 0.6,
0.78), Spirit = c(0.45, 0.29, 0.53, 0.35, 0.37, 0.52, 0.43,
0.84, 0.44, 0.15, 0.47, 0.24, 0.66, 1, 0.51, 0.77), Worth = c(0.48,
0.34, 0.43, 0.44, 0.4, 0.64, 0.38, 0.49, 0.6, 0.26, 0.59,
0.31, 0.6, 0.51, 1, 0.77), TOT = c(0.67, 0.62, 0.65, 0.67,
0.72, 0.75, 0.74, 0.8, 0.73, 0.63, 0.73, 0.66, 0.78, 0.77,
0.77, 0.89)), row.names = c(NA, -16L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x000001d7adc21ef0>)
</pre/>
I would like to generate a visual using corrplot. However, when I try, I get an error:
Error in is.finite(tmp) : default method not implemented for type 'list'
My data is indeed of type list. I have tried usuing 'unlist'. Not sure what else to try.
There is a problem with your dput() output, possibly because you have a data.table. I can read it by deleting ", .internal.selfref = <pointer: 0x000001d7adc21ef0>" from the last line so that it ends class = c("data.table", "data.frame")). Printing that out shows a problem with the last line/column (Tot). The bottom row in that column should be 1.00, but it is 0.89. We can trim that and use as.matrix (my mistake in the earlier comment) to convert the data frame:
gd3 <- gd2[-16, -16]
corrplot(as.matrix(gd3))
library(corrplot)
M <- cor(df)
head(round(M,2))
corrplot(M, method="number")

Canonical Correlation in R with different matrix dimensions

I'm having difficulties about doing a CC analysis in R.
The assignment which I'm doing is from "Applied Multivariate Analysis" by Sharma, exercise 13.7, if you're familiar with it.
Basically, I'm asked to conduct a CCA on a set of variables. There are seven X variables, but only five Y variables, thus R complains that the dimensions are not compatible. See the image below for a visual representation of the data called CETNEW.
Edited (Changed from image to dput):
structure(list(...
1 = c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "Y1", "Y2", "Y3", "Y4", "Y5"),
2 = c(2.72, 1.2, 0.82, 0.92, 1.19, 1, 1.45, 0.68, 0.98, 0.57, 1.07, 0.91), ...
3 = c(1.2, 3.78, 0.7, 1.04, 1.06, 1.32, 1.31, 0.56, 1, 0.79, 1.13, 1.38), ...
4 = c(0.82, 0.7, 1.7, 0.59, 0.83, 1.08, 1.01, 0.65, 0.78, 0.66, 0.93, 0.77), ...
5 = c(0.92, 1.04, 0.59, 3.09, 1.06, 0.93, 1.47, 0.62, 1.26, 0.51, 0.94, 0.85), ...
6 = c(1.19, 1.06, 0.83, 1.06, 2.94, 1.36, 1.66, 0.68, 1.16, 0.77, 1.37, 1.11), ...
7 = c(1, 1.32, 1.08, 0.93, 1.36, 2.94, 1.56, 0.9, 1.23, 0.78, 1.65, 1.31), ...
8 = c(1.45, 1.31, 1.01, 1.47, 1.66, 1.56, 3.11, 1.03, 1.7, 0.81, 1.63, 1.44), ...
9 = c(0.68, 0.56, 0.65, 0.62, 0.68, 0.9, 1.03, 1.71, 0.99, 0.65, 0.86, 0.72), ...
10 = c(0.98, 1, 0.78, 1.26, 1.16, 1.23, 1.7, 0.99, 3.07, 0.61, 1.43, 1.28), ...
11 = c(0.57, 0.79, 0.66, 0.51, 0.77, 0.78, 0.81, 0.65, 0.61, 2.83, 1.04, 0.84), ...
12 = c(1.07, 1.13, 0.93, 0.94, 1.37, 1.65, 1.63, 0.86, 1.43, 1.04, 2.83, 1.6), ...
13 = c(0.91, 1.38, 0.77, 0.85, 1.11, 1.31, 1.44, 0.72, 1.28, 0.84, 1.6, 4.01)),
row.names = c(NA, -12L), class = c("tbl_df", "tbl", "data.frame"))
What I've Done so Far
CETNEW <- CETNEW[,-1] #To remove the non-numeric values
Create two variables (criterion and predictor variables) as:
CETNEWx <- CETNEW[1:7,]
CETNEWy <- CETNEW[8:12,]
Then I've been using various packages such as CCA, CCP and candisk. From CCA:
ccCETNEW <- cc(CETNEWx,CETNEWy)
Yields the following error message:
Error in cov(X, Y, use = "pairwise") : incompatible dimensions
The matcor function also from CCA, yields the following error message:
Error in data.frame(..., check.names = FALSE) : arguments imply differing number of rows: 7, 5
Thus, it would seem that it all boils down to the different dimension problem. I've talked to my professor about it, but since he is using SAS, which apparently are compatible with this problem and could solve it, he could not help me.
Please, if you're familiar with canonical correlation and have had a similar problem before, any help regarding this topic is highly appreciated.
If you look at your data, notice the first column is divided into X and Y labels. That suggests to me that your data are transposed. If so, each column is an observation and the X and Y labels indicate various measurements taken on each observation. Canonical correlations are performed on two groups of measurements/variables from a single set of observations. First, here is the transposed data:
CETNEW.T <- structure(list(X1 = c(2.72, 1.2, 0.82, 0.92, 1.19, 1, 1.45, 0.68,
0.98, 0.57, 1.07, 0.91), X2 = c(1.2, 3.78, 0.7, 1.04, 1.06, 1.32,
1.31, 0.56, 1, 0.79, 1.13, 1.38), X3 = c(0.82, 0.7, 1.7, 0.59,
0.83, 1.08, 1.01, 0.65, 0.78, 0.66, 0.93, 0.77), X4 = c(0.92,
1.04, 0.59, 3.09, 1.06, 0.93, 1.47, 0.62, 1.26, 0.51, 0.94, 0.85
), X5 = c(1.19, 1.06, 0.83, 1.06, 2.94, 1.36, 1.66, 0.68, 1.16,
0.77, 1.37, 1.11), X6 = c(1, 1.32, 1.08, 0.93, 1.36, 2.94, 1.56,
0.9, 1.23, 0.78, 1.65, 1.31), X7 = c(1.45, 1.31, 1.01, 1.47,
1.66, 1.56, 3.11, 1.03, 1.7, 0.81, 1.63, 1.44), Y1 = c(0.68,
0.56, 0.65, 0.62, 0.68, 0.9, 1.03, 1.71, 0.99, 0.65, 0.86, 0.72
), Y2 = c(0.98, 1, 0.78, 1.26, 1.16, 1.23, 1.7, 0.99, 3.07, 0.61,
1.43, 1.28), Y3 = c(0.57, 0.79, 0.66, 0.51, 0.77, 0.78, 0.81,
0.65, 0.61, 2.83, 1.04, 0.84), Y4 = c(1.07, 1.13, 0.93, 0.94,
1.37, 1.65, 1.63, 0.86, 1.43, 1.04, 2.83, 1.6), Y5 = c(0.91,
1.38, 0.77, 0.85, 1.11, 1.31, 1.44, 0.72, 1.28, 0.84, 1.6, 4.01
)), class = "data.frame", row.names = c(NA, -12L))
Now the analysis runs fine:
library("CCA")
str(CETNEW.T)
# 'data.frame': 12 obs. of 12 variables:
# $ X1: num 2.72 1.2 0.82 0.92 1.19 1 1.45 0.68 0.98 0.57 ...
# $ X2: num 1.2 3.78 0.7 1.04 1.06 1.32 1.31 0.56 1 0.79 ...
# $ X3: num 0.82 0.7 1.7 0.59 0.83 1.08 1.01 0.65 0.78 0.66 ...
# $ X4: num 0.92 1.04 0.59 3.09 1.06 0.93 1.47 0.62 1.26 0.51 ...
# $ X5: num 1.19 1.06 0.83 1.06 2.94 1.36 1.66 0.68 1.16 0.77 ...
# $ X6: num 1 1.32 1.08 0.93 1.36 2.94 1.56 0.9 1.23 0.78 ...
# $ X7: num 1.45 1.31 1.01 1.47 1.66 1.56 3.11 1.03 1.7 0.81 ...
# $ Y1: num 0.68 0.56 0.65 0.62 0.68 0.9 1.03 1.71 0.99 0.65 ...
# $ Y2: num 0.98 1 0.78 1.26 1.16 1.23 1.7 0.99 3.07 0.61 ...
# $ Y3: num 0.57 0.79 0.66 0.51 0.77 0.78 0.81 0.65 0.61 2.83 ...
# $ Y4: num 1.07 1.13 0.93 0.94 1.37 1.65 1.63 0.86 1.43 1.04 ...
# $ Y5: num 0.91 1.38 0.77 0.85 1.11 1.31 1.44 0.72 1.28 0.84 ...
X <- CETNEW.T[, 1:7]
Y <- CETNEW.T[, 8:12]
ccCETNEW <- cc(X, Y)
ccCETNEW is list with 5 parts containing the results.

Reading and reconstructing symmetric matrix with R

I need to read the following matrix from a file. It's a symmetric correlation matrix, so half of it is omitted.
1.00
0.49 1.00
0.53 0.57 1.00
0.49 0.46 0.48 1.00
0.51 0.53 0.57 0.57 1.00
0.33 0.30 0.31 0.24 0.38 1.00
0.32 0.21 0.23 0.22 0.32 0.43 1.00
0.20 0.16 0.14 0.12 0.17 0.27 0.33 1.00
0.19 0.08 0.07 0.19 0.23 0.24 0.26 0.25 1.00
0.30 0.27 0.24 0.21 0.32 0.34 0.54 0.46 0.28 1.00
0.37 0.35 0.37 0.29 0.36 0.37 0.32 0.29 0.30 0.35 1.00
0.21 0.20 0.18 0.16 0.27 0.40 0.58 0.45 0.27 0.59 0.31 1.00
Currently, I'm using
data1 <- na.omit(as.vector(t(read.table('triangle-data.txt', fill = TRUE))))
pt <- 12
R <- matrix(0, nrow = pt , ncol = pt)
for(i in 1:pt){
R[i, 1:i] <- data1[(i*(i-1)/2 + 1): (i*(i+1)/2)]
}
R <- R + t(R) - diag(rep(1, pt))
R
The result is
> dput(R)
structure(c(1, 0.49, 0.53, 0.49, 0.51, 0.33, 0.32, 0.2, 0.19,
0.3, 0.37, 0.21, 0.49, 1, 0.57, 0.46, 0.53, 0.3, 0.21, 0.16,
0.08, 0.27, 0.35, 0.2, 0.53, 0.57, 1, 0.48, 0.57, 0.31, 0.23,
0.14, 0.07, 0.24, 0.37, 0.18, 0.49, 0.46, 0.48, 1, 0.57, 0.24,
0.22, 0.12, 0.19, 0.21, 0.29, 0.16, 0.51, 0.53, 0.57, 0.57, 1,
0.38, 0.32, 0.17, 0.23, 0.32, 0.36, 0.27, 0.33, 0.3, 0.31, 0.24,
0.38, 1, 0.43, 0.27, 0.24, 0.34, 0.37, 0.4, 0.32, 0.21, 0.23,
0.22, 0.32, 0.43, 1, 0.33, 0.26, 0.54, 0.32, 0.58, 0.2, 0.16,
0.14, 0.12, 0.17, 0.27, 0.33, 1, 0.25, 0.46, 0.29, 0.45, 0.19,
0.08, 0.07, 0.19, 0.23, 0.24, 0.26, 0.25, 1, 0.28, 0.3, 0.27,
0.3, 0.27, 0.24, 0.21, 0.32, 0.34, 0.54, 0.46, 0.28, 1, 0.35,
0.59, 0.37, 0.35, 0.37, 0.29, 0.36, 0.37, 0.32, 0.29, 0.3, 0.35,
1, 0.31, 0.21, 0.2, 0.18, 0.16, 0.27, 0.4, 0.58, 0.45, 0.27,
0.59, 0.31, 1), .Dim = c(12L, 12L))
This is too unwieldy, and I need to hard-code its size. Is there a more convenient way?
I used a combination of readLines and strsplit to read the file
a <- sapply(sapply(lapply(readLines("triangle.txt"),
function(x) strsplit(x, " ")), "[", 1),
function(x) na.omit(as.numeric(x)))
and rbind to cast it into a square matrix
A <- do.call("rbind", a)
Despite the warning, the lower part of the matrix is correctly read from the file, but the upper part is all messed up, which I fixed with a little dirty trick
A[upper.tri(A)] <- 0
A <- A + t(A) - diag(nrow(A))
EDIT
Another simpler solution based on the vector of the coefficients:
data1 <- na.omit(as.vector(t(read.table('triangle.txt', fill = TRUE))))
n <- Re(polyroot(c(-length(data1), 1/2, 1/2)))[1]
A <- matrix(0, n, n)
A[upper.tri(A, diag = T)] <- data1
A <- A + t(A) - diag(n)

Cluster Segregation in R

Clusters have been formed. Now, I am wondering if we can select elements from a particular cluster id.
Here are the different clusters that are formed .
1 2 3 4 5 6 7 8 9
549 290 1206 103 97 102 2 208 123
10 11 12 13 14 15 16 17 18
17 75 293 981 23 586 25 15 365
Like , I have to chose element from cluster 12. Then, how to do it
This is the code used to form the cluster:
db <- dbscan(cbind(Final$event_begin_longitude,Final$event_begin_latitude), .0025, minPts = 1, scale = FALSE, method = "raw")
There is no predefined method to access elements of a cluster. However, you can easily do it yourself. The return value of dbscan has a slot named clusters, which is in the same order as your input:
dta <- structure(list(V1 = c(0, 0.04, 0.09, 0.13, 0.17, 0.22, 0.26, 0.3, 0.35, 0.39, 0.43, 0.48, 0.52, 0.57, 0.61, 0.65, 0.7, 0.74, 0.78, 0.83, 0.87, 0.91, 0.96, 1),
V2 = c(0.01, 0.01, 0, 0, 0.08, 0.03, 0.01, 0.05, 0.45, 0.73, 0.91, 0.9, 0.67, 0.77, 0.98, 0.94, 0.86, 1, 0.38, 0.09, 0.01, 0.01, 0, 0)),
.Names = c("V1", "V2"),
row.names = c(NA, -24L),
class = "data.frame")
db <- dbscan::dbscan(dta, .25, minPts = 1)
# Combine values and their cluster
cbind(dta, db$cluster)
# Plot with colored clusters
plot(dta, col = db$cluster, pch = 16)

Partition a matrix into list of sub matrices row-wise in R

I have a matrix where each row is either all 1's or any number between 0 and 1. I need to partition this into list of matrices such that
Each sub matrix is either all 1's or all numbers between 0 and 1
Combining all the sub matrices of the list using rbind yields the original matrix
Example:
m <-
structure(c(1, 1, 1, 0.84, 0.27, 0.24, 0.48, 0.28, 0.62, 0.55,
1, 1, 0.26, 0.93, 0.87, 0.76, 1, 1, 1, 1, 1, 0.31, 0.32, 0.96,
0.25, 0.96, 0.43, 0.66, 1, 1, 0.22, 0.88, 0.01, 0.14, 1, 1, 1,
1, 1, 0.71, 0.99, 0.6, 0.22, 0.73, 0.54, 0.17, 1, 1, 0.59, 0.67,
0.07, 0.4, 1, 1, 1, 1, 1, 0.27, 0.62, 0.52, 0.67, 0.69, 0.06,
0.63, 1, 1, 0.27, 0.95, 0.16, 0.22, 1, 1, 1, 1, 1, 0.59, 0.94,
0.4, 0.05, 0.05, 0.26, 0.31, 1, 1, 0.53, 0.52, 0.77, 0.06, 1,
1, 1, 1, 1, 0.48, 0.47, 0.88, 0.7, 0.4, 0.4, 0.72, 1, 1, 0.79,
0.58, 0.74, 0.4, 1, 1, 1, 1, 1, 0.27, 0.41, 0.36, 0.35, 0.48,
0.2, 0.4, 1, 1, 0.17, 0.34, 0.97, 0.06, 1, 1, 1, 1, 1, 0.56,
0.66, 0.29, 0.41, 0.56, 0.83, 0.97, 1, 1, 0.4, 0.35, 0.47, 0.23,
1, 1, 1, 1, 1, 0.91, 0.15, 0.17, 0.82, 0.7, 0.15, 0.97, 1, 1,
0.47, 0.02, 0.07, 0.05, 1, 1, 1, 1, 1, 0.9, 0.57, 0.17, 0.92,
0.92, 0.8, 0.73, 1, 1, 0.87, 0.5, 0.65, 0.67, 1, 1), .Dim = c(18L,
10L))
L <-
list(structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), .Dim = c(3L, 10L
)), structure(c(0.84, 0.27, 0.24, 0.48, 0.28, 0.62, 0.55, 0.31,
0.32, 0.96, 0.25, 0.96, 0.43, 0.66, 0.71, 0.99, 0.6, 0.22, 0.73,
0.54, 0.17, 0.27, 0.62, 0.52, 0.67, 0.69, 0.06, 0.63, 0.59, 0.94,
0.4, 0.05, 0.05, 0.26, 0.31, 0.48, 0.47, 0.88, 0.7, 0.4, 0.4,
0.72, 0.27, 0.41, 0.36, 0.35, 0.48, 0.2, 0.4, 0.56, 0.66, 0.29,
0.41, 0.56, 0.83, 0.97, 0.91, 0.15, 0.17, 0.82, 0.7, 0.15, 0.97,
0.9, 0.57, 0.17, 0.92, 0.92, 0.8, 0.73), .Dim = c(7L, 10L)),
structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1), .Dim = c(2L, 10L)), structure(c(0.26, 0.93,
0.87, 0.76, 0.22, 0.88, 0.01, 0.14, 0.59, 0.67, 0.07, 0.4,
0.27, 0.95, 0.16, 0.22, 0.53, 0.52, 0.77, 0.06, 0.79, 0.58,
0.74, 0.4, 0.17, 0.34, 0.97, 0.06, 0.4, 0.35, 0.47, 0.23,
0.47, 0.02, 0.07, 0.05, 0.87, 0.5, 0.65, 0.67), .Dim = c(4L,
10L)), structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1), .Dim = c(2L, 10L)))
Here is a solution for you. It uses data.table's rleid for convenience.
First, we generate some data:
set.seed(123)
input <- matrix(runif(180),ncol=10)
input[c(1:3,5,9:10),]<-1
Then we identify which rows have only ones
is_one <- apply(input,1,function(x){all(x==1)})
We want to split our rows by groups of is one/is not one, so we generate row id's and split id's
row_ids <- 1:nrow(input)
split_ids <- data.table::rleid(is_one)
The we generate our output, by splitting the row id's by groups of is one/is not one and returning our input data for each set of row ids.
output <- lapply(split(row_ids,split_ids),function(x){input[x,]})
You can also use split.data.frame with a matrix:
> m=structure(c(1,1,1,0.84,0.27,0.24,0.48,0.28,0.62,0.55,1,1,0.26,0.93,0.87,0.76,1,1,1,1,1,0.31,0.32,0.96,0.25,0.96,0.43,0.66,1,1,0.22,0.88,0.01,0.14,1,1,1,1,1,0.71,0.99,0.6,0.22,0.73,0.54,0.17,1,1,0.59,0.67,0.07,0.4,1,1,1,1,1,0.27,0.62,0.52,0.67,0.69,0.06,0.63,1,1,0.27,0.95,0.16,0.22,1,1,1,1,1,0.59,0.94,0.4,0.05,0.05,0.26,0.31,1,1,0.53,0.52,0.77,0.06,1,1,1,1,1,0.48,0.47,0.88,0.7,0.4,0.4,0.72,1,1,0.79,0.58,0.74,0.4,1,1,1,1,1,0.27,0.41,0.36,0.35,0.48,0.2,0.4,1,1,0.17,0.34,0.97,0.06,1,1,1,1,1,0.56,0.66,0.29,0.41,0.56,0.83,0.97,1,1,0.4,0.35,0.47,0.23,1,1,1,1,1,0.91,0.15,0.17,0.82,0.7,0.15,0.97,1,1,0.47,0.02,0.07,0.05,1,1,1,1,1,0.9,0.57,0.17,0.92,0.92,0.8,0.73,1,1,0.87,0.5,0.65,0.67,1,1),.Dim=c(18L,10L))
> r=rle(rowSums(m==1)==ncol(m))$lengths
> split.data.frame(m,rep(1:length(r),r))
$`1`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1 1 1 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1 1 1 1
[3,] 1 1 1 1 1 1 1 1 1 1
$`2`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0.84 0.31 0.71 0.27 0.59 0.48 0.27 0.56 0.91 0.90
[2,] 0.27 0.32 0.99 0.62 0.94 0.47 0.41 0.66 0.15 0.57
[3,] 0.24 0.96 0.60 0.52 0.40 0.88 0.36 0.29 0.17 0.17
[4,] 0.48 0.25 0.22 0.67 0.05 0.70 0.35 0.41 0.82 0.92
[5,] 0.28 0.96 0.73 0.69 0.05 0.40 0.48 0.56 0.70 0.92
[6,] 0.62 0.43 0.54 0.06 0.26 0.40 0.20 0.83 0.15 0.80
[7,] 0.55 0.66 0.17 0.63 0.31 0.72 0.40 0.97 0.97 0.73
$`3`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1 1 1 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1 1 1 1
$`4`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0.26 0.22 0.59 0.27 0.53 0.79 0.17 0.40 0.47 0.87
[2,] 0.93 0.88 0.67 0.95 0.52 0.58 0.34 0.35 0.02 0.50
[3,] 0.87 0.01 0.07 0.16 0.77 0.74 0.97 0.47 0.07 0.65
[4,] 0.76 0.14 0.40 0.22 0.06 0.40 0.06 0.23 0.05 0.67
$`5`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 1 1 1 1 1 1 1 1 1 1
[2,] 1 1 1 1 1 1 1 1 1 1

Resources