How to pull TRUE values out of matrices - r

I need to pull the genes with the TRUE values out of each column of the matrix and form a list of them for each of my contrasts (columns). How do I go about doing that?
gcQVals=qvalue(eBgcData$p.value)
print(sum(gcQVals$qvalues<=0.01))
gcQs2=gcQVals$qvalues<=0.01
print(gcQs2[1:5,1:6])
Here is the output:
[1] 17969
Contrasts
KOInfvsKOUnInf WTInfvsWTUnInf KOInfvsWTInf KOInfvsWTUnInf
1415670_at FALSE FALSE FALSE FALSE
1415671_at FALSE FALSE FALSE FALSE
1415672_at TRUE FALSE FALSE TRUE
1415673_at FALSE FALSE FALSE FALSE
1415674_a_at FALSE FALSE FALSE FALSE
Contrasts
KOUnInfvsWTInf KOUnInfvsWTUnInf
1415670_at FALSE FALSE
1415671_at FALSE FALSE
1415672_at FALSE FALSE
1415673_at FALSE FALSE
1415674_a_at FALSE FALSE

I tried to figure out what you were saying and built this MWE:
# build the example
a <- matrix(runif(9), 3, 3)>0.5
dimnames(a) <- list(letters[1:3], LETTERS[1:3])
# solution to your supposed problem
lapply(colnames(a), function(name) rownames(a)[a[,name]])
> a
A B C
a FALSE TRUE FALSE
b FALSE FALSE TRUE
c FALSE FALSE FALSE
res <- lapply(colnames(a), function(name) rownames(a)[a[,name]])
names(res) <- colnames(a)
> res
$A
character(0)
$B
[1] "a"
$C
[1] "b"

Related

Is it possible to keep memory while using apply()?

I need to run the function lapply on a activation_status list t times so that the t iteration of the function remembers the results from the t-1 iteration.
The list is basically a bidimensional array representing a single item i status over multiple t periods and looks like this:
n_items <<- 100
n_iterations <<- 10
activation_status <-
lapply(1:n_iterations,
FUN = function(t, bool, i) rep(bool, t),
FALSE, n_items)
Now during each iteration t, I randomly activate (set to TRUE) a number of items within the list but I want all the items already activated at time t-1 to stay active (note that I define activation_status within the update function so that it's accessible in the inner functions).
updateActivation <- function(t) {
activation_status[[t]] <- as.logical(rbinom(n_items, 1, prob = .5))
activation_status[[t]][activation_status[[t-1]] == TRUE] <- TRUE
}
But then
lapply(1:n_iterations, updateActivation)
throws as error:
Error in activation_status[[t - 1]] : attempt to select less than one element in get1index
I know I could use a loop, but I wonder if it is:
Possible to do something like this with the apply function?
Do it faster?
Not sure if I fully understood the question but seems like you are looking for a recursion.
In that case Reduce() can be used instead of lapply():
activation_status <- rep(FALSE, 10)
n_iterations <- 5
Reduce(function(y, x) as.logical(rbinom(length(y), 1, prob=0.1)) | y,
x=1:n_iterations, init=activation_status, accumulate=TRUE
)
[[1]]
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[[2]]
[1] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
[[3]]
[1] FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE
[[4]]
[1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
[[5]]
[1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
[[6]]
[1] TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE
We could probably do this without using any apply command.
#Set seed for reproduciblity
set.seed(123)
#Create initialization demo data
activation_status <- rep(FALSE, 10)
#Number of values to select
n_iterations <- 5
#Sequence from 1:n_iterations
seq_n_iterations <- seq_len(n_iterations)
#Create matrix to hold output
output <- replicate(n_iterations, activation_status)
#Select n_iterations random values from 1:length(activation_status)
#You can change this if you want to use some specific distrubution
points <- sample(length(activation_status), n_iterations)
#Create column indices
cols <- rep(seq_n_iterations, seq_n_iterations)
#Create row indices
rows <- points[ave(inds, inds, FUN = seq_along)]
#Change those values to TRUE
output[cbind(rows, cols)] <- TRUE
output
# [,1] [,2] [,3] [,4] [,5]
# [1,] FALSE FALSE FALSE FALSE FALSE
# [2,] FALSE FALSE TRUE TRUE TRUE
# [3,] TRUE TRUE TRUE TRUE TRUE
# [4,] FALSE FALSE FALSE FALSE FALSE
# [5,] FALSE FALSE FALSE FALSE FALSE
# [6,] FALSE FALSE FALSE FALSE TRUE
# [7,] FALSE FALSE FALSE FALSE FALSE
# [8,] FALSE FALSE FALSE TRUE TRUE
# [9,] FALSE FALSE FALSE FALSE FALSE
#[10,] FALSE TRUE TRUE TRUE TRUE
If you want them as lists :
asplit(output, 2)
#[[1]]
# [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[[2]]
# [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
#[[3]]
# [1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
#[[4]]
# [1] FALSE TRUE TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE
#[[5]]
# [1] FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE FALSE TRUE

Splitting data based on time condition

I have rows of data that are seconds apart, however I found some anomalies. The difference between some rows is 30min or above, so I want to split my data to multiple other data frames at that condition which means loop through my data frame and split when the difference in time is above 30min. I’ve tried this already but it splits my data to one row data frame.
RBD < - function(x){
i <- 0
while(i < length(data$Time)){
if(data$Time[i+1]-data$Time[i] > 60*30){
rb <- 1
}
else{
rb<-0
}
i <- i+1
}
}
ListData <- Data %>%
group_by(Data$temp)%>%
transmute(ind=all((RBD = 1))%>%
.$ind
names(ListData) <- paste0(‘Data’, seq_along(ListData))
split(Data, ListData)
My Data looks like this
Data
There's a very helpful function in base R: diff, which can do the heavy lifting for you. If this doesn't work for you, try posting a reprex and I'll see if i can help you troubleshoot.
Lets simulate some data:
set.seed(123)
x <- sample(1200, 100)
x <- x + sample(c(0, 0, 0, 0, 2400), 100, replace = TRUE)
RBD <- function(x){
res <- lag(x) > 60*30
res[1] <- FALSE
res
}
RBD(x)
# [1] FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE FALSE FALSE FALSE TRUE
# [13] FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
# [25] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
# [37] FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
# [49] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [73] FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
# [85] FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE
# [97] FALSE FALSE FALSE FALSE

Create a logical or binary matrix/data.frame from a list of factors in R

I have a list of approximately 2 million elements. The list is made up of vectors of character strings. There are about 50 different character strings so can be considered factors. The vectors of character strings are different lengths varying between 1 and 50 (i.e the total number of character strings).
I want to convert the list to a logical or binary matrix/data.frame. Currently my method involves lapply and is incredibly slow, I would like to know if there is a vectorised approach.
require(dplyr); require(tidyr)
#create test data set
set.seed(123)
list1 <- list()
ListLength <-10
elementlength <- sample(1:5, ListLength, replace = TRUE )
for(i in 1:length(elementlength) ){
list1[[i]] <- sample(letters[1:15], elementlength[i])
}
#Create data frame from list using lapply
lapply(list1, function(n){
data.frame(type = n, value = TRUE) %>%
spread(., key = type, value )
}) %>% bind_rows()
I don't know if there is a way by preallocating the data frame then filling it in somehow.
Type <- unique(unlist(list1, use.names = FALSE))
#Create empty dataframe
TypeMat <- data.frame(matrix(NA,
ncol = length(Type),
nrow = ListLength)) %>%
setNames(Type)
We could use mtabulate from qdapTools
library(qdapTools)
mtabulate(list1)!=0
# a b c d e f g h i j k l m o
#[1,] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
#[2,] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE
#[3,] TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[4,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
#[5,] FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE TRUE
#[6,] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
#[8,] TRUE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
#[9,] FALSE TRUE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#[10,]FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Exclude multiple words from a vector with grepl [duplicate]

This question already has answers here:
Matching multiple patterns
(6 answers)
Closed 7 years ago.
Here sample data:
exclude.words <- c("zoznam","azet","dovera","joj","alza","telecom","google","post","sme")
main.data <- c("zoznam","registration","azet","azet.com","dovera","dna","joj","alza","telecom","google","post","sme")
This works if the words are equal (match exactly), however see azet.com that won't be excluded! For that we could use agrepl().
main.data[!(main.data %in% exclude.words)]
So how to use agrepl with two vectors?
main.data[!agrepl(main.data, exclude.words)]
As commented, you can use:
main.data[!grepl(paste(exclude.words, collapse = "|"), main.data)]
to exclude any words that have a partly or complete match between the main.data and exclude.words.
paste(exclude.words, collapse = "|")
creates a single string with "|" (logical OR) between the exclude.words which can be used as a single pattern in grepl. Therefore, you don't need to loop over the single words.
main.data[!as.logical(rowSums(sapply(exclude.words, function(x) agrepl(x, main.data))))]
# [1] "registration" "dna"
# clarification
sapply(exclude.words, function(x) agrepl(x, main.data))
# zoznam azet dovera joj alza telecom google post sme
# [1,] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [3,] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [4,] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [5,] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
# [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [7,] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
# [8,] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
# [9,] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
# [10,] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
# [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
# [12,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
You can use this functional programming approach:
library(functional)
funcs = lapply(exclude.words, function(u) function(x) x[!grepl(u, x)])
Reduce(Compose, funcs)(main.data)
#[1] "registration" "dna"

Finding vector elements with a length longer than 1 in R

I have a vector like this
c(0,1,2,0,0,2,2,2,2,2,2,1,0,1,2,2,2,2,2,1)
I would like to find the position where a series of at least three consecutive 2's, c(2,2,2), starts and if it is interupted I would like to find the next first postion.
The return vector should look something like this:
FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
I have tried match and several other functions but without success
Here is one way:
x <- c(0,1,2,0,0,2,2,2,2,2,2,1,0,1,2,2,2,2,2,1)
a <- rle(x)
z <- rep(FALSE, length(x))
z[sequence(a$lengths) == 1] <- a$lengths >= 2 & a$values == 2
z
# [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
# [11] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
r <- rle(x)
# Which entries are true in the result:
w <- (cumsum(r$length)[r$values==2] - (r$length[r$values==2]-1))[r$length[r$values==2]>2]
result <- logical(length(x))
result[w] <- T
result
## [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## [17] FALSE FALSE FALSE FALSE
Here's my attempt as a function:
FUN <- function(x, n = 3) {
y <- rle(x)
z <- y[[1]] > (n - 1)
unlist(lapply(1:length(z), function(i) {
m <- rep(FALSE, each=y[[1]][i])
if(z[i]) {
m[1] <- TRUE
}
m
}))
}
FUN(x)
## [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
## [11] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE

Resources