append the loop results to a vector - r

I am trying to get the list of variable names where the count of value "a" is more than or equals 2 and finally store it in a vector Morethan2 and similarly do for the lessthan2. Please help me to achieve this.
df <- data.frame(a1 = c("a","a","b"),a2 = c("a","b","b"),a3 = c("a","a","a"))
for(x in names(df[1:3])){
if(sum(df[x] =="a") >= 2){
more2 = x
} else{less2 = x}}
Lessthan2 = less2
Morethan2 = more2
Expected REsult:
Morethan2 : 'a1','a3'
Lessthan2 : 'a2'

We can use colSums to get the count of "a" in each column and then subset it to get morethan2 and lessthan2.
inds <- colSums(df == "a")
morethan2 <- names(inds)[inds >= 2]
lessthan2 <- names(inds)[inds < 2]
morethan2
#[1] "a1" "a3"
lessthan2
#[1] "a2"
If we want to use for loop, we can do
i <- 1
j <- 1
more2 <- numeric()
less2 <- numeric()
for(x in names(df)) {
if(sum(df[[x]] =="a") >= 2) {
more2[i] = x
i= i + 1
} else {
less2[j] = x
j = j + 1
}
}

df <- data.frame(a1 = c("a","a","b"),a2 = c("a","b","b"),a3 = c("a","a","a"))
more2 <- c()
less2 <- c()
for(x in names(df[1:3])){
if(sum(df[x] =="a") >= 2){
more2[x] = x
} else{less2 [x] = x}}

Related

Why calling rbind on data.frame with 0 columns drops all the rows?

I noticed a discrepancy with rbind behaviour between matrix and data.frame objects.
With matrix objects everything works as expected:
mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)
dim(rbind(mat1, mat2))
[1] 4 0
But if we turn them to data.frame all of a sudden it looses the number of rows:
> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0
What I would like to understand is - is this behaviour intentional? And if so what is the reasoning for dropping the number of rows in this situation?
EDIT: As noted by #PoGibas - this behaviour is documented in ?rbind. No reason is given and it would probably be hard to infer one. So the question becomes:
How to rbind an arbitrary number of data.frames while always preserving their number of rows?
Workaround could be to use cbind and transposition:
m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows
Here cbind creates a data.frame with 0 rows and 4 columns and we transpose it to matrix with 4 rows and 0 columns.
Another solution is just brutal modification of original base::rbind.data.frame (source on github) function.
You have to remove/comment out two parts there:
Removal of arguments if there length is not a positive integer (length(data.frame()) returns 0). Comment out this part:
allargs <- allargs[lengths(allargs) > 0L]
Return of empty data.frame if attribute names is empty (you can't set attribute to an empty data.frame - names(as.data.frame(mat1)) <- "" returns an error). Comment out this part:
if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))
Result:
m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0
Code:
rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
{
match.names <- function(clabs, nmi)
{
if(identical(clabs, nmi)) NULL
else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
## we need 1-1 matches here
m <- pmatch(nmi, clabs, 0L)
if(any(m == 0L))
stop("names do not match previous names")
m
} else stop("names do not match previous names")
}
if(make.row.names)
Make.row.names <- function(nmi, ri, ni, nrow)
{
if(nzchar(nmi)) {
if(ni == 0L) character() # PR8506
else if(ni > 1L) paste(nmi, ri, sep = ".")
else nmi
}
else if(nrow > 0L && identical(ri, seq_len(ni)) &&
identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
as.integer(seq.int(from = nrow + 1L, length.out = ni))
else ri
}
allargs <- list(...)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# allargs <- allargs[lengths(allargs) > 0L]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(length(allargs)) {
## drop any zero-row data frames, as they may not have proper column
## types (e.g. NULL).
nr <- vapply(allargs, function(x)
if(is.data.frame(x)) .row_names_info(x, 2L)
else if(is.list(x)) length(x[[1L]])
# mismatched lists are checked later
else length(x), 1L)
if(any(nr > 0L)) allargs <- allargs[nr > 0L]
else return(allargs[[1L]]) # pretty arbitrary
}
n <- length(allargs)
if(n == 0L)
return(structure(list(),
class = "data.frame",
row.names = integer()))
nms <- names(allargs)
if(is.null(nms))
nms <- character(n)
cl <- NULL
perm <- rows <- vector("list", n)
rlabs <- if(make.row.names) rows # else NULL
nrow <- 0L
value <- clabs <- NULL
all.levs <- list()
for(i in seq_len(n)) {
## check the arguments, develop row and column labels
xi <- allargs[[i]]
nmi <- nms[i]
## coerce matrix to data frame
if(is.matrix(xi)) allargs[[i]] <- xi <-
as.data.frame(xi, stringsAsFactors = stringsAsFactors)
if(inherits(xi, "data.frame")) {
if(is.null(cl))
cl <- oldClass(xi)
ri <- attr(xi, "row.names")
ni <- length(ri)
if(is.null(clabs)) ## first time
clabs <- names(xi)
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, names(xi))
if( !is.null(pi) ) perm[[i]] <- pi
}
rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
nrow <- nrow + ni
if(is.null(value)) { ## first time ==> setup once:
value <- unclass(xi)
nvar <- length(value)
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
for(j in seq_len(nvar)) {
xj <- value[[j]]
facCol[j] <-
if(!is.null(levels(xj))) {
all.levs[[j]] <- levels(xj)
TRUE # turn categories into factors
} else
is.factor(xj)
ordCol[j] <- is.ordered(xj)
has.dim[j] <- length(dim(xj)) == 2L
}
}
else for(j in seq_len(nvar)) {
xij <- xi[[j]]
if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
if(facCol[jj]) {
if(length(lij <- levels(xij))) {
all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
ordCol[jj] <- ordCol[jj] & is.ordered(xij)
} else if(is.character(xij))
all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
}
}
}
else if(is.list(xi)) {
ni <- range(lengths(xi))
if(ni[1L] == ni[2L])
ni <- ni[1L]
else stop("invalid list argument: all variables should have the same length")
rows[[i]] <- ri <-
as.integer(seq.int(from = nrow + 1L, length.out = ni))
nrow <- nrow + ni
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
if(length(nmi <- names(xi)) > 0L) {
if(is.null(clabs))
clabs <- nmi
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, nmi)
if( !is.null(pi) ) perm[[i]] <- pi
}
}
}
else if(length(xi)) { # 1 new row
rows[[i]] <- nrow <- nrow + 1L
if(make.row.names)
rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
}
}
nvar <- length(clabs)
if(nvar == 0L)
nvar <- max(lengths(allargs)) # only vector args
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# if(nvar == 0L)
# return(structure(list(), class = "data.frame",
# row.names = integer()))
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
pseq <- seq_len(nvar)
if(is.null(value)) { # this happens if there has been no data frame
value <- list()
value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
}
names(value) <- clabs
for(j in pseq)
if(length(lij <- all.levs[[j]]))
value[[j]] <-
factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
if(any(has.dim)) {
rmax <- max(unlist(rows))
for(i in pseq[has.dim])
if(!inherits(xi <- value[[i]], "data.frame")) {
dn <- dimnames(xi)
rn <- dn[[1L]]
if(length(rn) > 0L) length(rn) <- rmax
pi <- dim(xi)[2L]
length(xi) <- rmax * pi
value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
}
}
for(i in seq_len(n)) {
xi <- unclass(allargs[[i]])
if(!is.list(xi))
if(length(xi) != nvar)
xi <- rep(xi, length.out = nvar)
ri <- rows[[i]]
pi <- perm[[i]]
if(is.null(pi)) pi <- pseq
for(j in pseq) {
jj <- pi[j]
xij <- xi[[j]]
if(has.dim[jj]) {
value[[jj]][ri, ] <- xij
## copy rownames
rownames(value[[jj]])[ri] <- rownames(xij)
} else {
## coerce factors to vectors, in case lhs is character or
## level set has changed
value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
## copy names if any
if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
}
}
}
if(make.row.names) {
rlabs <- unlist(rlabs)
if(anyDuplicated(rlabs))
rlabs <- make.unique(as.character(rlabs), sep = "")
}
if(is.null(cl)) {
as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
stringsAsFactors = stringsAsFactors)
} else {
structure(value, class = cl,
row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
}
}

for loop with lists R

I want to create two lists of data frames in a for loop, but I cannot use assign:
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
samp <- vector(mode = "list", length = 100)
h <- list(a,b)
hname <- c("a", "b")
for (j in 1:length(h)) {
for (i in 1:100) {
samp[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
assign(paste("samp", hname[j], sep="_"), samp[[i]])
}
}
Instead of lists named samp_a and samp_b I get vectors which contain the result of the 100th sample. I want to get a list samp_a and samp_b, which have all the different samples for dat[dat$name == a,] and dat[dat$name == a,].
How could I do this?
How about creating two different lists and avoiding using assign:
Option 1:
# create empty list
samp_a <-list()
samp_b <- list()
for (j in seq(h)) {
# fill samp_a list
if(j == 1){
for (i in 1:100) {
samp_a[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
# fill samp_b list
} else if(j == 2){
for (i in 1:100) {
samp_b[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
}
}
You could use assign too, shorter answer:
Option 2:
for (j in seq(hname)) {
l = list()
for (i in 1:100) {
l[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
assign(paste0('samp_', hname[j]), l)
rm(l)
}
You could easily use an lapply for this using the rep function. Unless you want a random x, paired with a random y. This will maintain the existing paired order.
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
h <- list(a,b)
hname <- c("a", "b")
testfunc <- function(df) {
#df[sample(nrow(df), nrow(df)*0.5), ] #gives you the values in your data frame
sample(nrow(df), nrow(df)*0.5) # just gives you the indices
}
lapply(h, testfunc) # This gives you the standard lapply format, and only gives one a, and one b
samp <- lapply(rep(h, 100), testfunc) # This shows you how to replicate the function n times, giving you 100 a and 100 b data.frames in a list
samp_a <- samp[c(TRUE, FALSE)] # Applies a repeating T/F vector, selecting the odd data.frames, which in this case are the `a` frames.
samp_b <- samp[c(FALSE, TRUE)] # And here, the even data.frames, which are the `b` frames.

why nested If statement doesn't return data frame r?

the nested if statement in this code is working very fine
x <- 4
if(x == 4){
t <- x + 1
if(t == 5){
t + 2
}
}
[1] 7
but when i try the same code on data frame there is no results.
mydata_one <- data.frame(words = c("hello", "everyone"))
mydata_two <- data.frame(words = c("my", "name"))
if(length(mydata_one$word) == 2){
big_data <- rbind(mydata_one, mydata_two)
if(length(big_data) > 3){
big_data[1:3, 1]
}
}
The expected value i am looking for:
[1] hello everyone my
Length will not work with data frame. Use nrow instead to get number of records.
mydata_one <- data.frame(words = c("hello", "everyone"))
mydata_two <- data.frame(words = c("my", "name"))
if(length(mydata_one$word) == 2){
big_data <- rbind(mydata_one, mydata_two)
if(nrow(big_data) > 3){
big_data[1:3, 1]
}
}
Output:
[1] hello everyone my
Levels: everyone hello my name

Converting a vector to a matrix

Here is my code:
n <- 10
set.seed(100)
d <- rep(NA, n)
d[1] <- 0
y <- runif(n)
a <- 5
for (i in (2:(length(y)+1))) {
d[i] <- d[i-1] + y[i-1]
}
store.x <- NULL
for(j in 1:a) {
x <- runif(1, min = 0, max = sum(y))
for (i in 1:(length(y))) {
if(x <= d[i+1] && x > d[i]) {
store.x[j] <- i
break
}
}
}
store.x
Now store.x prints out 7, 9, 4, 6, 8. I want to be able to put these into a matrix where the numbers that store.x prints correspond to the columns and the row is in order of the numbers. So the first entry would be in row 1 column 7, next would be row 2 column 9 and so on. I want to start with a n by n matrix filled with zeros and then add one the row/column that these numbers are in. I'm not sure how to go about doing this. Any help would be appreciated!
So creating a matrix mt that will be filled and then NA's changed to zeros.
n <- 10
set.seed(100)
d <- rep(NA, n)
d[1] <- 0
y <- runif(n)
a <- 220
mt = matrix(nrow = n, ncol = n)
mt[is.na(mt)] = 0
for (i in (2:(length(y)+1))) {
d[i] <- d[i-1] + y[i-1]
}
store.x <- NULL
for(j in 1:a) {
x <- runif(1, min = 0, max = sum(y))
for (i in 1:(length(y))) {
if(x <= d[i+1] && x > d[i]) {
store.x[j] <- i
if(length(which(i == store.x)) > 1){
mt[which(mt[,i] != 0),i] = mt[which(mt[,i] != 0),i] + 1
} else {
mt[(which(rowSums(mt) == 0)[1]),i] = 1
}
break
}
}
}
what i added was this following logic
if(length(which(i == store.x)) > 1){
mt[which(mt[,i] != 0),i] = mt[which(mt[,i] != 0),i] + 1
} else {
mt[(which(rowSums(mt) == 0)[1]),i] = 1
}
if the number created exists in store.x more than once then we find the existing entry (column corresponds to i and row will be the one which is not 0). If the number does not exist we then find the first row which has no entry and use that.

apply function in R

I want to apply a user defined function over a matrix object.
I don't have desired results my input is a 4x4 matrix and I want to get as output a 4x4 matrix with the transformation defined in mapfun function
Where is my error?
Thanks in advance
mapfun <- function(val){
if (val == 1){
res <- "A" }
else{
if (val == 2){
res <- "B"
} else
{
if (val == 3){
res <- "C"
} else
{
res <- "D"
}
}
}
return(res)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat2 <- apply(mat1, 1, FUN=mapfun)`
We can just the numeric values in 'mat1' as index to replace the corresponding 'LETTERS'. The output will be a vector which can be converted back to a matrix by assigning dim.
`dim<-`(LETTERS[mat1], dim(mat1))
Regarding the Warning message in the mapfun, it would be better to use ifelse instead of if/else as we are dealing with a vector of length greater than 1 in each row of 'mat1'.
mapfun <- function(val){
ifelse(val == 1, 'A',
ifelse(val==2, 'B',
ifelse(val==3, 'C', 'D')))
}
apply(mat1, 1, mapfun)
This could probably work:
mat1[] <- mapply(mapfun,mat1)
Note that this will modify mat1, so you could make a copy of mat1 named mat2 and apply the function to mat2:
mat2<- mat1
mat2[] <- mapply(mapfun,mat1)
You could try this (fun) solution as well. First you coerce input value to a character vector and then use repeatedly replace function and return the result as a matrix again. Maybe there is a way to trick replace function so that it would be used only once.
mapfun <- function(val) {
res <- as.character(val)
res <- replace(x = res, list = which(res == "1"), values = "A")
res <- replace(x = res, list = which(res == "2"), values = "B")
res <- replace(x = res, list = which(res == "3"), values = "C")
res <- replace(x = res, list = which(res == "4"), values = "D")
return(matrix(res, ncol = 4)) # ncol(val)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat1
mapfun(mat1)

Resources