I am trying to get the list of variable names where the count of value "a" is more than or equals 2 and finally store it in a vector Morethan2 and similarly do for the lessthan2. Please help me to achieve this.
df <- data.frame(a1 = c("a","a","b"),a2 = c("a","b","b"),a3 = c("a","a","a"))
for(x in names(df[1:3])){
if(sum(df[x] =="a") >= 2){
more2 = x
} else{less2 = x}}
Lessthan2 = less2
Morethan2 = more2
Expected REsult:
Morethan2 : 'a1','a3'
Lessthan2 : 'a2'
We can use colSums to get the count of "a" in each column and then subset it to get morethan2 and lessthan2.
inds <- colSums(df == "a")
morethan2 <- names(inds)[inds >= 2]
lessthan2 <- names(inds)[inds < 2]
morethan2
#[1] "a1" "a3"
lessthan2
#[1] "a2"
If we want to use for loop, we can do
i <- 1
j <- 1
more2 <- numeric()
less2 <- numeric()
for(x in names(df)) {
if(sum(df[[x]] =="a") >= 2) {
more2[i] = x
i= i + 1
} else {
less2[j] = x
j = j + 1
}
}
df <- data.frame(a1 = c("a","a","b"),a2 = c("a","b","b"),a3 = c("a","a","a"))
more2 <- c()
less2 <- c()
for(x in names(df[1:3])){
if(sum(df[x] =="a") >= 2){
more2[x] = x
} else{less2 [x] = x}}
Related
I noticed a discrepancy with rbind behaviour between matrix and data.frame objects.
With matrix objects everything works as expected:
mat1 <- matrix(nrow=2, ncol=0)
mat2 <- matrix(nrow=2, ncol=0)
dim(rbind(mat1, mat2))
[1] 4 0
But if we turn them to data.frame all of a sudden it looses the number of rows:
> dim(rbind(as.data.frame(mat1), as.data.frame(mat2)))
[1] 0 0
What I would like to understand is - is this behaviour intentional? And if so what is the reasoning for dropping the number of rows in this situation?
EDIT: As noted by #PoGibas - this behaviour is documented in ?rbind. No reason is given and it would probably be hard to infer one. So the question becomes:
How to rbind an arbitrary number of data.frames while always preserving their number of rows?
Workaround could be to use cbind and transposition:
m <- matrix(nrow = 2, ncol = 0)
as.data.frame(t(cbind(as.data.frame(t(m)), as.data.frame(t(m)))))
# Returns: data frame with 0 columns and 4 rows
Here cbind creates a data.frame with 0 rows and 4 columns and we transpose it to matrix with 4 rows and 0 columns.
Another solution is just brutal modification of original base::rbind.data.frame (source on github) function.
You have to remove/comment out two parts there:
Removal of arguments if there length is not a positive integer (length(data.frame()) returns 0). Comment out this part:
allargs <- allargs[lengths(allargs) > 0L]
Return of empty data.frame if attribute names is empty (you can't set attribute to an empty data.frame - names(as.data.frame(mat1)) <- "" returns an error). Comment out this part:
if(nvar == 0L) return(structure(list(), class = "data.frame", row.names = integer()))
Result:
m <- matrix(nrow = 2, ncol = 0)
dim(rbind.data.frame2(as.data.frame(m), as.data.frame(m)))
# Returns: [1] 4 0
Code:
rbind.data.frame2 <- function(..., deparse.level = 1, make.row.names = TRUE,
stringsAsFactors = default.stringsAsFactors())
{
match.names <- function(clabs, nmi)
{
if(identical(clabs, nmi)) NULL
else if(length(nmi) == length(clabs) && all(nmi %in% clabs)) {
## we need 1-1 matches here
m <- pmatch(nmi, clabs, 0L)
if(any(m == 0L))
stop("names do not match previous names")
m
} else stop("names do not match previous names")
}
if(make.row.names)
Make.row.names <- function(nmi, ri, ni, nrow)
{
if(nzchar(nmi)) {
if(ni == 0L) character() # PR8506
else if(ni > 1L) paste(nmi, ri, sep = ".")
else nmi
}
else if(nrow > 0L && identical(ri, seq_len(ni)) &&
identical(unlist(rlabs, FALSE, FALSE), seq_len(nrow)))
as.integer(seq.int(from = nrow + 1L, length.out = ni))
else ri
}
allargs <- list(...)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# allargs <- allargs[lengths(allargs) > 0L]
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if(length(allargs)) {
## drop any zero-row data frames, as they may not have proper column
## types (e.g. NULL).
nr <- vapply(allargs, function(x)
if(is.data.frame(x)) .row_names_info(x, 2L)
else if(is.list(x)) length(x[[1L]])
# mismatched lists are checked later
else length(x), 1L)
if(any(nr > 0L)) allargs <- allargs[nr > 0L]
else return(allargs[[1L]]) # pretty arbitrary
}
n <- length(allargs)
if(n == 0L)
return(structure(list(),
class = "data.frame",
row.names = integer()))
nms <- names(allargs)
if(is.null(nms))
nms <- character(n)
cl <- NULL
perm <- rows <- vector("list", n)
rlabs <- if(make.row.names) rows # else NULL
nrow <- 0L
value <- clabs <- NULL
all.levs <- list()
for(i in seq_len(n)) {
## check the arguments, develop row and column labels
xi <- allargs[[i]]
nmi <- nms[i]
## coerce matrix to data frame
if(is.matrix(xi)) allargs[[i]] <- xi <-
as.data.frame(xi, stringsAsFactors = stringsAsFactors)
if(inherits(xi, "data.frame")) {
if(is.null(cl))
cl <- oldClass(xi)
ri <- attr(xi, "row.names")
ni <- length(ri)
if(is.null(clabs)) ## first time
clabs <- names(xi)
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, names(xi))
if( !is.null(pi) ) perm[[i]] <- pi
}
rows[[i]] <- seq.int(from = nrow + 1L, length.out = ni)
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
nrow <- nrow + ni
if(is.null(value)) { ## first time ==> setup once:
value <- unclass(xi)
nvar <- length(value)
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
for(j in seq_len(nvar)) {
xj <- value[[j]]
facCol[j] <-
if(!is.null(levels(xj))) {
all.levs[[j]] <- levels(xj)
TRUE # turn categories into factors
} else
is.factor(xj)
ordCol[j] <- is.ordered(xj)
has.dim[j] <- length(dim(xj)) == 2L
}
}
else for(j in seq_len(nvar)) {
xij <- xi[[j]]
if(is.null(pi) || is.na(jj <- pi[[j]])) jj <- j
if(facCol[jj]) {
if(length(lij <- levels(xij))) {
all.levs[[jj]] <- unique(c(all.levs[[jj]], lij))
ordCol[jj] <- ordCol[jj] & is.ordered(xij)
} else if(is.character(xij))
all.levs[[jj]] <- unique(c(all.levs[[jj]], xij))
}
}
}
else if(is.list(xi)) {
ni <- range(lengths(xi))
if(ni[1L] == ni[2L])
ni <- ni[1L]
else stop("invalid list argument: all variables should have the same length")
rows[[i]] <- ri <-
as.integer(seq.int(from = nrow + 1L, length.out = ni))
nrow <- nrow + ni
if(make.row.names) rlabs[[i]] <- Make.row.names(nmi, ri, ni, nrow)
if(length(nmi <- names(xi)) > 0L) {
if(is.null(clabs))
clabs <- nmi
else {
if(length(xi) != length(clabs))
stop("numbers of columns of arguments do not match")
pi <- match.names(clabs, nmi)
if( !is.null(pi) ) perm[[i]] <- pi
}
}
}
else if(length(xi)) { # 1 new row
rows[[i]] <- nrow <- nrow + 1L
if(make.row.names)
rlabs[[i]] <- if(nzchar(nmi)) nmi else as.integer(nrow)
}
}
nvar <- length(clabs)
if(nvar == 0L)
nvar <- max(lengths(allargs)) # only vector args
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
# if(nvar == 0L)
# return(structure(list(), class = "data.frame",
# row.names = integer()))
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
pseq <- seq_len(nvar)
if(is.null(value)) { # this happens if there has been no data frame
value <- list()
value[pseq] <- list(logical(nrow)) # OK for coercion except to raw.
all.levs <- vector("list", nvar)
has.dim <- facCol <- ordCol <- logical(nvar)
}
names(value) <- clabs
for(j in pseq)
if(length(lij <- all.levs[[j]]))
value[[j]] <-
factor(as.vector(value[[j]]), lij, ordered = ordCol[j])
if(any(has.dim)) {
rmax <- max(unlist(rows))
for(i in pseq[has.dim])
if(!inherits(xi <- value[[i]], "data.frame")) {
dn <- dimnames(xi)
rn <- dn[[1L]]
if(length(rn) > 0L) length(rn) <- rmax
pi <- dim(xi)[2L]
length(xi) <- rmax * pi
value[[i]] <- array(xi, c(rmax, pi), list(rn, dn[[2L]]))
}
}
for(i in seq_len(n)) {
xi <- unclass(allargs[[i]])
if(!is.list(xi))
if(length(xi) != nvar)
xi <- rep(xi, length.out = nvar)
ri <- rows[[i]]
pi <- perm[[i]]
if(is.null(pi)) pi <- pseq
for(j in pseq) {
jj <- pi[j]
xij <- xi[[j]]
if(has.dim[jj]) {
value[[jj]][ri, ] <- xij
## copy rownames
rownames(value[[jj]])[ri] <- rownames(xij)
} else {
## coerce factors to vectors, in case lhs is character or
## level set has changed
value[[jj]][ri] <- if(is.factor(xij)) as.vector(xij) else xij
## copy names if any
if(!is.null(nm <- names(xij))) names(value[[jj]])[ri] <- nm
}
}
}
if(make.row.names) {
rlabs <- unlist(rlabs)
if(anyDuplicated(rlabs))
rlabs <- make.unique(as.character(rlabs), sep = "")
}
if(is.null(cl)) {
as.data.frame(value, row.names = rlabs, fix.empty.names = TRUE,
stringsAsFactors = stringsAsFactors)
} else {
structure(value, class = cl,
row.names = if(is.null(rlabs)) .set_row_names(nrow) else rlabs)
}
}
I want to create two lists of data frames in a for loop, but I cannot use assign:
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
samp <- vector(mode = "list", length = 100)
h <- list(a,b)
hname <- c("a", "b")
for (j in 1:length(h)) {
for (i in 1:100) {
samp[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
assign(paste("samp", hname[j], sep="_"), samp[[i]])
}
}
Instead of lists named samp_a and samp_b I get vectors which contain the result of the 100th sample. I want to get a list samp_a and samp_b, which have all the different samples for dat[dat$name == a,] and dat[dat$name == a,].
How could I do this?
How about creating two different lists and avoiding using assign:
Option 1:
# create empty list
samp_a <-list()
samp_b <- list()
for (j in seq(h)) {
# fill samp_a list
if(j == 1){
for (i in 1:100) {
samp_a[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
# fill samp_b list
} else if(j == 2){
for (i in 1:100) {
samp_b[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
}
}
You could use assign too, shorter answer:
Option 2:
for (j in seq(hname)) {
l = list()
for (i in 1:100) {
l[[i]] <- sample(1:nrow(h[[j]]), nrow(h[[j]])*0.5)
}
assign(paste0('samp_', hname[j]), l)
rm(l)
}
You could easily use an lapply for this using the rep function. Unless you want a random x, paired with a random y. This will maintain the existing paired order.
dat <- data.frame(name = c(rep("a", 10), rep("b", 13)),
x = c(1,3,4,4,5,3,7,6,5,7,8,6,4,3,9,1,2,3,5,4,6,3,1),
y = c(1.1,3.2,4.3,4.1,5.5,3.7,7.2,6.2,5.9,7.3,8.6,6.3,4.2,3.6,9.7,1.1,2.3,3.2,5.7,4.8,6.5,3.3,1.2))
a <- dat[dat$name == "a",]
b <- dat[dat$name == "b",]
h <- list(a,b)
hname <- c("a", "b")
testfunc <- function(df) {
#df[sample(nrow(df), nrow(df)*0.5), ] #gives you the values in your data frame
sample(nrow(df), nrow(df)*0.5) # just gives you the indices
}
lapply(h, testfunc) # This gives you the standard lapply format, and only gives one a, and one b
samp <- lapply(rep(h, 100), testfunc) # This shows you how to replicate the function n times, giving you 100 a and 100 b data.frames in a list
samp_a <- samp[c(TRUE, FALSE)] # Applies a repeating T/F vector, selecting the odd data.frames, which in this case are the `a` frames.
samp_b <- samp[c(FALSE, TRUE)] # And here, the even data.frames, which are the `b` frames.
the nested if statement in this code is working very fine
x <- 4
if(x == 4){
t <- x + 1
if(t == 5){
t + 2
}
}
[1] 7
but when i try the same code on data frame there is no results.
mydata_one <- data.frame(words = c("hello", "everyone"))
mydata_two <- data.frame(words = c("my", "name"))
if(length(mydata_one$word) == 2){
big_data <- rbind(mydata_one, mydata_two)
if(length(big_data) > 3){
big_data[1:3, 1]
}
}
The expected value i am looking for:
[1] hello everyone my
Length will not work with data frame. Use nrow instead to get number of records.
mydata_one <- data.frame(words = c("hello", "everyone"))
mydata_two <- data.frame(words = c("my", "name"))
if(length(mydata_one$word) == 2){
big_data <- rbind(mydata_one, mydata_two)
if(nrow(big_data) > 3){
big_data[1:3, 1]
}
}
Output:
[1] hello everyone my
Levels: everyone hello my name
Here is my code:
n <- 10
set.seed(100)
d <- rep(NA, n)
d[1] <- 0
y <- runif(n)
a <- 5
for (i in (2:(length(y)+1))) {
d[i] <- d[i-1] + y[i-1]
}
store.x <- NULL
for(j in 1:a) {
x <- runif(1, min = 0, max = sum(y))
for (i in 1:(length(y))) {
if(x <= d[i+1] && x > d[i]) {
store.x[j] <- i
break
}
}
}
store.x
Now store.x prints out 7, 9, 4, 6, 8. I want to be able to put these into a matrix where the numbers that store.x prints correspond to the columns and the row is in order of the numbers. So the first entry would be in row 1 column 7, next would be row 2 column 9 and so on. I want to start with a n by n matrix filled with zeros and then add one the row/column that these numbers are in. I'm not sure how to go about doing this. Any help would be appreciated!
So creating a matrix mt that will be filled and then NA's changed to zeros.
n <- 10
set.seed(100)
d <- rep(NA, n)
d[1] <- 0
y <- runif(n)
a <- 220
mt = matrix(nrow = n, ncol = n)
mt[is.na(mt)] = 0
for (i in (2:(length(y)+1))) {
d[i] <- d[i-1] + y[i-1]
}
store.x <- NULL
for(j in 1:a) {
x <- runif(1, min = 0, max = sum(y))
for (i in 1:(length(y))) {
if(x <= d[i+1] && x > d[i]) {
store.x[j] <- i
if(length(which(i == store.x)) > 1){
mt[which(mt[,i] != 0),i] = mt[which(mt[,i] != 0),i] + 1
} else {
mt[(which(rowSums(mt) == 0)[1]),i] = 1
}
break
}
}
}
what i added was this following logic
if(length(which(i == store.x)) > 1){
mt[which(mt[,i] != 0),i] = mt[which(mt[,i] != 0),i] + 1
} else {
mt[(which(rowSums(mt) == 0)[1]),i] = 1
}
if the number created exists in store.x more than once then we find the existing entry (column corresponds to i and row will be the one which is not 0). If the number does not exist we then find the first row which has no entry and use that.
I want to apply a user defined function over a matrix object.
I don't have desired results my input is a 4x4 matrix and I want to get as output a 4x4 matrix with the transformation defined in mapfun function
Where is my error?
Thanks in advance
mapfun <- function(val){
if (val == 1){
res <- "A" }
else{
if (val == 2){
res <- "B"
} else
{
if (val == 3){
res <- "C"
} else
{
res <- "D"
}
}
}
return(res)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat2 <- apply(mat1, 1, FUN=mapfun)`
We can just the numeric values in 'mat1' as index to replace the corresponding 'LETTERS'. The output will be a vector which can be converted back to a matrix by assigning dim.
`dim<-`(LETTERS[mat1], dim(mat1))
Regarding the Warning message in the mapfun, it would be better to use ifelse instead of if/else as we are dealing with a vector of length greater than 1 in each row of 'mat1'.
mapfun <- function(val){
ifelse(val == 1, 'A',
ifelse(val==2, 'B',
ifelse(val==3, 'C', 'D')))
}
apply(mat1, 1, mapfun)
This could probably work:
mat1[] <- mapply(mapfun,mat1)
Note that this will modify mat1, so you could make a copy of mat1 named mat2 and apply the function to mat2:
mat2<- mat1
mat2[] <- mapply(mapfun,mat1)
You could try this (fun) solution as well. First you coerce input value to a character vector and then use repeatedly replace function and return the result as a matrix again. Maybe there is a way to trick replace function so that it would be used only once.
mapfun <- function(val) {
res <- as.character(val)
res <- replace(x = res, list = which(res == "1"), values = "A")
res <- replace(x = res, list = which(res == "2"), values = "B")
res <- replace(x = res, list = which(res == "3"), values = "C")
res <- replace(x = res, list = which(res == "4"), values = "D")
return(matrix(res, ncol = 4)) # ncol(val)
}
mat1 <- matrix(sample(c(1,2,3,4), 16, replace=T, prob=c(0.25,0.25,0.25,0.25)),
nrow=4, ncol=4)
mat1
mapfun(mat1)