Replace NA with empty string in a list - r

I have large list of matrix data that looks like this:
$`1`
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
2010 "6 811 529 000" NA NA NA "455 782 000"
2011 "7 531 264 000" NA NA NA "585 609 000"
2012 "8 013 843 000" NA NA NA "702 256 000"
and I would like to replace the NA with empty string like this : ""
The solution must be without conversion to data.frame since this: x[is.na(x)] <- ""
would solve the issue.
This works for me: print(x, na.print = "") but I cannot figure it out how to store the print output.

You can do this with lapply:
# Setup sample data frame
dat = list(matrix(c(NA, "a", "b", NA), nrow=2),
matrix(c(rep("r", 8), NA), nrow=3))
dat
# [[1]]
# [,1] [,2]
# [1,] NA "b"
# [2,] "a" NA
#
# [[2]]
# [,1] [,2] [,3]
# [1,] "r" "r" "r"
# [2,] "r" "r" "r"
# [3,] "r" "r" NA
# Do conversion
dat <- lapply(dat, function(x) { x[is.na(x)] <- "" ; x })
dat
# [[1]]
# [,1] [,2]
# [1,] "" "b"
# [2,] "a" ""
#
# [[2]]
# [,1] [,2] [,3]
# [1,] "r" "r" "r"
# [2,] "r" "r" "r"
# [3,] "r" "r" ""

Related

How to extract sublists based on variable name in R

I have lists deep within lists and I want to extract all matrices with name a from the code below and store all these matrices into asingle list, say matlist. Please can anyone suggest how to do this. Thank you in advance
x <- list()
x[[1]] <- list()
x[[1]][[1]] <- list()
x[[1]][[2]] <- list()
x[[2]] <- list()
x[[2]][[1]] <- list()
x[[2]][[2]] <- list()
x[[3]] <- list()
x[[3]][[1]] <- list()
x[[3]][[2]] <- list()
x[[1]][[1]]$a <- matrix(rnorm(4),2,2)
x[[1]][[1]]$b <- 3
x[[1]][[2]]$a <- matrix(rnorm(4),2,2)
x[[1]][[2]]$b <- 3
x[[2]][[1]]$a <- matrix(rnorm(4),2,2)
x[[2]][[1]]$b <- 2
x[[2]][[2]]$a <- matrix(rnorm(4),2,2)
x[[2]][[2]]$b <- 2
x[[3]][[1]]$a <- matrix(rnorm(4),2,2)
x[[3]][[1]]$b <- 1
x[[3]][[2]]$a <- matrix(rnorm(4),2,2)
x[[3]][[2]]$b <- 1
You can use [[ in lapply with a after using unlist for one level to extract lists within lists based on variable name.
matlist <- lapply(unlist(x, FALSE), "[[", "a")
matlist
#[[1]]
# [,1] [,2]
#[1,] -0.2164749 0.1255995
#[2,] 0.9368159 1.0416349
#
#[[2]]
# [,1] [,2]
#[1,] -2.188259 -0.2364393
#[2,] -3.003292 1.2006254
#
#[[3]]
# [,1] [,2]
#[1,] 1.089767 1.059162
#[2,] -2.043437 1.428467
#
#[[4]]
# [,1] [,2]
#[1,] -0.1984872 1.194170
#[2,] -0.9998112 -2.875852
#
#[[5]]
# [,1] [,2]
#[1,] 1.4556923 -0.05156698
#[2,] -0.4252525 -0.64838966
#
#[[6]]
# [,1] [,2]
#[1,] 0.2450849 0.6129029
#[2,] -0.2372427 0.2555269
Or in case you want to keep the structure of sub lists like purrr:map_depth is doing a recursive function call is possible:
f <- function(x, n) {
if(!is.null(names(x)) & n %in% names(x)) x[[n]]
else if(is.list(x)) lapply(x, f, n)
}
matlist <- f(x, "a")
matlist
#[[1]]
#[[1]][[1]]
# [,1] [,2]
#[1,] -0.2164749 0.1255995
#[2,] 0.9368159 1.0416349
#
#[[1]][[2]]
# [,1] [,2]
#[1,] -2.188259 -0.2364393
#[2,] -3.003292 1.2006254
#
#
#[[2]]
#[[2]][[1]]
# [,1] [,2]
#[1,] 1.089767 1.059162
#[2,] -2.043437 1.428467
#
#[[2]][[2]]
# [,1] [,2]
#[1,] -0.1984872 1.194170
#[2,] -0.9998112 -2.875852
#
#
#[[3]]
#[[3]][[1]]
# [,1] [,2]
#[1,] 1.4556923 -0.05156698
#[2,] -0.4252525 -0.64838966
#
#[[3]][[2]]
# [,1] [,2]
#[1,] 0.2450849 0.6129029
#[2,] -0.2372427 0.2555269
This time purrr:map_depth is your friend:
library(purrr)
map_depth(x, .depth = 2, 'a')
[[1]]
[[1]][[1]]
[,1] [,2]
[1,] 2.0693923 2.142611
[2,] 0.6840833 -1.440975
[[1]][[2]]
[,1] [,2]
[1,] 0.5415685 -0.7262725
[2,] -0.1604015 0.5218570
[[2]]
[[2]][[1]]
[,1] [,2]
[1,] 0.1245714 8.887078e-05
[2,] -0.2137517 -9.737122e-01
[[2]][[2]]
[,1] [,2]
[1,] 0.1993254 0.1116033
[2,] 0.5058125 -1.8523019
[[3]]
[[3]][[1]]
[,1] [,2]
[1,] 0.8661770 -1.76328811
[2,] -0.4559405 -0.08104821
[[3]][[2]]
[,1] [,2]
[1,] 0.7247404 -2.6736933
[2,] 0.8053027 0.5735254
You can use lapply as :
lapply(x, function(x) {
x1 <- unlist(x, recursive = FALSE)
do.call(rbind, x1[names(x1) == 'a'])
}) -> x1
x1
#[[1]]
# [,1] [,2]
#[1,] 0.3031708 0.09749503
#[2,] 0.9608749 0.51528005
#[3,] -0.6591001 0.09854767
#[4,] -1.6979551 -0.11709249
#[[2]]
# [,1] [,2]
#[1,] -1.11747169 -0.5668559
#[2,] -0.87458122 0.2517329
#[3,] -0.01483449 -1.7775506
#[4,] 1.57139735 -0.3488897
#[[3]]
# [,1] [,2]
#[1,] -0.2880580 -0.07233675
#[2,] 0.2933759 0.73729995
#[3,] 0.1070971 -0.27202774
#[4,] -0.5881883 -1.27391810
If you want everything combined into one you can do do.call(rbind, x1).

using rbind two combine two one-column variables

I am trying to simply use rbind for two columns and I use the following (all variables are city names and r considers them as factor)
firstcitynames <- rcffull$X1CityName
secondcitynames <- rcffull$X2CityName
allcitynames <- rbind(firstcitynames, secondcitynames)
allcitynames
then when get to View(allcitynames) all I get is a bunch of numbers instead of names:
[,2276] [,2277] [,2278] [,2279] [,2280] [,2281]
[,2282] [,2283] [,2284] [,2285] [,2286] [,2287]
Any suggestions?
You need to convert factors to characters with as.character(df$var)
Here's an illustration
a <- factor(letters[1:10])
b <- factor(LETTERS[1:10])
rbind(a,b)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## a 1 2 3 4 5 6 7 8 9 10
## b 1 2 3 4 5 6 7 8 9 10
rbind(as.character(a), as.character(b))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j"
## [2,] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J"
Assuming firstcitynames and secondcitynames are of type factors
you can try this
rbind(levels(firstcitynames),levels(secondcitynames))
this one also worked:
firstcitynames <- as.tibble(rcffull$X1CityName)
secondcitynames <- as.tibble(rcffull$X2CityName)
allcitynames <- rbind(firstcitynames, secondcitynames)
allcitynames

R - filtering Matrix based off True/False vector

I have a data structure that can contain both vectors and matrices. I want to filter it based off of of a true false column. I can't figure out how to filter both of them successfully.
result <- structure(list(aba = c(1, 2, 3, 4), beta = c("a", "b", "c", "d"),
chi = structure(c(0.438148361863568, 0.889733991585672, 0.0910745360888541,
0.0512442977633327, 0.812013201415539, 0.717306115897372, 0.995319503592327,
0.758843480376527, 0.366544214077294, 0.706843026448041, 0.108310810523108,
0.225777650484815, 0.831163870869204, 0.274351604515687, 0.323493955424055,
0.351171918679029), .Dim = c(4L, 4L))), .Names = c("aba", "beta", "chi"))
> result
$aba
[1] 1 2 3 4
$beta
[1] "a" "b" "c" "d"
$chi
[,1] [,2] [,3] [,4]
[1,] 0.43814836 0.8120132 0.3665442 0.8311639
[2,] 0.88973399 0.7173061 0.7068430 0.2743516
[3,] 0.09107454 0.9953195 0.1083108 0.3234940
[4,] 0.05124430 0.7588435 0.2257777 0.3511719
tf <- c(T,F,T,T)
What I would like to do is something like
> lapply(result,function(x) {ifelse(tf,x,NA)})
$aba
[1] 1 NA 3 4
$beta
[1] "a" NA "c" "d"
$chi
[1] 0.43814836 NA 0.09107454 0.05124430
but the $chi matrix structure is lost.
The result I'd expect is
ifelse(matrix(tf,ncol=4,nrow=4),result$chi,NA)
[,1] [,2] [,3] [,4]
[1,] 0.43814836 0.8120132 0.3665442 0.8311639
[2,] NA NA NA NA
[3,] 0.09107454 0.9953195 0.1083108 0.3234940
[4,] 0.05124430 0.7588435 0.2257777 0.3511719
The challenge I'm having a problem solving is how to match the tf vector to the data. It feels like I need to set it using a conditional based on data type, which I'd like to avoid. Thoughts and answers are appreciated.
I don't see how you can avoid either checking the data type or the "dimensions" of the data. As such, I would propose something like:
lapply(result, function(x) {
if (is.null(dim(x))) x[!tf] <- NA else x[!tf, ] <- NA
x
})
# $aba
# [1] 1 NA 3 4
#
# $beta
# [1] "a" NA "c" "d"
#
# $chi
# [,1] [,2] [,3] [,4]
# [1,] 0.43814836 0.8120132 0.3665442 0.8311639
# [2,] NA NA NA NA
# [3,] 0.09107454 0.9953195 0.1083108 0.3234940
# [4,] 0.05124430 0.7588435 0.2257777 0.3511719
This seems fairly simple:
is.na(tf) <- !tf # convert FALSE to NA
result$chi[ tf, ] # and use the default behavior of "[" with NA arg
[,1] [,2] [,3] [,4]
[1,] 0.43814836 0.8120132 0.3665442 0.8311639
[2,] NA NA NA NA
[3,] 0.09107454 0.9953195 0.1083108 0.3234940
[4,] 0.05124430 0.7588435 0.2257777 0.3511719
But now I see that you wanted NAs at the corresponging postions of the atomic vectors. Unfortunately "[" with the additional NULL argument would error-out on that type of object.

R fill list into column based on index

I have a number of sets of data of differing length that I am trying to make into an ordered data structure.
At the present time I am trying to inset lists into the regular data structure based on filling by index number.
with the following code:
mat <- matrix(NA,nrow=5,ncol=6)
mat[,1] <- LETTERS[1:5]
vec1 <- c("B","D","E")
vec2 <- c("A","C","E")
m1 <- match(mat[,1],vec1)
m2 <- match(mat[,1],vec2)
x1 <- which(!is.na(m1))
x2 <- which(!is.na(m2))
I would like to know how to procede to get:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "A" NA "A" NA NA NA
[2,] "B" "B" NA NA NA NA
[3,] "C" NA "C" NA NA NA
[4,] "D" "D" NA NA NA NA
[5,] "E" "E" "E" NA NA NA
Any suggestions or hints please?
Thanks,
Matt
Try
mat[match(vec1, mat[,1]), 2] <- vec1
mat[match(vec2, mat[,1]), 3] <- vec2
mat
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "A" NA "A" NA NA NA
# [2,] "B" "B" NA NA NA NA
# [3,] "C" NA "C" NA NA NA
# [4,] "D" "D" NA NA NA NA
# [5,] "E" "E" "E" NA NA NA
Or
mat[mat[, 1] %in% vec1, 2] <- vec1
mat[mat[, 1] %in% vec2, 3] <- vec2
Or a more general approach
mylist <- list(vec1, vec2)
indx <- unlist(lapply(seq_len(length(mylist) + 1)[-1],
function(x) match(matchlist[[x-1]], mat[, 1])))
indx2 <- rep(seq_len(length(mylist) + 1)[-1], sapply(mylist , length))
mat[cbind(indx, indx2)] <- unlist(mylist)
You could also do if you have more number of columns to match:
mat1 <- mat
indx <- which(outer(mat[,1], c(vec1, vec2), "=="), arr.ind=TRUE)[,1]
indx1 <- rep(1:2, c(length(vec1), length(vec2))) +1
mat[cbind(indx, indx1)] <- c(vec1,vec2)
mat
# [,1] [,2] [,3] [,4] [,5] [,6]
#[1,] "A" NA "A" NA NA NA
#[2,] "B" "B" NA NA NA NA
#[3,] "C" NA "C" NA NA NA
#[4,] "D" "D" NA NA NA NA
#[5,] "E" "E" "E" NA NA NA
Suppose, you need to fill a couple more columns based on multiple vector matching
vec3 <- c("A", "D")
vec4 <- c("B", "C")
veclist <- mget(ls(pattern="vec\\d"))
unlist(veclist, use.names=FALSE)
#[1] "B" "D" "E" "A" "C" "E" "A" "D" "B" "C"
indx <- which(outer(mat[,1], unlist(veclist), "=="), arr.ind=TRUE)[,1]
indx1 <- rep(seq(length(veclist))+1, sapply(veclist, length))
mat1[cbind(indx, indx1)] <- unlist(veclist)
mat1
# [,1] [,2] [,3] [,4] [,5] [,6]
#[1,] "A" NA "A" "A" NA NA
#[2,] "B" "B" NA NA "B" NA
#[3,] "C" NA "C" NA "C" NA
#[4,] "D" "D" NA "D" NA NA
#[5,] "E" "E" "E" NA NA NA

put the individual list name to the last column in the list?

I would like to add the individual list name to the last column, respectively. what is the best way to do that efficiently.
lst <- list(a=matrix(runif(10), nrow=5, ncol=2), b=matrix(runif(6), nrow=3, ncol=2))
$a
[,1] [,2]
[1,] 0.5257330 0.52673079
[2,] 0.2103107 0.23357179
[3,] 0.3745236 0.03687697
[4,] 0.9731074 0.15569480
[5,] 0.2248541 0.60258915
$b
[,1] [,2]
[1,] 0.9901820 0.3648310
[2,] 0.8922225 0.4285105
[3,] 0.6963518 0.5795353
I would like this one: it means the individual list name should be added in the last column, respectively.
$a
[,1] [,2] [,3]
[1,] "0.52573303761892" "0.526730791199952" "a"
[2,] "0.210310699883848" "0.233571790158749" "a"
[3,] "0.374523550504819" "0.0368769748602062" "a"
[4,] "0.973107369150966" "0.155694802291691" "a"
[5,] "0.224854125175625" "0.602589153219014" "a"
$b
[,1] [,2] [,3]
[1,] "0.990182007197291" "0.36483103595674" "b"
[2,] "0.892222490161657" "0.42851050500758" "b"
[3,] "0.696351842954755" "0.579535307129845" "b"
Any help will be appreciated.
Kevin
A solution that keeps the names from the original list:
mapply(function(x, y) cbind(x, y), lst, names(lst))
Here's a solution that gives you exactly what you asked for. Based on your expected output, it seems like you're aware that by doing so, you're coercing the numbers in the matrix to characters.
lapply(names(lst), function(x) {
`colnames<-`(cbind(lst[[x]], x), NULL)
} )
# [[1]]
# [,1] [,2] [,3]
# [1,] "0.497699242085218" "0.934705231105909" "a"
# [2,] "0.717618508264422" "0.212142521282658" "a"
# [3,] "0.991906094830483" "0.651673766085878" "a"
# [4,] "0.380035179434344" "0.125555095961317" "a"
# [5,] "0.777445221319795" "0.267220668727532" "a"
#
# [[2]]
# [,1] [,2] [,3]
# [1,] "0.386114092543721" "0.86969084572047" "b"
# [2,] "0.0133903331588954" "0.34034899668768" "b"
# [3,] "0.382387957070023" "0.482080115471035" "b"

Resources