Fill cell based on surrounding cells R - r

My initial matrix looks like the following (but my matrix is huge)
A NA A A A D D B NA B C NA C
A NA A B B D C A NA A A NA A
D NA D D A A A C NA C C NA C
structure(c("A", "A", "D", NA, NA, NA, "A", "A", "D", "A", "B",
"D", "A", "B", "A", "D", "D", "A", "D", "C", "A", "B", "A", "C",
NA, NA, NA, "B", "A", "C", "C", "A", "C", NA, NA, NA, "C", "A",
"C"), .Dim = c(3L, 13L), .Dimnames = list(NULL, c("V1", "V2",
"V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12",
"V13")))
I want to substitute the NA with the letters surroundings (left and right), if they are the same, that is, I want something like this:
A A A A A D D B B B C C C
A A A B B D C A A A A A A
D D D D A A A C C C C C C
structure(c("A", "A", "D", "A", "A", "D", "A", "A", "D", "A",
"B", "D", "A", "B", "A", "D", "D", "A", "D", "C", "A", "B", "A",
"C", "B", "A", "C", "B", "A", "C", "C", "A", "C", "C", "A", "C",
"C", "A", "C"), .Dim = c(3L, 13L), .Dimnames = list(NULL, c("V1",
"V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11",
"V12", "V13")))
So, if both surrounding letters are the same, I would change the NA to the surrounding letter, otherwise, I would keep the NA.
Any ideas?
Thank you very much.

Here my approach without using additional librariey:
dat <- matrix(c('A',NA,'A','A',NA,'B',
'B',NA,'A','B',NA,'B',
'B',NA,NA,'B','B',NA
),nrow=3,byrow=TRUE)
t(apply(dat,1,function(x){
pos <- which(!is.na(x))
## if the delta of the index of two non-na elements is 2 -> potential match
dif <- which(diff(pos)==2)
## prevent to process rows with no potential match (woiuld convert NA to "NA"
if(length(dif)){
x[pos[dif]+1] <- sapply(dif,function(y) ifelse(x[pos[y]]==x[pos[y]+2], x[pos[y]],NA))
}
x
}))
Questions are: how do you handle a sequence of NA's and NA's at the margins
Here the version which allows NA sequences to be handeld too
t(apply(dat,1,function(x){
pos <- which(!is.na(x))
## if the delta of the index of two non-na elements is > 1 -> potential match
dif <- diff(pos)
for(cur in which(dif>1)){
if(x[pos[cur]]==x[pos[cur]+dif[cur]]){
x[(pos[cur]+1):(pos[cur]+dif[cur])] <- x[pos[cur]]
}
}
x
}))

I'm not sure if there is an elegant and simply way. Assuming your matrix is named mat, you could use
library(tidyr)
library(dplyr)
library(zoo)
mat %>%
as.data.frame(stringsAsFactors = FALSE) %>%
mutate(id = row_number()) %>%
pivot_longer(cols=-id) %>%
group_by(id) %>%
mutate(value = ifelse(is.na(value) & (na.locf(value) == na.locf(value, fromLast = TRUE)), na.locf(value), value)) %>%
ungroup() %>%
pivot_wider() %>%
select(-id) %>%
as.matrix()
which returns
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
[1,] "A" "A" "A" "A" "A" "D" "D" "B" "B" "B" "C" "C" "C"
[2,] "A" "A" "A" "B" "B" "D" NA "A" "A" "A" "A" "A" "A"
[3,] "D" "D" "D" "D" "A" "A" "A" "C" "C" "C" "C" "C" "C"
Note: I added an NA-value in mat[2,7] for the case of unequal surroundings.
Data
mat <- structure(c("A", "A", "D", NA, NA, NA, "A", "A", "D", "A", "B",
"D", "A", "B", "A", "D", "D", "A", "D", NA, "A", "B", "A", "C",
NA, NA, NA, "B", "A", "C", "C", "A", "C", NA, NA, NA, "C", "A",
"C"), .Dim = c(3L, 13L))

Related

Converting multiple columns to factors and releveling with mutate(across)

dat <- data.frame(Comp1Letter = c("A", "B", "D", "F", "U", "A*", "B", "C"),
Comp2Letter = c("B", "C", "E", "U", "A", "C", "A*", "E"),
Comp3Letter = c("D", "A", "C", "D", "F", "D", "C", "A"))
GradeLevels <- c("A*", "A", "B", "C", "D", "E", "F", "G", "U")
I have a dataframe that looks something like the above (but with many other columns I don't want to change).
The columns I am interested in changing contains lists of letter grades, but are currently character vectors and not in the right order.
I need to convert each of these columns into factors with the correct order. I've been able to get this to work using the code below:
factordat <-
dat %>%
mutate(Comp1Letter = factor(Comp1Letter, levels = GradeLevels)) %>%
mutate(Comp2Letter = factor(Comp2Letter, levels = GradeLevels)) %>%
mutate(Comp3Letter = factor(Comp3Letter, levels = GradeLevels))
However this is super verbose and chews up a lot of space.
Looking at some other questions, I've tried to use a combination of mutate() and across(), as seen below:
factordat <-
dat %>%
mutate(across(c(Comp1Letter, Comp2Letter, Comp3Letter) , factor(levels = GradeLetters)))
However when I do this the vectors remain character vectors.
Could someone please tell me what I'm doing wrong or offer another option?
You can do across as an anonymous function like this:
dat <- data.frame(Comp1Letter = c("A", "B", "D", "F", "U", "A*", "B", "C"),
Comp2Letter = c("B", "C", "E", "U", "A", "C", "A*", "E"),
Comp3Letter = c("D", "A", "C", "D", "F", "D", "C", "A"))
GradeLevels <- c("A*", "A", "B", "C", "D", "E", "F", "G", "U")
dat %>%
tibble::as_tibble() %>%
dplyr::mutate(dplyr::across(c(Comp1Letter, Comp2Letter, Comp3Letter) , ~forcats::parse_factor(., levels = GradeLevels)))
# # A tibble: 8 × 3
# Comp1Letter Comp2Letter Comp3Letter
# <fct> <fct> <fct>
# 1 A B D
# 2 B C A
# 3 D E C
# 4 F U D
# 5 U A F
# 6 A* C D
# 7 B A* C
# 8 C E A
You were close, all that was left to be done was make the factor function anonymous. That can be done either with ~ and . in tidyverse or function(x) and x in base R.

Repeating list elements in R

I have this list:
my_list <- list(V1 = c("A", "B", "C"), V2 = c("A", "B", "D"), V3 = c("A", "B", "E"), V4 = c("A", "B", "F"), V5 = c("A", "B", "G"))
I want to repeat each list element 4 times to get this list:
output <- list(V1 = c("A", "B", "C"), V2 = c("A", "B", "C"), V3 = c("A", "B", "C"), V4 = c("A", "B", "C"), V5 = c("A", "B", "D"), V6 = c("A", "B", "D"), V7 = c("A", "B", "D"), V8 = c("A", "B", "D"), V9 = c("A", "B", "E"), V10 = c("A", "B", "E"), V11 = c("A", "B", "E"), V12 = c("A", "B", "E"), V13 = c("A", "B", "F"), V14 = c("A", "B", "F"), V15 = c("A", "B", "F"), V16 = c("A", "B", "F"), V17 = c("A", "B", "G"), V18 = c("A", "B", "G"), V19 = c("A", "B", "G"), V20 = c("A", "B", "G"))
unlist(rep(list(my_list), each = 4), recursive = F) doesn't do the trick because it repeats the entire list 4 times instead of repeating each individual element 4 times.
If you specify the same index multiple times, R respects the duplication:
letters[c(1, 1, 1)]
[1] "a" "a" "a"
Therefore all we need is a set of indices like c(1, 1, 1, 1, 2, 2, 2, ...). We can create exactly this with rep's "each" argument, and then rename them with names and paste0:
my_list <- list(V1 = c("A", "B", "C"), V2 = c("A", "B", "D"), V3 = c("A", "B", "E"), V4 = c("A", "B", "F"), V5 = c("A", "B", "G"))
list_repeated <- my_list[rep(1:length(my_list), each = 4)]
names(list_repeated) <- paste0('V', 1:length(list_repeated))

How to get common values among all the column

I want to know which values are common among N columns, N-1 columns, N-2 columns etc.
Input
structure(c("a", "b", "c", "d", "e", "f", "a", "z", "d", "b",
"e", "s", "a", "b", "c", "d", "e", "s", "a", "b", "c", "d", "e",
"f"), .Dim = c(6L, 4L), .Dimnames = list(NULL, c("x", "y", "z",
"a")))
Output:
common in all 4 columns :- a , b, e ,d
common in maximum 3 columns :- c
common in maximum 2 columns:- f,s
Reshaping the given matrix from wide to long format (melt() has a method for matrices) and counting by value might be an approach:
library(data.table)
options(datatable.print.class = TRUE)
setDT(melt(dat))[, .N, by = "value"][order(-N)]
value N
<fctr> <int>
1: a 4
2: b 4
3: d 4
4: e 4
5: c 3
6: f 2
7: s 2
8: z 1
However, code needs to be enhanced to deal with duplicates in each column (dat2 has row 1 duplicated):
setDT(melt(dat2))[, unique(value), by = Var2][, .N, by = "V1"][order(-N)]
V1 N
<fctr> <int>
1: a 4
2: b 4
3: d 4
4: e 4
5: c 3
6: f 2
7: s 2
8: z 1
or, more consisely:
setDT(melt(dat2))[, unique(value), by = Var2][, .N, by = "V1"][
, toString(sort(V1)), by = N][order(-N)]
N V1
<int> <char>
1: 4 a, b, d, e
2: 3 c
3: 2 f, s
4: 1 z
N denotes the count of columns a value appears in.
Data
dat <- structure(
c("a", "b", "c", "d", "e", "f", "a", "z", "d", "b", "e", "s",
"a", "b", "c", "d", "e", "s", "a", "b", "c", "d", "e", "f"),
.Dim = c(6L, 4L),
.Dimnames = list(NULL, c("x", "y", "z", "a")))
# second data set with duplicated row 1
dat2 <- dat[c(1, seq_len(nrow(dat))), ]
dat2
x y z a
[1,] "a" "a" "a" "a"
[2,] "a" "a" "a" "a"
[3,] "b" "z" "b" "b"
[4,] "c" "d" "c" "c"
[5,] "d" "b" "d" "d"
[6,] "e" "e" "e" "e"
[7,] "f" "s" "s" "f"

Calculating a rolling division in r data table

I have a data table like this:
a group
1: 1 a
2: 2 a
3: 3 a
4: 4 a
5: 5 a
6: 6 a
The sample can be created from the code below:
structure(list(a = 1:100, group = c("a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b")), .Names = c("a", "group"), row.names = c(NA,
-100L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000000004790788>)
For each row in each group I would like to:
take value in column a
divide it by value in column a lagged by 2 and subtract 1
divide it by value in column a lagged by 4 and subtract 1
divide it by value in column a lagged by 6 and subtract 1
sum result of steps 2-4 and return it in a new column
So for rows 1-6, I would have NA, and then 7/5 + 7/3 + 7/1 - 3, 8/6 + 8/4 + 8/2 - 3, 9/7 + 9/5 + 9/3 - 3, 10/8 + 10/6 + 10/4 - 3
So based on the table reported in the first chunk, I would like to get a new column, say metric_1, which would, on the 10th row have the value 2.416667
Please note that the values in column a will not in practice correspond to row numbers, but would be some measurements.
The final output would then look like this:
a group metric_1
1: 1 a NA
2: 2 a NA
3: 3 a NA
4: 4 a NA
5: 5 a NA
6: 6 a NA
7: 7 a 7.733333
8: 8 a 4.333333
9: 9 a 3.085714
10: 10 a 2.416667
I already tried some versions with Reduce which works like a champ if I need to sum some values in a vector, but I haven't been able to tweak it into enabling me to do the division like this.
I'm not sure if this is exactly what you're looking for but perhaps it will help:
library(dplyr)
the_data %>% group_by(group) %>%
mutate(metric_1 = (a/lag(a, 2)-1)+( a/lag(a,4)-1) + (a/lag(a, 6) - 1 )) %>%
ungroup()
found one possible solution as:
dt[,
list(a, Reduce(`+`, lapply(shift(a, seq(2, 6, by = 2)),
function(x) a/x - 1))),
by = "group"]
But it is rather slow.

How to unlist a very messy list in R [duplicate]

This question already has answers here:
How to flatten a list to a list without coercion?
(7 answers)
Closed 7 years ago.
I have a very messy list with multiple levels in the form of:
[[1]]
[[1]][[1]]
[[1]][[1]][[1]]
[1] "D" "B" "A"
[[1]][[1]][[2]]
[1] "E" "B" "A"
[[1]][[2]]
[[1]][[2]][[1]]
[1] "D" "C" "A"
[[1]][[3]]
[[1]][[3]][[1]]
[1] "B" "D" "A"
....
[[5]][[2]][[2]]
[1] "D" "B" "E"
[[5]][[3]]
[1] "C" "E"
...
What is the easiest way to just get a list of the lowest level character vectors, so the first element would be "D""B""A" then the next would be "E""B""A" and so forth?
Thanks!
Edit:
Here's my list in dput format as requested. However, the nesting structure can change and the number of levels can increase. Thus any solution that works by using a presupposed number of levels is no good.
> dput(myResults)
list(list(list(c("D", "B", "A"), c("E", "B", "A")), list(c("D",
"C", "A")), list(c("B", "D", "A"), c("C", "D", "A"), c("E", "D",
"A")), list(c("B", "E", "A"), c("D", "E", "A"))), list(list(c("D",
"A", "B"), c("E", "A", "B")), c("C", "B"), list(c("A", "D", "B"
), c("E", "D", "B")), list(c("A", "E", "B"), c("D", "E", "B"))),
list(list(c("D", "A", "C")), c("B", "C"), list(c("A", "D",
"C")), c("E", "C")), list(list(c("B", "A", "D"), c("C", "A",
"D"), c("E", "A", "D")), list(c("A", "B", "D"), c("E", "B",
"D")), list(c("A", "C", "D")), list(c("A", "E", "D"), c("B",
"E", "D"))), list(list(c("B", "A", "E"), c("D", "A", "E")),
list(c("A", "B", "E"), c("D", "B", "E")), c("C", "E"),
list(c("A", "D", "E"), c("B", "D", "E"))))
Edit
There is a package rlist with a function list.flatten that does this
library(rlist)
list.flatten(yourLst)
A recursive solution (the order is changed though, ie. the leastly nested stuff comes out first)
unlst <- function(lst){
if (!any((inds <- sapply(lst, is.list)))) return(lst)
c(lst[!inds], unlst(unlist(lst[inds], rec=F)))
}
Try this function please.
unlist_messy_list <- function(cur_list){
if (is.atomic(cur_list)){
list(cur_list)
}else{
cl <- lapply(cur_list, unlist_messy_list)
Reduce(c, cl)
}
}
As you have not provided a sample data , I tested it with some cases made up by myself and it works.
unlist_messy_list(list())
unlist_messy_list(list(c(1,2,3), c(4,5,6), c(7,8,9)))
unlist_messy_list(list(c(1,2,3), list(c(4,5,6), c(7,8,9))))
unlist_messy_list(list(c(1,2,3), c(4,5,6), list(c(7,8,9), c(10,11,12))))
unlist_messy_list(list(c(1,2,3), list(c(4,5,6), c(7,8,9), list(10, c(11,12,13), 14, list(c(15,16))))))
I just tested it on your newly provided data, and it works fine. The output is (after dput):
list(c("D", "B", "A"), c("E", "B", "A"), c("D", "C", "A"), c("B", "D", "A"), c("C", "D", "A"), c("E", "D", "A"), c("B", "E", "A"), c("D", "E", "A"), c("D", "A", "B"), c("E", "A", "B"), c("C", "B"), c("A", "D", "B"), c("E", "D", "B"), c("A", "E", "B"), c("D", "E", "B"), c("D", "A", "C"), c("B", "C"), c("A", "D", "C"), c("E", "C"), c("B", "A", "D"), c("C", "A", "D"), c("E", "A", "D"), c("A", "B", "D"), c("E", "B", "D"), c("A", "C", "D"), c("A", "E", "D"), c("B", "E", "D"),c("B", "A", "E"), c("D", "A", "E"), c("A", "B", "E"), c("D", "B", "E"), c("C", "E"), c("A", "D", "E"), c("B", "D", "E"))

Resources