Related
I have a list (dput() below) that has 4 datasets.I also have a variable called 'u' with 4 characters. I have made a video here which explains what I want and a spreadsheet is here.
The spreadsheet is not exactly how my data looks like but i am using it just as an example. My original list has 4 datasets but the spreadsheet has 3 datasets.
Essentially i have some characters(A,B,C,D) and i want to find the proportions of times each character occurs in each column of 3 groups of datasets.(Check video, its hard to explain by typing it out)
u<- c("D", "B", "C", "A")
l<- list(`0` = structure(list(X70 = "D", X71 = "C", X72 = "C", X73 = "A", X74 = "B", X75 = "C", X76 = "D", X77 = NA_character_, X78 = "B", X79 = "D", X80 = "C", Q = 1), row.names = 32L, class = "data.frame"), `1` = structure(list(X70 = c("D", "B", "D", "D", "B", "D", "D", "D", "D", "D", "D"), X71 = c("B", "B", "C", "C", "C", NA, "D", "B", "C", "A", "C"), X72 = c("A", "A", "C", "B", "C", "C", "C", "C", "D", "B", NA), X73 = c("B", "C", "C", "B", "C", "D", "A", "B", "C", "C", NA), X74 = c("B", "A", "C", "D", "B", "D", NA, "D", "D", "D", NA), X75 = c("C", "C", "B", "C", "D", "D", "C", "A", "C", "C", "C"), X76 = c("D", "A", "D", "B", "D", "C", "D", "A", "A", "D", "B"), X77 = c("D", "C", "B", "B", "B", "C", "B", "B", "B", "B", "D"), X78 = c("B", "C", "C", "B", "A", "A", "C", "B", "A", "C", NA), X79 = c("C", "C", NA, NA, "D", "A", "A", "A", "D", "A", "D"), X80 = c("B", "A", NA, NA, "B", "C", "B", NA, "B", "C", "A"), Q = c(2, 2, 1, 1, 2, 2, 1, 1, 4, 3, 1)), row.names = c(8L, 10L, 12L, 17L, 25L, 27L, 28L, 33L, 35L, 38L, 45L), class = "data.frame"), `2` = structure(list(X70 = c("D", "D", "D", "B", "D", "C", "D", "D", "D", "D", "D", "D"), X71 = c("A", "B", "C", "C", "A", "A", "C", "B", "C", "C", "D", "B"), X72 = c("D", "C", "D", "A", "A", "C", "D", "C", NA, "D", "C", "B"), X73 = c("B", "D", "D", "C", "B", "D", "D", "D", NA, NA, "C", "A"), X74 = c("D", "C", "B", "D", "C", "B", "C", "C", "B", NA, "C", "D"), X75 = c("B", "C", "C", "C", NA, "C", "B", "C", "C", "C", "B", "C"), X76 = c("A", "D", "D", "D", NA, "D", "D", "A", "D", "D", "D", "D"), X77 = c("B", "B", "D", "B", NA, "B", "D", "B", "B", "B", "B", "B"), X78 = c("C", "D", "C", "B", NA, "D", "C", "C", "B", "D", "C", NA), X79 = c("A", "D", "D", "D", NA, "D", "A", NA, "A", "D", "B", NA), X80 = c(NA, "C", "C", "A", NA, "C", "C", NA, "B", "C", "C", NA), Q = c(2, 3, 3, 1, 3, 1, 2, 2, 1, 2, 2, 1)), row.names = c(4L, 5L, 6L, 11L, 15L, 16L, 21L, 22L, 26L, 37L, 39L, 43L), class = "data.frame"), `3` = structure(list(X70 = c("A", "A", "D", "C", "D", "D", "D", "D", NA, "D", "D", "D"), X71 = c("B", "C", "D", "D", "C", "C", "B", "C", "C", "C", "A", "D"), X72 = c("B", "C", NA, "B", "A", "C", "B", "A", "C", "C", "D", "B"), X73 = c(NA, "C", "C", "A", "D", "C", "A", "A", "D", "B", "D", "B"), X74 = c(NA, "C", "D", "B", "A", "D", NA, "D", "B", "A", "D", "A"), X75 = c(NA, "C", "B", "D", "C", "C", "C", "C", "C", "B", "C", "D"), X76 = c(NA, "D", "A", "B", "A", "D", "D", "D", "D", "D", "D", "D"), X77 = c(NA, "B", "B", "B", "C", "B", "A", "B", NA, "C", "D", "D"), X78 = c(NA, "C", "C", "B", "C", "B", "A", "C", "D", "C", "C", "C"), X79 = c(NA, "D", "D", NA, "B", "D", "A", "D", "A", "D", "D", "A"), X80 = c(NA, "C", "C", NA, "D", "C", "C", "C", "C", "C", "B", "C"), Q = c(2, 2, 2, 2, 4, 2, 4, 4, 4, 3, 3, 2)), row.names = c(2L, 13L, 14L, 18L, 19L, 20L, 29L, 30L, 34L, 36L, 41L, 44L), class = "data.frame"), `4` = structure(list(X70 = c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D"), X71 = c("A", NA, "A", "B", "C", "A", "A", "C", "B", "C", "C", "C"), X72 = c("B", "C", "C", "C", NA, "C", "B", "A", "C", "B", NA, "A"), X73 = c(NA, "D", "D", "D", "B", "D", "D", "D", "C", "A", "A", "C"), X74 = c("C", "A", "C", "D", "C", "C", "A", "A", "C", "D", "D", "D"), X75 = c("C", "C", "C", "C", "C", "C", "C", "C", "C", "D", "C", "C"), X76 = c("D", "D", "D", "D", "D", "D", "D", "D", "A", "D", "D", "A"), X77 = c(NA, "B", "D", "B", NA, "B", "B", "B", "C", "D", NA, "C"), X78 = c("C", "C", "C", "C", "A", "A", "C", "A", "C", "C", "C", "C"), X79 = c("D", "D", "A", "D", "D", "A", "D", "D", "A", "D", "C", "C"), X80 = c("C", "C", "C", "C", NA, "C", "C", "C", "C", "C", "C", "A"), Q = c(2, 4, 4, 3, 2, 4, 2, 4, 1, 1, 2, 4)), row.names = c(1L, 3L, 7L, 9L, 23L, 24L, 31L, 40L, 42L, 46L, 47L, 48L), class = "data.frame"))
I read through the matplot documentation a bit and i wanted to add some features which i could not figure out how can i draw using matplot. But I had taken help from someone a long time ago and he helped me create this beautiful graph using ggplot. It had A,B,C,D at the end of each line and also looked very beautiful!. This is the exact kind of graph i want to create. Would there be any way to recreate this graph for each table of out. I have posted the code below. This creates graph for one table. But i want to create a graph for each table just like we did using matplot.
library(tidyverse)
d = structure(c(0.129310344827586, 0.258620689655172, 0.318965517241379,
0.293103448275862, 0.12972972972973, 0.210810810810811, 0.345945945945946,
0.313513513513514, 0.0845070422535211, 0.154929577464789, 0.338028169014085,
0.422535211267606, 0.226415094339623, 0.0943396226415094, 0.367924528301887,
0.311320754716981), .Dim = c(4L, 4L), .Dimnames = list(c("A", "B", "C", "D"), c("1", "2", "3", "4")))
d = d %>%
data.frame %>%
rownames_to_column(var = "Groups") %>%
pivot_longer(cols = 2:5) %>%
group_by(Groups) %>%
mutate(name = sub("X", "", name) %>% as.numeric(), n = 1:n())
ggplot(data = d) +
geom_path(aes(x = name, y = value, group = factor(Groups), color = factor(Groups)), size = 0.7) +
geom_point(aes(x = name, y = value, color = factor(Groups)), size = 2) +
geom_text(data = d %>% filter(n == max(n)), aes(x = name, y = value, label = Groups, color = factor(Groups)), nudge_x = 0.2) +
labs(x = "Group", y = "P") +
theme_bw() +
theme(legend.position = "none")
We can loop over the list 'l' with lapply, then get the table for each of the columns by looping over the columns with sapply after converting the column to factor with levels specified as 'u', get the proportions, transpose, convert to data.frame (as.data.frame), split by row (asplit - MARGIN = 1), then use transpose from purrr to change the structure so that each column from all the list elements will be blocked as a single unit, bind them with bind_rows
library(dplyr)
library(purrr)
out <- lapply(l, function(dat)
asplit(as.data.frame(t(sapply(dat, function(x)
proportions(table(factor(unlist(x), levels = u)))))), 1) ) %>%
transpose %>%
map(bind_rows, .id = 'grp')
-output
out
$X70
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 0 0
2 1 0.818 0.182 0 0
3 2 0.833 0.0833 0.0833 0
4 3 0.727 0 0.0909 0.182
5 4 1 0 0 0
$X71
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 1 0
2 1 0.1 0.3 0.5 0.1
3 2 0.0833 0.25 0.417 0.25
4 3 0.25 0.167 0.5 0.0833
5 4 0 0.182 0.455 0.364
$X72
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 1 0
2 1 0.1 0.2 0.5 0.2
3 2 0.364 0.0909 0.364 0.182
4 3 0.0909 0.364 0.364 0.182
5 4 0 0.3 0.5 0.2
$X73
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 0 1
2 1 0.1 0.3 0.5 0.1
3 2 0.5 0.2 0.2 0.1
4 3 0.273 0.182 0.273 0.273
5 4 0.545 0.0909 0.182 0.182
$X74
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 1 0 0
2 1 0.556 0.222 0.111 0.111
3 2 0.273 0.273 0.455 0
4 3 0.4 0.2 0.1 0.3
5 4 0.333 0 0.417 0.25
$X75
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 1 0
2 1 0.182 0.0909 0.636 0.0909
3 2 0 0.273 0.727 0
4 3 0.182 0.182 0.636 0
5 4 0.0833 0 0.917 0
$X76
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 0 0
2 1 0.455 0.182 0.0909 0.273
3 2 0.818 0 0 0.182
4 3 0.727 0.0909 0 0.182
5 4 0.833 0 0 0.167
$X77
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 NaN NaN NaN NaN
2 1 0.182 0.636 0.182 0
3 2 0.182 0.818 0 0
4 3 0.2 0.5 0.2 0.1
5 4 0.222 0.556 0.222 0
$X78
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 1 0 0
2 1 0 0.3 0.4 0.3
3 2 0.3 0.2 0.5 0
4 3 0.0909 0.182 0.636 0.0909
5 4 0 0 0.75 0.25
$X79
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 0 0
2 1 0.333 0 0.222 0.444
3 2 0.556 0.111 0 0.333
4 3 0.6 0.1 0 0.3
5 4 0.583 0 0.167 0.25
$X80
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 1 0
2 1 0 0.5 0.25 0.25
3 2 0 0.125 0.75 0.125
4 3 0.1 0.1 0.8 0
5 4 0 0 0.909 0.0909
$Q
# A tibble: 5 x 5
grp D B C A
<chr> <dbl> <dbl> <dbl> <dbl>
1 0 NaN NaN NaN NaN
2 1 NaN NaN NaN NaN
3 2 NaN NaN NaN NaN
4 3 NaN NaN NaN NaN
5 4 NaN NaN NaN NaN
For plotting a single component, extrac the list element with [[, remove the first 'grp' column ([-1]) and use matplot
matplot(out[[1]][-1], type = "l", col = 1:4, xaxt = "n")
axis(side=1, at=1:4, labels=colnames(out[[1]][-1]))
legend("topleft", legend = colnames(out[[1]][-1]), fill = 1:4)
if we want this to be applied on all elements, loop with lapply
par(mfrow = c(4, 3))
out2 <- lapply(out[-12], function(x) {
matplot(x[-1], type = "l", col = 1:4, xaxt = "n")
axis(side=1, at=1:4, labels=colnames(x[-1]))
legend("topleft", legend = colnames(x[-1]), fill = 1:4)
})
-output
I have some data in JSON format, that using jsonlite I was able to read into a data frame in R. The data I'm working with is in lists, where each list contains character vectors of different lengths. For example:
values
<list>
1 A
2 B
3 character(0)
4 C
5 c(A, C)
6 D
7 c(B, C)
8 c(D, E)
Or, to reproduce in full:
structure(list(values1 = list("C", "E", character(0), "C", character(0),
"C", c("D", "A"), c("D", "A"), "D", "D", character(0), "D",
"A", "E", "E", "A", "A", "A", "B", "A", "A", "A", "A", "D",
"E", "E", "A", character(0), "E", character(0), character(0),
"B", character(0), "C", "C", "C", "C", "C", character(0),
character(0), character(0), character(0), character(0), character(0),
character(0), character(0), "E", c("E", "D"), c("E", "D"),
"B", "E", "E", "A", "A", "B", "B", "B", "B", "B", "D", "D",
character(0), character(0), character(0), character(0), "B",
c("C", "A"), character(0), "A", "B", "B", "B", "B", "B",
"C", "C", character(0), character(0), character(0), character(0),
"E", "E", character(0), character(0), "B", "E", "A", "C",
"B", "C", "A", "C", "C", "C", "C", "C", "A", character(0),
"A", character(0), "A", "D", "B", "A", "C", "A", "A", "A",
"C", "A", "A", "B", "D", "D", character(0), character(0),
character(0), character(0), character(0), character(0), "C",
"B", character(0), "B", character(0), "B", "E", "D", c("C",
"E"), c("C", "E"), "D", "D", "C", "C", character(0), "C",
character(0), "C", "C", "D", "E", "E", "B", "B", "C", "C",
"B", "B", "E", character(0), character(0), character(0),
character(0), "B", "B", "E", "A", character(0), "B", "A",
character(0), "A", "D", "D", c("D", "A"), c("D", "A"), c("D",
"B"), c("D", "B"), character(0), "E", character(0), "E",
"E", "E", "E", character(0), "D", character(0), "E", "A",
"A", "A", "A", "A", "D", "D", c("B", "A"), c("B", "A"), "C",
character(0), character(0), "B", "E", "E", "B", c("E", "B"
), "A", "A", "B", "B", "D", "D", "A", "A", character(0),
"A", "C", character(0), "C", "C", "B", "B", "A", "A", "B",
"B", "A", "E", "C", "C", "D", "D", "D", c("C", "E"), character(0),
character(0), character(0), character(0), "E", c("E", "A"
), "E", character(0), character(0), "A", "D", "D", c("D",
"A"), c("D", "A"), character(0), character(0), character(0),
character(0), character(0), character(0), "B", "C", "C",
"C", "C", "B", "B", c("C", "E"), c("C", "E"), "E", "C", "C",
"C", c("E", "D", "B", "A"), c("E", "D", "B", "A"), character(0),
"A", character(0), "A", c("C", "A"), c("C", "A"), c("C",
"A"), "E", "E", "A", character(0), "C", c("E", "D"), c("E",
"D"), character(0), character(0), character(0), character(0),
"A", "A", "A", "A", "D", "E", c("C", "D"), "E", character(0),
character(0), character(0), "D", "D", character(0), "A",
"B", character(0), character(0), character(0), character(0),
"D", "D", "D", "E", "E", "D", "D", "B", "B", "B", "E", "D",
"C", "D", "C", "C", "E", "E", "A", character(0), character(0),
"B", character(0), "B", "B", "B", "B", character(0), "A",
"C", "C", "C", "D", "D", "D", character(0), "D", character(0),
"D", "B", "A", character(0), "B", "D", "A", "A", character(0),
"A", "D", "D", "E", "E", "B", character(0), character(0),
character(0), "C", "C", "C", "B", "B", "A", "D", c("C", "B"
), character(0), "D", "C", "C", character(0), character(0),
"D", "D", "D", c("B", "A"), "E", "A", "A", character(0),
"E", "C", "B", character(0), character(0), character(0),
character(0), "E", "E", "D", "C", "C", "E", "E", "E", "E",
character(0), "E", "E", "A", "B", "A", "A", "D", "E", "E",
"B", "B", character(0), character(0), "D", "D", "C", "D",
"D", "E", character(0), "E", character(0), "E", c("D", "B"
), character(0), "B", character(0), character(0), "D", character(0),
"D", "D", "D", "C", character(0), "E", "E", c("E", "B"),
c("E", "B"), "E", "E", "D", "D", "B", c("E", "A"), c("E",
"A"), c("C", "D"), c("C", "D"), c("C", "B"), c("C", "B"),
character(0), "C", "B"), values2 = list("C", "E", "C",
"C", "C", "C", c("D", "A"), c("D", "A"), "D", "D", "D", "D",
"A", "E", "E", "A", "A", "A", "B", "A", "A", "A", "A", "D",
"E", "E", "A", "E", "E", character(0), "B", "B", "C", "C",
"C", "C", "C", "C", c("E", "A"), c("E", "A"), c("E", "A"),
c("E", "A"), c("C", "A"), c("C", "A"), c("C", "A"), c("C",
"A"), "E", c("E", "D"), c("E", "D"), "B", "E", "E", "A",
"A", "B", "B", "B", "B", "B", "D", "D", c("C", "B"), c("C",
"B"), c("C", "B"), c("C", "B"), "B", c("C", "A"), character(0),
"A", "B", "B", "B", "B", "B", "C", "C", c("E", "D"), c("E",
"D"), c("E", "D"), c("E", "D"), "E", "E", character(0), character(0),
"B", "E", "A", "C", "B", "C", "A", "C", "C", "C", "C", "C",
"A", "A", "A", "A", "A", "D", "B", "A", "C", "A", "A", "A",
"C", "A", "A", "B", "D", "D", "E", "E", "E", "E", character(0),
character(0), "C", "B", "B", "B", "B", "B", "E", "D", c("C",
"E"), c("C", "E"), "D", "D", "C", "C", "C", "C", "C", "C",
"C", "D", "E", "E", "B", "B", "C", "C", "B", "B", "E", "B",
"B", "B", "B", "B", "B", "E", "A", "B", "B", "A", "A", "A",
"D", "D", c("D", "A"), c("D", "A"), c("D", "B"), c("D", "B"
), "E", "E", "E", "E", "E", "E", "E", "D", "D", "E", "E",
"A", "A", "A", "A", "A", "D", "D", c("B", "A"), c("B", "A"
), "C", character(0), character(0), "B", "E", "E", "B", c("E",
"B"), "A", "A", "B", "B", "D", "D", "A", "A", "A", "A", "C",
"C", "C", "C", "B", "B", "A", "A", "B", "B", "A", "E", "C",
"C", "D", "D", "D", c("C", "E"), "D", "D", "D", "D", "E",
c("E", "A"), "E", character(0), character(0), "A", "D", "D",
c("D", "A"), c("D", "A"), c("D", "A"), c("D", "A"), c("D",
"A"), c("D", "A"), c("D", "A"), c("D", "A"), "B", "C", "C",
"C", "C", "B", "B", c("C", "E"), c("C", "E"), "E", "C", "C",
"C", c("E", "D", "B", "A"), c("E", "D", "B", "A"), "A", "A",
"A", "A", c("C", "A"), c("C", "A"), c("C", "A"), "E", "E",
"A", "C", "C", c("E", "D"), c("E", "D"), "A", "A", "A", "A",
"A", "A", "A", "A", "D", "E", c("C", "D"), "E", character(0),
character(0), character(0), "D", "D", character(0), "A",
"B", c("D", "B"), c("D", "B"), c("D", "B"), c("D", "B"),
"D", "D", "D", "E", "E", "D", "D", "B", "B", "B", "E", "D",
"C", "D", "C", "C", "E", "E", "A", character(0), "B", "B",
"B", "B", "B", "B", "B", "A", "A", "C", "C", "C", "D", "D",
"D", "D", "D", "D", "D", "B", "A", "B", "B", "D", "A", "A",
"A", "A", "D", "D", "E", "E", "B", character(0), character(0),
character(0), "C", "C", "C", "B", "B", "A", "D", c("C", "B"
), "D", "D", "C", "C", character(0), "D", "D", "D", "D",
c("B", "A"), "E", "A", "A", character(0), "E", "C", "B",
"C", "C", "C", "C", "E", "E", "D", "C", "C", "E", "E", "E",
"E", "E", "E", "E", "A", "B", c("C", "E", "D", "B", "A"),
c("C", "E", "D", "B", "A"), "D", "E", "E", "B", "B", character(0),
character(0), "D", "D", "C", "D", "D", "E", "E", "E", "E",
"E", c("D", "B"), "B", "B", character(0), "D", "D", "D",
"D", "D", "D", "C", "E", "E", "E", c("E", "B"), c("E", "B"
), "E", "E", "D", "D", "B", c("E", "A"), c("E", "A"), c("C",
"D"), c("C", "D"), c("C", "B"), c("C", "B"), "C", "C", "B")), row.names = c(NA,
445L), class = "data.frame")
I would like to split this data up so that each value gets its own column:
1 2 3 4 5
<chr> <chr> <chr> <chr> <chr>
1 A
2 B
3
4 C
5 A C
6 D
7 B C
8 D E
Then, ultimately, get the data into a tidy format so that it's easy to filter by a column:
A B C D E
<logi> <logi> <logi> <logi> <logi>
1 TRUE FALSE FALSE FALSE FALSE
2 FALSE TRUE FALSE FALSE FALSE
3 FALSE FALSE FALSE FALSE FALSE
4 FALSE FALSE TRUE FALSE FALSE
5 TRUE FALSE TRUE FALSE FALSE
6 FALSE FALSE FALSE TRUE FALSE
7 FALSE TRUE TRUE FALSE FALSE
8 FALSE FALSE FALSE TRUE TRUE
That last step should be simple with mutate, it's the splitting I can't figure out. I'm aware of both tidyr separate and unnest_wider, but as far as I can tell those don't let me control which columns the vector is split into.
Assuming your data is something like this :
df <- structure(list(values = list("A", "B", character(0), "C", c("A",
"C"), "D", c("B", "C"), c("D", "E"))),
row.names = c(NA, -8L), class = "data.frame")
You can do :
library(dplyr)
library(tidyr)
df %>%
mutate(row = row_number()) %>%
unnest(values) %>%
complete(row = 1:max(row)) %>%
mutate(val = TRUE) %>%
pivot_wider(names_from = values, values_from = val, values_fill = FALSE) %>%
dplyr::select(-`NA`, -row)
# A B C D E
# <lgl> <lgl> <lgl> <lgl> <lgl>
#1 TRUE FALSE FALSE FALSE FALSE
#2 FALSE TRUE FALSE FALSE FALSE
#3 FALSE FALSE FALSE FALSE FALSE
#4 FALSE FALSE TRUE FALSE FALSE
#5 TRUE FALSE TRUE FALSE FALSE
#6 FALSE FALSE FALSE TRUE FALSE
#7 FALSE TRUE TRUE FALSE FALSE
#8 FALSE FALSE FALSE TRUE TRUE
Based on the dput, data, we can do
library(dplyr)
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols = -rn) %>%
unnest(value) %>%
pivot_wider(names_from = value, values_from = name,
values_fill = FALSE, values_fn = list(name = ~ length(.) > 0)) %>%
select(-rn)
# A tibble: 422 x 5
# C E D A B
# <lgl> <lgl> <lgl> <lgl> <lgl>
# 1 TRUE FALSE FALSE FALSE FALSE
# 2 FALSE TRUE FALSE FALSE FALSE
# 3 TRUE FALSE FALSE FALSE FALSE
# 4 TRUE FALSE FALSE FALSE FALSE
# 5 TRUE FALSE FALSE FALSE FALSE
# 6 TRUE FALSE FALSE FALSE FALSE
# 7 FALSE FALSE TRUE TRUE FALSE
# 8 FALSE FALSE TRUE TRUE FALSE
# 9 FALSE FALSE TRUE FALSE FALSE
#10 FALSE FALSE TRUE FALSE FALSE
# … with 412 more rows
My initial matrix looks like the following (but my matrix is huge)
A NA A A A D D B NA B C NA C
A NA A B B D C A NA A A NA A
D NA D D A A A C NA C C NA C
structure(c("A", "A", "D", NA, NA, NA, "A", "A", "D", "A", "B",
"D", "A", "B", "A", "D", "D", "A", "D", "C", "A", "B", "A", "C",
NA, NA, NA, "B", "A", "C", "C", "A", "C", NA, NA, NA, "C", "A",
"C"), .Dim = c(3L, 13L), .Dimnames = list(NULL, c("V1", "V2",
"V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12",
"V13")))
I want to substitute the NA with the letters surroundings (left and right), if they are the same, that is, I want something like this:
A A A A A D D B B B C C C
A A A B B D C A A A A A A
D D D D A A A C C C C C C
structure(c("A", "A", "D", "A", "A", "D", "A", "A", "D", "A",
"B", "D", "A", "B", "A", "D", "D", "A", "D", "C", "A", "B", "A",
"C", "B", "A", "C", "B", "A", "C", "C", "A", "C", "C", "A", "C",
"C", "A", "C"), .Dim = c(3L, 13L), .Dimnames = list(NULL, c("V1",
"V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11",
"V12", "V13")))
So, if both surrounding letters are the same, I would change the NA to the surrounding letter, otherwise, I would keep the NA.
Any ideas?
Thank you very much.
Here my approach without using additional librariey:
dat <- matrix(c('A',NA,'A','A',NA,'B',
'B',NA,'A','B',NA,'B',
'B',NA,NA,'B','B',NA
),nrow=3,byrow=TRUE)
t(apply(dat,1,function(x){
pos <- which(!is.na(x))
## if the delta of the index of two non-na elements is 2 -> potential match
dif <- which(diff(pos)==2)
## prevent to process rows with no potential match (woiuld convert NA to "NA"
if(length(dif)){
x[pos[dif]+1] <- sapply(dif,function(y) ifelse(x[pos[y]]==x[pos[y]+2], x[pos[y]],NA))
}
x
}))
Questions are: how do you handle a sequence of NA's and NA's at the margins
Here the version which allows NA sequences to be handeld too
t(apply(dat,1,function(x){
pos <- which(!is.na(x))
## if the delta of the index of two non-na elements is > 1 -> potential match
dif <- diff(pos)
for(cur in which(dif>1)){
if(x[pos[cur]]==x[pos[cur]+dif[cur]]){
x[(pos[cur]+1):(pos[cur]+dif[cur])] <- x[pos[cur]]
}
}
x
}))
I'm not sure if there is an elegant and simply way. Assuming your matrix is named mat, you could use
library(tidyr)
library(dplyr)
library(zoo)
mat %>%
as.data.frame(stringsAsFactors = FALSE) %>%
mutate(id = row_number()) %>%
pivot_longer(cols=-id) %>%
group_by(id) %>%
mutate(value = ifelse(is.na(value) & (na.locf(value) == na.locf(value, fromLast = TRUE)), na.locf(value), value)) %>%
ungroup() %>%
pivot_wider() %>%
select(-id) %>%
as.matrix()
which returns
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
[1,] "A" "A" "A" "A" "A" "D" "D" "B" "B" "B" "C" "C" "C"
[2,] "A" "A" "A" "B" "B" "D" NA "A" "A" "A" "A" "A" "A"
[3,] "D" "D" "D" "D" "A" "A" "A" "C" "C" "C" "C" "C" "C"
Note: I added an NA-value in mat[2,7] for the case of unequal surroundings.
Data
mat <- structure(c("A", "A", "D", NA, NA, NA, "A", "A", "D", "A", "B",
"D", "A", "B", "A", "D", "D", "A", "D", NA, "A", "B", "A", "C",
NA, NA, NA, "B", "A", "C", "C", "A", "C", NA, NA, NA, "C", "A",
"C"), .Dim = c(3L, 13L))
I have a data table like this:
a group
1: 1 a
2: 2 a
3: 3 a
4: 4 a
5: 5 a
6: 6 a
The sample can be created from the code below:
structure(list(a = 1:100, group = c("a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b")), .Names = c("a", "group"), row.names = c(NA,
-100L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x0000000004790788>)
For each row in each group I would like to:
take value in column a
divide it by value in column a lagged by 2 and subtract 1
divide it by value in column a lagged by 4 and subtract 1
divide it by value in column a lagged by 6 and subtract 1
sum result of steps 2-4 and return it in a new column
So for rows 1-6, I would have NA, and then 7/5 + 7/3 + 7/1 - 3, 8/6 + 8/4 + 8/2 - 3, 9/7 + 9/5 + 9/3 - 3, 10/8 + 10/6 + 10/4 - 3
So based on the table reported in the first chunk, I would like to get a new column, say metric_1, which would, on the 10th row have the value 2.416667
Please note that the values in column a will not in practice correspond to row numbers, but would be some measurements.
The final output would then look like this:
a group metric_1
1: 1 a NA
2: 2 a NA
3: 3 a NA
4: 4 a NA
5: 5 a NA
6: 6 a NA
7: 7 a 7.733333
8: 8 a 4.333333
9: 9 a 3.085714
10: 10 a 2.416667
I already tried some versions with Reduce which works like a champ if I need to sum some values in a vector, but I haven't been able to tweak it into enabling me to do the division like this.
I'm not sure if this is exactly what you're looking for but perhaps it will help:
library(dplyr)
the_data %>% group_by(group) %>%
mutate(metric_1 = (a/lag(a, 2)-1)+( a/lag(a,4)-1) + (a/lag(a, 6) - 1 )) %>%
ungroup()
found one possible solution as:
dt[,
list(a, Reduce(`+`, lapply(shift(a, seq(2, 6, by = 2)),
function(x) a/x - 1))),
by = "group"]
But it is rather slow.
This question already has answers here:
How to flatten a list to a list without coercion?
(7 answers)
Closed 7 years ago.
I have a very messy list with multiple levels in the form of:
[[1]]
[[1]][[1]]
[[1]][[1]][[1]]
[1] "D" "B" "A"
[[1]][[1]][[2]]
[1] "E" "B" "A"
[[1]][[2]]
[[1]][[2]][[1]]
[1] "D" "C" "A"
[[1]][[3]]
[[1]][[3]][[1]]
[1] "B" "D" "A"
....
[[5]][[2]][[2]]
[1] "D" "B" "E"
[[5]][[3]]
[1] "C" "E"
...
What is the easiest way to just get a list of the lowest level character vectors, so the first element would be "D""B""A" then the next would be "E""B""A" and so forth?
Thanks!
Edit:
Here's my list in dput format as requested. However, the nesting structure can change and the number of levels can increase. Thus any solution that works by using a presupposed number of levels is no good.
> dput(myResults)
list(list(list(c("D", "B", "A"), c("E", "B", "A")), list(c("D",
"C", "A")), list(c("B", "D", "A"), c("C", "D", "A"), c("E", "D",
"A")), list(c("B", "E", "A"), c("D", "E", "A"))), list(list(c("D",
"A", "B"), c("E", "A", "B")), c("C", "B"), list(c("A", "D", "B"
), c("E", "D", "B")), list(c("A", "E", "B"), c("D", "E", "B"))),
list(list(c("D", "A", "C")), c("B", "C"), list(c("A", "D",
"C")), c("E", "C")), list(list(c("B", "A", "D"), c("C", "A",
"D"), c("E", "A", "D")), list(c("A", "B", "D"), c("E", "B",
"D")), list(c("A", "C", "D")), list(c("A", "E", "D"), c("B",
"E", "D"))), list(list(c("B", "A", "E"), c("D", "A", "E")),
list(c("A", "B", "E"), c("D", "B", "E")), c("C", "E"),
list(c("A", "D", "E"), c("B", "D", "E"))))
Edit
There is a package rlist with a function list.flatten that does this
library(rlist)
list.flatten(yourLst)
A recursive solution (the order is changed though, ie. the leastly nested stuff comes out first)
unlst <- function(lst){
if (!any((inds <- sapply(lst, is.list)))) return(lst)
c(lst[!inds], unlst(unlist(lst[inds], rec=F)))
}
Try this function please.
unlist_messy_list <- function(cur_list){
if (is.atomic(cur_list)){
list(cur_list)
}else{
cl <- lapply(cur_list, unlist_messy_list)
Reduce(c, cl)
}
}
As you have not provided a sample data , I tested it with some cases made up by myself and it works.
unlist_messy_list(list())
unlist_messy_list(list(c(1,2,3), c(4,5,6), c(7,8,9)))
unlist_messy_list(list(c(1,2,3), list(c(4,5,6), c(7,8,9))))
unlist_messy_list(list(c(1,2,3), c(4,5,6), list(c(7,8,9), c(10,11,12))))
unlist_messy_list(list(c(1,2,3), list(c(4,5,6), c(7,8,9), list(10, c(11,12,13), 14, list(c(15,16))))))
I just tested it on your newly provided data, and it works fine. The output is (after dput):
list(c("D", "B", "A"), c("E", "B", "A"), c("D", "C", "A"), c("B", "D", "A"), c("C", "D", "A"), c("E", "D", "A"), c("B", "E", "A"), c("D", "E", "A"), c("D", "A", "B"), c("E", "A", "B"), c("C", "B"), c("A", "D", "B"), c("E", "D", "B"), c("A", "E", "B"), c("D", "E", "B"), c("D", "A", "C"), c("B", "C"), c("A", "D", "C"), c("E", "C"), c("B", "A", "D"), c("C", "A", "D"), c("E", "A", "D"), c("A", "B", "D"), c("E", "B", "D"), c("A", "C", "D"), c("A", "E", "D"), c("B", "E", "D"),c("B", "A", "E"), c("D", "A", "E"), c("A", "B", "E"), c("D", "B", "E"), c("C", "E"), c("A", "D", "E"), c("B", "D", "E"))