How to combine two rows of a dataframe into one row - r

I have a dataframe which looks like this.
Name info.1 info.2
ab a 1
123 a 1
de c 4
456 c 4
fg d 5
789 d 5
The two rows that need to be combined are identical aside from the name column and are together in the dataframe. I want the new dataframe to look like this:
Name ID info.1 info.2
ab 123 a 1
de 456 c 4
fg 789 d 5
I have no clue how to do this and google search hasn't been helpful so far

In base R you could do:
data.frame(Name = df[seq(nrow(df)) %% 2 == 0, 1],
ID = df[seq(nrow(df)) %% 2 == 1, 1],
df[seq(nrow(df)) %% 2 == 0, 2:3])
#> Name ID info.1 info.2
#> 2 ab 456 a 1
#> 4 123 fg c 4
#> 6 de 789 d 5
Created on 2022-07-20 by the reprex package (v2.0.1)

A possible solution:
library(tidyverse)
df %>%
group_by(info.1) %>%
summarise(Name = str_c(Name, collapse = "_"), info.2 = first(info.2)) %>%
separate(Name, into = c("Name", "ID"), convert = T) %>%
relocate(info.1, .before = info.2)
#> # A tibble: 3 × 4
#> Name ID info.1 info.2
#> <chr> <int> <chr> <int>
#> 1 ab 123 a 1
#> 2 de 456 c 4
#> 3 fg 789 d 5

Assuming the Name column is consistently ordered Name-ID-Name-ID then:
library(tidyverse)
data <- tibble(Name = c('ab', 123, 'de', 456, 'fg', 789),
info.1 = c('a', 'a', 'c', 'c', 'd', 'd'),
info.2 = c(1, 1, 4, 4, 5, 5))
# remove the troublesome column and make a tibble
# with the unique combos of info1 and 2
data_2 <- data %>% select(info.1, info.2) %>% distinct()
# add columns for name and ID by skipping every other row in the
# original tibble
data_2$Name <- data$Name[seq(from = 1, to = nrow(data), by = 2)]
data_2$ID <- data$Name[seq(from = 2, to = nrow(data), by = 2)]

We could also use summarise and extract first as name and last as id:
data |>
group_by(info.1, info.2) |>
summarise(name = first(Name), ID = last(Name)) |>
ungroup() #|>
#relocate(3:4,1:2)
Output:
# A tibble: 3 × 4
info.1 info.2 name ID
<chr> <dbl> <chr> <chr>
1 a 1 ab 123
2 c 4 de 456
3 d 5 fg 789

We could also use
library(dplyr)
library(stringr)
data %>%
group_by(across(starts_with('info'))) %>%
mutate(ID = str_subset(Name, "^\\d+$"), .before = 2) %>%
ungroup %>%
filter(str_detect(Name, '^\\d+$', negate = TRUE))
-output
# A tibble: 3 × 4
Name ID info.1 info.2
<chr> <chr> <chr> <dbl>
1 ab 123 a 1
2 de 456 c 4
3 fg 789 d 5
data
data <- structure(list(Name = c("ab", "123", "de", "456", "fg", "789"
), info.1 = c("a", "a", "c", "c", "d", "d"), info.2 = c(1, 1,
4, 4, 5, 5)), row.names = c(NA, -6L), class = "data.frame")

Related

Transpose single column after NA to multiple columns with R

I would like to transpose a single column to multiple columns after two NA's. I have tried to transpose at every nth row, but there is no pattern.
Example:
x<-data.frame(col1=c('A','F',1,15,'','','A','Z','$35','P',2,'','','B','ER',3,'P',56,'YT65','','','B','AZ','$5','PO',28,'',''))
What I am hoping to accomplish is:
col1
col 2
col3
col4
col5
col6
A
F
1
15
A
Z
$35
P
2
B
ER
3
P
56
YT65
B
AZ
$5
PO
28
It's a little convoluted, but you could do:
z <- lapply(split(x$col1, cumsum(!nzchar(x$col1)) %/% 2), function(x) {
if(!nzchar(x[1])) x[-1] else x
})
z <- do.call(rbind, lapply(z, function(x) {
c(x, rep('', max(lengths(z)) - length(x)))
}))
as.data.frame(z[rowSums(z == '') != ncol(z), colSums(z == '') != nrow(z)])
#> V1 V2 V3 V4 V5 V6
#> 0 A F 1 15
#> 1 A Z $35 P 2
#> 2 B ER 3 P 56 YT65
#> 3 B AZ $5 PO 28
A possible alternative approach:
library(tidyverse)
df <- data.frame(col1 = c("A", "F", 1, 15, "", "",
"A", "Z", "$35", "P", 2, "", "",
"B", "ER", 3, "P", 56, "YT65", "", "",
"B", "AZ", "$5", "PO", 28, "", ""))
df |>
mutate(group = cumsum(col1 == "")) |>
filter(col1 != "") |>
group_by(group) |>
mutate(col = row_number()) |>
ungroup() |>
pivot_wider(names_from = col, values_from = col1) |>
select(-group)
#> # A tibble: 4 × 6
#> `1` `2` `3` `4` `5` `6`
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 A F 1 15 <NA> <NA>
#> 2 A Z $35 P 2 <NA>
#> 3 B ER 3 P 56 YT65
#> 4 B AZ $5 PO 28 <NA>
Created on 2022-06-07 by the reprex package (v2.0.1)

Converting a matrix into a tibble in R

How can I convert this matrix:
> matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
A
X 1
Y 2
Z 3
into this tibble:
> tibble::tribble(~group1, ~group2, ~value, "X", "A", 1, "Y", "A", 2, "Z", "A", 3)
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <dbl>
1 X A 1
2 Y A 2
3 Z A 3
Thank you
as.tibble can convert the matrix's rownames to a column, and then you can use gather() to create the group2 column:
library(tidyverse)
m <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
newtib <- m %>%
as.tibble(rownames = "group1") %>%
gather('A', key = "group2", value = "value")
> newtib
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <int>
1 X A 1
2 Y A 2
3 Z A 3
> tibble::tribble(~group1, ~group2, ~value, "X", "A", 1, "Y", "A", 2, "Z", "A", 3)
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <dbl>
1 X A 1
2 Y A 2
3 Z A 3
Easier with base R, if we convert to table and coerce with as.data.frame (if we need to convert to tibble - use as_tibble as wrapper over the as.data.frame
as.data.frame(as.table(m1))
Var1 Var2 Freq
1 X A 1
2 Y A 2
3 Z A 3
data
m1 <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
Transform your matrix into a dataframe
bring your rownames to column group1
mutate group2
data.frame(matrix) %>%
rownames_to_column("group1") %>%
mutate(group2 = colnames(matrix)) %>%
dplyr::select(group1, group2, value=A)
group1 group2 value
1 X A 1
2 Y A 2
3 Z A 3
You can use -
library(tidyverse)
mat <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
mat %>%
as.data.frame() %>%
rownames_to_column(var = 'group1') %>%
pivot_longer(cols = -group1, names_to = 'group2')
# group1 group2 value
# <chr> <chr> <dbl>
#1 X A 1
#2 Y A 2
#3 Z A 3

R: how to combine rows using Pivot_wider

I have a table like the following:
A, B, C
1, Yes, 3
1, No, 2
2, Yes, 4
2, No, 6
etc
I want to convert it to:
A, Yes, No
1, 3, 2
2, 4, 6
I have tried using:
dat <- dat %>%
spread(B, C) %>%
group_by(A)
However, now I have a bunch of NA values. Is it possible to use pivot_longer to do this instead?
We can use pivot_wider
library(tidyr)
pivot_wider(dat, names_from = B, values_from = C)
-output
# A tibble: 2 x 3
# A Yes No
# <dbl> <dbl> <dbl>
#1 1 3 2
#2 2 4 6
If there are duplicate rows, then an option is to create a sequence by that column
library(data.table)
library(dplyr)
dat1 <- bind_rows(dat, dat) # // example with duplicates
dat1 %>%
mutate(rn = rowid(B)) %>%
pivot_wider(names_from = B, values_from = C) %>%
select(-rn)
-output
# A tibble: 4 x 3
# A Yes No
# <dbl> <dbl> <dbl>
#1 1 3 2
#2 2 4 6
#3 1 3 2
#4 2 4 6
data
dat <- structure(list(A = c(1, 1, 2, 2), B = c("Yes", "No", "Yes", "No"
), C = c(3, 2, 4, 6)), class = "data.frame", row.names = c(NA,
-4L))

Converting Uneven List to Dataframe

I am wondering if there is an elegant and generalizable way to convert mylist to mydf in the example below. I've looked at the rectangling vignette but the examples for unnest/hoist seem to be on lists that have a regular but not tidy structure.
mylist <- list(name = "example",
idnum = 123,
cases = list(
case1 = list(
type = 1,
genre = "A"),
case2 = list(
type = 1,
genre = "B"),
case3 = list(
type = 2,
genre = "A"
)))
mydf <- data.frame(name = rep("example", 3),
idnum = rep(123, 3),
cases = c("case1", "case2", "case3"),
type = c(1, 1, 2),
genre = c("A", "B", "A"))
Edit: This gets close to what I want but I am losing the case names
mylist %>%
as_tibble %>%
unnest_wider(cases)
# A tibble: 3 x 4
name idnum type genre
<chr> <dbl> <dbl> <chr>
1 example 123 1 A
2 example 123 1 B
3 example 123 2 A
One option is bind_rows with unnest_wider
library(dplyr)
library(tidyr)
bind_rows(mylist) %>%
mutate(case = names(cases)) %>%
unnest_wider(c(cases))
# A tibble: 3 x 5
# name idnum type genre case
# <chr> <dbl> <dbl> <chr> <chr>
#1 example 123 1 A case1
#2 example 123 1 B case2
#3 example 123 2 A case3
Or as #Ben suggested in the comments
mylist %>%
as_tibble %>%
mutate(case = names(cases)) %>%
unnest_wider(c(cases))
# A tibble: 3 x 5
# name idnum type genre case
# <chr> <dbl> <dbl> <chr> <chr>
#1 example 123 1 A case1
#2 example 123 1 B case2
#3 example 123 2 A case3

How do I select column based on value in another column with dplyr?

My data frame looks like this:
id A T C G ref var
1 1 10 15 7 0 A C
2 2 11 9 2 3 A G
3 3 2 31 1 12 T C
I'd like to create two new columns: ref_count and var_count which will have following values:
Value from A column and value from C column, since ref is A and var is C
Value from A column and value from G column, since ref is A and var is G
etc.
So I'd like to select a column based on the value in another column for each row.
Thanks!
We can use pivot_longer to reshape into 'long' format, filter the rows and then reshape it to 'wide' format with pivot_wider
library(dplyr)
library(tidyr)
df1 %>%
pivot_longer(cols = A:G) %>%
group_by(id) %>%
filter(name == ref|name == var) %>%
mutate(nm1 = c('ref_count', 'var_count')) %>%
ungroup %>%
select(id, value, nm1) %>%
pivot_wider(names_from = nm1, values_from = value) %>%
left_join(df1, .)
# A tibble: 3 x 9
# id A T C G ref var ref_count var_count
#* <int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
#1 1 10 15 7 0 A C 10 7
#2 2 11 9 2 3 A G 11 3
#3 3 2 31 1 12 T C 31 1
Or in base R, we can also make use of the vectorized row/column indexing
df1$refcount <- as.matrix(df1[2:5])[cbind(seq_len(nrow(df1)), match(df1$ref, names(df1)[2:5]))]
df1$var_count <- as.matrix(df1[2:5])[cbind(seq_len(nrow(df1)), match(df1$var, names(df1)[2:5]))]
data
df1 <- structure(list(id = 1:3, A = c(10, 11, 2), T = c(15, 9, 31),
C = c(7, 2, 1), G = c(0, 3, 12), ref = c("A", "A", "T"),
var = c("C", "G", "C")), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))
The following is a tidyverse alternative without creating a long dataframe that needs filtering. It essentially uses tidyr::nest() to nest the dataframe by rows, after which the correct column can be selected for each row.
df1 %>%
nest(data = -id) %>%
mutate(
data = map(
data,
~mutate(., refcount = .[[ref]], var_count = .[[var]])
)
) %>%
unnest(data)
#> # A tibble: 3 × 9
#> id A T C G ref var refcount var_count
#> <int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
#> 1 1 10 15 7 0 A C 10 7
#> 2 2 11 9 2 3 A G 11 3
#> 3 3 2 31 1 12 T C 31 1
A variant of this does not need the (assumed row-specific) id column but defines the nested groups from the unique values of ref and var directly:
df1 %>%
nest(data = -c(ref, var)) %>%
mutate(
data = pmap(
list(data, ref, var),
function(df, ref, var) {
mutate(df, refcount = df[[ref]], var_count = df[[var]])
}
)
) %>%
unnest(data)
The data were specified by akrun:
df1 <- structure(list(id = 1:3, A = c(10, 11, 2), T = c(15, 9, 31),
C = c(7, 2, 1), G = c(0, 3, 12), ref = c("A", "A", "T"),
var = c("C", "G", "C")), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"))

Resources