I would like to transpose a single column to multiple columns after two NA's. I have tried to transpose at every nth row, but there is no pattern.
Example:
x<-data.frame(col1=c('A','F',1,15,'','','A','Z','$35','P',2,'','','B','ER',3,'P',56,'YT65','','','B','AZ','$5','PO',28,'',''))
What I am hoping to accomplish is:
col1
col 2
col3
col4
col5
col6
A
F
1
15
A
Z
$35
P
2
B
ER
3
P
56
YT65
B
AZ
$5
PO
28
It's a little convoluted, but you could do:
z <- lapply(split(x$col1, cumsum(!nzchar(x$col1)) %/% 2), function(x) {
if(!nzchar(x[1])) x[-1] else x
})
z <- do.call(rbind, lapply(z, function(x) {
c(x, rep('', max(lengths(z)) - length(x)))
}))
as.data.frame(z[rowSums(z == '') != ncol(z), colSums(z == '') != nrow(z)])
#> V1 V2 V3 V4 V5 V6
#> 0 A F 1 15
#> 1 A Z $35 P 2
#> 2 B ER 3 P 56 YT65
#> 3 B AZ $5 PO 28
A possible alternative approach:
library(tidyverse)
df <- data.frame(col1 = c("A", "F", 1, 15, "", "",
"A", "Z", "$35", "P", 2, "", "",
"B", "ER", 3, "P", 56, "YT65", "", "",
"B", "AZ", "$5", "PO", 28, "", ""))
df |>
mutate(group = cumsum(col1 == "")) |>
filter(col1 != "") |>
group_by(group) |>
mutate(col = row_number()) |>
ungroup() |>
pivot_wider(names_from = col, values_from = col1) |>
select(-group)
#> # A tibble: 4 × 6
#> `1` `2` `3` `4` `5` `6`
#> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 A F 1 15 <NA> <NA>
#> 2 A Z $35 P 2 <NA>
#> 3 B ER 3 P 56 YT65
#> 4 B AZ $5 PO 28 <NA>
Created on 2022-06-07 by the reprex package (v2.0.1)
Related
I have a dataframe which looks like this.
Name info.1 info.2
ab a 1
123 a 1
de c 4
456 c 4
fg d 5
789 d 5
The two rows that need to be combined are identical aside from the name column and are together in the dataframe. I want the new dataframe to look like this:
Name ID info.1 info.2
ab 123 a 1
de 456 c 4
fg 789 d 5
I have no clue how to do this and google search hasn't been helpful so far
In base R you could do:
data.frame(Name = df[seq(nrow(df)) %% 2 == 0, 1],
ID = df[seq(nrow(df)) %% 2 == 1, 1],
df[seq(nrow(df)) %% 2 == 0, 2:3])
#> Name ID info.1 info.2
#> 2 ab 456 a 1
#> 4 123 fg c 4
#> 6 de 789 d 5
Created on 2022-07-20 by the reprex package (v2.0.1)
A possible solution:
library(tidyverse)
df %>%
group_by(info.1) %>%
summarise(Name = str_c(Name, collapse = "_"), info.2 = first(info.2)) %>%
separate(Name, into = c("Name", "ID"), convert = T) %>%
relocate(info.1, .before = info.2)
#> # A tibble: 3 × 4
#> Name ID info.1 info.2
#> <chr> <int> <chr> <int>
#> 1 ab 123 a 1
#> 2 de 456 c 4
#> 3 fg 789 d 5
Assuming the Name column is consistently ordered Name-ID-Name-ID then:
library(tidyverse)
data <- tibble(Name = c('ab', 123, 'de', 456, 'fg', 789),
info.1 = c('a', 'a', 'c', 'c', 'd', 'd'),
info.2 = c(1, 1, 4, 4, 5, 5))
# remove the troublesome column and make a tibble
# with the unique combos of info1 and 2
data_2 <- data %>% select(info.1, info.2) %>% distinct()
# add columns for name and ID by skipping every other row in the
# original tibble
data_2$Name <- data$Name[seq(from = 1, to = nrow(data), by = 2)]
data_2$ID <- data$Name[seq(from = 2, to = nrow(data), by = 2)]
We could also use summarise and extract first as name and last as id:
data |>
group_by(info.1, info.2) |>
summarise(name = first(Name), ID = last(Name)) |>
ungroup() #|>
#relocate(3:4,1:2)
Output:
# A tibble: 3 × 4
info.1 info.2 name ID
<chr> <dbl> <chr> <chr>
1 a 1 ab 123
2 c 4 de 456
3 d 5 fg 789
We could also use
library(dplyr)
library(stringr)
data %>%
group_by(across(starts_with('info'))) %>%
mutate(ID = str_subset(Name, "^\\d+$"), .before = 2) %>%
ungroup %>%
filter(str_detect(Name, '^\\d+$', negate = TRUE))
-output
# A tibble: 3 × 4
Name ID info.1 info.2
<chr> <chr> <chr> <dbl>
1 ab 123 a 1
2 de 456 c 4
3 fg 789 d 5
data
data <- structure(list(Name = c("ab", "123", "de", "456", "fg", "789"
), info.1 = c("a", "a", "c", "c", "d", "d"), info.2 = c(1, 1,
4, 4, 5, 5)), row.names = c(NA, -6L), class = "data.frame")
I am stuck in performing pivot_longer() over multiple sets of columns. Here is the sample dataset
df <- data.frame(
id = c(1, 2),
uid = c("m1", "m2"),
germ_kg = c(23, 24),
mineral_kg = c(12, 17),
perc_germ = c(45, 34),
perc_mineral = c(78, 10))
I need the output dataframe to look like this
out <- df <- data.frame(
id = c(1, 1, 2, 2),
uid = c("m1", "m1", "m2", "m2"),
crop = c("germ", "germ", "mineral", "mineral"),
kg = c(23, 12, 24, 17),
perc = c(45, 78, 34, 10))
df %>%
rename_with(~str_replace(.x,'(.*)_kg', 'kg_\\1')) %>%
pivot_longer(-c(id, uid), names_to = c('.value', 'crop'), names_sep = '_')
# A tibble: 4 x 5
id uid crop kg perc
<dbl> <chr> <chr> <dbl> <dbl>
1 1 m1 germ 23 45
2 1 m1 mineral 12 78
3 2 m2 germ 24 34
4 2 m2 mineral 17 10
If you were to use data.table:
library(data.table)
melt(setDT(df), c('id', 'uid'), patterns(kg = 'kg', perc = 'perc'))
id uid variable kg perc
1: 1 m1 1 23 45
2: 2 m2 1 24 34
3: 1 m1 2 12 78
4: 2 m2 2 17 10
I suspect there might be a simpler way using pivot_long_spec, but one tricky thing here is that your column names don't have a consistent ordering of their semantic components. #Onyambu's answer deals with this nicely by fixing it upsteam.
library(tidyverse)
df %>%
pivot_longer(-c(id, uid)) %>%
separate(name, c("col1", "col2")) %>% # only needed
mutate(crop = if_else(col2 == "kg", col1, col2), # because name
meas = if_else(col2 == "kg", col2, col1)) %>% # structure
select(id, uid, crop, meas, value) %>% # is
pivot_wider(names_from = meas, values_from = value) # inconsistent
# A tibble: 4 x 5
id uid crop kg perc
<dbl> <chr> <chr> <dbl> <dbl>
1 1 m1 germ 23 45
2 1 m1 mineral 12 78
3 2 m2 germ 24 34
4 2 m2 mineral 17 10
I was having the same problem as How to find last column with value (for each row) in R?, except I have rows with no value (entire row of NA). The sample provided in said post did not have an entire row of NAs.
I was wondering how I should modify the following? I do not want to remove those rows with all NAs because they will be useful in later analysis.
df %>%
rowwise %>%
mutate(m = {tmp <- c_across(starts_with('m'))
tail(na.omit(tmp), 1)}) %>%
ungroup
Thanks a lot in advance!
If all the elements in the rows are empty, then a general solution would be to create condition to return NA for those rows
library(dplyr)
df %>%
rowwise %>%
mutate(m = {tmp <- c_across(starts_with('m'))
if(all(is.na(tmp))) NA_character_ else
tail(na.omit(tmp), 1)}) %>%
ungroup
-output
# A tibble: 4 × 5
id m_1 m_2 m_3 m
<dbl> <chr> <chr> <chr> <chr>
1 1 a e i i
2 2 b <NA> <NA> b
3 3 <NA> <NA> <NA> <NA>
4 4 d h l l
If the OP wants to return only the last single non-NA element, we may also add an index [1] to extract, which automatically return NA when there are no elements
df %>%
rowwise %>%
mutate(m = {tmp <- c_across(starts_with('m'))
tail(na.omit(tmp), 1)[1]}) %>%
ungroup
# A tibble: 4 × 5
id m_1 m_2 m_3 m
<dbl> <chr> <chr> <chr> <chr>
1 1 a e i i
2 2 b <NA> <NA> b
3 3 <NA> <NA> <NA> <NA>
4 4 d h l l
data
df <- structure(list(id = c(1, 2, 3, 4), m_1 = c("a", "b", NA, "d"),
m_2 = c("e", NA, NA, "h"), m_3 = c("i", NA, NA, "l")), row.names = c(NA,
-4L), class = "data.frame")
Using data from #akrun (many thanks) we could do maybe this way:
'\\b[^,]+$' is a regular expression:
\\ ... means escape (in other words do not match) this is R special in other languages it is only one \
\\b... The metacharacter \b is an anchor like ^ and $ sign. It matches at a position that is called a “word boundary”. This match is zero-length.
[^,]+... stands for character class, here special with the ^caret: One character that is not ,. The + means here one or more ,
$ ... means end of string or end of line depending on multiline mode.
library(dplyr)
library(tidyr)
library(stringr)
df %>%
mutate(across(starts_with("m"), ~case_when(!is.na(.) ~ cur_column()), .names = 'new_{col}')) %>%
unite(New_Col, starts_with('new'), na.rm = TRUE, sep = ', ') %>%
mutate(New_Col = str_extract(New_Col, '\\b[^,]+$'))
id m_1 m_2 m_3 New_Col
1 1 a e i m_3
2 2 b <NA> <NA> m_1
3 3 <NA> <NA> <NA> <NA>
4 4 d h l m_3
library(tidyverse)
df <- data.frame(id = c(1, 2, 3, 4), m_1 = c("a", NA, "c", "d"), m_2 = c("e", NA, "g", "h"), m_3 = c("i", NA, NA, "l"))
df %>%
rowwise() %>%
mutate(
nms = list(str_subset(names(df), "^m")),
m = c_across(starts_with("m")) %>%
{
ifelse(test = all(is.na(.)),
yes = NA,
no = nms[which(. == tail(na.omit(.), 1))]
)
}
) %>%
select(-nms)
#> # A tibble: 4 × 5
#> # Rowwise:
#> id m_1 m_2 m_3 m
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 a e i m_3
#> 2 2 <NA> <NA> <NA> <NA>
#> 3 3 c g <NA> m_2
#> 4 4 d h l m_3
# only the value no the column name
df %>%
rowwise() %>%
mutate(
m = c_across(starts_with("m")) %>%
{
ifelse(test = all(is.na(.)),
yes = NA,
no = tail(na.omit(.), 1)
)
}
)
#> # A tibble: 4 × 5
#> # Rowwise:
#> id m_1 m_2 m_3 m
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 a e i i
#> 2 2 <NA> <NA> <NA> <NA>
#> 3 3 c g <NA> g
#> 4 4 d h l l
Created on 2022-01-01 by the reprex package (v2.0.1)
How can I convert this matrix:
> matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
A
X 1
Y 2
Z 3
into this tibble:
> tibble::tribble(~group1, ~group2, ~value, "X", "A", 1, "Y", "A", 2, "Z", "A", 3)
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <dbl>
1 X A 1
2 Y A 2
3 Z A 3
Thank you
as.tibble can convert the matrix's rownames to a column, and then you can use gather() to create the group2 column:
library(tidyverse)
m <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
newtib <- m %>%
as.tibble(rownames = "group1") %>%
gather('A', key = "group2", value = "value")
> newtib
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <int>
1 X A 1
2 Y A 2
3 Z A 3
> tibble::tribble(~group1, ~group2, ~value, "X", "A", 1, "Y", "A", 2, "Z", "A", 3)
# A tibble: 3 × 3
group1 group2 value
<chr> <chr> <dbl>
1 X A 1
2 Y A 2
3 Z A 3
Easier with base R, if we convert to table and coerce with as.data.frame (if we need to convert to tibble - use as_tibble as wrapper over the as.data.frame
as.data.frame(as.table(m1))
Var1 Var2 Freq
1 X A 1
2 Y A 2
3 Z A 3
data
m1 <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
Transform your matrix into a dataframe
bring your rownames to column group1
mutate group2
data.frame(matrix) %>%
rownames_to_column("group1") %>%
mutate(group2 = colnames(matrix)) %>%
dplyr::select(group1, group2, value=A)
group1 group2 value
1 X A 1
2 Y A 2
3 Z A 3
You can use -
library(tidyverse)
mat <- matrix(1:3, nrow = 3, dimnames = list(c("X","Y","Z"), c("A")))
mat %>%
as.data.frame() %>%
rownames_to_column(var = 'group1') %>%
pivot_longer(cols = -group1, names_to = 'group2')
# group1 group2 value
# <chr> <chr> <dbl>
#1 X A 1
#2 Y A 2
#3 Z A 3
The code below creates a simplified version of the dataframe and illustrates my desired end result (df_wider) based on the unnested version. My question is: How can I achieve the same end result (df_wider) from the nested version (nested_df), using purrr?
library(tidyverse)
df <- tibble(id_01 = c(rep("01", 3), rep("02", 3)),
a = (c("a", "a", "b", "c", "c", "d")),
b = letters[7:12],
id_02 = rep(c(1, 2, 1), 2)
)
df_wider <- pivot_wider(df,
id_cols = c(id_01, a),
names_from = id_02,
values_from = b,
names_sep = "_"
)
nested_df <- nest(df, data = -id_01)
To be clear, I am trying to pivot while the dataframes are nested (i.e., before unnesting).
We can use purrr::map() within dplyr::mutate():
library(tidyverse)
df <- tibble(
id_01 = c(rep("01", 3), rep("02", 3)),
a = (c("a", "a", "b", "c", "c", "d")),
b = letters[7:12],
id_02 = rep(c(1, 2, 1), 2)
)
nested_df <- df %>%
nest(data = -id_01) %>%
mutate(data = map(data, ~ .x %>%
pivot_wider(
id_cols = a,
names_from = id_02,
values_from = b
)))
nested_df
#> # A tibble: 2 x 2
#> id_01 data
#> <chr> <list>
#> 1 01 <tibble [2 x 3]>
#> 2 02 <tibble [2 x 3]>
nested_df %>%
unnest(data)
#> # A tibble: 4 x 4
#> id_01 a `1` `2`
#> <chr> <chr> <chr> <chr>
#> 1 01 a g h
#> 2 01 b i <NA>
#> 3 02 c j k
#> 4 02 d l <NA>
Created on 2021-03-26 by the reprex package (v1.0.0)