Shiny: Join Cells in R - r

I would like to make this table:
Look like this:

Using dplyr:
df <- tibble(id = c(1,1,3),
b = c("foo", "bar", "foo"),
c = c("x", "y", "z"))
df
# A tibble: 3 x 3
id b c
<dbl> <chr> <chr>
1 1 foo x
2 1 bar y
3 3 foo z
df %>% group_by(id) %>%
summarize(new = paste(b, collapse = ","),
new2 = paste(c, collapse = ","))
which results in:
# A tibble: 2 x 3
a new new2
<dbl> <chr> <chr>
1 1 foo,bar x,y
2 3 foo z

Related

Creating columns dynamically in `mutate`

Consider the following dataset:
library(tidyverse)
tbl <- tibble(
x = letters[1:3],
y = c("a", "a", "b"),
z = letters[4:6]
)
I'd like to create a function to create new columns containing unique ids for each value of a given set of columns, with a single mutate call.
I tried the following but I'm getting an error:
add_ids <- function(.data, ...) {
mutate(.data, map_chr(c(...), ~ {
"{.x}_id" := vctrs::vec_group_id(.data[[.x]])
}))
}
add_ids(tbl, x, y)
Expected output:
# A tibble: 3 × 5
x y z x_id y_id
<chr> <chr> <chr> <int> <int>
1 a a d 1 1
2 b a e 2 1
3 c b f 3 2
You could rather use across in this case:
add_ids <- function(.data, var) {
mutate(.data,
across({{ var }}, vctrs::vec_group_id, .names = "{col}_id"))
}
add_ids(tbl, c(x, y))
## A tibble: 3 × 5
# x y z x_id y_id
# <chr> <chr> <chr> <int> <int>
#1 a a d 1 1
#2 b a e 2 1
#3 c b f 3 2

New column containing string that appears the most in the row

Im trying to Create a column with the string that appears the most in the row and create another column with the number of times this most prevalent string appeared.
To facilitate my question this is what im trying to achieve:
My actual DF
What im trying to obtain:
most prevalente category and count
example df:
d
f <- data.frame(ID = 1:4,
V1 = c("A","B","C","D"),
V2 = c("A", "B","D","B"),
V3 = c("A","C","D","B"))
Here is another way:
count <- sapply(apply(f[, -1], 1, table), max)
count
# [1] 3 2 2 2
category <- names(sapply(apply(f[, -1], 1, table), which.max))
category
# [1] "A" "B" "D" "B"
f2 <- data.frame(f, category, count)
f2
# ID V1 V2 V3 category count
# 1 1 A A A A 3
# 2 2 B B C B 2
# 3 3 C D D D 2
# 4 4 D B B B 2
df <- data.frame(ID = 1:4,
V1 = c("A","B","C","D"),
V2 = c("A", "B","D","B"),
V3 = c("A","C","D","B"))
library(data.table)
setDT(df)
other <- melt(df, id.vars = "ID", measure.vars = c("V1", "V2", "V3"))
other <- other[, .N, by = .(ID, value)]
colnames(other) <- c("ID", "category", "count")
other <- other[, .SD[which.max(count)], by = .(ID)]
res <- merge(df, other, by = c("ID"))
res
We can use dplyr rowwise function to apply table to each row from V1:V3
library(dplyr)
df |> rowwise() |>
mutate(category = names(table(c_across(V1:V3)))[which.max(table(c_across(V1:V3)))] ,
count = max(table(c_across(V1:V3))))
Output
# A tibble: 4 × 6
# Rowwise:
ID V1 V2 V3 category count
<int> <chr> <chr> <chr> <chr> <int>
1 1 A A A A 3
2 2 B B C B 2
3 3 C D D D 2
4 4 D B B B 2

Extracting a letter and put it in a separated column in R

I have data set like this:
df<-data.frame(ID=(1:5), column1=c("AA","GG","AG","AA","AT"), column2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df
ID column1 column2
1 AA AA
2 GG GG
3 AG AG
4 AA AA
5 AT AT
I want to separate each column into 2 letters so the output will look something like this:
ID column1.A column1.B column2.A column2.B
1 A A A A
2 G G G G
3 A G A G
4 A A A A
5 A T A T
Can you help me please?
library(tidyverse)
df %>%
pivot_longer(-ID) %>%
mutate(tmp = str_split(value, pattern = "")) %>%
unnest(tmp) %>%
group_by(ID, name) %>%
mutate(id_row = LETTERS[row_number()]) %>%
pivot_wider(id_cols = c(ID, name), names_from =c(name, id_row), values_from = tmp, names_sep = ".") %>%
ungroup()
#> # A tibble: 5 x 5
#> ID column1.A column1.B column2.A column2.B
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T
data
df <-
data.frame(
ID = (1:5),
column1 = c("AA", "GG", "AG", "AA", "AT"),
column2 = c("AA", "GG", "AG", "AA", "AT"),
stringsAsFactors = FALSE
)
Created on 2021-11-05 by the reprex package (v2.0.1)
data.table
library(data.table)
setDT(df)
melt(data = df, id.vars = "ID") %>%
.[, list(value = unlist(strsplit(value, split = ""))), by = list(ID, variable)] %>%
.[, id_row := LETTERS[rowid(ID, variable)]] %>%
dcast(formula = ID ~ variable + id_row, value.var = "value")
ID column1_A column1_B column2_A column2_B
1: 1 A A A A
2: 2 G G G G
3: 3 A G A G
4: 4 A A A A
5: 5 A T A T
Uisng strsplit.
cbind(df[1], do.call(cbind.data.frame, lapply(df[-1], function(x)
do.call(rbind, strsplit(x, '')))))
# ID column1.1 column1.2 column2.1 column2.2
# 1 1 A A A A
# 2 2 G G G G
# 3 3 A G A G
# 4 4 A A A A
# 5 5 A T A T
Yet another solution, tidyverse-based:
library(tidyverse)
df<-data.frame(ID=(1:5), column1=c("AA","GG","AG","AA","AT"), column2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df %>%
mutate(
across(
starts_with("column"), ~
str_split(get(cur_column()), "(?<=[A-Z])(?=[A-Z])", simplify = T),
.names="{.col}_sep"), column1 = NULL, column2 = NULL)
#> ID column1_sep.1 column1_sep.2 column2_sep.1 column2_sep.2
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T
Another possibility, based on a pivot_longer followed by a pivot_wider:
library(tidyverse)
df<-data.frame(ID=(1:5), column1=c("AA","GG","AG","AA","AT"), column2=c("AA","GG","AG","AA","AT"), stringsAsFactors=FALSE)
df %>%
pivot_longer(-ID) %>%
separate(value, into=LETTERS[1:2], sep= "(?<=[A-Z])(?=[A-Z])") %>%
pivot_wider(ID, names_from = "name", values_from = c(A,B),
names_glue = "{name}.{.value}") %>%
relocate(column1.B,.before=column2.A)
#> # A tibble: 5 × 5
#> ID column1.A column1.B column2.A column2.B
#> <int> <chr> <chr> <chr> <chr>
#> 1 1 A A A A
#> 2 2 G G G G
#> 3 3 A G A G
#> 4 4 A A A A
#> 5 5 A T A T

a beautiful solution to decode a table with dplyr and mutate

Dear dplyr/tidyverse companions, I am looking for a nice solution to the following problem. I only get my solutions in base R with a loop. How do you solve this cleanly in tidyverse?
I have a dataset called data, which has not useful column names and not useful values (integer).
data <- tibble(var1 = rep(c(1:3), 2),
var2 = rep(c(1:3), 2))
# A tibble: 6 x 2
var1 var2
<int> <int>
1 1 1
2 2 2
3 3 3
4 1 1
5 2 2
6 3 3
Additional I have a coding table, which has for every column a better name (var1 -> variable1) and a better value (1 -> "a")
coding <- tibble(variable = c(rep("var1", 3),rep("var2", 3)),
name = c(rep("variable1", 3),rep("variable2", 3)),
code = rep(c(1:3), 2),
value = rep(c("a", "b", "c"), 2))
# A tibble: 6 x 4
variable name code value
<chr> <chr> <int> <chr>
1 var1 variable1 1 a
2 var1 variable1 2 b
3 var1 variable1 3 c
4 var2 variable2 1 a
5 var2 variable2 2 b
6 var2 variable2 3 c
I'm looking for a result, which has transformed names of the columns and the real values as factors in the dataset, compare:
result <- tibble(variable1 = factor(rep(c("a", "b", "c"), 2)),
variable2 = factor(rep(c("a", "b", "c"), 2)))
# A tibble: 6 x 2
variable1 variable2
<fct> <fct>
1 a a
2 b b
3 c c
4 a a
5 b b
6 c c
Thank you for your commitment :) :) :) :)
library(dplyr)
library(tidyr)
data %>%
stack() %>%
left_join(coding, by = c(ind = "variable", values = "code")) %>%
group_by(name) %>%
mutate(j = row_number()) %>%
pivot_wider(id_cols = j, values_from = value) %>%
select(-j)
# # A tibble: 6 x 2
# variable1 variable2
# <chr> <chr>
# 1 a a
# 2 b b
# 3 c c
# 4 a a
# 5 b b
# 6 c c
A general solution for any number of columns -
create a row number column to identify each row
get data in long format
join it with coding for each value
keep only unique rows and get it back in wide format.
library(dplyr)
library(tidyr)
data %>%
mutate(row = row_number()) %>%
pivot_longer(cols = -row, values_to = 'code') %>%
left_join(coding, by = 'code') %>%
select(row, name = name.y, value) %>%
distinct() %>%
pivot_wider() %>%
select(-row)
# variable1 variable2
# <chr> <chr>
#1 a a
#2 b b
#3 c c
#4 a a
#5 b b
#6 c c

How to get all combinations of 2 from a grouped column in a data frame

I could write a loop to do this, but I was wondering how this might be done in R with dplyr. I have a data frame with two columns. Column 1 is the group, Column 2 is the value. I would like a data frame that has every combination of two values from each group in two separate columns. For example:
input = data.frame(col1 = c(1,1,1,2,2), col2 = c("A","B","C","E","F"))
input
#> col1 col2
#> 1 1 A
#> 2 1 B
#> 3 1 C
#> 4 2 E
#> 5 2 F
and have it return
output = data.frame(col1 = c(1,1,1,2), col2 = c("A","B","C","E"), col3 = c("B","C","A","F"))
output
#> col1 col2 col3
#> 1 1 A B
#> 2 1 B C
#> 3 1 C A
#> 4 2 E F
I'd like to be able to include it within dplyr syntax:
input %>%
group_by(col1) %>%
???
I tried writing my own function that produces a data frame of combinations like what I need from a vector and sent it into the group_map function, but didn't have success:
combos = function(x, ...) {
x = t(combn(x, 2))
return(as.data.frame(x))
}
input %>%
group_by(col1) %>%
group_map(.f = combos)
Produced an error.
Any suggestions?
You can do :
library(dplyr)
data <- input %>%
group_by(col1) %>%
summarise(col2 = t(combn(col2, 2)))
cbind(data[1], data.frame(data$col2))
# col1 X1 X2
# <dbl> <chr> <chr>
#1 1 A B
#2 1 A C
#3 1 B C
#4 2 E F
input %>%
group_by(col1) %>%
nest(data=-col1) %>%
mutate(out= map(data, ~ t(combn(unlist(.x), 2)))) %>%
unnest(out) %>% select(-data)
# A tibble: 4 x 2
# Groups: col1 [2]
col1 out[,1] [,2]
<dbl> <chr> <chr>
1 1 A B
2 1 A C
3 1 B C
4 2 E F
Or :
combos = function(x, ...) {
return(tibble(col1=x[[1,1]],col2=t(combn(unlist(x[[2]], use.names=F), 2))))
}
input %>%
group_by(col1) %>%
group_map(.f = combos, .keep=T) %>% invoke(rbind,.) %>% tibble
# A tibble: 4 x 2
col1 col2[,1] [,2]
<dbl> <chr> <chr>
1 1 A B
2 1 A C
3 1 B C
4 2 E F
Thank you! In terms of parsimony, I like both the answer from Ben
input %>%
group_by(col1) %>%
do(data.frame(t(combn(.$col2, 2))))
and Ronak
data <- input %>%
group_by(col1) %>%
summarise(col2 = t(combn(col2, 2)))
cbind(data[1], data.frame(data$col2))

Resources