Merge/combine Row based on unique value in R

Merge/combine Row based on unique value in R - r

My data :
I want the output this way :
I already played around with aggregate, merge and group_by functions but the output does not come out the way I want.

One way, using tidyr::separate:
d %>%
group_by(V1) %>%
summarise(V2 = toString(V2)) %>%
separate(V2, into = c("V2", "V1"))
# A tibble: 2 x 2
V2 V1
<chr> <chr>
1 A C
2 B D

You could do:
library(tidyverse)
df <- data.frame(V1 = c(1,2,1,2),
V2 = LETTERS[1:4])
df %>%
mutate(id = rep(1:2, each = 2)) %>%
pivot_wider(names_from = V1,
names_prefix = 'V',
values_from = V2) %>%
select(-id)
Which gives:
# A tibble: 2 x 2
V1 V2
<chr> <chr>
1 A B
2 C D

How about -
library(dplyr)
df <- tibble(V1 = rep(1:2, 2), V2 = LETTERS[1:4])
df %>%
left_join(df, by = "V1") %>%
filter(V2.x != V2.y & V2.x %in% c("A", "B")) %>%
select(V1 = V2.y, V2 = V2.x)
# A tibble: 2 x 2
V1 V2
<chr> <chr>
1 C A
2 D B

Related

Correlation by group and unique pairs

I want to do correlations for each unique combination and grouped by another variable. My solutions works for a very small dataset buy imagine more columns it's getting very tedious.
set.seed((13))
df <- data.frame(group = rep(LETTERS[1:3], 3),
var1 = rnorm(9, 1),
var2 = rnorm(9, 2),
var3 = rnorm(9, 1))
df %>%
group_by(group) %>%
summarise(var1_var2 = cor(var1, var2),
var1_var3 = cor(var1, var3),
var2_var3 = cor(var2, var3))
I also tried this one, but it doens't work.
df %>%
group_by(group) %>%
summarise(cor = cor(df[,2:ncol(df)]))

Here is an option. Map out all the combos then run a cor test for each group and each var and then pivot wider at the end:
library(tidyverse)
map_dfr(unique(df$group), \(x){
data.frame(t(combn(c("var1", "var2", "var3"), 2))) |>
mutate(group = x)
}) |>
mutate(cor = pmap_dbl(list(X1, X2, group),
~cor(df[df$group == ..3, ..1],
df[df$group == ..3, ..2]))) |>
unite(test, X1, X2) |>
pivot_wider(names_from = test, values_from = cor)
#> # A tibble: 3 x 4
#> group var1_var2 var1_var3 var2_var3
#> <chr> <dbl> <dbl> <dbl>
#> 1 A 0.318 -0.476 -0.985
#> 2 B -0.373 -0.487 -0.628
#> 3 C 0.535 0.991 0.645

Another solution. This works for any number of variables.
library(dplyr)
library(tidyr)
library(purrr)
library(tibble)
set.seed((13))
df <- data.frame(group = rep(LETTERS[1:3], 3),
var1 = rnorm(9, 1),
var2 = rnorm(9, 2),
var3 = rnorm(9, 2))
df %>%
select(-group) %>%
split(df$group) %>%
imap_dfr(
~ {
expand.grid(
first = names(.x),
second = names(.x),
stringsAsFactors = FALSE
) %>%
filter(first < second) %>%
rowwise() %>%
transmute(
group = .y,
pair = paste(first, second, sep = "_"),
cor = cor(.x[[first]], .x[[second]])
)
}
) %>%
pivot_wider(
names_from = "pair",
values_from = "cor"
)
# # A tibble: 3 × 4
# group var1_var2 var1_var3 var2_var3
# <chr> <dbl> <dbl> <dbl>
# 1 A 0.318 -0.476 -0.985
# 2 B -0.373 -0.487 -0.628
# 3 C 0.535 0.991 0.645

Stack columns in a data frame

Can someone help me stack the following data frame so that the as are on top of each other and also the 1s and 2s, preferably using a pipe and form a 3x4 dataframe
df <- rbind(data.frame(X1 = 'a', X2 = 1, X3 = 2, X4 = 'a', X5 = 1, X6 = 2), data.frame(X1 = 'a', X2 = 1, X3 = 2, X4 = 'a', X5 = 1, X6 = 2))
Thank you

Here is a data.table solution...
library(data.table)
cols <- 3
# Split df to chuncks of 3 (=ncol) columns
L <- split.default(df, f = cols:(ncol(df) + 2) %/% cols)
# Rowbind, ignore columns names
data.table::rbindlist(L, use.names = FALSE)
# X1 X2 X3
# 1: a 1 2
# 2: a 1 2
# 3: a 1 2
# 4: a 1 2

Using tidyverse -
library(dplyr)
library(tidyr)
df %>%
mutate(across(.fns = as.character)) %>%
pivot_longer(cols = everything()) %>%
mutate(id = paste0('col', rep(1:3, length.out = n()))) %>%
group_by(id) %>%
mutate(name = row_number()) %>%
pivot_wider(names_from = id, values_from = value) %>%
select(-name)
# col1 col2 col3
# <chr> <chr> <chr>
#1 a 1 2
#2 a 1 2
#3 a 1 2
#4 a 1 2

Add 2 dataframe with dfifferent lengths in R

I have the above 2 dataframes in R,
df1 = [a,2 df2 = [a,10
b,3] c,2]
I want to add those 2 df, so the output can be
df = [a, 12,
b, 3,
c, 2]
Any advice would be much appreciated, thanks!

We can rbind the two datasets and do a group by sum
aggregate(col2 ~ col1, rbind(df1, df2), sum)
-output
# col1 col2
#1 a 12
#2 b 3
#3 c 2
Or in dplyr
library(dplyr)
bind_rows(df1, df2) %>%
group_by(col1) %>%
summarise(col2 = sum(col2), .groups = 'drop')
-output
# A tibble: 3 x 2
# col1 col2
# <chr> <dbl>
#1 a 12
#2 b 3
#3 c 2
data
df2 <- data.frame(col1 = c('a', 'c'), col2 = c(10, 2))
df1 <- data.frame(col1 = c('a', 'b'), col2 = c(2, 3))

`unnest_wider` multiple columns

I have a tibble with multiple columns with multiple list columns I'd like to unnest_wider.
df1 <- tibble(
gr = c('a', 'b', 'c'),
values1 = list(1:2, 3:4, 5:6),
values2 = list(1:2, 3:4, 5:6)
)
I have tried many approaches that have not worked including adding a vector into col
df1 %>% # unnest_wider doesn't take multiple inputs
unnest_wider(col = c(values, values2),
names_sep = c("_1", "_2"),
names_repair = "unique")
and trying mutate_at
df1 %>% # mutate_at doesn't send data
mutate_at(vars(values, values2),
~unnest_wider(col = .,
names_sep = c("_1", "_2"),
names_repair = "unique"))
How can I unnest multiple columns wider?

Here is one option with map
library(dplyr)
library(purrr)
map_dfc(names(df1[-1]), ~
df1 %>%
select(.x) %>%
unnest_wider(c(!!.x), names_sep=c("_1", "_2"),
names_repair = 'unique')) %>%
bind_cols(df1 %>%
select(gr), .)
# A tibble: 3 x 5
# gr values1_1 values1_2 values2_1 values2_2
#* <chr> <int> <int> <int> <int>
#1 a 1 2 1 2
#2 b 3 4 3 4
#3 c 5 6 5 6

Answer I used
df1 %>%
unnest() %>%
mutate(q_name = rep(c("1", "2"), nrow(.)/2)) %>%
pivot_wider(id_cols = gr,
names_from = q_name,
values_from = values1:values2)

Creating an interval in for frequency table in R

I have a dataframe I've created in the form
FREQ CNT
0 5
1 20
2 1000
3 3
4 3
I want to further group my results to be in the following form:
CUT CNT
0+1 25
2+3 1003
4+5 ...
.....
I've tried using the between and cut functions in dplyr but it just adds a new interval column to my dataframe can anyone give me a good indication as to where to go to achieve this?

Here is a way to do it in dplyr:
library(dplyr)
df <- df %>%
mutate(id = 1:n()) %>%
mutate(new_freq = ifelse(id %% 2 != 0, paste0(FREQ, "+", lead(FREQ, 1)), paste0(lag(FREQ, 1), "+", FREQ)))
df <- df %>%
group_by(new_freq) %>%
mutate(new_cnt = sum(CNT))
unique(df[, 4:5])
# A tibble: 2 x 2
# Groups: new_freq [2]
# new_freq new_cnt
# <chr> <int>
#1 0+1 25
#2 2+3 1003
data
df <- structure(list(FREQ = 0:3, CNT = c(5L, 20L, 1000L, 3L)), class = "data.frame", row.names = c(NA, -4L))

A non-elegant solution using dplyr... probably a better way to do this.
dat <- data.frame(FREQ = c(0,1,2,3,4), CNT = c(5,20,1000, 3, 3))
dat2 <- dat %>%
mutate(index = 0:(nrow(dat)-1)%/%2) %>%
group_by(index)
dat2 %>%
summarise(new_CNT = sum(CNT)) %>%
left_join(dat2 %>%
mutate(CUT = paste0(FREQ[1], "+", FREQ[2])) %>%
distinct(index, CUT),
by = "index") %>%
select(-index)
# A tibble: 3 x 2
new_CNT CUT
<dbl> <chr>
1 25 0+1
2 1003 2+3
3 3 4+NA

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Merge/combine Row based on unique value in R - r

My data : I want the output this way : I already played around with aggregate, merge and group_by functions but the output does not come out the way I want.

One way, using tidyr::separate: d %>% group_by(V1) %>% summarise(V2 = toString(V2)) %>% separate(V2, into = c("V2", "V1")) # A tibble: 2 x 2 V2 V1 <chr> <chr> 1 A C 2 B D

You could do: library(tidyverse) df <- data.frame(V1 = c(1,2,1,2), V2 = LETTERS[1:4]) df %>% mutate(id = rep(1:2, each = 2)) %>% pivot_wider(names_from = V1, names_prefix = 'V', values_from = V2) %>% select(-id) Which gives: # A tibble: 2 x 2 V1 V2 <chr> <chr> 1 A B 2 C D

How about - library(dplyr) df <- tibble(V1 = rep(1:2, 2), V2 = LETTERS[1:4]) df %>% left_join(df, by = "V1") %>% filter(V2.x != V2.y & V2.x %in% c("A", "B")) %>% select(V1 = V2.y, V2 = V2.x) # A tibble: 2 x 2 V1 V2 <chr> <chr> 1 C A 2 D B

Related

Correlation by group and unique pairs

Stack columns in a data frame

Add 2 dataframe with dfifferent lengths in R

`unnest_wider` multiple columns

Creating an interval in for frequency table in R

Categories

Resources