Stack columns in a data frame - r

Can someone help me stack the following data frame so that the as are on top of each other and also the 1s and 2s, preferably using a pipe and form a 3x4 dataframe
df <- rbind(data.frame(X1 = 'a', X2 = 1, X3 = 2, X4 = 'a', X5 = 1, X6 = 2), data.frame(X1 = 'a', X2 = 1, X3 = 2, X4 = 'a', X5 = 1, X6 = 2))
Thank you

Here is a data.table solution...
library(data.table)
cols <- 3
# Split df to chuncks of 3 (=ncol) columns
L <- split.default(df, f = cols:(ncol(df) + 2) %/% cols)
# Rowbind, ignore columns names
data.table::rbindlist(L, use.names = FALSE)
# X1 X2 X3
# 1: a 1 2
# 2: a 1 2
# 3: a 1 2
# 4: a 1 2

Using tidyverse -
library(dplyr)
library(tidyr)
df %>%
mutate(across(.fns = as.character)) %>%
pivot_longer(cols = everything()) %>%
mutate(id = paste0('col', rep(1:3, length.out = n()))) %>%
group_by(id) %>%
mutate(name = row_number()) %>%
pivot_wider(names_from = id, values_from = value) %>%
select(-name)
# col1 col2 col3
# <chr> <chr> <chr>
#1 a 1 2
#2 a 1 2
#3 a 1 2
#4 a 1 2

Related

how to group_by one variable and count based on another variable?

Is it possible to use group_by to group one variable and count the target variable based on another variable?
For example,
x1
x2
x3
A
1
0
B
2
1
C
3
0
B
1
1
A
1
1
I want to count 0 and 1 of x3 with grouped x1
x1
x3=0
x3=1
A
1
1
B
0
2
C
1
0
Is it possible to use group_by and add something to summarize? I tried group_by both x1 and x3, but that gives x3 as the second column which is not what we are looking for.
If it's not possible to just use group_by, I was thinking we could group_by both x1 and x3, then split by x3 and cbind them, but the two dataframes after split have different lengths of rows, and there's no cbind_fill. What should I do to cbind them and fill the extra blanks?
using the data.table package:
library(data.table)
dat <- as.data.table(dataset)
dat[, x3:= paste0("x3=", x3)]
result <- dcast(dat, x1~x3, value.var = "x3", fun.aggregate = length)
A tidyverse approach to achieve your desired result using dplyr::count + tidyr::pivot_wider:
library(dplyr)
library(tidyr)
df %>%
count(x1, x3) %>%
pivot_wider(names_from = "x3", values_from = "n", names_prefix = "x3=", values_fill = 0)
#> # A tibble: 3 × 3
#> x1 `x3=0` `x3=1`
#> <chr> <int> <int>
#> 1 A 1 1
#> 2 B 0 2
#> 3 C 1 0
DATA
df <- data.frame(
x1 = c("A", "B", "C", "B", "A"),
x2 = c(1L, 2L, 3L, 1L, 1L),
x3 = c(0L, 1L, 0L, 1L, 1L)
)
Yes, it is possible. Here is an example:
dat = read.table(text = "x1 x2 x3
A 1 0
B 2 1
C 3 0
B 1 1
A 1 1", header = TRUE)
dat %>% group_by(x1) %>%
count(x3) %>%
pivot_wider(names_from = x3,
names_glue = "x3 = {x3}",
values_from = n) %>%
replace(is.na(.),0)
# A tibble: 3 x 3
# Groups: x1 [3]
# x1 `x3 = 0` `x3 = 1`
# <chr> <int> <int>
#1 A 1 1
#2 B 0 2
#3 C 1 0

Merge/combine Row based on unique value in R

My data :
I want the output this way :
I already played around with aggregate, merge and group_by functions but the output does not come out the way I want.
One way, using tidyr::separate:
d %>%
group_by(V1) %>%
summarise(V2 = toString(V2)) %>%
separate(V2, into = c("V2", "V1"))
# A tibble: 2 x 2
V2 V1
<chr> <chr>
1 A C
2 B D
You could do:
library(tidyverse)
df <- data.frame(V1 = c(1,2,1,2),
V2 = LETTERS[1:4])
df %>%
mutate(id = rep(1:2, each = 2)) %>%
pivot_wider(names_from = V1,
names_prefix = 'V',
values_from = V2) %>%
select(-id)
Which gives:
# A tibble: 2 x 2
V1 V2
<chr> <chr>
1 A B
2 C D
How about -
library(dplyr)
df <- tibble(V1 = rep(1:2, 2), V2 = LETTERS[1:4])
df %>%
left_join(df, by = "V1") %>%
filter(V2.x != V2.y & V2.x %in% c("A", "B")) %>%
select(V1 = V2.y, V2 = V2.x)
# A tibble: 2 x 2
V1 V2
<chr> <chr>
1 C A
2 D B

Merge data frames and divide rows by group

I would like to divide the values from df1 over the values from df2. In this reproducible example, I am able to sum these values. What about the division? Thanks in advance!
df1 <- data.frame(country = c("a", "b", "c"), year1 = c(1, 2, 3), year2 = c(1, 2, 3))
df2 <- data.frame(country = c("a", "b", "d"), year1 = c(1, 2, NA), year2 = c(1, 2, 3))
df3 <- bind_rows(df1, df2) %>%
mutate_if(is.numeric, tidyr::replace_na, 0) %>%
group_by(country) %>%
summarise_all(., sum, na.rm = TRUE) %>%
na_if(., 0)
Expected result is:
# A tibble: 4 x 3
country year1 year2
<chr> <dbl> <dbl>
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA
As there are groups with 2 rows and some with 1, use an if/else condition within summarise/across to divide the first element by the last if there are two elements or else return NA
library(dplyr) # version 1.0.4
library(tidyr)
bind_rows(df1, df2) %>%
mutate(across(where(is.numeric), replace_na, 0)) %>%
group_by(country) %>%
summarise(across(everything(), ~ if(n() == 2) first(.)/last(.)
else NA_real_))
-output
# A tibble: 4 x 3
# country year1 year2
#* <chr> <dbl> <dbl>
#1 a 1 1
#2 b 1 1
#3 c NA NA
#4 d NA NA
Here is a base R option using merge + split.default
df <- merge(df1, df2, by = "country", all = TRUE)
cbind(
df[1],
list2DF(lapply(
split.default(df[-1], gsub("\\.(x|y)", "", names(df)[-1])),
function(v) do.call("/", v)
))
)
which gives
country year1 year2
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA

Add 2 dataframe with dfifferent lengths in R

I have the above 2 dataframes in R,
df1 = [a,2 df2 = [a,10
b,3] c,2]
I want to add those 2 df, so the output can be
df = [a, 12,
b, 3,
c, 2]
Any advice would be much appreciated, thanks!
We can rbind the two datasets and do a group by sum
aggregate(col2 ~ col1, rbind(df1, df2), sum)
-output
# col1 col2
#1 a 12
#2 b 3
#3 c 2
Or in dplyr
library(dplyr)
bind_rows(df1, df2) %>%
group_by(col1) %>%
summarise(col2 = sum(col2), .groups = 'drop')
-output
# A tibble: 3 x 2
# col1 col2
# <chr> <dbl>
#1 a 12
#2 b 3
#3 c 2
data
df2 <- data.frame(col1 = c('a', 'c'), col2 = c(10, 2))
df1 <- data.frame(col1 = c('a', 'b'), col2 = c(2, 3))

Creating an interval in for frequency table in R

I have a dataframe I've created in the form
FREQ CNT
0 5
1 20
2 1000
3 3
4 3
I want to further group my results to be in the following form:
CUT CNT
0+1 25
2+3 1003
4+5 ...
.....
I've tried using the between and cut functions in dplyr but it just adds a new interval column to my dataframe can anyone give me a good indication as to where to go to achieve this?
Here is a way to do it in dplyr:
library(dplyr)
df <- df %>%
mutate(id = 1:n()) %>%
mutate(new_freq = ifelse(id %% 2 != 0, paste0(FREQ, "+", lead(FREQ, 1)), paste0(lag(FREQ, 1), "+", FREQ)))
df <- df %>%
group_by(new_freq) %>%
mutate(new_cnt = sum(CNT))
unique(df[, 4:5])
# A tibble: 2 x 2
# Groups: new_freq [2]
# new_freq new_cnt
# <chr> <int>
#1 0+1 25
#2 2+3 1003
data
df <- structure(list(FREQ = 0:3, CNT = c(5L, 20L, 1000L, 3L)), class = "data.frame", row.names = c(NA, -4L))
A non-elegant solution using dplyr... probably a better way to do this.
dat <- data.frame(FREQ = c(0,1,2,3,4), CNT = c(5,20,1000, 3, 3))
dat2 <- dat %>%
mutate(index = 0:(nrow(dat)-1)%/%2) %>%
group_by(index)
dat2 %>%
summarise(new_CNT = sum(CNT)) %>%
left_join(dat2 %>%
mutate(CUT = paste0(FREQ[1], "+", FREQ[2])) %>%
distinct(index, CUT),
by = "index") %>%
select(-index)
# A tibble: 3 x 2
new_CNT CUT
<dbl> <chr>
1 25 0+1
2 1003 2+3
3 3 4+NA

Resources