Merge data frames and divide rows by group

Merge data frames and divide rows by group - r

I would like to divide the values from df1 over the values from df2. In this reproducible example, I am able to sum these values. What about the division? Thanks in advance!
df1 <- data.frame(country = c("a", "b", "c"), year1 = c(1, 2, 3), year2 = c(1, 2, 3))
df2 <- data.frame(country = c("a", "b", "d"), year1 = c(1, 2, NA), year2 = c(1, 2, 3))
df3 <- bind_rows(df1, df2) %>%
mutate_if(is.numeric, tidyr::replace_na, 0) %>%
group_by(country) %>%
summarise_all(., sum, na.rm = TRUE) %>%
na_if(., 0)
Expected result is:
# A tibble: 4 x 3
country year1 year2
<chr> <dbl> <dbl>
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA

As there are groups with 2 rows and some with 1, use an if/else condition within summarise/across to divide the first element by the last if there are two elements or else return NA
library(dplyr) # version 1.0.4
library(tidyr)
bind_rows(df1, df2) %>%
mutate(across(where(is.numeric), replace_na, 0)) %>%
group_by(country) %>%
summarise(across(everything(), ~ if(n() == 2) first(.)/last(.)
else NA_real_))
-output
# A tibble: 4 x 3
# country year1 year2
#* <chr> <dbl> <dbl>
#1 a 1 1
#2 b 1 1
#3 c NA NA
#4 d NA NA

Here is a base R option using merge + split.default
df <- merge(df1, df2, by = "country", all = TRUE)
cbind(
df[1],
list2DF(lapply(
split.default(df[-1], gsub("\\.(x|y)", "", names(df)[-1])),
function(v) do.call("/", v)
))
)
which gives
country year1 year2
1 a 1 1
2 b 1 1
3 c NA NA
4 d NA NA

Related

I´m looking for a way to inverse the values of 2 columns of a row when this inverse exists in the dataframe?

Here is my dataframe:
DF <- data.frame(
VAR1 = c("A", "A", "B", "B", "B", "C", "C"),
VAR2 = c("B", "C", "A", "D", "C", "B", "D"),
VAR3 = c(1, 1, 1, 2, 4, 6, 4)
)
I would like to have this:
VAR1 VAR2 VAR3
A B 2
A C 1
B D 2
B C 10
C D 4
If There is two rows like (VAR1=A, VAR2=B, VAR3=X) and (VAR2=B, VAR1=A, VAR3=Y), I want to have one row like this one (VAR1=A, VAR2=B, VAR3=X+Y). So if the two first variables are "inverse", I would like to have one row with the sum of them.
I tried to have a column which says "Yes" if two rows have inverse values but I can´t find a way to do it.
My code:
DF <- DF %>%
mutate(VAR4 = case_when(VAR2 %in% DF$VAR1 &
VAR1 %in%
(DF %>%
filter(VAR1 == VAR2) %>%
pull(VAR2)
) ~ "Yes",
TRUE ~ 'No' ))
`
This is the result:
VAR1 VAR2 VAR3 VAR4
A B 1 No
A C 1 No
B A 1 No
B D 2 No
B C 4 No
C B 6 No
C D 4 No
My code doesn´t work because my filter doesn´t take the result of VAR2 %in% DF$VAR1 in account.
Does someone have an idea?

You can sort first with apply, and then summarise:
DF[1:2] <- t(apply(DF[1:2], 1, sort))
DF %>%
group_by(VAR1, VAR2) %>%
summarise(VAR3 = sum(VAR3))
# A tibble: 5 × 3
# Groups: VAR1 [3]
VAR1 VAR2 VAR3
<chr> <chr> <dbl>
1 A B 2
2 A C 1
3 B C 10
4 B D 2
5 C D 4
Or, in single pipe:
DF %>%
mutate(VAR = pmap(., ~ sort(c(..1, ..2)) %>%
set_names(., c("VAR1", "VAR2")))) %>%
group_by(VAR) %>%
summarise(VAR3 = sum(VAR3)) %>%
unnest_wider(VAR)

You could try:
library(dplyr)
DF %>%
mutate(across(VAR1:VAR2, as.character)) %>%
group_by(idx1 = pmin(VAR1, VAR2), idx2 = pmax(VAR1, VAR2)) %>%
summarise(VAR3 = sum(VAR3)) %>%
rename_with(~ sub('idx', 'VAR', .)) %>%
ungroup
Output:
# A tibble: 5 x 3
VAR1 VAR2 VAR3
<chr> <chr> <dbl>
1 A B 2
2 A C 1
3 B C 10
4 B D 2
5 C D 4

Count and merge NA rows in R

I have the following dataframe:
var1 <- c("a", "b", "c", "d", "e")
var2 <- c(5, 10, NA, NA, NA)
df <- data.frame (var1, var2)
df
# A tibble: 5 × 2
var1 var2
<chr> <dbl>
1 a 5
2 b 10
3 c NA
4 d NA
5 e NA
I would like to count and merge the NA rows. Expected output:
# A tibble: 3 × 2
var1 var2
<chr> <dbl>
1 a 5
2 b 10
3 x 3
I have tried aggregate(data=df,var2~.,na.rm = FALSE, FUN = sum) but it only returns the results for a and b.
Thank you in advance

With dplyr
df %>%
mutate(var1 = ifelse(is.na(var2), "x", var1),
var2 = ifelse(var1=="x", sum(is.na(var2) & var1 == "x"),
var2)
) %>%
unique()
var1 var2
1 a 1
2 b 2
3 c 3
4 x 2
Data
df <- structure(list(var1 = c("a", "b", "c", "d", "e"), var2 = c(1,
2, 3, NA, NA)), class = "data.frame", row.names = c(NA, -5L))

Try this with Base R
s <- df[complete.cases(df) , ]
s[nrow(s)+1 ,] <- c("x" , sum(is.na(df$var2) == T))
s
output
var1 var2
1 a 5
2 b 10
3 x 3

Using aggregate:
df$var1 <- ifelse(is.na(df$var2), "x", df$var1)
aggregate(data = df, var2 ~ .,
FUN = \(x) if (!all(is.na(x))) sum(x) else length(x), na.action = NULL)
#> var1 var2
#> 1 a 5
#> 2 b 10
#> 3 x 3

Another approach with dplyr
df %>%
mutate(var1 = ifelse(is.na(var2), "x", var1),
var2 = ifelse(is.na(var2), 1, var2)) %>%
count(var1, wt=var2, name="var2")

Sum While melting columns in R

Is there a way to melt 2 columns and take there sums as value . For example
df <- data.frame(A = c("x", "y", "z"), B = c(1, 2, 3), Cat1 = c(1, 4, 3), New2 = c(4, 4, 4))
Expected output
New_Col Sum
Cat1 8
New2 12

Or using base R with colSums after selecting the columns of interest and then convert the named vector to data.frame with stack
stack(colSums(df[c("Cat1", "New2")]))[2:1]
ind values
1 Cat1 8
2 New2 12

Of course
df %>%
summarise(across(starts_with('Cat'), sum)) %>%
pivot_longer(everything(), names_to = 'New_Col', values_to = 'Sum')
# A tibble: 2 × 2
New_Col Sum
<chr> <dbl>
1 Cat1 8
2 Cat2 12

Add 2 dataframe with dfifferent lengths in R

I have the above 2 dataframes in R,
df1 = [a,2 df2 = [a,10
b,3] c,2]
I want to add those 2 df, so the output can be
df = [a, 12,
b, 3,
c, 2]
Any advice would be much appreciated, thanks!

We can rbind the two datasets and do a group by sum
aggregate(col2 ~ col1, rbind(df1, df2), sum)
-output
# col1 col2
#1 a 12
#2 b 3
#3 c 2
Or in dplyr
library(dplyr)
bind_rows(df1, df2) %>%
group_by(col1) %>%
summarise(col2 = sum(col2), .groups = 'drop')
-output
# A tibble: 3 x 2
# col1 col2
# <chr> <dbl>
#1 a 12
#2 b 3
#3 c 2
data
df2 <- data.frame(col1 = c('a', 'c'), col2 = c(10, 2))
df1 <- data.frame(col1 = c('a', 'b'), col2 = c(2, 3))

R Mutating multiple columns with matching

I am processing a large dataset adapted to my research. Suppose that I have 4 observations (records) and 5 columns as follows:
x <- data.frame("ID" = c(1, 2, 3, 4),
"group1" = c("A", NA, "B", NA),
"group2" = c("B", "A", NA, "C"),
"hours1" = c(3, NA, 5, NA),
"hours2" = c(1, 2, NA, 5))
> x
ID group1 group2 hours1 hours2
1 A B 3 1
2 <NA> A NA 2
3 B <NA> 5 NA
4 <NA> C NA 5
The "group1" and "group2" are reference columns containing the character values of A, B, and C, and the last two columns, "hours1" and "hours2," are numeric indicating hours obviously.
The column "group1" is corresponding to the column "hours1"; likewise, "group2" is corresponding to "hours 2."
I want to create multiple columns according to the values, A, B, and C, of the reference columns matching to values of "hours1" and "hours2" as follows:
ID group1 group2 hours1 hours2 A B C
1 A B 3 1 3 1 NA
2 <NA> A NA 2 2 NA NA
3 B <NA> 5 NA NA 5 NA
4 <NA> C NA 5 NA NA 5
For example, ID 1 has A in "group1," corresponding to 3 in "hours1" which is found under the column "A." ID 3 has B in "group1," corresponding to 5 in "hours1" which is found under the columns "B." In "group 2," ID 4 has C, corresponding to 5 in hours2 which is found under column "C."
Is there a way to do it using R?

One way would be to combine all the "hour" column in one column and "group" columns in another column. This can be done using pivot_longer. After that we can get data in wide format and join it with original data.
library(dplyr)
library(tidyr)
x %>%
pivot_longer(cols = -ID,
names_to = c('.value'),
names_pattern = '(.*?)\\d+',
values_drop_na = TRUE) %>%
pivot_wider(names_from = group, values_from = hours) %>%
left_join(x, by = 'ID') %>%
select(ID, starts_with('group'), starts_with('hour'), everything())
# A tibble: 4 x 8
# ID group1 group2 hours1 hours2 A B C
# <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 1 A B 3 1 3 1 NA
#2 2 NA A NA 2 2 NA NA
#3 3 B NA 5 NA NA 5 NA
#4 4 NA C NA 5 NA NA 5
For OP's dataset we can slightly modify the code to achieve the desired result.
zz %>%
pivot_longer(cols = -id,
names_to = c('.value'),
names_pattern = '(.*)_',
values_drop_na = TRUE) %>%
arrange(fu2a) %>%
pivot_wider(names_from = fu2a, values_from = fu2b) %>%
left_join(zz, by = 'id') %>%
select(id, starts_with('fu2a'), starts_with('fu2b'), everything())

Another approach using dplyr could be done separating group and hours variables to compute the desired variables and then merge with the original x:
library(tidyverse)
#Data
x <- data.frame("ID" = c(1, 2, 3, 4),
"group1" = c("A", NA, "B", NA),
"group2" = c("B", "A", NA, "C"),
"hours1" = c(3, NA, 5, NA),
"hours2" = c(1, 2, NA, 5),stringsAsFactors = F)
#Reshape
x %>%
left_join(x %>% select(1:3) %>%
pivot_longer(cols = -ID) %>%
group_by(ID) %>% mutate(id=1:n()) %>%
left_join(x %>% select(c(1,4:5)) %>%
pivot_longer(cols = -ID) %>%
rename(name2=name,value2=value) %>%
group_by(ID) %>% mutate(id=1:n())) %>%
filter(!is.na(value)) %>% select(ID,value,value2) %>%
pivot_wider(names_from = value,values_from=value2))
Output:
ID group1 group2 hours1 hours2 A B C
1 1 A B 3 1 3 1 NA
2 2 <NA> A NA 2 2 NA NA
3 3 B <NA> 5 NA NA 5 NA
4 4 <NA> C NA 5 NA NA 5

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Merge data frames and divide rows by group - r

Here is a base R option using merge + split.default df <- merge(df1, df2, by = "country", all = TRUE) cbind( df[1], list2DF(lapply( split.default(df[-1], gsub("\\.(x|y)", "", names(df)[-1])), function(v) do.call("/", v) )) ) which gives country year1 year2 1 a 1 1 2 b 1 1 3 c NA NA 4 d NA NA

Related

I´m looking for a way to inverse the values of 2 columns of a row when this inverse exists in the dataframe?

Count and merge NA rows in R

Sum While melting columns in R

Add 2 dataframe with dfifferent lengths in R

R Mutating multiple columns with matching

Categories

Resources