Gather serveral columns at once in r - r

I am trying to gather() a data.frame, but somehow it is not doing what I want.
This is my data:
df <- data.frame("id" = c(1),
"reco_1"= c(2),
"sim_1" = c(2),
"title_1"= c(2),
"reco_2" = c(3),
"sim_2" = c(3),
"title_2"= c(3))
And this is what it looks like printed:
> df
id reco_1 sim_1 title_1 reco_2 sim_2 title_2
1 1 2 2 2 3 3 3
When I now gather() my df, it looks like this:
> df %>% gather(reco, sim, -id)
id reco sim
1 1 reco_1 2
2 1 sim_1 2
3 1 title_1 2
4 1 reco_2 3
5 1 sim_2 3
6 1 title_2 3
However, what I would like to have is the following structure:
id reco sim title
1 1 2 2 2
2 2 3 3 3
I would appreciate any help, since I do not even know whether gather() is even the right verb for it.

We can use pivot_longer
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-id, names_to = c(".value", "new_id"), names_sep = "_") %>%
select(-id)
# A tibble: 2 x 4
new_id reco sim title
<chr> <dbl> <dbl> <dbl>
1 1 2 2 2
2 2 3 3 3

Related

Is there a way to do a group by and do a full count as well as a count based on filter in same table?

I have a dataset that looks like this
ID|Filter|
1 Y
1 N
1 Y
1 Y
2 N
2 N
2 N
2 Y
2 Y
3 N
3 Y
3 Y
I would like the final result to look like this. A summary count of total count and also when filter is "Y"
ID|All Count|Filter Yes
1 4 3
2 5 2
3 3 2
If i do like this i only get the full count but I also want the folder as the next column
df<- df %>%
group_by(ID)%>%
summarise(`All Count`=n())
df %>%
group_by(ID) %>%
summarise(`All Count` = n(),
`Count Yes` = sum(Filter == "Y"))
# A tibble: 3 × 3
ID `All Count` `Count Yes`
<chr> <int> <int>
1 1 4 3
2 2 5 2
3 3 3 2
We can use
library(dplyr)
df %>%
group_by(ID)%>%
summarise(`All Count`=n(), `Filter Yes` = sum(Filter == 'Y', na.rm = TRUE))

Create dyadic (relational) data from monadic data

I have conflict data that looks like this
conflict_ID country_code SideA
1 1 1
1 2 1
1 3 0
2 4 1
2 5 0
Now I want to make it into dyadic conflict data that looks like this (SideA=1 should be country_code_1):
conflict_ID country_code_1 country_code_2
1 1 3
1 2 3
2 4 5
Can anyone point me in the right direction?
Here's a direct approach:
df %>%
filter(SideA == 1) %>%
select(conflict_ID, country_code_1 = country_code) %>%
left_join(
df %>%
filter(SideA == 0) %>%
select(conflict_ID, country_code_2 = country_code),
by = "conflict_ID"
)
# conflict_ID country_code_1 country_code_2
# 1 1 1 3
# 2 1 2 3
# 3 2 4 5
Using this data:
df = read.table(text = 'conflict_ID country_code SideA
1 1 1
1 2 1
1 3 0
2 4 1
2 5 0 ', header = T)
This extends the previous issue you posted. You could produce all combinations for each conflict_ID, and filter out those combinations where country_code_2 matches country_code with SideA == 1.
library(dplyr)
library(tidyr)
mydf %>%
group_by(conflict_ID) %>%
summarise(country_code = combn(country_code, 2, sort, simplify = FALSE),
.groups = 'drop') %>%
unnest_wider(country_code, names_sep = '_') %>%
anti_join(filter(mydf, SideA == 1),
by = c("conflict_ID", "country_code_2" = "country_code"))
# # A tibble: 3 × 3
# conflict_ID country_code_1 country_code_2
# <int> <int> <int>
# 1 1 1 3
# 2 1 2 3
# 3 2 4 5

Multiplying column value by another value matching column name R

I have a data frame which looks like this:
Value1 = c("1","2","1","3")
Letter = c("A","B","B","A")
A = c("2","2","0","1")
B = c("1","1","1","0")
data <- data.frame(Value1,Letter,A,B)
data
Value1 Letter A B
1 1 A 2 1
2 2 B 2 1
3 1 B 0 1
4 3 A 1 0
I'm trying to add a new column which is the multiplication of column Value1, by column A or B depending on what is in the Letter column. The expected result would be:
Value1 Letter A B Results
1 1 A 2 1 2
2 2 B 2 1 2
3 1 B 0 1 1
4 3 A 1 0 3
I'm trying to use the match() function, but without success.
Thanks!
With base R:
data <- type.convert(data, as.is = TRUE)
data$Results <- ifelse(data$Letter == 'A', data$A * data$Value1, data$B * data$Value1)
Output
Value1 Letter A B Results
1 1 A 2 1 2
2 2 B 2 1 2
3 1 B 0 1 1
4 3 A 1 0 3
Another option would be to pivot to long form, do the calculation, then pivot back to wide format.
library(tidyverse)
data %>%
type.convert(as.is = TRUE) %>%
pivot_longer(c(A, B)) %>%
mutate(Results = ifelse(Letter == name, value * Value1, NA_integer_)) %>%
pivot_wider(names_from = "name", values_from = "value") %>%
group_by(Value1, Letter) %>%
summarise_all(discard, is.na)
Output
Value1 Letter Results A B
<int> <chr> <int> <int> <int>
1 1 A 2 2 1
2 1 B 1 0 1
3 2 B 2 2 1
4 3 A 3 1 0
Use case_when or ifelse
library(dplyr)
data <- data %>%
type.convert(as.is = TRUE) %>%
mutate(Results = case_when(Letter == 'A' ~ A * Value1,
TRUE ~ B * Value1))
-output
data
Value1 Letter A B Results
1 1 A 2 1 2
2 2 B 2 1 2
3 1 B 0 1 1
4 3 A 1 0 3
Or use get with rowwise
data <- data %>%
type.convert(as.is = TRUE) %>%
rowwise %>%
mutate(Result = get(Letter) * Value1) %>%
# or may also use
# mutate(Result = cur_data()[[Letter]] * Value1) %>%
ungroup
-output
data
# A tibble: 4 × 5
Value1 Letter A B Result
<int> <chr> <int> <int> <int>
1 1 A 2 1 2
2 2 B 2 1 2
3 1 B 0 1 1
4 3 A 1 0 3
In base R, we may use row/column indexing as vectorized option
data <- type.convert(data, as.is = TRUE)
nm1 <- unique(data$Letter)
data$Results <-data[nm1][cbind(seq_len(nrow(data)),
match(data$Letter, nm1))] * data$Value1

Merging columns while ignoring NAs

I would like to merge multiple columns. Here is what my sample dataset looks like.
df <- data.frame(
id = c(1,2,3,4,5),
cat.1 = c(3,4,NA,4,2),
cat.2 = c(3,NA,1,4,NA),
cat.3 = c(3,4,1,4,2))
> df
id cat.1 cat.2 cat.3
1 1 3 3 3
2 2 4 NA 4
3 3 NA 1 1
4 4 4 4 4
5 5 2 NA 2
I am trying to merge columns cat.1 cat.2 and cat.3. It is a little complicated for me since there are NAs.
I need to have only one cat variable and even some columns have NA, I need to ignore them. The desired output is below:
> df
id cat
1 1 3
2 2 4
3 3 1
4 4 4
5 5 2
Any thoughts?
Another variation of Gregor's answer using dplyr::transmute:
library(dplyr)
df %>%
transmute(id = id, cat = coalesce(cat.1, cat.2, cat.3))
#> id cat
#> 1 1 3
#> 2 2 4
#> 3 3 1
#> 4 4 4
#> 5 5 2
With dplyr:
library(dplyr)
df %>%
mutate(cat = coalesce(cat.1, cat.2, cat.3)) %>%
select(-cat.1, -cat.2, -cat.3)
An option with fcoalesce from data.table
library(data.table)
setDT(df)[, .(id, cat = do.call(fcoalesce, .SD)), .SDcols = patterns('^cat')]
-output
# id cat
#1: 1 3
#2: 2 4
#3: 3 1
#4: 4 4
#5: 5 2
Does this work:
> library(dplyr)
> df %>% rowwise() %>% mutate(cat = mean(c(cat.1, cat.2, cat.3), na.rm = T)) %>% select(-(2:4))
# A tibble: 5 x 2
# Rowwise:
id cat
<dbl> <dbl>
1 1 3
2 2 4
3 3 1
4 4 4
5 5 2
Since values across rows are unique, mean of the rows will return the same unique value, can also go with max or min.
Here is a base R solution which uses apply:
df$cat <- apply(df, 1, function(x) unique(x[!is.na(x)][-1]))

Add original values for columns after group by

For the dataframe below I want to add the original values for Var_x after a group_by on ID and event and a max() on quest, but I cannot get my code right. Any suggestions? By the way, in my original dataframe more than 1 column needs to be added.
df <- data.frame(ID = c(1,1,1,1,1,1,2,2,2,3,3,3),
quest = c(1,1,2,2,3,3,1,2,3,1,2,3),
event = c("A","B","A","B","A",NA,"C","D","C","D","D",NA),
VAR_X = c(2,4,3,6,3,NA,6,4,5,7,5,NA))
Code:
df %>%
group_by(ID,event) %>%
summarise(quest = max(quest))
Desired output:
ID quest event VAR_X
1 1 2 B 6
2 1 3 A 3
3 2 2 D 4
4 2 3 C 5
5 3 2 D 5
Start by omiting the na values and in the end do an inner_join with the original data set.
df %>%
na.omit() %>%
group_by(ID, event) %>%
summarise(quest = max(quest)) %>%
inner_join(df, by = c("ID", "event", "quest"))
## A tibble: 5 x 4
## Groups: ID [3]
# ID event quest VAR_X
# <dbl> <fct> <dbl> <dbl>
#1 1 A 3 3
#2 1 B 2 6
#3 2 C 3 5
#4 2 D 2 4
#5 3 D 2 5
df %>%
drop_na() %>% # remove if necessary ..
group_by(ID, event) %>%
filter(quest == max(quest)) %>%
ungroup()
# A tibble: 5 x 4
# ID quest event VAR_X
#<dbl> <dbl> <chr> <dbl>
# 1 1 2 B 6
# 2 1 3 A 3
# 3 2 2 D 4
# 4 2 3 C 5
# 5 3 2 D 5

Resources