Reodering a pivot wide table - r

I have the following dataframe:
df <- structure(list(rows = c(1, 2, 3, 4, 5, 6), col1 = c(122, 111,
111, 222, 212, 122), col2 = c(10101, 20202, 200022, 10201, 20022,
22222), col3 = c(11, 22, 22, 22, 11, 22)), class = "data.frame", row.names = c(NA,
-6L))
rows col1 col2 col3
1 1 122 10101 11
2 2 111 20202 22
3 3 111 200022 22
4 4 222 10201 22
5 5 212 20022 11
6 6 122 22222 22
I would like to filter the rows where at least one of the columns 2,3,4 include "1" AND "2".
The desired outcome would be:
rows col1 col2 col3
1 1 122 10101 11
4 4 222 10201 22
5 5 212 20022 11
6 6 122 22222 22
The following two are not working because they scan all the three columns together and not one by one.
df[which(apply(df[,2:4],1,function(x) any(grepl("1",x)) & any(grepl("2",x)))),]
OR
library(tidyverse)
TRIPS2_fin %>% filter_at(vars(2,3,4), any_vars(str_detect(., pattern="1|2")))

You could use :
df[apply(df[2:4], 1, function(x) any(grepl('1.*2|2.*1', x))),]
# rows col1 col2 col3
#1 1 122 10101 11
#4 4 222 10201 22
#5 5 212 20022 11
#6 6 122 22222 22
And similar using filter_at
library(dplyr)
df %>% filter_at(2:4, any_vars(grepl('1.*2|2.*1', .)))

We can vectorize it in base R
df[Reduce(`|`, lapply(df[2:4], grepl, pattern = '1.*2|2.*1')),]
# rows col1 col2 col3
#1 1 122 10101 11
#4 4 222 10201 22
#5 5 212 20022 11
#6 6 122 22222 22

Related

Choosing the right column based on a vector of column names

I'm trying to pull values from columns based on the values in a vector. I'm not sure I have the right words to describe the problem, but the code should help.
This feels related to coalesce maybe not?
library(tidyverse)
# Starting table
dat <-
tibble(
A = 1:10,
B = 31:40,
C = 101:110,
value = c("A", "C", "B", "A", "B", "C", "C", "B", "A", "A")
)
I want:
dat %>%
mutate(
output = c(1, 102, 33, 4, 35, 106, 107, 38, 9, 10)
)
I could do
dat %>%
mutate(
output =
case_when(value == "A" ~ A,
value == "B" ~ B,
value == "C" ~ C)
)
but my real application has many values and I want to take advantage of value having the matching info
Is there a function that does:
dat %>%
mutate(output = grab_the_right_column(value))
Thanks!
The rowwise approach would be less efficient, but it is compact within the tidyverse approaches to get the column value based on the column name for each row.
library(dplyr)
dat %>%
rowwise %>%
mutate(output = get(value)) %>%
ungroup
-output
# A tibble: 10 x 5
# A B C value output
# <int> <int> <int> <chr> <int>
# 1 1 31 101 A 1
# 2 2 32 102 C 102
# 3 3 33 103 B 33
# 4 4 34 104 A 4
# 5 5 35 105 B 35
# 6 6 36 106 C 106
# 7 7 37 107 C 107
# 8 8 38 108 B 38
# 9 9 39 109 A 9
#10 10 40 110 A 10
These type of issues are more efficient with a row/column indexing approach from base R. Create a matrix of row sequence and the matching index of columns with the 'value' column and the column names to extract the element
dat$output <- as.data.frame(dat)[,1:3][cbind(seq_len(nrow(dat)), match(dat$value, names(dat)[1:3]))]
You can also use purrr and pmap():
library(dplyr)
library(purrr)
dat%>%mutate(output=
pmap(., ~{
v1<-c(...)
v1[names(v1)==v1[['value']]]
}
)%>%
as.numeric()%>%
unlist)
# A tibble: 10 x 5
A B C value output
<int> <int> <int> <chr> <dbl>
1 1 31 101 A 1
2 2 32 102 C 102
3 3 33 103 B 33
4 4 34 104 A 4
5 5 35 105 B 35
6 6 36 106 C 106
7 7 37 107 C 107
8 8 38 108 B 38
9 9 39 109 A 9
10 10 40 110 A 10

r add a new column with conditions from another

I have the following table
Type Score
B 18
A 23
A 45
B 877
A 654
B 345
A 23445
A 45
A 432
B 22
B 4566
B 2
B 346
A 889
I would like to be able to create a column that takes out the A values, see below
Type Score New_Score
B 18 18
A 23 0
A 45 0
B 877 877
A 654 0
B 345 345
A 23445 0
A 45 0
A 432 0
B 22 22
B 4566 4566
B 2 2
B 346 346
A 889 0
I have tried a good few things in r but none of them work for me, any help would be most appreciated.
use this
df$New_score <- replace(df$Score, df$Type == 'B', 0)
Check
df <- read.table(text = 'Type Score
B 18
A 23
A 45
B 877
A 654
B 345
A 23445
A 45
A 432
B 22
B 4566
B 2
B 346
A 889', header = T)
df$New_score <- replace(df$Score, df$Type == 'B', 0)
df
Type Score New_Score
1 B 18 18
2 A 23 0
3 A 45 0
4 B 877 877
5 A 654 0
6 B 345 345
7 A 23445 0
8 A 45 0
9 A 432 0
10 B 22 22
11 B 4566 4566
12 B 2 2
13 B 346 346
14 A 889 0
Using ifelse.
transform(dat, new_score=ifelse(Type == "B", Score, 0))
# Type Score new_score
# 1 B 18 18
# 2 A 23 0
# 3 A 45 0
# 4 B 877 877
# 5 A 654 0
# 6 B 345 345
# 7 A 23445 0
# 8 A 45 0
# 9 A 432 0
# 10 B 22 22
# 11 B 4566 4566
# 12 B 2 2
# 13 B 346 346
# 14 A 889 0
use of dplyr::mutate and case_when should solve the problem, I would think.
library(dplyr)
df <- data.frame(Type=c("B","A","C","D","A","B","A"), Score = c(1,2,3,4,5,6,7))
df_new <- df %>% mutate(New_Score = dplyr::case_when (
df$Type == "A" ~ as.numeric(0),
TRUE ~ df$Score
)#end of case_when
)#end of mutate
df_new
Just for fun. Here is another solution
df$New_Score <- df$Score # add New_Score column
df$New_Score1 <- df$New_Score[df$Type == "A"] <- 0 # add 0 with helping column
df = subset(df, select = -(New_Score1)) # remove helping column
Output:
Type Score New_Score
1 B 18 18
2 A 23 0
3 A 45 0
4 B 877 877
5 A 654 0
6 B 345 345
7 A 23445 0
8 A 45 0
9 A 432 0
10 B 22 22
11 B 4566 4566
12 B 2 2
13 B 346 346
14 A 889 0
data:
structure(list(Type = c("B", "A", "A", "B", "A", "B", "A", "A",
"A", "B", "B", "B", "B", "A"), Score = c(18, 23, 45, 877, 654,
345, 23445, 45, 432, 22, 4566, 2, 346, 889), New_Score = c(18,
0, 0, 877, 0, 345, 0, 0, 0, 22, 4566, 2, 346, 0)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
We can use
dat$new_score <- ifelse(dat$Type == "B", dat$Score, 0)

Merging variables by a group

I have a similar dataset to the one below, I would like to merge them based on the "TA", the merged variable should look like "ID1"€"ID2". The TAs are always in pairs
usa <- data.frame(
TA = c(111, 111, 121, 121, 131, 131, 141, 141),
ID = c("A", "B", "A", "C", "A", "B","C","D"))
The expected output is a new dataset
TA merged
1 111 "A€B"
2 121 "A€C"
3 131 "A€B"
4 141 "C€D"
Another option of an output
TA ID merged
1 111 A "A€B"
2 111 B "A€B"
3 121 A "A€C"
4 121 C "A€C"
5 131 A "A€B"
6 131 B "A€B"
7 141 C "C€D"
8 141 D "C€D"
You can use aggregate with paste:
aggregate(ID ~ TA, usa, paste, collapse = "\u20AC")
# TA ID
#1 111 A€B
#2 121 A€C
#3 131 A€B
#4 141 C€D
Does this work:
library(dplyr)
usa %>% group_by(TA) %>% summarise(merged = str_c(ID, collapse = '\u20AC'))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 4 x 2
TA merged
<dbl> <chr>
1 111 A€B
2 121 A€C
3 131 A€B
4 141 C€D
Second option:
usa %>% group_by(TA) %>% mutate(merged = str_c(ID, collapse = '\u20AC'))
# A tibble: 8 x 3
# Groups: TA [4]
TA ID merged
<dbl> <chr> <chr>
1 111 A A€B
2 111 B A€B
3 121 A A€C
4 121 C A€C
5 131 A A€B
6 131 B A€B
7 141 C C€D
8 141 D C€D
Another option with data.table
library(data.table)
setDT(usa)[, .(ID = paste(ID, collapse='\u20AC')), TA]
-output
# TA ID
#1: 111 A€B
#2: 121 A€C
#3: 131 A€B
#4: 141 C€D

Split all columns in one data frame and create two data frames in R

I have a single data frame (let's call it df) that looks like this:
col1 <- c("1/10", "2/30", "1/40", "3/23", "0/17", "7/14")
col2 <- c("2/44", "0/13", "4/55", "6/43", "0/19", "2/34")
col3 <- c("0/36", "0/87", "3/11", "2/12", "4/33", "0/12")
col4 <- c("1/76", "2/65", "2/21", "5/0", "2/26", "1/52")
df <- data.frame(col1,col2,col3,col4)
GOAL: In each cell there is are two numbers separated by a "/". Create two data frames: 1 data frame with the the LEFT number and another data frame with the RIGHT number.
The end result would ideally look like this:
df.left.numbers:
col1 col2 col3 col4
1 2 0 1
2 0 0 2
1 4 3 2
3 6 2 5
0 0 4 2
7 2 0 1
df.right.numbers:
col1 col2 col3 col4
10 44 36 76
30 13 87 65
40 55 11 21
23 43 12 0
17 19 33 26
14 34 12 53
I've used strsplit() but that is for 1 column splitting into two within ONE data frame. I also tried the separate() function in the tidyr package however that requires the name of a given column. I am iterating through all of them. I suppose I could write a loop, however I was wondering if anyone had an easier way of making this happen!
Thanks!!
Try this:
require(data.table)
lapply(split(unlist(
lapply(df,tstrsplit,"/"),recursive=FALSE),c("Left","Right")),
as.data.frame)
#$Right
# col12 col22 col32 col42
#1 10 44 36 76
#2 30 13 87 65
#3 40 55 11 21
#4 23 43 12 0
#5 17 19 33 26
#6 14 34 12 52
#$Left
# col11 col21 col31 col41
#1 1 2 0 1
#2 2 0 0 2
#3 1 4 3 2
#4 3 6 2 5
#5 0 0 4 2
#6 7 2 0 1
Not very elegant, but it is short and it works...
col1 <- c("1/10", "2/30", "1/40", "3/23", "0/17", "7/14")
col2 <- c("2/44", "0/13", "4/55", "6/43", "0/19", "2/34")
col3 <- c("0/36", "0/87", "3/11", "2/12", "4/33", "0/12")
col4 <- c("1/76", "2/65", "2/21", "5/0", "2/26", "1/52")
df <- data.frame(col1,col2,col3,col4,stringsAsFactors = FALSE)
dfLeft <- as.data.frame(lapply(df,function(x) gsub("\\/.+","",x)))
dfRight <- as.data.frame(lapply(df,function(x) gsub(".+\\/","",x)))
Another option with purrr package:
library(data.table)
library(purrr)
df %>%
map(tstrsplit, split="/") %>%
transpose() %>% map(as.data.frame) %>%
set_names(c("left", "right"))
#$left
# col1 col2 col3 col4
#1 1 2 0 1
#2 2 0 0 2
#3 1 4 3 2
#4 3 6 2 5
#5 0 0 4 2
#6 7 2 0 1
#$right
# col1 col2 col3 col4
#1 10 44 36 76
#2 30 13 87 65
#3 40 55 11 21
#4 23 43 12 0
#5 17 19 33 26
#6 14 34 12 52

dplyr- renaming sequence of columns with select function

I'm trying to rename my columns in dplyr. I found that doing it with select function. however when I try to rename some selected columns with sequence I cannot rename them the format that I want.
test = data.frame(x = rep(1:3, each = 2),
group =rep(c("Group 1","Group 2"),3),
y1=c(22,8,11,4,7,5),
y2=c(22,18,21,14,17,15),
y3=c(23,18,51,44,27,35),
y4=c(21,28,311,24,227,225))
CC <- paste("CC",seq(0,3,1),sep="")
aa<-test%>%
select(AC=x,AR=group,CC=y1:y4)
head(aa)
AC AR CC1 CC2 CC3 CC4
1 1 Group 1 22 22 23 21
2 1 Group 2 8 18 18 28
3 2 Group 1 11 21 51 311
4 2 Group 2 4 14 44 24
5 3 Group 1 7 17 27 227
6 3 Group 2 5 15 35 225
the problem is even I set CC value from CC0, CC1, CC2, CC3 the output gives automatically head names starting from CC1.
how can I solve this issue?
I think you'll have an easier time crating such an expression with the select_ function:
library(dplyr)
test <- data.frame(x=rep(1:3, each=2),
group=rep(c("Group 1", "Group 2"), 3),
y1=c(22, 8, 11, 4, 7, 5),
y2=c(22, 18, 21, 14, 17, 15),
y3=c(23, 18, 51, 44, 27, 35),
y4=c(21, 28, 311,24, 227, 225))
# build out our select "translation" named vector
DQ <- paste0("y", 1:4)
names(DQ) <- paste0("DQ", seq(0, 3, 1))
# take a look
DQ
## DQ0 DQ1 DQ2 DQ3
## "y1" "y2" "y3" "y4"
test %>%
select_("AC"="x", "AR"="group", .dots=DQ)
## AC AR DQ0 DQ1 DQ2 DQ3
## 1 1 Group 1 22 22 23 21
## 2 1 Group 2 8 18 18 28
## 3 2 Group 1 11 21 51 311
## 4 2 Group 2 4 14 44 24
## 5 3 Group 1 7 17 27 227
## 6 3 Group 2 5 15 35 225

Resources