I have this type of DF
DF
ID V1
1 A
2 V
3 C
4 B
5 L
6 L
I would like to get
ID V1 V2
1 A AA
2 V AV
3 C AC
4 B BB
5 L BL
6 L BL
I would like to concatenate A, B in V1 with other characters in V1.
I used something like this
DF%>%
mutate(V2 = ifelse ((V1 == "A" ), paste ("A", ID), ifelse ((V1 == "B")), paste ("B",V1), "")%>%
V2 = na_if (V2, ""))%>%
fill (V2)
Here is a way using base R
df <- transform(df,
V2 = ave(x = V1,
cumsum(V1 %in% c("A", "B")), #grouping variable
FUN = function(x) paste0(x[1], x)))
Gives
df
# ID V1 V2
#1 1 A AA
#2 2 V AV
#3 3 C AC
#4 4 B BB
#5 5 L BL
#6 6 L BL
You can use %in% to find where A and B is. Use unsplit to replicate them and paste0 to make the new string.
i <- DF$V1 %in% c("A", "B")
DF$V2 <- paste0(unsplit(DF$V1[i], cumsum(i)), DF$V1)
#DF$V2 <- paste0(rep(DF$V1[i], diff(c(which(i), length(i)))), DF$V1) #Alternative
DF
# ID V1 V2
#1 1 A AA
#2 2 V AV
#3 3 C AC
#4 4 B BB
#5 5 L BL
#6 6 L BL
Here is a dplyr solution.
library(dplyr)
DF %>%
mutate(flag = cumsum(V1 %in% c("A", "B"))) %>%
group_by(flag) %>%
mutate(V2 = paste0(first(V1), V1)) %>%
ungroup() %>%
select(-flag)
## A tibble: 6 x 3
# ID V1 V2
# <int> <chr> <chr>
#1 1 A AA
#2 2 V AV
#3 3 C AC
#4 4 B BB
#5 5 L BL
#6 6 L BL
Related
I have two lists and I want to use lapply to get a new list
The data is
library(dplyr)
list.A <- list(df1=data.frame(x= c(1:5), y = letters[1:5], z= rep(1,5)),
df2=data.frame(x= c(10:15), y = letters[5:10], z= rep(10,6)))
list.B <- list(df1=data.frame(x= c(1:6), var2 = letters[10:15], var3= rep(7,6)),
df2=data.frame(x= c(10,12), var2 = letters[1:2], var3= rep(5,2)))
I want the result to be as following
dat.1 <- left_join(list.A[[1]], list.B[[1]], by=("x"))
dat.2 <- left_join(list.A[[2]], list.B[[2]], by=("x"))
new.list <- list(df1 = dat.1, df2 =dat.2)
But when I use lapply the results are weird and not as I wish them to be
new.list <- lapply(list.A, function(a){lapply(list.B, function(b){
df <-left_join(a, b, by=("x"))
})
})
Any help, please. I need to apply loop or lapply would work?
my actual lists have so many data frames
We need either map2 from purrr as this loops over each corresponding elements of both list and do the left_join by the 'x' column
library(dplyr)
library(purrr)
map2(list.A, list.B, ~ left_join(.x, .y, by = 'x'))
-output
#$df1
# x y z var2 var3
#1 1 a 1 j 7
#2 2 b 1 k 7
#3 3 c 1 l 7
#4 4 d 1 m 7
#5 5 e 1 n 7
#$df2
# x y z var2 var3
#1 10 e 10 a 5
#2 11 f 10 <NA> NA
#3 12 g 10 b 5
#4 13 h 10 <NA> NA
#5 14 i 10 <NA> NA
#6 15 j 10 <NA> NA
Or Map (from base R)
Map(merge, list.A, list.B, MoreArgs = list(all.x = TRUE, by = 'x'))
Let' say I have two data.frames
name_df = read.table(text = "player_name
a
b
c
d
e
f
g", header = T)
game_df = read.table(text = "game_id winner_name loser_name
1 a b
2 b a
3 a c
4 a d
5 b c
6 c d
7 d e
8 e f
9 f a
10 g f
11 g a
12 f e
13 a d", header = T)
name_df contains a unique list of all the winner_name or loser_name values in game_df. I want to create a new data.frame that has, for each person in the name_df a row if a given name (e.g. a) appears in either the winner_name or loser_name column
So I essentially want to merge game_df with name_df, but the key column (name) can appear in either winner_name or loser_name.
So, for just a and b the final output would look something like:
final_df = read.table(text = "player_name game_id winner_name loser_name
a 1 a b
a 2 b a
a 3 a c
a 4 a d
a 9 f a
a 11 g a
a 13 a d
b 1 a b
b 2 b a
b 5 b c", header = T)
We can loop over the elements in 'name_df' for 'player_name', filter the rows from 'game_df' for either the 'winner_name' or 'loser_name'
library(dplyr)
library(purrr)
map_dfr(setNames(name_df$player_name, name_df$player_name),
~ game_df %>%
filter(winner_name %in% .x|loser_name %in% .x), .id = 'player_name')
Or if there are many columns, use if_any
map_dfr(setNames(name_df$player_name, name_df$player_name),
~ {
nm1 <- .x
game_df %>%
filter(if_any(c(winner_name, loser_name), ~ . %in% nm1))
}, .id = 'player_name')
Dedicated to our teacher and mentor dear #akrun
I think we can also make use of the add_row() function you first taught me the other day. Unbelievable!!!
library(dplyr)
library(purrr)
library(tibble)
game_df %>%
rowwise() %>%
mutate(player_name = winner_name) %>%
group_split(game_id) %>%
map_dfr(~ add_row(.x, game_id = .x$game_id, winner_name = .x$winner_name,
loser_name = .x$loser_name, player_name = .x$loser_name)) %>%
arrange(player_name) %>%
relocate(player_name)
# A tibble: 26 x 4
player_name game_id winner_name loser_name
<chr> <int> <chr> <chr>
1 a 1 a b
2 a 2 b a
3 a 3 a c
4 a 4 a d
5 a 9 f a
6 a 11 g a
7 a 13 a d
8 b 1 a b
9 b 2 b a
10 b 5 b c
# ... with 16 more rows
This can be directly expressed in SQL:
library(sqldf)
sqldf("select *
from name_df
left join game_df on winner_name = player_name or loser_name = player_name")
Without using purrr. I think this is appropriate use case of tidyr::unite with argument remove = F where we can first unite the winners' and losers' names and then use tidyr::separate_rows to split new column into rows.
library(tidyr)
library(dplyr)
game_df %>% unite(Player_name, winner_name, loser_name, remove = F, sep = ', ') %>%
separate_rows(Player_name) %>%
relocate(Player_name) %>%
arrange(Player_name)
# A tibble: 26 x 4
Player_name game_id winner_name loser_name
<chr> <int> <chr> <chr>
1 a 1 a b
2 a 2 b a
3 a 3 a c
4 a 4 a d
5 a 9 f a
6 a 11 g a
7 a 13 a d
8 b 1 a b
9 b 2 b a
10 b 5 b c
# ... with 16 more rows
A Base R approach :
result <- do.call(rbind, lapply(name_df$player_name, function(x)
cbind(plaername = x,
subset(game_df, winner_name == x | loser_name == x))))
rownames(result) <- NULL
result
# playername game_id winner_name loser_name
#1 a 1 a b
#2 a 2 b a
#3 a 3 a c
#4 a 4 a d
#5 a 9 f a
#6 a 11 g a
#7 a 13 a d
#8 b 1 a b
#...
#...
I did some search but cannot find an obvious answer of this question so hopefully it's not a duplicated question. I have a data frame looks like this:
X1 X2 V1 V2 V3 ... Vn
A B 0 1 2 1
B C 1 0 1 0
A C 2 1 0 1
What I want to achieve is to replace V1 to Vn values to the "dosage" of X2. So for row 1 (each row may have different values of X1 and X2),
if the value is 0, I want to replace it to AA;
if the value is 1, I want to replace it to AB;
if the value is 2, I want to replace it to BB;
The expected outcome is:
X1 X2 V1 V2 V3 ... Vn
A B AA AB BB AB
B C BC BB BC BB
A C CC AC AA AC
Here is the sample data frame:
df=data.frame(X1=c("A","B","A"),
X2=c("B","C","C"),
V1=c(0,1,2),
V2=c(1,0,1),
V3=c(2,1,0))
Thanks for the help!
This is inspired from #Matt's answer. We can use mutate_at with paste0 to achieve this task.
## Load packages
library(dplyr)
dat2 <- dat %>%
mutate_at(vars(-X1, -X2), .funs = list(
~case_when(
. == 0 ~paste0(X1, X1),
. == 1 ~paste0(X1, X2),
. == 2 ~paste0(X2, X2),
TRUE ~NA_character_
)
))
dat2
# X1 X2 V1 V2 V3 Vn
# 1 A B AA AB BB AB
# 2 B C BC BB BC BB
# 3 A C CC AC AA AC
DATA
dat <- read.table(text = "X1 X2 V1 V2 V3 Vn
A B 0 1 2 1
B C 1 0 1 0
A C 2 1 0 1 ",
stringsAsFactors = FALSE, header = TRUE)
With your actual df you can replace V1:V3 with V1:Vn.
Using your reprex, you can do:
library(dplyr)
df %>%
mutate_at(
vars(V1:V3),
funs(case_when(
. == 0 ~ "AA",
. == 1 ~ "AB",
. == 2 ~ "BB"
))
)
It not an elegant solution but for the sake of completeness: just nest two for-loops
for (i in 1:dim(df)[1]) {
for (j in 3:dim(df)[2]){
if (df[i,j] == 0){
df[i,j] <- paste0(df[i,1], df[i,1])
} else if (df[i,j] == 1) {
df[i,j] <- paste0(df[i,1], df[i,2])
} else if (df[i,j] == 2) {
df[i,j] <- paste0(df[i,2], df[i,2])
}
}
}
Sorry for that.
Use spread and gather
library(tidyverse)
df <- tibble(X1=c("A","B","A"),
X2=c("B","C","C"),
V1=c(0,1,2),
V2=c(1,0,1),
V3=c(2,1,0))
Capture your from:to translation
transl <- tibble(DOSE = c(0,1,2),
OUTCOME = c("AA", "AB", "AC"))
Then Gather your values into long form
longTbl <- df %>%
gather(key = "TheV", value = "DOSE", na.rm = TRUE,starts_with("V")) %>%
left_join(transl, by = "DOSE") %>%
select(- DOSE)
# A tibble: 9 x 4
X1 X2 TheV OUTCOME
<chr> <chr> <chr> <chr>
1 A B V1 AA
2 B C V1 AB
3 A C V1 AC
4 A B V2 AB
5 B C V2 AA
6 A C V2 AB
7 A B V3 AC
8 B C V3 AB
9 A C V3 AA
You might be better leaving it there. But we can pivot it back with spread.
widTbl <- longTbl %>%
spread(TheV, OUTCOME )
widTbl
# A tibble: 3 x 5
X1 X2 V1 V2 V3
<chr> <chr> <chr> <chr> <chr>
1 A B AA AB AC
2 A C AC AB AA
3 B C AB AA AB
And Bob's your uncle.
I am trying to fill in blank cells with the value of rows above. Similar to na.locf function, but I have a pattern that needs to be matched. I don't necessarily know how many rows between new values (i.e betweem a,b and c,d).
I have used the na.locf and searched around for a solution to no avail.
df <- df <- data.frame(col1 = c("a","b", NA, NA, NA, NA, "c", "d", NA, NA))
df
# col1
# 1 a
# 2 b
# 3 <NA>
# 4 <NA>
# 5 <NA>
# 6 <NA>
# 7 c
# 8 d
# 9 <NA>
# 10 <NA>
Solution I would like:
df
col1
a
b
a
b
a
b
c
d
c
d
ave(df$col1,
with(rle(!is.na(df$col1)), rep(cumsum(values), lengths)),
FUN = function(x){
rep(x[!is.na(x)], length.out = length(x))
})
# [1] a b a b a b c d c d
Here's way with dplyr. You can drop the group column if needed. -
df %>%
group_by(group = cumsum(is.na(lag(col1)) & !is.na(col1))) %>%
mutate(
col1 = rep(col1[!is.na(col1)], length.out = n())
) %>%
ungroup()
# A tibble: 10 x 2
col1 group
<chr> <int>
1 a 1
2 b 1
3 a 1
4 b 1
5 a 1
6 b 1
7 c 2
8 d 2
9 c 2
10 d 2
I constructed the following code below. It shall assign the value "1" or "2" to vector v2, if an element in vector v1 occurs twice, e.g. "A" in vector v1 appears twice, hence in the respective rows, v2 should once read "1" and in the other case "2".
The code works sort of fine, except in some cases, a similar number is assigned to v2, when an element in v1 occurs twice, this should obviously not be the case.
Can anybody help me with the issue? Thanks!
v1 <- c(rep(c("A","B","C","D","E","F","G"),rep(2,7)),c("H","I","J","K"))
v2 <- rep(3,length(v1))
df1 <- data.frame(v1,v2)
for (i in 1:length(df1$v1)) {
if (sum(df1$v1[i]==df1$v1)==2 & df1$v2[i]==3) {
df1$v2[i] <- sample(c(1,2),1,replace=TRUE)
} else if (sum(df1$v1[i]==df1$v1)==2 & df1$v2[i]==1) {
df1$v2[i] <- 2
} else if (sum(df1$v1[i]==df1$v1)==2 & df1$v2[i]==2) {
df1$v2[i] <- 1
} else {
df1$v2[i] <- 2
}
}
I think that I have understood what you require and hopefully the below should do what you want, using dplyr. It will randomly assign integer values from 1 to n, where n is the number of occurrences of a given letter (note this is generalizable from your requirement of 2 occurrences).
library(dplyr)
df1 <- data.frame(v1 = c(rep(c("A","B","C","D","E","F","G"),rep(2,7)),c("H","I","J","K")))
df1 <- df1 %>%
group_by(v1) %>%
mutate(v2 = case_when(n() > 1 ~ sample(c(1:n()), n(), replace = FALSE),
TRUE ~ 1L))
v1 <- c(rep(c("A","B","C","D","E","F","G"),rep(2,7)),c("H","I","J","K"))
value = 1:length(v1)
v2 <- rep(3,length(v1))
df1 <- data.frame(v1,value,v2)
library(dplyr)
set.seed(9)
df1 %>%
sample_frac(1) %>% # shuffle rows
group_by(v1) %>% # for each v1 value
mutate(v2 = row_number()) %>% # count and flag occurences
ungroup() %>% # forget the grouping
arrange(v1) # order by v1 (only for visualisation purposes)
# # A tibble: 18 x 3
# v1 value v2
# <fct> <int> <int>
# 1 A 1 1
# 2 A 2 2
# 3 B 4 1
# 4 B 3 2
# 5 C 5 1
# 6 C 6 2
# 7 D 7 1
# 8 D 8 2
# 9 E 9 1
#10 E 10 2
#11 F 12 1
#12 F 11 2
#13 G 14 1
#14 G 13 2
#15 H 15 1
#16 I 16 1
#17 J 17 1
#18 K 18 1
Using base R, I think you can arrive at what you want somewhat easily by using table and sequence in connection and manipulating the output.
Edit: After your comments. I now think I understand what you what.
res <- data.frame(v1, v2 = sequence(table(v1)), row.names = NULL)
res <- res[sample(1:nrow(res)), ] # Scramble data order
res <- res[order(res$v1), ] # Reorder by v1 column
# v1 v2
#1 A 1
#2 A 2
#3 B 1
#4 B 2
#5 C 1
#6 C 2
#7 D 2 # note 2 comes first here
#8 D 1
#9 E 1
#10 E 2
#11 F 1
#12 F 2
#13 G 1
#14 G 2
#15 H 1
#16 I 1
#17 J 1
#18 K 1
Edit2 "randomly" sorting before assigning:
df1 <- data.frame(v1)
df1[order(rank(v1, ties.method = "random")), "v2"] <- sequence(table(v1))
df1