Append text to a field based on another field's value - r

I want to append a text based on another field's value. For example:-
This is the current df:
field_x <- c("A", "A", "C", "B", "B", "C")
field_y <- c("Axl", "Slash", "Duff", "Steven", "Izzy", "Dizzy")
df <- cbind(field_x, field_y)
I need to change the field_y based on field_x values so that it looks like this:
field_x <- c("A", "A", "C", "B", "B", "C")
field_y <- c("Axl (Apple)", "Slash (Apple)", "Duff (Cat)", "Steven (Ball)", "Izzy (Ball)", "Dizzy (Cat)")
So, basically if field_x has "A" then "(Apple)" is to be appended to field_y and so forth. Thanks in advance!

First note that your df is actually a matrix: when you cbind vectors, you get a matrix. So first thing to do is convert to dataframe.
Then it depends on whether or not you are using dplyr.
field_x <- c("A", "A", "C", "B", "B", "C")
field_y <- c("Axl", "Slash", "Duff", "Steven", "Izzy", "Dizzy")
df <- cbind(field_x, field_y)
df <- as.data.frame(df)
Without dplyr:
df <- within(df, {
s <- ifelse(field_x == "A", "Apple", ifelse(field_x == "B", "Ball", "Cat"))
field_y <- paste0(field_y, "(", s, ")")
rm(s)
})
With dplyr:
library(dplyr)
library(stringr)
library(magrittr)
df %<>%
mutate(
s = recode(field_x, "A" = "Apple", "B" = "Ball", "C" = "Cat"),
field_y = str_glue("{field_y}({s})")) %>%
select(-s)
Another way, with case_when instead of recode:
df %<>%
mutate(
s = case_when(
field_x == "A" ~ "Apple",
field_x == "B" ~ "Ball",
field_x == "C" ~ "Cat"
),
field_y = str_glue("{field_y}({s})")) %>%
select(-s)
Note that I create an auxiliary variable s: it's not really necessary, but it makes the code more readable.

Here is another approach:
We could create a look-up table to address the concerns of #Tim Biegeleisen in the comment section:
look_up <- data.frame(x = c("A", "B" ,"C"),
y = c("Apple", "Ball", "Cat"))
library(dplyr)
df %>%
as.data.frame() %>%
rowwise() %>%
mutate(field_y = paste0(field_y, ' (', look_up$y[look_up$x==field_x], ')'))
field_x field_y
<chr> <chr>
1 A Axl (Apple)
2 A Slash (Apple)
3 C Duff (Cat)
4 B Steven (Ball)
5 B Izzy (Ball)
6 C Dizzy (Cat)

Related

Rename colnames according to data.frame

I have several data.frames df1, df2, df3, ... .
df1 <- data.frame(var1 = c("a", "b", "c"),
var2 = c("a", "b", "c"),
var3 = c("a", "b", "c"),
var4 = c("a", "b", "c"))
df2 <- data.frame(var1 = c("a", "b", "c"),
var2 = c("a", "b", "c"),
var3 = c("a", "b", "c"),
var4 = c("a", "b", "c"))
df3 <- data.frame(var1 = c("a", "b", "c"),
var2 = c("a", "b", "c"),
var3 = c("a", "b", "c"),
var4 = c("a", "b", "c"))
df4 <- data.frame(var1 = c("a", "b", "c"),
var2 = c("a", "b", "c"),
var3 = c("a", "b", "c"),
var4 = c("a", "b", "c"))
And I have a data.frame rename_vars which tells mich which variables in which data.frame should be renamed.
rename_vars <- data.frame(df = c("df1", "df1", "df3"),
var = c("var1", "var3", "var1"),
rename_to = c("var1x", "var3y", "var1z"))
df var rename_to
1 df1 var1 var1x
2 df1 var3 var3y
3 df2 var1 var1z
For example, in df1 the variable var1 should be renamed into var1x and var3 should be called var3x. In data.frame df2 variable var1 should be called var1z and so forth.
But how can I automate this process of renaming variables in different data.frames according to rename_vars?
Thanks for help!
We split the 'rename_vars' data based on the 'df' column then loop over the list with map2 on the values from the names of the list (mget) and the 'lst1', and change the column names with rename_at. It is better to keep it in a list, but if we need to change the global individual objects, use list2env after naming the list ('out') with the object names
library(dplyr)
library(purrr)
lst1 <- split(rename_vars[-1], rename_vars$df)
out <- map2(mget(names(lst1)), lst1, ~ {
nm1 <- .y[[1]]
nm2 <- .y[[2]]
.x %>%
rename_at(vars(nm1), ~ nm2)})
list2env(out, .GlobalEnv)
-output
df1
# var1x var2 var3y var4
#1 a a a a
#2 b b b b
#3 c c c c
df3
# var1z var2 var3 var4
#1 a a a a
#2 b b b b
#3 c c c c
Or another option is !!! with rename
library(tibble)
lst1 <- split(as.list(deframe(rename_vars[3:2])), rename_vars$df)
list2env(map2(mget(names(lst1)), lst1, ~ .x %>%
rename(!!! .y)), .GlobalEnv)
Or using base R with a for loop and assign
for(i in seq_len(nrow(rename_vars))) {
tmp1 <- get(rename_vars$df[i])
i1 <- match(rename_vars$var[i], names(tmp1))
names(tmp1)[i1] <- rename_vars$rename_to[i]
assign(rename_vars$df[i], tmp1)
}
setnames from data.table
library('data.table')
for (dt in unique(rename_vars$df) ) {
df_rows <- (rename_vars$df == dt) # get row indices matching data frame name
old <- rename_vars$var[df_rows] # old names
new <- rename_vars$rename_to[df_rows] # new names
setDT(get(dt)) # convert to data table by reference
setnames(get(dt), old, new) # set names by reference
}
Output:
names(df1)
# [1] "var1x" "var2" "var3y" "var4"
names(df2)
# [1] "var1" "var2" "var3" "var4"
names(df3)
# [1] "var1z" "var2" "var3" "var4"

Exchange data.table columns with most prevalent value of columns

I have data
test = data.table(
a = c(1,1,3,4,5,6),
b = c("a", "be", "a", "c", "d", "c"),
c = rep(1, 6)
)
I wish to take the unique values of column a, store it in another data.table, and afterwards fill in the remaining columns with the most prevalent values of those remaining columns, such that my resulting data.table would be:
test2 = data.table(a = c(1,3,4,5,6), b = "a", c = 1)
Column be has equal amounts of "a" and "c", but it doesn't matter which is chosen in those cases.
Attempt so far:
test2 = unique(test, by = "a")
test2[, c("b", "c") := lapply(.SD, FUN = function(x){test2[, .N, by = x][order(-N)][1,1]}), .SDcols = c("b", "c")]
EDIT: I would preferrably like a generic solution that is compatible with a function where I specify the column to be "uniqued", and the rest of the columns are with the single most prevalent value. Hence my use of lapply and .SD =)
EDIT2: as #MichaelChirico points out, how do we keep the class. With the following data.table some of the solutions does not work, although solution of #chinsoon12 does work:
test = data.table(a = c(1,1,3,4,5,6),
b = c("a", "be", "a", "c", "d", "c"),
c = rep(1, 6),
d = as.Date("2019-01-01"))
Another option:
dtmode <- function(x) x[which.max(rowid(x))]
test[, .(A=unique(A), B=dtmode(B), C=dtmode(C))]
data:
test = data.table(
A = c(1,1,3,4,5,6),
B = c("a", "be", "a", "c", "d", "c"),
C = rep(1, 6)
)
Not a clean way to do this but it works.
test = data.frame(a = c(1,1,3,4,5,6), b = c("a", "be", "a", "c", "d", "c"), c = rep(1, 6))
a = unique(test$a)
b = tail(names(sort(table(test$b))), 1)
c = tail(names(sort(table(test$c))), 1)
test2 = cbind(a,b,c)
Output is like this:
> test2
a b c
[1,] "1" "c" "1"
[2,] "3" "c" "1"
[3,] "4" "c" "1"
[4,] "5" "c" "1"
[5,] "6" "c" "1"
>
#EmreKiratli is very close to what I would do:
test[ , c(
list(a = unique(a)),
lapply(.SD, function(x) as(tail(names(sort(table(x))), 1L), class(x)))
), .SDcols = !'a']
The as(., class(x)) part is because names in R are always character, so we have to convert back to the original class of x.
You might like this better in magrittr form since it's many nested functions:
library(magrittr)
test[ , c(
list(a = unique(a)),
lapply(.SD, function(x) {
table(x) %>% sort %>% names %>% tail(1L) %>% as(class(x))
})
), .SDcols = !'a']
I was able to make an OK solution, but if somebody can do it more elegantly, for example not going through the step of storting a list in refLevel below, please let me know! I'm very interested in learning data.table properly!
#solution:
test = data.table(a = c(1,1,3,4,5,6), b = c("a", "be", "a", "c", "d", "c"), c = rep(1, 6))
test2 = unique(test, by="a")
funPrev = function(x){unlist(as.data.table(x)[, .N, by=x][order(-N)][1,1], use.names = F)}
refLevel = lapply(test[, c("b", "c")], funPrev)
test2[, c("b", "c") := refLevel]
...and using a function (if anybody see any un-necessary step, please let me know):
genData = function(dt, var_unique, vars_prev){
data = copy(dt)
data = unique(data, by = var_unique)
funPrev = function(x){unlist(as.data.table(x)[, .N, by=x][order(-N)][1,1], use.names = F)}
refLevel = lapply(dt[, .SD, .SDcols = vars_prev], funPrev)
data[, (vars_prev) := refLevel]
return(data)
}
test2 = genData(test, "a", c("b", "c"))
Here's another variant which one might find less sophisticated, yet more readable. It's essentially chinsoon12's rowid approach generalized for any number of columns. Also the classes are kept.
test = data.table(a = c(1,1,3,4,5,6),
b = c("a", "be", "a", "c", "d", "c"),
c = rep(1, 6),
d = as.Date("2019-01-01"))
test2 = unique(test, by = "a")
for (col in setdiff(names(test2), "a")) test2[[col]] = test2[[col]][which.max(rowid(test2[[col]]))]

Replace values in vector where not %in% vector

Short question:
I can substitute certain variable values like this:
values <- c("a", "b", "a", "b", "c", "a", "b")
df <- data.frame(values)
What's the easiest way to replace all the values of df$values by "x" (where the value is neither "a" or "b")?
Output should be:
c("a", "b", "a", "b", "x", "a", "b")
Your example is a bit unclear and not reproducible.
However, based on guessing what you actually want, I could suggest trying this option using the data.table package:
df[values %in% c("a", "b"), values := "x"]
or the dplyr package:
df %>% mutate(values = ifelse(values %in% c("a","b"), x, values))
What about:
df[!df[, 1] %in% c("a", "b"), ] <- "x"
values
1 a
2 b
3 a
4 b
5 x
6 a
7 b

How can I make combn work in dplyr::mutate?

I'm trying to make combn() work in dplyr::mutate, but I'm failing and can't quite figure out why.
This works:
c("a", "b", "c") %>% combn(2, FUN = paste, collapse = ";", simplify = TRUE)
[1] "a;b" "a;c" "b;c"
But how can I make this work?
tribble(
~col,
c("a", "b", "c"),
c("a", "d", "f")
) %>%
mutate(col = combn(str_split(names, ";"), 2, FUN = paste, collapse = ";"))
I want each row in the matrix to be a character vector in this form:
[1] "a;b" "a;c" "b;c"
The example above would be the first row.
Edit: I guess it's fine if combn() isn't used.
We could use map to loop over the list and paste
library(tidyverse)
out <- tribble(
~col,
c("a", "b", "c"),
c("a", "d", "f")
) %>%
mutate(col = map(col, ~ combn(.x, 2, FUN = paste, collapse=";")))
Try:
tribble(
~col,
c("a", "b", "c"),
c("a", "d", "f")
) %>%
rowwise() %>%
mutate(new = toString(combn(col, 2, FUN = paste, collapse = ";")))

Dpylr's recode function multiple-to-1 R

I would like an easier way to recode vectors. Specifically I'm wondering if there is a way to pass vectors to a function like dplyr's recode. I understand the basics of quasiquotation but don't quite get how to incorporate the =.
library(tidyverse)
vec1 <- rep(LETTERS[1:7],7)
#standard way
vec2 <- recode(vec1,
"A" = "Value1",
"B" = "Value2",
"C" = "Value3",
"D" = "Value4",
"E" = "Value5",
"F" = "Value6",
"G" = "Value7"
)
vec3 <- recode(vec1,
"A" = "Value1",
"B" = "Value1",
"C" = "Value2",
"D" = "Value2",
.default = "Value other"
)
I'd like to do the following
vec3 <- some.function(vec1,
c("A", "B") = "Value1",
c("C", "D") = "Value2",
.default = "Value other"
)
I have a solution but can't figure out how to incorporate a function with ... and =
do.call(dplyr::recode,
c(list(vec1),
setNames(rep("Value1",length(val1)), val1),
setNames(rep("Value2",length(val2)), val2)))
I also have figured out a way to pass two vectors and rename all the variables.
recode.by.vectors <- function(x, current.names, new.names){
do.call(dplyr::recode, c(list(x), setNames(new.names, current.names)))
}
Lastly, I'm aware of a base solution.
vec3 <- vec1
val1 <- c("A", "B")
val2 <- c("C", "D")
vec3[vec1 %in% val1] <- "Value1"
vec3[vec1 %in% val2] <- "Value2"
vec3[!vec1 %in% c(val1,val1)] <- "Value other"
but am unaware to how in incorporate the assignment carried out here into a function.
We can use case_when from the dplyr package.
library(dplyr)
vec1 <- rep(LETTERS[1:7],7)
vec2 <- case_when(
vec1 %in% c("A", "B") ~ "Value1",
vec1 %in% c("C", "D") ~ "Value2",
TRUE ~ "Value other"
)
head(vec2)
# [1] "Value1" "Value1" "Value2" "Value2" "Value other" "Value other"
Using the forcats package (also included in the tidyverse package)
library(forcats)
vec1 <- rep(LETTERS[1:7], 7)
fct_collapse(vec1,
Value1 = c("A", "B"),
Value2 = c("C", "D"),
`Value other` = c("E", "F", "G"))
This is a little cumbersome if you have lots of categories to put into Value other, but with a secondary step, you can simplify it a little bit
fct_collapse(vec1,
Value1 = c("A", "B"),
Value2 = c("C", "D")) %>%
fct_other(keep = c("Value1", "Value2"),
other_level = "Value other")

Resources