My problem is quite straightforward, I have a dataframe with many columns, some of them start with q03b_, like this:
ID ... q03b_0 q03b_1 q03b_2 ... q03b_14
1 ... a b c m
But I need to change the column names to q03b_other_1, q03b_other_2, q03b_other_3, etc (counting from 1 instead of 0). I managed to select the columns with rename_at and add the "other" to the column names, like this:
df %>%
rename_at(vars(matches('q03b_')), list(~ str_replace(., "b_(\\d+)", "_other_\\1")))
Which brings a dataframe like this:
ID ... q03_other_0 q03_other_1 q03_other_2 ... q03_other_14
1 ... a b c m
But I'm struggling to get to the final stage, which would be this:
ID ... q03_other_1 q03_other_2 q03_other_3 ... q03_other_15
1 ... a b c m
I guess I need to use a combination of as.numeric and as.character, but because of tidy evaluation I'm struggling to find a way to make this work. Any ideas?
Thanks !
With gsubfn:
library(dplyr)
library(readr)
library(gsubfn)
df %>%
rename_at(vars(matches('q03b_')),
list(~ gsubfn("b_\\d+$",
~ paste0("_other_",
parse_number(x) + 1),
.)))
Output
q03_other_1 q03_other_2 q03_other_3
1 a b c
I am not sure if you have to get the number from the original column names, add +1 to it to create new columns.
This works without doing that -
library(dplyr)
df %>%
rename_with(~paste0('q03_other_', seq_along(.)), starts_with('q03b_'))
# ID q03_other_1 q03_other_2 q03_other_3
#1 1 a b c
data
df <- data.frame(ID = 1, q03b_0 = 'a', q03b_1 = 'b', q03b_2 = 'c')
Here is an alternative way using sprintf:
library(dplyr)
library(stringr)
df %>%
select(-ID) %>%
rename_with(~str_replace(., "[0-9]+$", sprintf("%.0f", 1:length(colnames(df))))) %>%
rename_with(~str_replace(., "b", "")) %>%
bind_cols(ID=df$ID)
q03_other_1 q03_other_2 q03_other_3 ID
1 a b c 1
We can also use
library(dplyr)
library(stringr)
df %>%
rename_with(~ str_replace(., "b_\\d+$", function(x)
str_c('_other_', readr::parse_number(x) + 1)) , starts_with('q03b_'))
ID q03_other_1 q03_other_2 q03_other_3
1 1 a b c
data
df <- structure(list(ID = 1L, q03b_0 = "a", q03b_1 = "b", q03b_2 = "c"), class = "data.frame", row.names = c(NA,
-1L))
Try the following:
library(tidyverse)
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L),
q03b_0 = c("a"),
q03b_1 = c("b"),
q03b_2 = c("c")
)
names(df)[-1] <- names(df)[-1] %>%
str_remove("_.*") %>%
paste0("_other_",1:length(.))
df
#> ID q03b_other_1 q03b_other_2 q03b_other_3
#> 1 1 a b c
EDIT: A more general solution:
library(tidyverse)
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L),
q03b_0 = c("a"),
q03b_1 = c("b"),
q03b_2 = c("c")
)
names(df)[str_detect(names(df), "^q03b_")] %<>%
str_split("_") %>%
map_chr(~ paste0(.x[1], "_other_", 1+as.numeric(.x[2])))
df
#> ID q03b_other_1 q03b_other_2 q03b_other_3
#> 1 1 a b c
Related
Lets say i have the following data: x <- 1:2.
My desired output is a data.frame() like the following:
a b
1 2
With base R i would do something along:
df <- data.frame(t(x))
colnames(df) <- c("a", "b")
Question: How would i do this with the pipe operator?
What i tried so far:
library(magrittr)
x %>% data.frame(a = .[1], b = .[2])
After the transpose, convert to tibble with as_tibble and change the column names with set_names
library(dplyr)
library(tibble)
x %>%
t %>%
as_tibble(.name_repair = "unique") %>%
setNames(c("a", "b"))
# A tibble: 1 x 2
# a b
# <int> <int>
#1 1 2
Or another option if we want to use the OP's syntax would be to wrap the code with {}
x %>%
{data.frame(a = .[1], b = .[2])}
I often have to dynamically generate multiple columns based on values in existing columns. Is there a dplyr equivalent of the following?:
cols <- c("x", "y")
foo <- c("a", "b")
df <- data.frame(a = 1, b = 2)
df[cols] <- df[foo] * 5
> df
a b x y
1 1 2 5 10
Not the most elegant:
library(tidyverse)
df %>%
mutate_at(vars(foo),function(x) x*5) %>%
set_names(.,nm=cols) %>%
cbind(df,.)
a b x y
1 1 2 5 10
This can be made more elegant as suggested by #akrun :
df %>%
mutate_at(vars(foo), list(new = ~ . * 5)) %>%
rename_at(vars(matches('new')), ~ c('x', 'y'))
I'm new in pipes R.
I have a dataframe like this
library(magrittr)
library(dplyr)
df = data.frame(a= c(1,2,3,4,5), b = c(3,4,5,6,7))
The result is
df_min = df %>% filter(a > 2) %$% as.data.frame( cbind(a=a*10, b))
> df_min
a b
1 30 5
2 40 6
3 50 7
Is there is a more convinient and shorter way instead of %$% as.data.frame( cbind(a=a*10, b))?
A shorter option with data.table
library(data.table)
setDT(df)[a > 2, .(a = a * 10, b)]
A more convenient way to do it is as follows:
library(magrittr)
library(dplyr)
df = data.frame(a= c(1,2,3,4,5), b = c(3,4,5,6,7))
df_min = df %>% filter(a>2) %>% mutate(a=a*10)
class(df_min)
df_min
You can read about mutate here and here's some examples
I have a dataframe:
source= c("A", "A", "B")
target = c("B", "C", "C")
source_A = c(5, 5, 6)
target_A = c(6, 7, 7)
source_B = c(10, 10, 11)
target_B = c(11, 12, 12)
c = c(0.5, 0.6, 0.7)
df = data.frame(source, target, source_A, target_A, source_B, target_B, c)
> df
source target source_A target_A source_B target_B c
1 A B 5 6 10 11 0.5
2 A C 5 7 10 12 0.6
3 B C 6 7 11 12 0.7
How can I reduce this dataframe to return only the values for the unique source and target values and return (ignoring column c).
For the Values [A B C]
id A B
1 A 5 10
2 B 6 11
3 C 7 12
At the moment I do something like this:
df1 <- df[,c("source","source_A", "source_B")]
df2 <- df[,c("target","target_A", "target_B")]
names(df1)[names(df1) == 'source'] <- 'id'
names(df1)[names(df1) == 'source_A'] <- 'A'
names(df1)[names(df1) == 'source_B'] <- 'B'
names(df2)[names(df2) == 'target'] <- 'id'
names(df2)[names(df2) == 'target_A'] <- 'A'
names(df2)[names(df2) == 'target_B'] <- 'B'
df3 <- rbind(df1,df2)
df3[!duplicated(df3$id),]
id A B
1 A 5 10
3 B 6 11
5 C 7 12
In reality, I have tens of columns so this is non-viable long term.
How can I do this more succinctly (and ideally, generaliseable to more columns)?
library(dplyr)
library(magrittr)
df1 <- subset(df, select = ls(pattern = "source"))
df2 <- subset(df, select = ls(pattern = "target"))
names(df1) <- names(df2)
df <- bind_rows(df1, df2)
df %<>% group_by(target, target_A, target_B) %>% slice(1)
This should do it, but I do not quite know how you want to generalize it.
I don't think this is the most elegant solution in the world, but it serves the purpose. Hopefully the columns that you intend to use can be targeted by the column name string pattern!
Here's a more general method with dplyr functions. You basically need to gather everything into a long format, where you can rename the variable accordingly, then spread them back into id, A, B:
library(dplyr)
library(tidyr)
df %>%
select(-c) %>%
mutate(index = row_number()) %>%
gather(key , value, -index) %>%
separate(key, c("type", "name"), fill = "right") %>%
mutate(name = ifelse(is.na(name), "id", name)) %>%
spread(key = name, value = value) %>%
select(id, matches("[A-Z]", ignore.case = FALSE)) %>%
distinct
Given a data frame like:
df <- data.frame(z_a = 1:2,
z_b = 1:2,
y_a = 3:4,
y_b = 3:4)
I can select columns names that contain a character with:
library(dplyr)
df %>% select(contains("a"), contains("b"))
z_a y_a z_b y_b
1 1 3 1 3
2 2 4 2 4
NOTE that the column order has changed. Columns containing a come first before columns containing b
I'd like to select column names that contain characters in a vector and that reorders the columns.
searchfor <- letters[1:2]
Using searchfor, I'd like to make the following expression and use it in a select statement:
E <- quote(contains(searchfor[1]), contains(searchfor[2]))
df %>% select_(E)
We can do
df %>%
select_at(vars(matches(paste(searchfor, collapse="|")))) %>%
select(order(sub(".*_", "", names(.))))
purrr solution:
library(purrr)
ind_lgl <- map(letters[1:2], ~ grepl(.x, names(df), fixed = TRUE)) %>%
pmap_lgl(`|`)
df[ind_lgl]
With the pipe:
df %>%
`[`(map(letters[1:2], ~ grepl(.x, names(df), fixed = TRUE)) %>%
pmap_lgl(`|`))
If you to get the right order:
rank <- map(letters[1:2], ~ grepl(.x, names(df), fixed = TRUE)) %>%
pmap(c) %>%
map(which)
ind_chr <- data_frame(colnames = names(df), rank) %>%
mutate(l = lengths(rank)) %>%
filter(l > 0) %>%
mutate(rank = unlist(map(rank, ~ .x[[1]]))) %>%
arrange(rank) %>%
pull(colnames)
df[ind_chr]
But it is not pretty...
I don't understand the exact requirement, but is this solution.
select(df, matches("a|b"))
Self answer - here's a solution with select_ and that still uses contains - just in case anyone else is interested:
library(iterators)
library(dplyr)
s <- paste0("c(", paste0(sapply(iter(searchfor), function(x) paste0("contains(\"", x, "\")")), collapse=","), ")")
df %>% select_(., s)
z_a y_a z_b y_b
1 1 3 1 3
2 2 4 2 4