Multiply a variable in pipe - r

I'm new in pipes R.
I have a dataframe like this
library(magrittr)
library(dplyr)
df = data.frame(a= c(1,2,3,4,5), b = c(3,4,5,6,7))
The result is
df_min = df %>% filter(a > 2) %$% as.data.frame( cbind(a=a*10, b))
> df_min
a b
1 30 5
2 40 6
3 50 7
Is there is a more convinient and shorter way instead of %$% as.data.frame( cbind(a=a*10, b))?

A shorter option with data.table
library(data.table)
setDT(df)[a > 2, .(a = a * 10, b)]

A more convenient way to do it is as follows:
library(magrittr)
library(dplyr)
df = data.frame(a= c(1,2,3,4,5), b = c(3,4,5,6,7))
df_min = df %>% filter(a>2) %>% mutate(a=a*10)
class(df_min)
df_min
You can read about mutate here and here's some examples

Related

Add 1 to column names that ends in integer r

My problem is quite straightforward, I have a dataframe with many columns, some of them start with q03b_, like this:
ID ... q03b_0 q03b_1 q03b_2 ... q03b_14
1 ... a b c m
But I need to change the column names to q03b_other_1, q03b_other_2, q03b_other_3, etc (counting from 1 instead of 0). I managed to select the columns with rename_at and add the "other" to the column names, like this:
df %>%
rename_at(vars(matches('q03b_')), list(~ str_replace(., "b_(\\d+)", "_other_\\1")))
Which brings a dataframe like this:
ID ... q03_other_0 q03_other_1 q03_other_2 ... q03_other_14
1 ... a b c m
But I'm struggling to get to the final stage, which would be this:
ID ... q03_other_1 q03_other_2 q03_other_3 ... q03_other_15
1 ... a b c m
I guess I need to use a combination of as.numeric and as.character, but because of tidy evaluation I'm struggling to find a way to make this work. Any ideas?
Thanks !
With gsubfn:
library(dplyr)
library(readr)
library(gsubfn)
df %>%
rename_at(vars(matches('q03b_')),
list(~ gsubfn("b_\\d+$",
~ paste0("_other_",
parse_number(x) + 1),
.)))
Output
q03_other_1 q03_other_2 q03_other_3
1 a b c
I am not sure if you have to get the number from the original column names, add +1 to it to create new columns.
This works without doing that -
library(dplyr)
df %>%
rename_with(~paste0('q03_other_', seq_along(.)), starts_with('q03b_'))
# ID q03_other_1 q03_other_2 q03_other_3
#1 1 a b c
data
df <- data.frame(ID = 1, q03b_0 = 'a', q03b_1 = 'b', q03b_2 = 'c')
Here is an alternative way using sprintf:
library(dplyr)
library(stringr)
df %>%
select(-ID) %>%
rename_with(~str_replace(., "[0-9]+$", sprintf("%.0f", 1:length(colnames(df))))) %>%
rename_with(~str_replace(., "b", "")) %>%
bind_cols(ID=df$ID)
q03_other_1 q03_other_2 q03_other_3 ID
1 a b c 1
We can also use
library(dplyr)
library(stringr)
df %>%
rename_with(~ str_replace(., "b_\\d+$", function(x)
str_c('_other_', readr::parse_number(x) + 1)) , starts_with('q03b_'))
ID q03_other_1 q03_other_2 q03_other_3
1 1 a b c
data
df <- structure(list(ID = 1L, q03b_0 = "a", q03b_1 = "b", q03b_2 = "c"), class = "data.frame", row.names = c(NA,
-1L))
Try the following:
library(tidyverse)
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L),
q03b_0 = c("a"),
q03b_1 = c("b"),
q03b_2 = c("c")
)
names(df)[-1] <- names(df)[-1] %>%
str_remove("_.*") %>%
paste0("_other_",1:length(.))
df
#> ID q03b_other_1 q03b_other_2 q03b_other_3
#> 1 1 a b c
EDIT: A more general solution:
library(tidyverse)
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(1L),
q03b_0 = c("a"),
q03b_1 = c("b"),
q03b_2 = c("c")
)
names(df)[str_detect(names(df), "^q03b_")] %<>%
str_split("_") %>%
map_chr(~ paste0(.x[1], "_other_", 1+as.numeric(.x[2])))
df
#> ID q03b_other_1 q03b_other_2 q03b_other_3
#> 1 1 a b c

Apply dplyr functions on a single column across a list using piping

I'm tring to filter something across a list of dataframes for a specific column. Typically across a single dataframe using dplyr I would use:
#creating dataframe
df <- data.frame(a = 0:10, d = 10:20)
# filtering column a for rows greater than 7
df %>% filter(a > 7)
I've tried doing this across a list using the following:
# creating list
x <- list(data.frame(a = 0:10, b = 10:20),
data.frame(c = 11:20, d = 21:30),
data.frame(e = 15:25, f = 35:45))
# selecting the appropriate column and trying to filter
# this is not working
x[1][[1]][1] %>% lapply(. %>% {filter(. > 2)})
# however, if I use the min() function it works
x[1][[1]][1] %>% lapply(. %>% {min(.)})
I find the %>% syntax quite easy to understand and carry out. However, in this case, selecting a specific column and doing something quite simple like filtering is not working. I'm guessing map could be equally useful. Any help is appreciated.
You can use filter_at to refer column by position.
library(dplyr)
purrr::map(x, ~.x %>% filter_at(1, any_vars(. > 7)))
In filter, you can subset the column and use it
purrr::map(x, ~.x %>% filter(.[[1]] > 7))
In base R, that would be :
lapply(x, function(y) y[y[[1]] > 7, ])
It seems you are interested in checking the condition on the first column of each dataframe in your list.
One solution using dplyr would be
lapply(x, function(df) {df %>% filter_at(1, ~. > 7)})
The 1 in filter_at indicates that I want to check the condition on the first column (1 is a positional index) of each dataframe in the list.
EDIT
After the discussion in the comments, I propose the following solution
lapply(x, function(df) {df %>% filter(a > 7) %>% select(a) %>% slice(1)})
Input data
x <- list(data.frame(a = 0:10, b = 10:20),
data.frame(a = 11:20, b = 21:30),
data.frame(a = 15:25, b = 35:45))
Output
[[1]]
a
1 8
[[2]]
a
1 11
[[3]]
a
1 15
Using filter with across
library(dplyr)
library(purrr)
map(x, ~ .x %>%
filter(across(names(.)[1], ~ .> 7)))

Using tidyverse in R how do I arrange my column values in a fixed way?

ID score
a 1
a 2
b 2
b 4
c 4
c 5
I want to change id to "a,b,c" order this to
ID score
a 1
b 2
c 4
a 2
b 4
c 5
What I tried
> data <- read_csv(data)
> data <- factor(data$id, levels = c('a', 'b', 'c'))
This works for tables so I tried it but didn't work for this. Anybody know if there is a way?
Instead of assigning the 'id' column to data <- (which would replace the data with the 'id' values) it would be used for ordering. In base R, this can be done with
data1 <- data[order(duplicated(data$ID)),]
row.name(data1) <- NULL
Or with dplyr
library(dplyr)
library(data.table)
data %>%
arrange(rowid(ID))
library(dplyr)
d %>%
group_by(ID) %>%
mutate(r = row_number()) %>%
ungroup() %>%
arrange(r, ID, score) %>%
select(-r)
OR in base R
with(d, d[order(ave(seq(NROW(d)), d$ID, FUN = seq_along), ID, score),])

Generating multiple columns at once with dplyr

I often have to dynamically generate multiple columns based on values in existing columns. Is there a dplyr equivalent of the following?:
cols <- c("x", "y")
foo <- c("a", "b")
df <- data.frame(a = 1, b = 2)
df[cols] <- df[foo] * 5
> df
a b x y
1 1 2 5 10
Not the most elegant:
library(tidyverse)
df %>%
mutate_at(vars(foo),function(x) x*5) %>%
set_names(.,nm=cols) %>%
cbind(df,.)
a b x y
1 1 2 5 10
This can be made more elegant as suggested by #akrun :
df %>%
mutate_at(vars(foo), list(new = ~ . * 5)) %>%
rename_at(vars(matches('new')), ~ c('x', 'y'))

R Dataframe - Extract Unique rows from columns

I have a dataframe:
source= c("A", "A", "B")
target = c("B", "C", "C")
source_A = c(5, 5, 6)
target_A = c(6, 7, 7)
source_B = c(10, 10, 11)
target_B = c(11, 12, 12)
c = c(0.5, 0.6, 0.7)
df = data.frame(source, target, source_A, target_A, source_B, target_B, c)
> df
source target source_A target_A source_B target_B c
1 A B 5 6 10 11 0.5
2 A C 5 7 10 12 0.6
3 B C 6 7 11 12 0.7
How can I reduce this dataframe to return only the values for the unique source and target values and return (ignoring column c).
For the Values [A B C]
id A B
1 A 5 10
2 B 6 11
3 C 7 12
At the moment I do something like this:
df1 <- df[,c("source","source_A", "source_B")]
df2 <- df[,c("target","target_A", "target_B")]
names(df1)[names(df1) == 'source'] <- 'id'
names(df1)[names(df1) == 'source_A'] <- 'A'
names(df1)[names(df1) == 'source_B'] <- 'B'
names(df2)[names(df2) == 'target'] <- 'id'
names(df2)[names(df2) == 'target_A'] <- 'A'
names(df2)[names(df2) == 'target_B'] <- 'B'
df3 <- rbind(df1,df2)
df3[!duplicated(df3$id),]
id A B
1 A 5 10
3 B 6 11
5 C 7 12
In reality, I have tens of columns so this is non-viable long term.
How can I do this more succinctly (and ideally, generaliseable to more columns)?
library(dplyr)
library(magrittr)
df1 <- subset(df, select = ls(pattern = "source"))
df2 <- subset(df, select = ls(pattern = "target"))
names(df1) <- names(df2)
df <- bind_rows(df1, df2)
df %<>% group_by(target, target_A, target_B) %>% slice(1)
This should do it, but I do not quite know how you want to generalize it.
I don't think this is the most elegant solution in the world, but it serves the purpose. Hopefully the columns that you intend to use can be targeted by the column name string pattern!
Here's a more general method with dplyr functions. You basically need to gather everything into a long format, where you can rename the variable accordingly, then spread them back into id, A, B:
library(dplyr)
library(tidyr)
df %>%
select(-c) %>%
mutate(index = row_number()) %>%
gather(key , value, -index) %>%
separate(key, c("type", "name"), fill = "right") %>%
mutate(name = ifelse(is.na(name), "id", name)) %>%
spread(key = name, value = value) %>%
select(id, matches("[A-Z]", ignore.case = FALSE)) %>%
distinct

Resources