Hello so I was able to rename a list of dataframes using,
names(Final_mixed_list)[1:12] <- sprintf("genome_%d", 1:12)
each dataframe has its own name as genome_1, genome_2 and so on
Now I was trying to rename the second column "Names" in each of the dataframes of my lists as their dataframe name "genome_1 instead of Names for the genome_1 list and genome_" for the Genome_2 list and so on.
Each of the dataframes have the same column names "COG" and "Names" and the data and number of rows vary but they all have the same two columns
I tried using lapply along with colnames but instead i got my dataframe deleted using the following
final_mixed_list2 <- lapply(seq_along(Final_mixed_list), function(i) {colnames(Final_mixed_list[[i]])[2] <- sprintf("genome_%d", 1:12)})
and I got a new list where all of my values were deleted and only genome1 to 12 values were present
My expected result is to keep the dataframes intact like before but to change the Names column with their respective dataframe name genome_1 , genome_2 and so on.
I like imap for something like this.
library(tidyverse)
#example
df_list <- list(genome_1 = tibble(Name = "genome_1", val = 9),
genome_2 = tibble(Name = "genome_2", val = 10),
genome_3 = tibble(Name = "genome_3", val = 15))
df_list
#> $genome_1
#> # A tibble: 1 x 2
#> Name val
#> <chr> <dbl>
#> 1 genome_1 9
#>
#> $genome_2
#> # A tibble: 1 x 2
#> Name val
#> <chr> <dbl>
#> 1 genome_2 10
#>
#> $genome_3
#> # A tibble: 1 x 2
#> Name val
#> <chr> <dbl>
#> 1 genome_3 15
#solution
df_list |>
imap(\(dat, genome) rename(dat, "{genome}" := Name))
#> $genome_1
#> # A tibble: 1 x 2
#> genome_1 val
#> <chr> <dbl>
#> 1 genome_1 9
#>
#> $genome_2
#> # A tibble: 1 x 2
#> genome_2 val
#> <chr> <dbl>
#> 1 genome_2 10
#>
#> $genome_3
#> # A tibble: 1 x 2
#> genome_3 val
#> <chr> <dbl>
#> 1 genome_3 15
Related
I created a function that creates unique column names from the existing ones: renameCol.
If I manually create a vector of new column names using that function I can manually set those as the new column names. However, if I use that function in rename_with I get an error about unique names.
library(tidyverse)
renameCol = function(colname)
{
match = str_match_all(colname, "HealthcareProvider((TaxonomyCode|PrimaryTaxonomySwitch))_([0-9]+)")[[1]]
coltype = match[[3]]
coltype = str_remove(coltype, "(Taxonomy|PrimaryTaxonomy)")
number = match[[4]]
return(paste0(coltype, "_", number))
}
renameCol("HealthcareProviderPrimaryTaxonomySwitch_11")
#> [1] "Switch_11"
renameCol("HealthcareProviderTaxonomyCode_11")
#> [1] "Code_11"
tb = tibble(
HealthcareProviderPrimaryTaxonomySwitch_11 = 1,
HealthcareProviderTaxonomyCode_3 = 2,
HealthcareProviderPrimaryTaxonomySwitch_9 = 3,
HealthcareProviderTaxonomyCode_13 = 4
)
tb %>% rename_with(renameCol)
#> Error in `rename_with()`:
#> ! Names must be unique.
#> x These names are duplicated:
#> * "Switch_11" at locations 1, 2, 3, and 4.
new_colnames = colnames(tb) %>% sapply(renameCol, USE.NAMES = F)
new_colnames
#> [1] "Switch_11" "Code_3" "Switch_9" "Code_13"
colnames(tb) = new_colnames
tb
#> # A tibble: 1 x 4
#> Switch_11 Code_3 Switch_9 Code_13
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 3 4
Created on 2022-06-16 by the reprex package (v2.0.1)
The answer is present in your question itself. Your function is not vectorised. It works for only one column name at a time.
library(tidyverse)
names(tb)
#[1] "HealthcareProviderPrimaryTaxonomySwitch_11"
#[2] "HealthcareProviderTaxonomyCode_3"
#[3] "HealthcareProviderPrimaryTaxonomySwitch_9"
#[4] "HealthcareProviderTaxonomyCode_13"
renameCol(names(tb))
#[1] "Switch_11"
Hence you have to use sapply to make it work for all the columns. rename_with is not a loop (like sapply) so to make it work you can do -
tb %>% rename_with(~sapply(., renameCol))
# A tibble: 1 × 4
# Switch_11 Code_3 Switch_9 Code_13
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 3 4
Or change the function to work with more than one column name.
renameCol = function(colname)
{
match = str_match_all(colname, "HealthcareProvider((TaxonomyCode|PrimaryTaxonomySwitch))_([0-9]+)")
match_data <- do.call(rbind, match)
coltype = match_data[, 3]
coltype = str_remove(coltype, "(Taxonomy|PrimaryTaxonomy)")
number = match_data[, 4]
return(paste0(coltype, "_", number))
}
renameCol(names(tb))
#[1] "Switch_11" "Code_3" "Switch_9" "Code_13"
tb %>% rename_with(renameCol)
# A tibble: 1 × 4
# Switch_11 Code_3 Switch_9 Code_13
# <dbl> <dbl> <dbl> <dbl>
#1 1 2 3 4
I have a dataframe with a column named CHR which has discrete values from 1 to 18 (1, 2, 3 ...)
I want to subset the dataframes for each value of CHR. So far my code (working) looks like this:
CH1<-boxplot %>% filter(CHR == "1")
CH2<-boxplot %>% filter(CHR == "2")
CH3<-boxplot %>% filter(CHR == "3")
.
.
.
CH18<-boxplot %>% filter(CHR == "18")
It does get the job done, but I'm very annoyed whenever my code looks like that. I want to learn one "proper" way so I can apply it to multiple other similar cases.
You have a few options:
1. Write a function, although you will still have many lines, they are condensed lines.
bx_filter <- function(boxplot, chr) {
boxplot %>% filter(CHR == chr)
}
CH1 <- bx_filter("1")
CH2 <- bx_filter("2")
2. Use split(), where you'll get a list and each element of the list has the data frames you're looking for
split(boxplot, boxplot$CHR)
3. A combo of map() and assign(), although it's generally frowned upon to write to the Global environment in ways similar to this
unique(boxplot$CHR) %>%
map(function(chr) {
assign(paste0('CH', chr), boxplot %>% filter(CHR == chr), envir = .GlobalEnv)
})
group_split is one option:
library(tidyverse)
list <- tibble(chr = rep(1:18, 2), value = 1:36) |>
group_split(chr)
head(list)
#> <list_of<
#> tbl_df<
#> chr : integer
#> value: integer
#> >
#> >[6]>
#> [[1]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 1 1
#> 2 1 19
#>
#> [[2]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 2 2
#> 2 2 20
#>
#> [[3]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 3 3
#> 2 3 21
#>
#> [[4]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 4 4
#> 2 4 22
#>
#> [[5]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 5 5
#> 2 5 23
#>
#> [[6]]
#> # A tibble: 2 × 2
#> chr value
#> <int> <int>
#> 1 6 6
#> 2 6 24
Created on 2022-06-08 by the reprex package (v2.0.1)
Loop over the CHR var
lapply(boxplot$CHR, function(i) filter(boxplot, CHR = i)
I have a tibble with columns named as numbers (e.g. 1). I created a function to compute differences between columns, but I don't know how to do it with that type of columns:
<!-- language-all: lang-r -->
library(tidyverse)
df <- tibble(`1` = c(1,2,3), `2` = c(2,4,6))
# This works
df %>%
mutate(diff = `1` - `2`)
#> # A tibble: 3 x 3
#> `1` `2` diff
#> <dbl> <dbl> <dbl>
#> 1 1 2 -1
#> 2 2 4 -2
#> 3 3 6 -3
# But this doesn't
calc_diffs <- function(x, y){
df %>%
mutate(diff := !!x - !!y)
}
calc_diffs(1, 2)
#> # A tibble: 3 x 3
#> `1` `2` diff
#> <dbl> <dbl> <dbl>
#> 1 1 2 -1
#> 2 2 4 -1
#> 3 3 6 -1
<sup>Created on 2020-10-14 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup>
We can convert to a symbol and evaluate
calc_diffs <- function(x, y){
df %>%
mutate(diff := !! rlang::sym(x) - !!rlang::sym(y))
}
Then, we just pass a string as argument
calc_diffs("1", "2")
# A tibble: 3 x 3
# `1` `2` diff
# <dbl> <dbl> <dbl>
#1 1 2 -1
#2 2 4 -2
#3 3 6 -3
Column names are strings. We could pass index to subset the column, but here the column name is an unusual name that starts with number. So, either we can wrap it with backreference using paste or just pass a string, convert to symbol and evaluate (!!)
Does this work:
> df <- tibble(`1` = c(1,2,3), `2` = c(2,4,6))
> df
# A tibble: 3 x 2
`1` `2`
<dbl> <dbl>
1 1 2
2 2 4
3 3 6
> calc_diffs <- function(x, y){
+ df %>%
+ mutate(diff = {{x}} - {{y}})
+ }
> calc_diffs(`1`,`2`)
# A tibble: 3 x 3
`1` `2` diff
<dbl> <dbl> <dbl>
1 1 2 -1
2 2 4 -2
3 3 6 -3
>
I was able to do this, but was wondering if there was a more elegant way, possibly with dplyr rename?
# Create dataframe with three named columns
tb <- tibble(col1 = 1:3, col2 = 1:3, col3 = 1:3)
#> # A tibble: 3 x 3
#> col1 col2 col3
#> <int> <int> <int>
#> 1 1 1 1
#> 2 2 2 2
#> 3 3 3 3
# Named vector with replacement names
new_names <- c(col1 = "Column 1", col3 = "Col3")
#> col1 col3
#> "Column 1" "Col3"
# Rename columns within dataframe
tb <- new_names[colnames(tb)] %>%
coalesce(colnames(tb)) %>%
setNames(object = tb, nm = .)
#> # A tibble: 3 x 3
#> `Column 1` col2 Col3
#> <int> <int> <int>
#> 1 1 1 1
#> 2 2 2 2
#> 3 3 3 3
# loading dplyr
pacman::p_load(dplyr)
# rename() syntax demands:
# LHS - a new column name
# RHS - an existing column name
# can be either a named vector or a named list
c('Column 1' = 'col1', 'Col3' = 'col3') -> x
# the unquote-splice (!!!) operator unquotes and splices its argument
rename(tibble(col1 = 1:3, col2 = 1:3, col3 = 1:3), !!!x)
#> # A tibble: 3 x 3
#> `Column 1` col2 Col3
#> <int> <int> <int>
#> 1 1 1 1
#> 2 2 2 2
#> 3 3 3 3
You can find more about it here:
a good book
And here: pretty documentation
Pipe operators are kinda slow so you ought to try to avoid using them when not needed.
I am trying to rename all columns in my data frame using dplyr and stringr, but it seems not to be working the way I desire. How should I change the following code to get the output I want (shown in the code below)?
Here is the fully reproducible code:
library(dplyr)
library(stringr)
library(tibble)
library(rlang)
# dataframe
x <-
tibble::as.tibble(cbind(
Grace_neu_wrong = c(1:4),
Grace_acc_wrong = c(1:4),
Grace_att_wrong = c(1:4),
Grace_int_wrong = c(1:4)
))
# defining custom function to rename the entire dataframe in a certain way
string_conversion <- function(df, ...) {
# preparing the dataframe
df <- dplyr::select(.data = df,
!!rlang::quo(...))
# custom function to split the name of each column in a certain way
splitfn <- function(x) {
x <- as.character(x)
split <- stringr::str_split(string = x, pattern = "_")[[1]]
paste(split[2], split[3], '_', split[1], sep = '')
}
# applying the splitfn function to each column name and outputting the data frame
df_new <- df %>%
dplyr::select_all(.funs = colnames) %>%
dplyr::mutate_all(.funs = splitfn)
return(df_new)
}
# the output I get
string_conversion(df = x, names(x))
#> # A tibble: 4 x 4
#> Grace_neu_wrong Grace_acc_wrong Grace_att_wrong Grace_int_wrong
#> <chr> <chr> <chr> <chr>
#> 1 NANA_1 NANA_1 NANA_1 NANA_1
#> 2 NANA_1 NANA_1 NANA_1 NANA_1
#> 3 NANA_1 NANA_1 NANA_1 NANA_1
#> 4 NANA_1 NANA_1 NANA_1 NANA_1
# the output I desire
tibble::as.tibble(cbind(
neuwrong_Grace = c(1:4),
accwrong_Grace = c(1:4),
attwrong_Grace = c(1:4),
intwrong_Grace = c(1:4)
))
#> # A tibble: 4 x 4
#> neuwrong_Grace accwrong_Grace attwrong_Grace intwrong_Grace
#> <int> <int> <int> <int>
#> 1 1 1 1 1
#> 2 2 2 2 2
#> 3 3 3 3 3
#> 4 4 4 4 4
Created on 2018-02-08 by the reprex
package (v0.1.1.9000).
You can do this in a single line without using mutate, which should be for the column values rather than the column names. Instead, do the following using stringr::str_replace and regular expressions.
The pattern "(.*)_(.*)_(.*)" is three groups of characters separated by underscores.
We simply make the replacement "\\2\\3_\\1", which is group 2, then group 3, then an underscore, then group 1, giving us the desired result.
The code is consequently just one line long:
names(x) <- str_replace(names(x), "(.*)_(.*)_(.*)", "\\2\\3_\\1")
print(x)
# A tibble: 4 x 4
neuwrong_Grace accwrong_Grace attwrong_Grace intwrong_Grace
<int> <int> <int> <int>
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 4 4 4 4