modify columns for a list of data frames - r

I have multiple tables loaded using lapply. The last colname is not the same for all tables which makes it impossible to rbind the tables. I would like to make a new column that contains the colname in column 10 as ID, then rename the colname of column 10 to X.
file_list <- list.files(pattern = "edit")
tables <- lapply(file_list, read.csv, header = T, sep="\t")
colnames(tables[[984]])
[1] "X.CHROM" "POS" "ID"
[4] "REF" "ALT" "QUAL"
[7] "FILTER" "INFO" "FORMAT"
[10] "ID02020886"
My try:
tables <- map2(tables, file_list, ~cbind(.x, ID = .y)) #Does not work....
Desired out:
colnames(tables[[984]])
[1] "X.CHROM" "POS" "ID"
[4] "REF" "ALT" "QUAL"
[7] "FILTER" "INFO" "FORMAT"
[10] "X" "ID"

I would use a loop. I named the new column ID_new as another ID column is already there.
# sample data
tables <- lapply(c("ID1", "ID2", "ID3"), function(id) {
cnames <- c("X.CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO",
"FORMAT", id)
as.data.frame(sapply(cnames, function(x) rnorm(5)))
})
# modify tables one by one
for (i in seq_along(tables)) {
tables[[i]] <- cbind(tables[[i]],
ID_new=colnames(tables[[i]])[10])
colnames(tables[[i]])[10] <- 'X'
}

Here is a solution, with built-in data set mtcars to test the code.
suppressPackageStartupMessages({
library(dplyr)
library(purrr)
})
file_list<- letters[1:2]
tables <- list(head(mtcars)[-1], head(mtcars)[-ncol(mtcars)])
tables %>%
map2(file_list, ~cbind(.x, ID = .y)) %>%
map( ~rename(.x, 'X' = names(.x)[10]))
#> [[1]]
#> cyl disp hp drat wt qsec vs am gear X ID
#> Mazda RX4 6 160 110 3.90 2.620 16.46 0 1 4 4 a
#> Mazda RX4 Wag 6 160 110 3.90 2.875 17.02 0 1 4 4 a
#> Datsun 710 4 108 93 3.85 2.320 18.61 1 1 4 1 a
#> Hornet 4 Drive 6 258 110 3.08 3.215 19.44 1 0 3 1 a
#> Hornet Sportabout 8 360 175 3.15 3.440 17.02 0 0 3 2 a
#> Valiant 6 225 105 2.76 3.460 20.22 1 0 3 1 a
#>
#> [[2]]
#> mpg cyl disp hp drat wt qsec vs am X ID
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 b
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 b
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 b
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 b
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 b
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 b
Created on 2022-06-12 by the reprex package (v2.0.1)

Related

replace row names with defined vector in R

Is there a way that the row names can be substituted based on predefined vector in R, something like:
rownames(GV) <- c(beta1='Age', beta10='Female Gender')
Or maybe case_when() will be easier for you:
library(dplyr)
df <- data.frame(a = c(1, 2, 3))
rownames(df)
#> [1] "1" "2" "3"
rownames(df) <- case_when(rownames(df) == "1" ~ "one",
rownames(df) == "2" ~ "two",
TRUE ~ rownames(df))
rownames(df)
#> [1] "one" "two" "3"
You specify new value for each contidion and the value for all rest cases (where is TRUE ~ rownames(df) line) - for the rest cases I'm leaving the previous row names above.
We could do the following:
rownames(mtcars)[which(rownames(mtcars) == "Datsun 710")] <- "My Rowname"
head(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> My Rowname 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
If we want to rename more rownames we can use %in%, but as #gss mentions in the comments, this comes with a caveat: not matter the order of the names in the character vector succeeding %in% the names will be replaced in the order they appear in rownames(). Compare the following two calls:
rownames(mtcars)[which(rownames(mtcars) %in% c("Datsun 710", "Mazda RX4 Wag"))] <- c("My Rowname1","My Rowname2")
head(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> My Rowname1 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> My Rowname2 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Which has the same result as:
rownames(mtcars)[which(rownames(mtcars) %in% c("Mazda RX4 Wag", "Datsun 710"))] <- c("My Rowname1","My Rowname2")
head(mtcars)
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> My Rowname1 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> My Rowname2 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Created on 2021-12-21 by the reprex package (v2.0.1)
If you want to rename all the rows, and you have an array of the desired new names in order:
example <- head(mtcars, 3)
mynewnames <- c("First", "Second", "Third")
rownames(example) <- mynewnames
example
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> First 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Second 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Third 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
If you want to rename all the rows, and you have a named array (not necessarily in the correct order):
example <- head(mtcars, 3)
mynewnames <- c("Datsun 710" = "Datsun", "Mazda RX4" = "Mazda", "Mazda RX4 Wag" = "Also Mazda")
rownames(example) <- mynewnames[rownames(example)]
example
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Also Mazda 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
If you want to rename only some rows, and have a named array (an ordered array makes no sense in this context):
example <- head(mtcars, 3)
mynewnames <- c("Mazda RX4" = "This Mazda", "Mazda RX4 Wag" = "That Mazda")
rownames(example)[rownames(example) %in% names(mynewnames)] <-
mynewnames[rownames(example)[rownames(example) %in% names(mynewnames)]]
example
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> This Mazda 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> That Mazda 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
This is a bit unwieldy; if you are only replacing one or two row names then #TimTeaFan's first suggestion is probably easier.
Most safe way and as OP prefers with a predefined named vector is taking the current rownames, replace those who are defined and set the rownames again. this does not fail on an incomplete vector, if it cannot be replaced it stays as it was before.
The advantage of this solution is to prevent the error below if your rename vector is incomplete.
Error in `.rowNamesDF<-`(x, value = value) :
missing values in 'row.names' are not allowed
solution
library(stringr) # used for str_replace_all()
df <- data.frame(
x = rep(1:5),
y = rep(11:15),
row.names = LETTERS[1:5]
)
df
# x y
# A 1 11
# B 2 12
# C 3 13
# D 4 14
# E 5 15
change <- c("A" = "a", "C" = "c")
row.names(df) <- str_replace_all(row.names(df), change)
df
# x y
# a 1 11
# B 2 12
# c 3 13
# D 4 14
# E 5 15

how to add a Column using IF_ELSE

I'm trying to add a column to a dataframe using add_column and if_else but I can get it I don't know how to do a correct logical test using logical conditional (or "|").
I have this kind data:
dataframe1
variable 1 variable2 variable3
(char) (char) (char)
value value value
value value value
value value value
I try this:
dataframe2 <- dataframe1%>%
add_column(newcolumn_name = if_else(variable3== "value1"|"value2”, TRUE, FALSE)
And I get this error:
Unknown or uninitialised column: value1.Error in variable3 ==
“value1“| "value2" : operations are possible only for numeric,
logical or complex types
Consider to extract the column with .$. The == can be replaced with %in% and | is used mostly with regex pattern (OR) while == does a fixed match. In addition, the output of == or %in% returns a logical vector. So, we don't need the if_else/ifelse
library(dplyr)
library(tibble)
dataframe1 %>%
add_column(newcolumn_name = .$variable3 %in% c("value1", "value2"))
Using a reproducible example
head(mtcars) %>%
add_column(new_column_name = .$carb %in% c(1, 4))
mpg cyl disp hp drat wt qsec vs am gear carb new_column_name
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 TRUE
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 TRUE
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 TRUE
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 TRUE
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 FALSE
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 TRUE
Also, this can be done within dplyr itself i.e. using mutate and thus we don't need to extract the column
head(mtcars) %>%
mutate(new_column_name = carb %in% c(1, 4))
mpg cyl disp hp drat wt qsec vs am gear carb new_column_name
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 TRUE
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 TRUE
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 TRUE
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 TRUE
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 FALSE
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 TRUE
I was able to do that with this code:
dataf2 <- dataf %>%
add_column(newcol = ifelse(dataf$var3=="value1" | dataf$var3=="value2", TRUE, FALSE) )

Change only specific column names based on a data table

I have a table (dt_replace) with the actual columns to replace and their corresponding new column:
column new
col1 new1
col2 new2
col3 new3
... ...
My original table (dt) the one i need to rename has 100 columns and dt_replace has only 50 columns.
So far I tried using dplyr library with function rename:
c = dt_replace$column
r = dt$new
rename(dt, c = r)
But it didn't work, then I tried the following using ColNames:
colnames(dt)[colnames(dt) %in% dt_replace$column] <- dt_replace$new
It worked but unfortunately, columns are added in the wrong order...
Try match
colnames(dt)[match(dt_replace$column, names(dt))] <- dt_replace$new
Adding a reproducible example
dt <- mtcars
dt_replace <- data.frame(column = c("mpg", "hp"), new = c("new1", "new2"),
stringsAsFactors = FALSE)
colnames(dt)[match(dt_replace$column, names(dt))] <- dt_replace$new
head(dt)
# new1 cyl disp new2 drat wt qsec vs am gear carb
#Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1

calculate new column in dataframe or list using a function with column as param

I'm trying to calculate a new column with a user defined function that needs data from same row and a fixed value valid for all rows:
myfunc <- function(ds,colname,val1,col1,col2){
# content of new column <colname> should be computed from:
ds[colname] = val1 + ds[col1] * ds[col2] # for each row of ds
return(ds)
}
v1 = 2
data(mtcars)
mt = head(mtcars)
mt
mpg cyl disp hp drat wt qsec vs am gear
carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
apply(mt,'newcol',v1,mt$wt,mt$qsec)
mt
What I would like to see in mt$newcol in first row is: 2 + 2.620 * 16.46 (-> 45.12) and all other rows similiar.
So, how can I send a fixed value (v1) and two values from each row to my function and store returned value in this row in a new column?
Thanks
dplyr approach:
library(dplyr)
data(mtcars)
myfunc <- function(ds, new_column, val1, col1, col2){
name <- rownames(ds)
ds <- ds %>%
mutate(!!as.name(new_column) := val1 + !!as.name(col1) + !!as.name(col2),
car_name = name) %>%
select(car_name, mpg:!!as.name(new_column))
return(ds)
}
head(
myfunc(ds = mtcars,
new_column = "new_column",
val1 = 2,
col1 = "hp",
col2 = "vs")
)
output
car_name mpg cyl disp hp drat wt qsec vs am gear carb new_column
1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 112
2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 112
3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 96
4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 113
5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 177
6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 108

Opposite function to add_rownames in dplyr

As an intermediate step I generate a data frame with one column as character strings and the rest are numbers. I'd like to convert it to a matrix, but first I have to convert that character column into row names and remove it from the data frame.
Is there a simpe way to do this in dplyr? A function like to_rownames() that is opposite to add_rownames()?
I saw a solution using a custom function, but it's really out of dplyr philosophy.
You can now use the tibble-package:
tibble::column_to_rownames()
This provides NSE & standard eval functions:
library(dplyr)
df <- data_frame(a=sample(letters, 4), b=c(1:4), c=c(5:8))
reset_rownames <- function(df, col="rowname") {
stopifnot(is.data.frame(df))
col <- as.character(substitute(col))
reset_rownames_(df, col)
}
reset_rownames_ <- function(df, col="rowname") {
stopifnot(is.data.frame(df))
nm <- data.frame(df)[, col]
df <- df[, !(colnames(df) %in% col)]
rownames(df) <- nm
df
}
m <- "rowname"
head(as.matrix(reset_rownames(add_rownames(mtcars), "rowname")))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
head(as.matrix(reset_rownames_(add_rownames(mtcars), m)))
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Perhaps to_rownames() or set_rownames() makes more sense. ¯\_(ツ)_/¯ YMMV.
If you really need a matrix you can just save the character column to a separate variable, drop it, and then create the matrix
library(dplyr)
df <- data_frame(a = sample(letters, 4), b = c(1:4), c = c(5:8))
letters <- df %>% select(a)
a.matrix <- df %>% select(-a) %>% as.matrix
Not sure what you are going to do after that, but this gets you as far as you asked for...

Resources