I have a file named 'schema'. Based on the file, I need to rename other data frames. For example, 'Var1' of TableA needs to be renamed to 'Col1'. Similarly, VarA of TableA needs to be renamed to ColA. In short, all variables listed in 'FROM' colume of schema needs to be renamed to column 'To'.
Schema <- read.table(header = TRUE, text =
'Tables From To
A Var1 Col1
A Var2 Col2
A Var3 Col3
B VarA ColA
B VarB ColB
B VarC ColC
')
A <- data.frame(Var1 = 1:3,
Var2 = 2:4,
Var3 = 3:5)
B <- data.frame(VarA = 1:3,
VarB = 2:4,
VarC = 3:5)
We could use match:
lapply(list(A = A, B = B), function(i){
setNames(i, Schema$To[ match(names(i), Schema$From) ])
})
# $A
# Col1 Col2 Col3
# 1 1 2 3
# 2 2 3 4
# 3 3 4 5
#
# $B
# ColA ColB ColC
# 1 1 2 3
# 2 2 3 4
# 3 3 4 5
Or:
Anew <- setNames(A, Schema$To[ match(names(A), Schema$From) ])
Bnew <- setNames(B, Schema$To[ match(names(B), Schema$From) ])
Or list2env:
list2env(lapply(list(A = A, B = B), function(i){
setNames(i, Schema$To[ match(names(i), Schema$From) ])
}), envir = globalenv())
Edit: When there is no match Schema then use keep column name as is:
list2env(lapply(list(A = A, B = B), function(i){
# check if there is a match, if not keep name unchaged
x <- as.character(Schema$To[ match(names(i), Schema$From) ])
ix <- which(is.na(x))
x[ ix ] <- names(i)[ ix ]
# retunr with updated names
setNames(i, x)
}), envir = globalenv())
The following code can extract retrieve the name of tables (A and B) from Schema and to the name replacement task:
r <- Map(function(v) function(v) {
r <- get(v)
names(r)[names(r) %in% Schema$From] <- as.character(Schema$To[Schema$From %in% names(r)])
assign(v,r)},
as.character(unique(Schema$Tables)))
which gives
> r
$A
Col1 Col2 Col3
1 1 2 3
2 2 3 4
3 3 4 5
$B
ColA ColB ColC
1 1 2 3
2 2 3 4
3 3 4 5
If you don't want result as list, you can do something like
list2env(Map(function(v) {
r <- get(v)
names(r)[names(r) %in% Schema$From] <- as.character(Schema$To[Schema$From %in% names(r)])
assign(v,r)},
as.character(unique(Schema$Tables))),envir = .GlobalEnv)
or
for (v in as.character(unique(Schema$Tables))) {
r <- get(v)
names(r)[names(r) %in% Schema$From] <- as.character(Schema$To[Schema$From %in% names(r)])
assign(v,r)
}
then you will keep your object A and B
> A
Col1 Col2 Col3
1 1 2 3
2 2 3 4
3 3 4 5
> B
ColA ColB ColC
1 1 2 3
2 2 3 4
3 3 4 5
lut <- setNames(as.character(Schema$To), Schema$From)
setNames(A, lut[names(A)])
Col1 Col2 Col3
1 1 2 3
2 2 3 4
3 3 4 5
setNames(B, lut[names(B)])
ColA ColB ColC
1 1 2 3
2 2 3 4
3 3 4 5
Related
I am attempting to merge every nth element from col1, replacing values from that same row in col2 in a new column: col3
df <- data.frame(col1 = c('A', 'B', 'D', 'F', 'C'), col2 = c(2, 1, 2, 3, 1))
> df
col1 col2
1 A 2
2 B 1
3 D 2
4 F 3
5 C 1
If i was to merge every odd element from col1 with every even element from col2, for example, the output should look something like this:
> df
col1 col2 col3
1 A 2 A
2 B 1 1
3 D 2 D
4 F 3 3
5 C 1 C
Thanks.
We could do it with an ifelse statement checking if row is even or odd with the modulo operator %%:
library(dplyr)
df %>%
mutate(col3 = ifelse((row_number() %% 2) == 0, col2, col1))
col1 col2 col3
1 A 2 A
2 B 1 1
3 D 2 D
4 F 3 3
5 C 1 C
In base R, we may also use a row/column indexing
df$col3 <- df[cbind(seq_len(nrow(df)), rep(1:2, length.out = nrow(df)))]
-output
> df
col1 col2 col3
1 A 2 A
2 B 1 1
3 D 2 D
4 F 3 3
5 C 1 C
base
df <- data.frame(col1 = c('A', 'B', 'D', 'F', 'C'), col2 = c(2, 1, 2, 3, 1))
df$col3 <- df$col1
df$col3[c(FALSE, TRUE)] <- df$col2[c(FALSE, TRUE)]
df
#> col1 col2 col3
#> 1 A 2 A
#> 2 B 1 1
#> 3 D 2 D
#> 4 F 3 3
#> 5 C 1 C
Created on 2022-03-06 by the reprex package (v2.0.1)
I am having different files that have a variable that is named differently but has the same string character “type_category” e.g., type_category_lifestyle_characterstics, type_category_uniqueness etc. The idea is to go through these files and rename such variables to type_category. Below are examples of data frames
df1 <- data.frame(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
df2 <- data.frame(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
Thanks in advance
We can get the datasets in a list
library(dplyr)
library(purrr)
out <- map(mget(ls(pattern = '^df\\d+$')), ~ .x %>%
rename_with(~ "type_category",
starts_with("type_category")))
-output
out
$df1
id type_category rating
1 1 5 1
2 2 6 3
3 3 7 4
$df2
id type_category rating
1 9 4 2
2 5 6 7
3 3 1 4
We could use setNames with lapply:
my_list <- list(df1, df2)
colnames <- c("id","type_category","rating")
lapply(my_list, setNames, colnames)
output:
[[1]]
id type_category rating
1 1 5 1
2 2 6 3
3 3 7 4
[[2]]
id type_category rating
1 9 4 2
2 5 6 7
3 3 1 4
Base R
Once you got them in a list, you can use lapply to change the variable names in all of them
df1 <- data.frame(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
df2 <- data.frame(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
lapply(list(df1, df2),
function(df){
nms <- names(df)
nms[grepl(pattern = "type_category",
x = nms,
ignore.case = TRUE)] <- "type_category"
names(df) <- nms
return(df)
})
#> [[1]]
#> id type_category rating
#> 1 1 5 1
#> 2 2 6 3
#> 3 3 7 4
#>
#> [[2]]
#> id type_category rating
#> 1 9 4 2
#> 2 5 6 7
#> 3 3 1 4
Just note that you would need to assign the result back to a list.
data.table
Since you tagged data.table, this allows you to change the names in place and no extra assignment is necessary
library(data.table)
dt1 <- data.table::data.table(id = c(1,2,3), type_category_lifestyle_characterstics = c(5,6,7), rating = c(1,3,4))
dt2 <- data.table::data.table(id = c(9,5,3), type_category_uniqueness = c(4,6,1), rating = c(2,7,4))
invisible(
lapply(list(dt1, dt2),
function(dt){
nms_old <- names(data.table::copy(dt))
nms_new <- data.table::copy(nms_old)
nms_new[grepl(pattern = "type_category",
x = nms_old,
ignore.case = TRUE)] <- "type_category"
data.table::setnames(dt, old = nms_old, new = nms_new)
return(NULL)
})
)
dt1
#> id type_category rating
#> 1: 1 5 1
#> 2: 2 6 3
#> 3: 3 7 4
dt2
#> id type_category rating
#> 1: 9 4 2
#> 2: 5 6 7
#> 3: 3 1 4
I need to be able to rename columns by name in a list of dataframes that can all expected to have the same names.
For example:
[[1]]
col1 col2
1 1 2
2 2 3
[[2]]
col1 col2
1 1 2
2 2 3
Should become:
[[1]]
ID col2
1 1 2
2 2 3
[[2]]
ID col2
1 1 2
2 2 3
data:
col1 <- c(1,2)
col2 <- c(2,3)
myList <- list(data.frame(col1,col2),data.frame(col1,col2))
my attempt:
lapply(myList, function(x){
names(myList[[x]])[names(myList[[x]]) =="col1"] <- "ID"
})
Where did I go wrong? I need base R.
You can use {dplyr} and {purrr} from the {tidyverse}
> library(purrr)
> library(dplyr)
>
> df1 <- data.frame(col1 = 1:2, col2 = 3:4)
> df2 <- data.frame(col1 = 3:2, col2 = 6:7)
>
> list(df1, df2) %>% map(~ rename(., ID = col1))
[[1]]
ID col2
1 1 3
2 2 4
[[2]]
ID col2
1 3 6
2 2 7
It also works with strings: list(df1, df2) %>% map(~ rename(., "ID" = "col1"))
I have 2 dataframes in R (df1, df2).
A C D
1 1 1
2 2 2
df2 as
A B C
1 1 1
2 2 2
How can I merge these 2 dataframes to produce the following output?
A B C D
2 1 2 1
4 2 4 2
Columns are sorted and column values are added. Both DFs have same number of rows. Thank you in advance.
Code to create DF:
df1 <- data.frame("A" = 1:2, "C" = 1:2, "D" = 1:2)
df2 <- data.frame("A" = 1:2, "B" = 1:2, "C" = 1:2)
nm1 = names(df1)
nm2 = names(df2)
nm = intersect(nm1, nm2)
if (length(nm) == 0){ # if no column names in common
cbind(df1, df2)
} else { # if column names in common
cbind(df1[!nm1 %in% nm2], # columns only in df1
df1[nm] + df2[nm], # add columns common to both
df2[!nm2 %in% nm1]) # columns only in df2
}
# D A C B
#1 1 2 2 1
#2 2 4 4 2
You can try:
library(tidyverse)
list(df2, df1) %>%
map(rownames_to_column) %>%
bind_rows %>%
group_by(rowname) %>%
summarise_all(sum, na.rm = TRUE)
# A tibble: 2 x 5
rowname A B C D
<chr> <int> <int> <int> <int>
1 1 2 1 2 1
2 2 4 2 4 2
By using left_join() from dplyr you won't lose the column
library(tidyverse)
dat1 <- tibble(a = 1:10,
b = 1:10,
c = 1:10)
dat2 <- tibble(c = 1:10,
d = 1:10,
e = 1:10)
left_join(dat1, dat2, by = "c")
#> # A tibble: 10 x 5
#> a b c d e
#> <int> <int> <int> <int> <int>
#> 1 1 1 1 1 1
#> 2 2 2 2 2 2
#> 3 3 3 3 3 3
#> 4 4 4 4 4 4
#> 5 5 5 5 5 5
#> 6 6 6 6 6 6
#> 7 7 7 7 7 7
#> 8 8 8 8 8 8
#> 9 9 9 9 9 9
#> 10 10 10 10 10 10
Created on 2019-01-16 by the reprex package (v0.2.1)
allnames <- sort(unique(c(names(df1), names(df2))))
df3 <- data.frame(matrix(0, nrow = nrow(df1), ncol = length(allnames)))
names(df3) <- allnames
df3[,allnames %in% names(df1)] <- df3[,allnames %in% names(df1)] + df1
df3[,allnames %in% names(df2)] <- df3[,allnames %in% names(df2)] + df2
df3
A B C D
1 2 1 2 1
2 4 2 4 2
Here is a fun base R method with Reduce.
Reduce(cbind,
list(Reduce("+", list(df1[intersect(names(df1), names(df2))],
df2[intersect(names(df1), names(df2))])), # sum results
df1[setdiff(names(df1), names(df2))], # in df1, not df2
df2[setdiff(names(df2), names(df1))])) # in df2, not df1
This returns
A C D B
1 2 2 1 1
2 4 4 2 2
This assumes that both df1 and df2 have columns that are not present in the other. If this is not true, you'd have to adjust the list.
Note also that you could replace Reduce with do.call in both places and you'd get the same result.
I have below data frame
col1 <- c("A","B", "A")
col2 <- c("C","D","D")
col3 <- c("E","E","E")
col4 <- c("F","F","H")
x <- data.frame(col1,col2,col3,col4)
Output of above frame is:
1
I want to replace characters to numbers, as below:
2
Here's a one-liner in base R that works with any number of columns and any names - nothing is hard-coded, so it works with any x:
> setNames(data.frame(matrix(as.numeric(unlist(x)),ncol=ncol(x))),names(x))
col1 col2 col3 col4
1 1 3 5 6
2 2 4 5 6
3 1 4 5 7
x <- x %>%
unlist %>%
as.numeric %>%
matrix(ncol=4) %>%
data.frame
names(x) <- paste0("col", 1:4)
x
col1 col2 col3 col4
1 1 3 5 6
2 2 4 5 6
3 1 4 5 7
Here is a solution with base R:
x[] <- match(as.matrix(x), unique(c(as.matrix(x))))
# > x
# col1 col2 col3 col4
# 1 1 3 5 6
# 2 2 4 5 6
# 3 1 4 5 7
Here is a shorter solution:
x[] <- as.integer(unlist(x))
data:
x <- data.frame(col1=c("A","B", "A"), col2=c("C","D","D"), col3=c("E","E","E"), col4=c("F","F","H")
We can use lapply from base R
x[] <- lapply(x, match, LETTERS)
x
# col1 col2 col3 col4
#1 1 3 5 6
#2 2 4 5 6
#3 1 4 5 8