R: rename columns in list based on other row value - r

I have imported data from matlab and have a large list (over 1000 list elements) from which I created the following sample dataset data with only two list elements.
data <- structure(list(TEST.DATA.1.1 = structure(list(ID = c(2, 2, 2), YEAR = c(1990, 1991, 1992), DATA.1 = c(10, 20, 30), DATA.NAME = structure(c(1L, 1L, 1L), class = "factor", .Label = "Test"), Remarks = c(1990, 1991, 1992)), .Names = c("ID", "YEAR", "DATA.1", "DATA.NAME", "Remarks"), row.names = c(NA, -3L), class = "data.frame"), TEST.DATA.2.1 = structure(list(ID = c(4, 4), YEAR = c(2000, 2001), DATA.1 = c(55, 60), DATA.2 = c(0, 2), DATA.3 = c(4, 6), DATA.NAME.structure..n1....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n1"), DATA.NAME.structure..n2....Dim...c.1L..1L.. = structure(c(1L, 1L), class = "factor", .Label = "n2"), DATA.NAME.structure..n3....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n3"), Remarks = c(2000,2001)), .Names = c("ID", "YEAR", "DATA.1", "DATA.2", "DATA.3", "DATA.NAME.structure..n1....Dim...c.1L..1L..", "DATA.NAME.structure..n2....Dim...c.1L..1L..", "DATA.NAME.structure..n3....Dim...c.1L..1L..", "Remarks"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("TEST.DATA.1.1", "TEST.DATA.2.1"))
data
$TEST.DATA.1.1
ID YEAR DATA.1 DATA.NAME Remarks
1 2 1990 10 Test 1990
2 2 1991 20 Test 1991
3 2 1992 30 Test 1992
$TEST.DATA.2.1
ID YEAR DATA.1 DATA.2 DATA.3 DATA.NAME.structure..n1....Dim...c.1L..1L.. DATA.NAME.structure..n2....Dim...c.1L..1L.. DATA.NAME.structure..n3....Dim...c.1L..1L.. Remarks
1 4 2000 55 0 4 n1 n2 n3 2000
2 4 2001 60 2 6 n1 n2 n3 2001
I am looking for a way how I could rename the data columns with the name from the column(s) DATA.NAME. Sometimes there are multiple data columns and respective names such as in the second list element and sometimes there is only one such as in the first element. I am looking for a way to do the renaming for a large list (> 1000 list elements) and then drop the DATA.NAME columns such as in data_new.
data_new
$TEST.DATA.1.1
ID YEAR Test Remarks
1 2 1990 10 1990
2 2 1991 20 1991
3 2 1992 30 1992
$TEST.DATA.2.1
ID YEAR n1 n2 n3 Remarks
1 4 2000 55 0 4 2000
2 4 2001 60 2 6 2001

Here's a base R approach:
for (i in seq_along(data)) {
namecis <- grep('^DATA\\.NAME',names(data[[i]]));
datacis <- grep('^DATA\\.\\d+',names(data[[i]]));
names(data[[i]])[datacis] <- as.character(unlist(data[[i]][1,namecis]));
data[[i]][namecis] <- list(NULL);
};
data;
## $TEST.DATA.1.1
## ID YEAR Test Remarks
## 1 2 1990 10 1990
## 2 2 1991 20 1991
## 3 2 1992 30 1992
##
## $TEST.DATA.2.1
## ID YEAR n1 n2 n3 Remarks
## 1 4 2000 55 0 4 2000
## 2 4 2001 60 2 6 2001

Solution using data.table package.
require(data.table)
data <- structure(list(TEST.DATA.1.1 = structure(list(ID = c(2, 2, 2), YEAR = c(1990, 1991, 1992), DATA.1 = c(10, 20, 30), DATA.NAME = structure(c(1L, 1L, 1L), class = "factor", .Label = "Test"), Remarks = c(1990, 1991, 1992)), .Names = c("ID", "YEAR", "DATA.1", "DATA.NAME", "Remarks"), row.names = c(NA, -3L), class = "data.frame"), TEST.DATA.2.1 = structure(list(ID = c(4, 4), YEAR = c(2000, 2001), DATA.1 = c(55, 60), DATA.2 = c(0, 2), DATA.3 = c(4, 6), DATA.NAME.structure..n1....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n1"), DATA.NAME.structure..n2....Dim...c.1L..1L.. = structure(c(1L, 1L), class = "factor", .Label = "n2"), DATA.NAME.structure..n3....Dim...c.1L..1L.. = structure(c(1L,1L), class = "factor", .Label = "n3"), Remarks = c(2000,2001)), .Names = c("ID", "YEAR", "DATA.1", "DATA.2", "DATA.3", "DATA.NAME.structure..n1....Dim...c.1L..1L..", "DATA.NAME.structure..n2....Dim...c.1L..1L..", "DATA.NAME.structure..n3....Dim...c.1L..1L..", "Remarks"), row.names = c(NA, -2L), class = "data.frame")), .Names = c("TEST.DATA.1.1", "TEST.DATA.2.1"))
fun <- function(x) {
x <- data.table(x)
var1 <- grep("DATA.[0-9]", names(x), value = T)
var2 <- as.character(unlist(x[1, grep("DATA.NAME", names(x)), with = F]))
setnames(x, var1, var2)
x[, grep("DATA.NAME", names(x)) := NULL, with = F]
return(x)
}
data_new <- lapply(data, fun)

This should work...
library(dplyr)
for (i in 1:length(data))
{
d <- data[[i]]
# Find the new names
new_names <- select(d, starts_with('DATA.NAME'))
new_names <- unlist(new_names[1,])
names(new_names) <- NULL
new_names <- as.character(new_names)
# Remove the columns containing the names
d <- select(d, -starts_with('DATA.NAME'))
# Pick which columns we want to replace
old_names <- names(d)
to_replace <- grep('DATA.[0-9]+', old_names)
# Replace those names
names(d)[to_replace] <- new_names
#Replace the list element
data[[i]] <- d
}

Related

Combine two matrix and mark common

I have two matrix as like this
Vehicle1 Year type
Car1 20 A
Car2 21 A
Car8 20 A
Second one
Vehicle2 Year type
Car1 20 M
Car2 21 M
Car7 90 M
I just need to combine the matrix based on the first column(Vehicle) and need to mark common as A/M as like this
Vehicle Year type
Car1 20 A/M
Car2 21 A/M
Car7 90 M
Car8 20 A
I used merge function for this but it only printing the common one
You can join the two dataframe and combine the type columns :
dplyr::full_join(df1, df2, by = c('Vehicle1' = 'Vehicle2', 'Year')) %>%
tidyr::unite(type, type.x, type.y, sep = '/', na.rm = TRUE)
# Vehicle1 Year type
#1 Car1 20 A/M
#2 Car2 21 A/M
#3 Car8 20 A
#4 Car7 90 M
data
df1 <- structure(list(Vehicle1 = c("Car1", "Car2", "Car8"), Year = c(20L,
21L, 20L), type = c("A", "A", "A")), class = "data.frame", row.names = c(NA, -3L))
df2 <- structure(list(Vehicle2 = c("Car1", "Car2", "Car7"), Year = c(20L,
21L, 90L), type = c("M", "M", "M")), class = "data.frame", row.names = c(NA, -3L))
Another dplyr solution.
library(dplyr)
df2 %>%
rename(Vehicle1 = Vehicle2) %>%
bind_rows(df1, .) %>%
group_by(Vehicle1, Year) %>%
summarise(type = paste(type, collapse = "/")) %>%
ungroup()
# # A tibble: 4 x 3
# Vehicle1 Year type
# <chr> <int> <chr>
# 1 Car1 20 A/M
# 2 Car2 21 A/M
# 3 Car7 90 M
# 4 Car8 20 A
You can also do this easily in base R.
rr <- merge(m1, m2, all=T, by.x="Vehicle1", by.y="Vehicle2")
rr <- setNames(na.omit(reshape(rr, idvar="Vehicle1", varying=list(c(2, 4), c(3, 5)),
direction="long")), c("Vehicle1", "t", names(m1)[-1]))
dupes <- which(duplicated(rr$Vehicle1))
rr[rr$Vehicle1 %in% rr$Vehicle1[dupes], 4] <- "A/M"
res <- rr[-dupes, -2]
res
# Vehicle1 Year type
# Car1.1 Car1 20 A/M
# Car2.1 Car2 21 A/M
# Car8.1 Car8 20 A
# Car7.2 Car7 90 M
Data:
m1 <- structure(list(Vehicle1 = c("Car1", "Car2", "Car8"), Year = c(20L,
21L, 20L), type = c("A", "A", "A")), class = "data.frame", row.names = c(NA,
-3L))
m2 <- structure(list(Vehicle2 = c("Car1", "Car2", "Car7"), Year = c(20L,
21L, 90L), type = c("M", "M", "M")), class = "data.frame", row.names = c(NA,
-3L))
Here is a base R option using merge
tmp <- merge(df1, df2, by.x = c("Vehicle1", "Year"), by.y = c("Vehicle2", "Year"), all = TRUE)
dfout <- cbind(tmp[c("Vehicle1", "Year")],
type = apply(
tmp[grep("type", names(tmp))],
1,
function(...) ifelse(any(is.na(...)), na.omit(...), paste0(..., collapse = "/"))
)
)
such that
> dfout
Vehicle1 Year type
1 Car1 20 A/M
2 Car2 21 A/M
3 Car7 90 M
4 Car8 20 A
Data
> dput(df1)
structure(list(Vehicle1 = c("Car1", "Car2", "Car8"), Year = c(20L,
21L, 20L), type = c("A", "A", "A")), class = "data.frame", row.names = c(NA,
-3L))
> dput(df2)
structure(list(Vehicle2 = c("Car1", "Car2", "Car7"), Year = c(20L,
21L, 90L), type = c("M", "M", "M")), class = "data.frame", row.names = c(NA,
-3L))

How to transform row to column based on a single row in R?

I have a data set that looks something like this
A B 1960 1970 1980
x a 1 2 3
x b 1.1 2.1 NA
y a 2 3 4
y b 1 NA 1
I want to transform the columns based on row B so that it looks something like this
A year a b
x 1960 1 1.1
x 1970 2 2.1
x 1980 3 NA
y 1960 2 1
y 1970 3 NA
y 1980 4 1
I am not sure how to do this. I know that I can do a full transformation using t() or using row_to_columns() from tidyverse, but the result is not what I want.
The initial data has about 60 columns and 165 distinct values in column B.
You can do pivot_long() and then pivot_wide() , although might be a bad idea to rename your column "B" again:
library(dplyr)
library(tidyr)
df %>% pivot_longer(-c(A,B)) %>%
pivot_wider(names_from=B) %>% rename(B=name)
# A tibble: 6 x 4
A B a b
<fct> <chr> <dbl> <dbl>
1 x 1960 1 1.1
2 x 1970 2 2.1
3 x 1980 3 NA
4 y 1960 2 1
5 y 1970 3 NA
6 y 1980 4 1
df = structure(list(A = structure(c(1L, 1L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), B = structure(c(1L, 2L, 1L, 2L), .Label = c("a",
"b"), class = "factor"), `1960` = c(1, 1.1, 2, 1), `1970` = c(2,
2.1, 3, NA), `1980` = c(3L, NA, 4L, 1L)), class = "data.frame", row.names = c(NA,
-4L))
library(data.table)
dt <- fread('A B 1960 1970 1980
x a 1 2 3
x b 1.1 2.1 NA
y a 2 3 4
y b 1 NA 1')
names(dt) <- as.character(dt[1,])
dt <- dt[-1,]
dt[,(3:5):=lapply(.SD,as.numeric),.SDcols=3:5]
dcast(melt(dt,measure.vars = 3:5),...~B,value.var = "value")
#> A variable a b
#> 1: x 1960 1 1.1
#> 2: x 1970 2 2.1
#> 3: x 1980 3 NA
#> 4: y 1960 2 1.0
#> 5: y 1970 3 NA
#> 6: y 1980 4 1.0
Created on 2020-05-05 by the reprex package (v0.3.0)
Base R solution:
long_df <- reshape(df, direction = "long",
varying = which(!names(df) %in% c("A", "B")),
v.names = "value",
timevar = "year",
times = names(df)[!(names(df) %in% c("A", "B"))],
ids = NULL,
new.row.names = 1:(length(which(!names(df) %in% c("A", "B"))) * nrow(df)))
wide_df <- setNames(reshape(long_df, direction = "wide",
idvar = c("A", "year"),
timevar = "B"), c("A", "B", unique(df$B)))
Data:
df <- structure(list(A = c("x", "x", "y", "y"), B = c("a", "b", "a",
"b"), `1960` = c(1, 1.1, 2, 1), `1970` = c(2, 2.1, 3, NA), `1980` = c(3L,
NA, 4L, 1L)), row.names = 2:5, class = "data.frame")

How to merge two dataframes with same column name but may have same data in variables in R?

I want to ask how do I merge this two data frame?
df1:
Name Type Price
A 1 NA
B 2 2.5
C 3 2.0
df2:
Name Type Price
A 1 1.5
D 2 2.5
E 3 2.0
As you can see from both df, they have same column names and one row with the same value in "Name" which is A but df1 doesn't have the price whereas df2 has. I want to achieve this output such that they merge if the value in "Name" is the same
Name Type Price
A 1 1.5
B 2 2.5
C 3 2.0
D 2 2.5
E 3 2.0
We could do a full_join on df1 and df2 by Name and using coalesce on Type and Price get the first non-NA value from those columns.
library(dplyr)
full_join(df1, df2, by = 'Name') %>%
mutate(Type = coalesce(Type.x, Type.y),
Price = coalesce(Price.x, Price.y)) %>%
select(names(df1))
# Name Type Price
#1 A 1 1.5
#2 B 2 2.5
#3 C 3 2.0
#4 D 2 2.5
#5 E 3 2.0
And similar in base R :
transform(merge(df1, df2, by = 'Name', all = TRUE),
Price = ifelse(is.na(Price.x), Price.y, Price.x),
Type = ifelse(is.na(Type.x), Type.y, Type.x))[names(df1)]
data
df1 <- structure(list(Name = structure(1:3, .Label = c("A", "B", "C"
), class = "factor"), Type = 1:3, Price = c(NA, 2.5, 2)),
class = "data.frame", row.names = c(NA, -3L))
df2 <- structure(list(Name = structure(1:3, .Label = c("A", "D", "E"
), class = "factor"), Type = 1:3, Price = c(1.5, 2.5, 2)),
class = "data.frame", row.names = c(NA, -3L))
Seems like you want to rbind the data frames together, then remove rows with NA values for Price, and order by Name.
library(data.table)
setDT(rbind(df1, df2))[!is.na(Price)][order(Name)]
# Name Type Price
# 1: A 1 1.5
# 2: B 2 2.5
# 3: C 3 2.0
# 4: D 2 2.5
# 5: E 3 2.0
Here is a base R solution using merge + ocmplete.cases
dfout <- subset(u <- merge(df1,df2,all= TRUE),complete.cases(u))
which yields
> dfout
Name Type Price
1 A 1 1.5
3 B 2 2.5
4 C 3 2.0
5 D 2 2.5
6 E 3 2.0
DATA
df1 <- structure(list(Name = structure(1:3, .Label = c("A", "B", "C"
), class = "factor"), Type = 1:3, Price = c(NA, 2.5, 2)),
class = "data.frame", row.names = c(NA, -3L))
df2 <- structure(list(Name = structure(1:3, .Label = c("A", "D", "E"
), class = "factor"), Type = 1:3, Price = c(1.5, 2.5, 2)),
class = "data.frame", row.names = c(NA, -3L))

Matching two list of unequal length

I am trying to match the values in 2 lists only where the variable names are the same between list. I would like the result to be a list the length of the longer list filled with count of total matches.
jac <- structure(list(s1 = "a", s2 = c("b", "c", "d"), s3 = 5),
.Names = c("s1", "s2", "s3"))
larger <- structure(list(s1 = structure(c(1L, 1L, 1L), .Label = "a", class = "factor"),
s2 = structure(c(2L, 1L, 3L), .Label = c("b", "c", "d"), class = "factor"),
s3 = c(1, 2, 7)), .Names = c("s1", "s2", "s3"), row.names = c(NA, -3L), class = "data.frame")
I am using mapply(FUN = pmatch, jac, larger) which gives me a correct total but not in the format that I would like below:
s1 s2 s3 s1result s2result s3result
a c 1 1 2 NA
a b 2 1 1 NA
a c 7 1 3 NA
However, I don't think pmatch will ensure the name matching in every situation so I wrote a function that I am still having issues with:
prodMatch <- function(jac,larger){
for(i in 1:nrow(larger)){
if(names(jac)[i] %in% names(larger[i])){
r[i] <- jac %in% larger[i]
r
}
}
}
Can anyone help out?
Another dataset that causes one to not be a multiple of the ohter:
larger2 <-
structure(list(s1 = structure(c(1L, 1L, 1L), class = "factor", .Label = "a"),
s2 = structure(c(1L, 1L, 1L), class = "factor", .Label = "c"),
s3 = c(1, 2, 7), s4 = c(8, 9, 10)), .Names = c("s1", "s2",
"s3", "s4"), row.names = c(NA, -3L), class = "data.frame")
mapply returns a list of matching index, you can convert it to a data frame simply using as.data.frame:
as.data.frame(mapply(match, jac, larger))
# s1 s2 s3
# 1 1 2 NA
# 2 1 1 NA
# 3 1 3 NA
And cbind the result with larger gives what you expected:
cbind(larger,
setNames(as.data.frame(mapply(match, jac, larger)),
paste(names(jac), "result", sep = "")))
# s1 s2 s3 s1result s2result s3result
#1 a c 1 1 2 NA
#2 a b 2 1 1 NA
#3 a d 7 1 3 NA
Update: To take care of the cases where the name of the two lists don't match, we can loop through the larger and it's name simultaneously and extract the elements from jac as follows:
as.data.frame(
mapply(function(col, name) {
m <- match(jac[[name]], col)
if(length(m) == 0) NA else m # if the name doesn't exist in jac return NA as well
}, larger, names(larger)))
# s1 s2 s3
#1 1 2 NA
#2 1 1 NA
#3 1 3 NA

What is the equivalent of Reduce for a list in R

I have a list, mylist with 5 elements.
I can merge any two elements using the following:
merge(mylist[[1]], mylist[[2]], by = someColumn).
However, how do I merge all of them together at once while preserving the names and not causing warnings.
Thanks!
Edit
Upon further inspection, here's a few more details to help clarify my question:
> mylist
$Alpha
id count
1 ABC 5
2 DEF 10
3 GHI 15
$Beta
id count
1 DEF 10
2 ABC 12
3 GHI 14
$Gamma
id count
1 ABC 13
2 GHI 15
3 DEF 17
$Kappa
id count
1 GHI 20
2 DEF 21
3 ABC 25
> Reduce(function(x,y){merge(x,y,by="id")}, mylist)
id count.x count.y count.x count.y
1 ABC 5 12 13 25
2 DEF 10 10 17 21
3 GHI 15 14 15 20
Warning message:
In merge.data.frame(x, y, by = "id") :
column names ‘count.x’, ‘count.y’ are duplicated in the result
As you can see, things are repeated at the top of the result and R throws a warning. How can I get it to avoid that (preferably calling the column names by the same as the list names -- so the first count.x would be count.Alpha).
Here's a copy of mylist in dput form in case anyone wants it:
structure(list(Alpha = structure(list(id = structure(1:3, .Label = c("ABC",
"DEF", "GHI"), class = "factor"), count = c(5, 10, 15)), .Names = c("id",
"count"), row.names = c(NA, -3L), class = "data.frame"), Beta = structure(list(
id = structure(c(2L, 1L, 3L), .Label = c("ABC", "DEF", "GHI"
), class = "factor"), count = c(10, 12, 14)), .Names = c("id",
"count"), row.names = c(NA, -3L), class = "data.frame"), Gamma = structure(list(
id = structure(c(1L, 3L, 2L), .Label = c("ABC", "DEF", "GHI"
), class = "factor"), count = c(13, 15, 17)), .Names = c("id",
"count"), class = "data.frame", row.names = c(NA, -3L)), Kappa = structure(list(
id = c("GHI", "DEF", "ABC"), count = c(20, 21, 25)), .Names = c("id",
"count"), row.names = c(NA, -3L), class = "data.frame")), .Names = c("Alpha",
"Beta", "Gamma", "Kappa"))
You can just run a quick Map to rename the "count" column. For example
Reduce(merge, Map(function(n,x) {names(x)[2]<-n; x}, names(mylist), mylist))
this returns
id Alpha Beta Gamma Kappa
1 ABC 5 12 13 25
2 DEF 10 10 17 21
3 GHI 15 14 15 20
Another way using match.call():
Reduce(merge, lapply(mylist, function(x) {
names(x)[2] <- paste(names(x)[2], names(mylist)[match.call()[[2]][[3]]],sep=".")
x}))
# id count.Alpha count.Beta count.Gamma count.Kappa
#1 ABC 5 12 13 25
#2 DEF 10 10 17 21
#3 GHI 15 14 15 20

Resources