Melt a matrix using extern covariate in R

Melt a matrix using extern covariate in R - r

I have a matrix:
mat1 <- matrix(rnorm(8), ncol = 4;
,dimnames=list(c('R1','R2'),c('C1','C2','C3','C4')))
> mat1
C1 C2 C3 C4
R1 1.226139 -1.0604743 -0.1803689 0.3852505
R2 -1.232622 -0.5567295 -0.4146919 0.2433812
and a covariate that match names of the matrix columns
> covariate <- factor(c('A','A','B','B'))
> t(data.frame(covariate, colnames(mat1)))
[,1] [,2] [,3] [,4]
covariate "A" "A" "B" "B"
colnames.mat1. "C1" "C2" "C3" "C4"
I would like to melt it with respect to the covariate in order to have the following result:
Melting the data gives:
> melt( mat1 )
Var1 Var2 value
1 R1 C1 1.2261395
2 R2 C1 -1.2326215
3 R1 C2 -1.0604743
4 R2 C2 -0.5567295
5 R1 C3 -0.1803689
6 R2 C3 -0.4146919
7 R1 C4 0.3852505
8 R2 C4 0.2433812
However I would like to get the following result:
covariate_2 <- factor( c(rep('A',4) , rep('B',4) ))
> data.frame( covariate_2 , melted_data )
covariate_2 Var1 Var2 value
1 A R1 C1 1.2261395
2 A R2 C1 -1.2326215
3 A R1 C2 -1.0604743
4 A R2 C2 -0.5567295
5 B R1 C3 -0.1803689
6 B R2 C3 -0.4146919
7 B R1 C4 0.3852505
8 B R2 C4 0.2433812
I think there must be a way to get the results using standard melt function. I would appreciate any help.

Perhaps it's easiest to just rename the columns of your matrix first, and then melt.
Here are a couple of examples, first using "data.table", and second using the "tidyverse":
library(data.table)
setDT(melt(`colnames<-`(mat1, paste(c('A','A','B','B'), colnames(mat1), sep = "_"))))[
, c("cov", "V1") := tstrsplit(Var2, "_")][, Var2 := NULL][]
# Var1 value cov V1
# 1: R1 1.2261390 A C1
# 2: R2 -1.2326220 A C1
# 3: R1 -1.0604743 A C2
# 4: R2 -0.5567295 A C2
# 5: R1 -0.1803689 B C3
# 6: R2 -0.4146919 B C3
# 7: R1 0.3852505 B C4
# 8: R2 0.2433812 B C4
library(tidyverse)
`colnames<-`(mat1, paste(c('A','A','B','B'), colnames(mat1), sep = "_")) %>%
as.data.frame() %>%
rownames_to_column() %>%
gather(var, val, -rowname) %>%
separate(var, into = c("cov", "var1"))
# rowname cov var1 val
# 1 R1 A C1 1.2261390
# 2 R2 A C1 -1.2326220
# 3 R1 A C2 -1.0604743
# 4 R2 A C2 -0.5567295
# 5 R1 B C3 -0.1803689
# 6 R2 B C3 -0.4146919
# 7 R1 B C4 0.3852505
# 8 R2 B C4 0.2433812
Sample data:
mat1 <- structure(c(1.226139, -1.232622, -1.0604743, -0.5567295, -0.1803689,
-0.4146919, 0.3852505, 0.2433812), .Dim = c(2L, 4L), .Dimnames = list(
c("R1", "R2"), c("C1", "C2", "C3", "C4")))

Related

Incrementing grouped identifiers

I have example data as follows:
library(data.table)
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B NA
E NA")
For the two rows that have Variable_codes_2022==NA, I would like to increment the variable code so that it becomes:
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B B4
E E2"
Because the column Variable_codes_2022 is a string variable, the numbers are not in numerical order.
I have no idea where to start and I was wondering if someone could help me on the right track.

We could do it this way:
grouping
arranging and
mutate.
To keep the original order we could first create and id and then rearrange:
library(dplyr)
dat %>%
group_by(Survey) %>%
arrange(.by_group = TRUE) %>%
mutate(Variable_codes_2022 = paste0(Survey, row_number()))
Survey Variable_codes_2022
<chr> <chr>
1 A A1
2 B B1
3 B B2
4 B B3
5 B B4
6 D D1
7 E E1
8 E E2

data.table option using rleid like this:
library(data.table)
dat[, Variable_codes_2022 := paste0(Survey, rleid(Variable_codes_2022)), by = Survey]
dat
#> Survey Variable_codes_2022
#> 1: D D1
#> 2: A A1
#> 3: B B1
#> 4: B B2
#> 5: B B3
#> 6: E E1
#> 7: B B4
#> 8: E E2
Created on 2022-12-01 with reprex v2.0.2

dat <-
structure(list(survey = c("D", "A", "B", "B", "B", "E", "B",
"E", "B"), var_code = c("D1", "A1", "B1", "B3", "B2", "E1", NA,
NA, NA)), row.names = c(NA, -9L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x0000026db10f1ef0>)
library(dplyr)
library(stringr)
dat %>%
group_by(survey) %>%
mutate(
aux1 = as.numeric(stringr::str_remove(var_code,survey)),
aux2 = cumsum(is.na(var_code)),
var_code = paste0(survey,max(aux1,na.rm = TRUE)+aux2)
) %>%
ungroup() %>%
select(-aux1,-aux2)
# A tibble: 9 x 2
survey var_code
<chr> <chr>
1 D D1
2 A A1
3 B B3
4 B B3
5 B B3
6 E E1
7 B B4
8 E E2
9 B B5

This solution with rowid.
Added an extra element to the sample so it can be tested against multiple missings
library(data.table)
#> Warning: package 'data.table' was built under R version 4.2.2
dat <- fread("Survey Variable_codes_2022
D D1
A A1
B B1
B B3
B B2
E E1
B NA
E NA
E NA")
dat[, n := as.numeric(substr(
Variable_codes_2022, nchar(Survey)+1, nchar(Variable_codes_2022)))]
dat[is.na(n),
Variable_codes_2022 := paste0(Survey, rowid(Survey) +
dat[.SD[,.(Survey)], .(m=max(n, na.rm=T)), on = "Survey", by=.EACHI ][,m])]
dat
#> Survey Variable_codes_2022 n
#> 1: D D1 1
#> 2: A A1 1
#> 3: B B1 1
#> 4: B B3 3
#> 5: B B2 2
#> 6: E E1 1
#> 7: B B4 NA
#> 8: E E2 NA
#> 9: E E3 NA

Convert nested list to data.frame

I got a nested list l with each item each self is a 2 level list. For example:
l1 = list("a", list("a1"= "a1v"))
l2 = list("b", list("b1" = "b1v", b2 = "b2v"))
l3 = list("c", list("c1" = c("c1v1", "c1v2", "c1v3")))
l = list(l1, l2, l3)
How do I tranform it to a data.frame like this:
df = data.frame(A = c("a", "b", "b", "c", "c", "c"), B= c("a1", "b1", "b2", "c1", "c1", "c1"), C=c("a1v", "b1v", "b2v", "c1v1", "c1v2", "c1v3"))
> df
A B C
1 a a1 a1v
2 b b1 b1v
3 b b2 b2v
4 c c1 c1v1
5 c c1 c1v2
6 c c1 c1v3
Tried with seperate_rows and map_df but both failed to deal with inconsistent dimension of .x[[2]] items.
Update 1:
#akrun's solution is not running for me:

We could use bind_rows with map
library(purrr)
library(dplyr)
library(tidyr)
map_dfr(l, ~bind_cols(.x) %>%
pivot_longer(cols = -1, names_to = 'B', values_to = 'C') %>%
rename_at(1, ~'A'))
# A tibble: 6 x 3
# A B C
#* <chr> <chr> <chr>
#1 a a1 a1v
#2 b b1 b1v
#3 b b2 b2v
#4 c c1 c1v1
#5 c c1 c1v2
#6 c c1 c1v3

If the sample data in your question accurately reflects your actual data, you can try one of the following:
library(data.table)
data.table(l)[, list(names(unlist(l)),
unlist(l, use.names = FALSE))][
, V3 := V2[1], cumsum(V1 == "")][V1 != ""]
## V1 V2 V3
## 1: a1 a1v a
## 2: b1 b1v b
## 3: b2 b2v b
## 4: c11 c1v1 c
## 5: c12 c1v2 c
## 6: c13 c1v3 c
reshape2::melt(setNames(lapply(l, "[[", -1), lapply(l, "[[", 1)))
## value L2 L1
## 1 a1v a1 a
## 2 b1v b1 b
## 3 b2v b2 b
## 4 c1v1 c1 c
## 5 c1v2 c1 c
## 6 c1v3 c1 c

Base R option :
do.call(rbind, lapply(l, function(x) {
data.frame(A = x[[1]], B = unlist(x[[2]]), C = names(x[[2]]))
}))
# A B C
#a1 a a1v a1
#b1 b b1v b1
#b2 b b2v b2
#c11 c c1v1 c1
#c12 c c1v2 c1
#c13 c c1v3 c1

Since this is also one of the solution, I will post it here as well. This one is the one I can relate to.
map_df(l, ~ tibble(A=.x[[1]], B=names(.x[[2]]), C= unlist(.x[[2]])))
Read:
Run through all elements of l and make a data.frame (map_df and ~ inside) from a sub-data.frame created by tibble where column A = ..., B = ..`, ...
Thanks go to:
#akrun for prompt answer, I could have used the solution, but was
too busy to figure out.
#A5C1D2H2I1M1N2O1R2T1 also provided a
performant answer.
#Ronak Shah provided a plain R base
solution that I can translate to this.

In R is there a way to recode the columns from one data frame with values from another data frame?

I am still relatively new to working in R and I am not sure how to approach this problem. Any help or advice is greatly appreciated!!!
The problem I have is that I am working with two data frames and I need to recode the first data frame with values from the second. The first data frame (df1) contains the data from the respondents to a survey and the other data frame(df2) is the data dictionary for df1.
The data looks like this:
df1 <- data.frame(a = c(1,2,3),
b = c(4,5,6),
c = c(7,8,9))
df2 <- data.frame(columnIndicator = c("a","a","a","b","b","b","c","c","c" ),
df1_value = c(1,2,3,4,5,6,7,8,9),
new_value = c("a1","a2","a3","b1","b2","b3","c1","c2","c3"))
So far I can manually recode df1 to get the expected output by doing this:
df1 <- within(df1,{
a[a==1] <- "a1"
a[a==2] <- "a2"
a[a==3] <- "a3"
b[b==4] <- "b4"
b[b==5] <- "b5"
b[b==6] <- "b6"
c[c==7] <- "c7"
c[c==8] <- "c8"
c[c==9] <- "c9"
})
However my real dataset has about 42 columns that need to be recoded and that method is a little time intensive. Is there another way in R for me to recode the values in df1 with the values in df2?
Thanks!

Just need to transform the shape a bit.
library(data.table)
df1 <- data.frame(a = c(1,2,3),
b = c(4,5,6),
c = c(7,8,9))
df2 <- data.frame(columnIndicator = c("a","a","a","b","b","b","c","c","c" ),
df1_value = c(1,2,3,4,5,6,7,8,9),
new_value = c("a1","a2","a3","b4","b5","b6","c7","c8","c9"),stringsAsFactors = FALSE)
setDT(df1)
setDT(df2)
df1[,ID:=.I]
ldf1 <- melt(df1,measure.vars = c("a","b","c"),variable.name = "columnIndicator",value.name = "df1_value")
ldf1[df2,"new_value":=i.new_value,on=.(columnIndicator,df1_value)]
ldf1
#> ID columnIndicator df1_value new_value
#> 1: 1 a 1 a1
#> 2: 2 a 2 a2
#> 3: 3 a 3 a3
#> 4: 1 b 4 b4
#> 5: 2 b 5 b5
#> 6: 3 b 6 b6
#> 7: 1 c 7 c7
#> 8: 2 c 8 c8
#> 9: 3 c 9 c9
dcast(ldf1,ID~columnIndicator,value.var = "new_value")
#> ID a b c
#> 1: 1 a1 b4 c7
#> 2: 2 a2 b5 c8
#> 3: 3 a3 b6 c9
Created on 2020-04-18 by the reprex package (v0.3.0)

In base R, we can unlist df1 match it with df1_value and get corresponding new_value.
df1[] <- df2$new_value[match(unlist(df1), df2$df1_value)]
df1
# a b c
#1 a1 b1 c1
#2 a2 b2 c2
#3 a3 b3 c3

Is this what you are looking for???
library(dplyr)
df3 <- df1 %>% gather(key = "key", value = "value")
df3 %>% inner_join(df2, by = c("key" = "columnIndicator", "value" = "df1_value"))
Output
key value new_value
1 a 1 a1
2 a 2 a2
3 a 3 a3
4 b 4 b1
5 b 5 b2
6 b 6 b3
7 c 7 c1
8 c 8 c2
9 c 9 c3

Reverse spread using gather

We have a rating matrix:
df <- data.frame(Customer.ID=c("c1",'c1','c1','c2','c2','c3'),
Movie.ID=c("m1", "m3", "m5", "m1", "m5", "m7"),
Rating=c(1,2,1,3,3,1))
df
Customer.ID Movie.ID Rating
1 c1 m1 1
2 c1 m3 2
3 c1 m5 1
4 c2 m1 3
5 c2 m5 3
6 c3 m7 1
When I spread and change row names like this:
df1 <- df %>% spread(key = 'Movie.ID', value = 'Rating')
df1 <- data.frame(df1, row.names = 'Customer.ID')
I get:
> df1
m1 m3 m5 m7
c1 1 2 1 NA
c2 3 NA 3 NA
c3 NA NA NA 1
I want to make df1 look like df again.
I have tried:
df2 <-setDT(df1, keep.rownames = TRUE)[]
df2 <- gather(df2, Video.ID, Rating, 2:4)
But it returns me:
> df2
rn m7 Video.ID Rating
1 c1 NA m1 1
2 c2 NA m1 3
3 c3 1 m1 NA
4 c1 NA m3 2
5 c2 NA m3 NA
6 c3 1 m3 NA
7 c1 NA m5 1
8 c2 NA m5 3
9 c3 1 m5 NA

While I am not certain why you are doing this (see #Jack Brookes comment), you can do this pretty readily with dplyr functions:
df1 %>%
rownames_to_column('Customer.ID') %>%
gather(m1:m7, key = 'Movie.ID', value = 'Rating') %>%
filter(!is.na(Rating))
Which gives us:
Customer.ID Movie.ID Rating
1 c1 m1 1
2 c2 m1 3
3 c1 m3 2
4 c1 m5 1
5 c2 m5 3
6 c3 m7 1

Run call columns data of another dataframe, row by row

This is my First dataframe,
df1 <- as.data.frame(matrix(rbinom(9*9, 1, 0.5), ncol=9, nrow =9))
colnames(df1) <- paste(rep(c("a","b","c"), each=3), rep(c(1,2,3), 3), sep = "")
set.seed(11)
This is my Second dataframe,
factor.1 <- paste(rep(c("a","b"), each=3), rep(c(1,2,3), 2), sep = "")
factor.2 <- rep(paste(rep("c", 3), c(1,2,3), sep = ""), 2)
df2 <- as.data.frame(cbind(factor.1,factor.2))
I want to calculate the result in each column and put it inside the second dataframe. I use dplyr
fun1 <- function(x){sum(ds1[, x])}
df2%>% mutate(value = fun1(factor.1))
But what I get is this,
factor.1 factor.2 value
1 a1 c1 22
2 a2 c2 22
3 a3 c3 22
4 b1 c1 22
5 b2 c2 22
6 b3 c3 22
But What I want is this,
factor.1 factor.2 value
1 a1 c1 4
2 a2 c2 4
3 a3 c3 4
4 b1 c1 1
5 b2 c2 4
6 b3 c3 5

Is this what you are looking for ?
df2 %>% mutate(value = sapply(factor.1, fun1) )

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Melt a matrix using extern covariate in R - r

Related

Incrementing grouped identifiers

Convert nested list to data.frame

In R is there a way to recode the columns from one data frame with values from another data frame?

Reverse spread using gather

Run call columns data of another dataframe, row by row

Categories

Resources