Update dt columns based on named list - r

Let's say, I have the following my_dt datatable:
neutrons
spectrum
geography
2.30
-1.2
KIEL
2.54
-1.6
KIEL
2.56
-0.9
JUNG
2.31
-0.3
ANT
Also I have the following named list (my_list):
> my_list
$particles
[1] "neutrons"
$station
[1] NA
$energy
[1] "spectrum"
$area
[1] "geography"
$gamma
[1] NA
The values of this list correspond to the columns names from my dataset (if they exist, if they are absent - NA).
Based on my dataset and this list, I need to check which columns exist in my_dt and rename them (based on my_list names), and for NA values - I need to create columns filled with NAs.
So, I want to obtain the following dataset:
>final_dt
particles
station
energy
area
gamma
2.30
NA
-1.2
KIEL
NA
2.54
NA
-1.6
KIEL
NA
2.56
NA
-0.9
JUNG
NA
2.31
NA
-0.3
ANT
NA
I try to implement this using apply family functions, but at the moment I can't obtain exactly what I want.
So, I would be grateful for any help!

data.table using lapply
library(data.table)
setDT(my_dt)
setDT(my_list)
final_dt <- setnames( my_list[, lapply( .SD, function(x){
if( x %in% colnames(my_dt)){ my_dt[,x,with=F] }else{ NA } } ) ],
names(my_list) )
final_dt
particles station energy area gamma
1: 2.30 NA -1.2 KIEL NA
2: 2.54 NA -1.6 KIEL NA
3: 2.56 NA -0.9 JUNG NA
4: 2.31 NA -0.3 ANT NA
base R using sapply
setDF(my_dt)
setDF(my_list)
data.frame( sapply( my_list, function(x) if(!is.na(x)){ my_dt[,x] }else{ NA } ) )
particles station energy area gamma
1 2.30 NA -1.2 KIEL NA
2 2.54 NA -1.6 KIEL NA
3 2.56 NA -0.9 JUNG NA
4 2.31 NA -0.3 ANT NA
Data
my_dt <- structure(list(neutrons = c(2.3, 2.54, 2.56, 2.31), spectrum = c(-1.2,
-1.6, -0.9, -0.3), geography = c("KIEL", "KIEL", "JUNG", "ANT"
)), class = "data.frame", row.names = c(NA, -4L))
my_list <- list(particles = "neutrons", station = NA, energy = "spectrum",
area = "geography", gamma = NA)

This may not meet your needs, but since I had come up with this separately thought I would share just in case. You can use setnames to rename the columns based on my_list. After that, add in the missing column names with values of NA. Finally, you can use setcolorder to reorder based on your list if desired.
library(data.table)
my_vec <- unlist(my_list)
setnames(my_dt, names(my_vec[match(names(my_dt), my_vec)]))
my_dt[, (setdiff(names(my_vec), names(my_dt))) := NA]
setcolorder(my_dt, names(my_vec))
my_dt
Output
particles station energy area gamma
1: 2.30 NA -1.2 KIEL NA
2: 2.54 NA -1.6 KIEL NA
3: 2.56 NA -0.9 JUNG NA
4: 2.31 NA -0.3 ANT NA

I wrote a simple code that should do the job for you:
l = list(c = 'cc', a = 'aa', b = NA) # replace this with your my_list
dt = data.frame(aa = 1:3, cc = 2:4) # replace this with my_dt
dtl = data.frame(l)
names(dt) = names(l)[na.omit(match(l, names(dt)))]
m = merge(dt, dtl[!is.element(names(dtl), names(dt))])

Related

unable to generate matrix in desired format

I need to make a function such that it create a matrix across two groups and provide total of the groups at row and column level, along with matrix results
Inputs for functions -> df
credit breaks
Rate_Cutpoints from 1..5
O/P needs to be final table provide below
MY Data Frame
credit <- c(10,20,30,40,10,30,50,70,90,100,25,45,67,87,98,54,34,56,78,23,45,56,12)
rate <- c(1,2,3,4,1,3,5,7,9,10,2,4,6,8,9,5,3,5,7,2,4,5,1)
Marks <- c(9,3,5,6,7,8,9,1,3,10,4,5,6,7,5,4,8,3,5,6,7,8,9)
Points <- c(1,2,3,4,5,6,7,8,9,10,2,3,4,4,5,7,8,3,4,5,6,7,8)
Scale <- c(1,2,3,4,5,6,7,8,9,10,2,3,4,4,5,7,8,3,4,5,6,7,8)
Category <- c('book', 'pen', 'textbook', 'pencil_case','book', 'pen', 'textbook', 'pencil_case','book', 'pen', 'textbook', 'pencil_case','book','pen' ,'pen', 'textbook', 'pencil_case','book', 'pen', 'textbook', 'pencil_case','book', 'pencil_case')
# Join the variables to create a data frame
df <- data.frame(credit,rate,Marks,Points,Scale,Category)
MY Inputs
credit_breaks<-c(0,15,30,45,65,75,1000)
Rate_Cutpoints1<-c(0,1,2,5,7,9,10)
Rate_Cutpoints2<-c(0,3,4,7,8,9,10)
Rate_Cutpoints3<-c(0,1,5,6,8,9,10)
Rate_Cutpoints4<-c(0,1,3,6,7,9,10)
Rate_Cutpoints5<-c(0,2,3,4,8,9,10)
Rate_Cutpoints6<-c(0,3,4,5,7,9,10)
MY code it basically first make credit band column from credit breaks which is provided as input and then using that make another column as rate bands based on rate breaks
calculate few metric and then provide metrics
df1<-df %>% mutate(Credit_Band = cut(credit,include.lowest = TRUE,right=TRUE,
breaks =credit_breaks ,labels = FALSE))
df2<-df1 %>%
group_by(credit) %>%
mutate(New_Band =
(ifelse(Credit_Band==1, (cut(rate, Rate_Cutpoints1 ,labels = FALSE)),
ifelse(Credit_Band==2, (cut(rate, Rate_Cutpoints2 ,labels = FALSE)),
ifelse(Credit_Band==3, (cut(rate, Rate_Cutpoints3 ,labels = FALSE)),
ifelse(Credit_Band==4, (cut(rate, Rate_Cutpoints4 ,labels = FALSE)),
ifelse(Credit_Band==5, (cut(rate, Rate_Cutpoints5,labels = FALSE)),
ifelse(Credit_Band==6, (cut(rate, Rate_Cutpoints6,labels = FALSE)),
NA))))))))
df2<-as.data.frame(df2)
summary_results<-df2%>%
group_by(Credit_Band,New_Band)%>%
dplyr::summarize(dist = n()/nrow(df2),
count =n(),
avg_marks= sum(Marks, na.rm=TRUE),
sum_points = sum(Points,na.rm = TRUE),
sum_scale = sum(Scale,na.rm = TRUE))
summary_results$final<-summary_results$avg_marks/summary_results$sum_points
results<-reshape2::dcast(data = summary_results,formula = Credit_Band~New_Band,
value.var = "final")
my result o/p is cross tab across credit and rate bands
Then below code is to calculate total across credit and rate bands
total_rows_value=df2%>% group_by(New_Band)%>%
dplyr::summarize(sum_points = sum(Points ,na.rm = TRUE),
avg_marks= sum(Marks, na.rm=TRUE),
)
total_rows_value$final<-total_rows_value$avg_marks/total_rows_value$sum_points
total_cols_vals=df2%>% group_by(Credit_Band)%>%
dplyr::summarize(sum_points = sum(Points ,na.rm = TRUE),
avg_marks= sum(Marks, na.rm=TRUE),
)
total_cols_vals$final<-total_cols_vals$avg_marks/total_cols_vals$sum_points
Now MY above O/P needs to be clubbed in a fashion to generate below matrix as Final O/P desired
Credit_Band 1 2 3 4 5 6 TotalCols
1 1.78 NA NA NA NA NA. 1.79
2 1.44 NA NA NA NA NA. 1.44
3 NA 1.23 NA NA NA NA. 1.24
4 NA NA. 1 NA NA NA 1
5 NA NA NA 0.58 NA NA 0.58
6 NA NA NA 1.25 0.83 1 0.93
Total_R 1.59. 1.24 1. 0.75. 0.83. 1
(results_body <- results[,-1])
(results_rownames <- results[,1])
(fin <- cbind(
rbind(results_body,total_rows_value$final),
totcol = c(total_cols_vals$final,NA)))
rownames(fin) <-c(results_rownames,"Total_R")
> round(fin,2)
1 2 3 4 5 6 totcol
1 1.79 NA NA NA NA NA 1.79
2 1.44 NA NA NA NA NA 1.44
3 NA 1.24 NA NA NA NA 1.24
4 NA NA 1 NA NA NA 1.00
5 NA NA NA 0.58 NA NA 0.58
6 NA NA NA 1.25 0.83 1 0.94
Total_R 1.59 1.24 1 0.75 0.83 1 NA

How to group by and fill NA with closest not NA in R dataframe column with condition on another column

I have a data frame of blood test markers results and I want to fill in the NA's by the following criteria:
For each group of ID (TIME is in ascending order) if the marker value is NA then fill it with the closest not NA value in this group (may be past or future) but only if the time difference is less than 14.
this example of my data:
df<-data.frame(ID=c(rep(2,5),rep(4,3)), TIME =c(1,22,33,43,85,-48,1,30),
CEA = c(1.32,1.42,1.81,2.33,2.23,29.7,23.34,18.23),
CA.15.3 = c(14.62,14.59,16.8,22.34,36.33,56.02,94.09,121.5),
CA.125 = c(33.98,27.56,30.31,NA,39.57,1171.00,956.50,825.30),
CA.19.9 = c(6.18,7.11,5.72, NA, 7.38,39.30,118.20,98.26),
CA.72.4 = c(rep(NA,5),1.32, NA, NA),
NSE = c(NA, 13.21, rep(NA,6)))
ID TIME CEA CA.15.3 CA.125 CA.19.9 CA.72.4 NSE
2 1 1.32 14.62 33.98 6.18 NA NA
2 22 1.42 14.59 27.56 7.11 NA 13.21
2 33 1.81 16.80 30.31 5.72 NA NA
2 43 2.33 22.34 NA NA NA NA
2 85 2.23 36.33 39.57 7.38 NA NA
4 -48 29.70 56.02 1171.00 39.30 1.32 NA
4 1 23.34 94.09 956.50 118.20 NA NA
4 30 18.23 121.50 825.30 98.26 NA NA
ID is the patient.
The TIME is the time of the blood test.
The others are the markers.
The only way I could do it is with loops which I try to avoid as much as possible.
I expect the output to be:
ID TIME CEA CA.15.3 CA.125 CA.19.9 CA.72.4 NSE
2 1 1.32 14.62 33.98 6.18 NA NA
2 22 1.42 14.59 27.56 7.11 NA 13.21
2 33 1.81 16.80 30.31 5.72 NA 13.21
2 43 2.33 22.34 30.31 5.72 NA NA
2 85 2.23 36.33 39.57 7.38 NA NA
4 -48 29.70 56.02 1171.00 39.30 1.32 NA
4 1 23.34 94.09 956.50 118.20 NA NA
4 30 18.23 121.50 825.30 98.26 NA NA
CA.19.9 and CA.124 are filled with the previous (10 days before)
NSE filled with the previous (11 days)
CA.72.4 not filled since the time difference of 1.32 which is -48 is 49 days from the next measure.
I bet there is a much simpler, vectorized solution but the following works.
fill_NA <- function(DF){
sp <- split(df, df$ID)
sp <- lapply(sp, function(DF){
d <- diff(DF$TIME)
i_diff <- c(FALSE, d < 14)
res <- sapply(DF[-(1:2)], function(X){
inx <- i_diff & is.na(X)
if(any(inx)){
inx <- which(inx)
last_change <- -1
for(i in inx){
if(i > last_change + 1){
if(i == 1){
X[i] <- X[i + 1]
}else{
X[i] <- X[i - 1]
}
last_change <- i
}
}
}
X
})
cbind(DF[1:2], res)
})
res <- do.call(rbind, sp)
row.names(res) <- NULL
res
}
fill_NA(df)
# ID TIME CEA CA.15.3 CA.125 CA.19.9 CA.72.4 NSE
#1 2 1 1.32 14.62 33.98 6.18 NA NA
#2 2 22 1.42 14.59 27.56 7.11 NA 13.21
#3 2 33 1.81 16.80 30.31 5.72 NA 13.21
#4 2 43 2.33 22.34 30.31 5.72 NA NA
#5 2 85 2.23 36.33 39.57 7.38 NA NA
#6 4 -48 29.70 56.02 1171.00 39.30 1.32 NA
#7 4 1 23.34 94.09 956.50 118.20 NA NA
#8 4 30 18.23 121.50 825.30 98.26 NA NA
Yes, you can have a vectorized solution. first let us consider the case in which you only impute using the future value. You need to create few auxiliary variables:
a variable that tells you whether the next observation belong to the same id (so it can be used to impute),
a variable that tells you whether the next observation is less than 14 days apart from the current one.
These do not depend on the specific variable you want to impute. for each variable to be imputed you will also need a variable that tells you whether the next variable is missing.
Then you can vectorize the following logic: when the next observation has the same id, and when it is less than 14 days from the current one and it is not missing copy its value in the current one.
Things get more complicated when you need to decide whether to use the past or future value, but the logic is the same. the code is below, it is a bit long but you can simplify it, I just wanted to be clear about what it does.
Hope this helps
x <-data.frame(ID=c(rep(2,5),rep(4,3)), TIME =c(1,22,33,43,85,-48,1,30),
CEA = c(1.32,1.42,1.81,2.33,2.23,29.7,23.34,18.23),
CA.15.3 = c(14.62,14.59,16.8,22.34,36.33,56.02,94.09,121.5),
CA.125 = c(33.98,27.56,30.31,NA,39.57,1171.00,956.50,825.30),
CA.19.9 = c(6.18,7.11,5.72, NA, 7.38,39.30,118.20,98.26),
CA.72.4 = c(rep(NA,5),1.32, NA, NA),
NSE = c(NA, 13.21, rep(NA,6)))
### these are the columns we want to input
cols.to.impute <- colnames(x)[! colnames(x) %in% c("ID","TIME")]
### is the next id the same?
x$diffidf <- NA
x$diffidf[1:(nrow(x)-1)] <- diff(x$ID)
x$diffidf[x$diffidf > 0] <- NA
### is the previous id the same?
x$diffidb <- NA
x$diffidb[2:nrow(x)] <- diff(x$ID)
x$diffidb[x$diffidb > 0] <- NA
### diff in time with next observation
x$difftimef <- NA
x$difftimef[1:(nrow(x)-1)] <- diff(x$TIME)
### diff in time with previous observation
x$difftimeb <- NA
x$difftimeb[2:nrow(x)] <- diff(x$TIME)
### if next (previous) id is not the same time difference is not meaningful
x$difftimef[is.na(x$diffidf)] <- NA
x$difftimeb[is.na(x$diffidb)] <- NA
### we do not need diffid anymore (due to previous statement)
x$diffidf <- x$diffidb <- NULL
### if next (previous) point in time is more than 14 days it is not useful for imputation
x$difftimef[abs(x$difftimef) > 14] <- NA
x$difftimeb[abs(x$difftimeb) > 14] <- NA
### create variable usef that tells us whether we should attempt to use the forward observation for imputation
### it is 1 only if difftime forward is less than difftime backward
x$usef <- NA
x$usef[!is.na(x$difftimef) & x$difftimef < x$difftimeb] <- 1
x$usef[!is.na(x$difftimef) & is.na(x$difftimeb)] <- 1
x$usef[is.na(x$difftimef) & !is.na(x$difftimeb)] <- 0
if (!is.na(x$usef[nrow(x)]))
stop("\nlast observation usef is not missing\n")
### now we get into column specific operations.
for (col in cols.to.impute){
### we will store the results in x$imputed, and copy into c[,col] at the end
x$imputed <- x[,col]
### x$usef needs to be modified depending on the specific column, so we define a local version of it
x$usef.local <- x$usef
### if a variable is not missing no point in looking at usef.local, so we make it missing
x$usef.local[!is.na(x[,col])] <- NA
### when usef.local is 1 but the next observation is missing it cannot be used for imputation, so we
### make it 0. but a value of 0 does not mean we can use the previous observation because that may
### be missing too. so first we make usef 0 and next we check the previous observation and if that
### is missing too we make usef missing
x$previous.value <- c(NA,x[1:(nrow(x)-1),col])
x$next.value <- c(x[2:nrow(x),col],NA)
x$next.missing <- is.na(x$next.value)
x$previous.missing <- is.na(x$previous.value)
x$usef.local[x$next.missing & x$usef.local == 1] <- 0
x$usef.local[x$previous.missing & x$usef.local == 0] <- NA
### now we can impute properly: use next value when usef.local is 1 and previous value when usef.local is 0
tmp <- rep(FALSE,nrow(x))
tmp[x$usef.local == 1] <- TRUE
x$imputed[tmp] <- x$next.value[tmp]
tmp <- rep(FALSE,nrow(x))
tmp[x$usef.local == 0] <- TRUE
x$imputed[tmp] <- x$previous.value[tmp]
### copy to column
x[,col] <- x$imputed
}
### get rid of useless temporary stuff
x$previous.value <- x$previous.missing <- x$next.value <- x$next.missing <- x$imputed <- x$usef.local <- NULL
ID TIME CEA CA.15.3 CA.125 CA.19.9 CA.72.4 NSE difftimef difftimeb usef
1 2 1 1.32 14.62 33.98 6.18 NA NA NA NA NA
2 2 22 1.42 14.59 27.56 7.11 NA 13.21 11 NA 1
3 2 33 1.81 16.80 30.31 5.72 NA 13.21 10 11 1
4 2 43 2.33 22.34 30.31 5.72 NA NA NA 10 0
5 2 85 2.23 36.33 39.57 7.38 NA NA NA NA NA
6 4 -48 29.70 56.02 1171.00 39.30 1.32 NA NA NA NA
7 4 1 23.34 94.09 956.50 118.20 NA NA NA NA NA
8 4 30 18.23 121.50 825.30 98.26 NA NA NA NA NA
>

Mapply or lapply solution instead of iterative for-loop that sequentially declines values against different values of same vector

Trying to learn better solutions than for-loops and feel like this instance may lend itself to lapply or mapply.
I have a dataframe of two columns (volumes, months.passed) from different points in time I'm declining by a vector of decline values (decline.vector) whereby the decline start point changes based on the month passed + 1 month, until the end of the decline.vector.
Currently my solution, which works fine, is:
decline.vector <- c( 0.9,0.81,0.729,0.656,0.590,0.531,0.478, 0.430, 0.387,0.348)
months.passed <- c(2,4,5,6)
volumes <- c(10,20,10,20)
df <- data.frame(months.passed, volumes)
for(i in 1:nrow(df)) {
build <- df$volumes[i] * cumprod(decline.vector[df$months.passed[i]+1:nrow(decline.vector),])
build <- rbind(final,build)
}
return(build)
[1] NA NA 7.29 6.56 5.90 5.31 4.78 4.30 3.87 3.49
[2] NA NA NA NA 11.81 10.63 9.57 8.61 7.75 6.97
[3] NA NA NA NA NA 5.31 4.78 4.30 3.87 3.49
[4] NA NA NA NA NA NA 9.57 8.61 7.75 6.97
Is there a more elegant way to do this, either using lapply or mapply, or even something instead of rbind?
Here is an option using mapply
out1 <- t(mapply(
function(x, y, z = decline.vector) {
n <- length(z)
c(rep(NA, y), x * z[(y + 1):n])
},
x = volumes,
y = months.passed
))
out1
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#[1,] NA NA 7.29 6.56 5.9 5.31 4.78 4.3 3.87 3.48
#[2,] NA NA NA NA 11.8 10.62 9.56 8.6 7.74 6.96
#[3,] NA NA NA NA NA 5.31 4.78 4.3 3.87 3.48
#[4,] NA NA NA NA NA NA 9.56 8.6 7.74 6.96
A vectorized option where we make use of matrix multiplication
out2 <- t(t(volumes)) %*% t(decline.vector)
Next we use a matrix to insert NAs at the first months.passed columns for each row
out2[cbind(rep(1:length(months.passed), months.passed),
sequence(months.passed))] <- NA
Result
identical(out1, out2)
# [1] TRUE
data
decline.vector <- c(0.9, 0.81, 0.729, 0.656, 0.590, 0.531, 0.478, 0.430, 0.387, 0.348)
volumes <- c(10, 20, 10, 20)
months.passed <- c(2, 4, 5, 6)

How to put elements of a list of vectors of different lengths into a data frame, with elements of vectors being separated as different columns

I would like to put elements of my list into a data frame. The list has this shape :
$`1`
SW GHS GS
0.49075730 0.46511628 0.02564103
...
$`95`
GHS SW LLB GS
0.06896552 0.03448276 0.03448276 0.00000000
$`96`
GHS SW TD
0.40736411 0.42843691 0.09003831
As you can see, the elements of the vectors are not always in the same order, and their number is variable as well.
I would like it to be put in a data frame like this :
GHS SW TD GS LLB
1 0.46511628 0.49075730 NA 0.02564103 NA
2 0.06896552 0.03448276 NA 0.00000000 0.03448276
3 0.40736411 0.42843691 0.09003831 NA NA
I hope you can help me, I have looked for similar question but so far was only able to find cases in which the amount and orders of elements in the vectors were consistent ...
We can also use purrr::full_join within purrr::reduce
# Reproducible sample data
set.seed(2018)
lst <- list(
`1` = setNames(as.data.frame(matrix(runif(3), ncol = 3)), c("SW", "GHS", "GS")),
`95` = setNames(as.data.frame(matrix(runif(4), ncol = 4)), c("GHS", "SW", "LLB", "GS")),
`96` = setNames(as.data.frame(matrix(runif(3), ncol = 3)), c("GHS", "SW", "TD")))
# Merge
library(purrr)
reduce(lst, full_join)
# SW GHS GS LLB TD
#1 0.3361535 0.4637233 0.06058539 NA NA
#2 0.4743142 0.1974336 0.60675886 0.3010486 NA
#3 0.9586547 0.1300121 NA NA 0.5468495
The solution below (1) converts each list element into a dataframe where t() is used to get the element names to be column names in the dataframe, and (2) row-binds those dataframes together where data.table's fill argument is helpful for the inconsistent vector lengths/names.
mylist <- list(
one = c(SW=0.49, GHS=0.46, GS=0.03),
two = c(GHS=0.07, GW = 0.03, LLW=0.03, GS=0.00),
six = c(GHS=0.41, SW=0.42, TD=0.09)
)
temp <- lapply(mylist, function(x) data.frame(t(x)))
data.table::rbindlist(temp, fill=TRUE)
output:
SW GHS GS GW LLW TD
1: 0.49 0.46 0.03 NA NA NA
2: NA 0.07 0.00 0.03 0.03 NA
3: 0.42 0.41 NA NA NA 0.09

tm package: Output of findAssocs() in a matrix instead of a list in R

Consider the following list:
library(tm)
data("crude")
tdm <- TermDocumentMatrix(crude)
a <- findAssocs(tdm, c("oil", "opec", "xyz"), c(0.7, 0.75, 0.1))
How do I manage to have a data frame with all terms associated with these 3 words in the columns and showing:
The corresponding correlation coefficient (if it exists)
NA if it does not exists for this word (for example the couple (oil, they) would show NA)
Here's a solution using reshape2 to help reshape the data
library(reshape2)
aa<-do.call(rbind, Map(function(d, n)
cbind.data.frame(
xterm=if (length(d)>0) names(d) else NA,
cor=if(length(d)>0) d else NA,
term=n),
a, names(a))
)
dcast(aa, term~xterm, value.var="cor")
Or you could use dplyr and tidyr
library(dplyr)
library('devtools')
install_github('hadley/tidyr')
library(tidyr)
a1 <- unnest(lapply(a, function(x) data.frame(xterm=names(x),
cor=x, stringsAsFactors=FALSE)), term)
a1 %>%
spread(xterm, cor) #here it removed terms without any `cor` for the `xterm`
# term 15.8 ability above agreement analysts buyers clearly emergency fixed
#1 oil 0.87 NA 0.76 0.71 0.79 0.70 0.8 0.75 0.73
#2 opec 0.85 0.8 0.82 0.76 0.85 0.83 NA 0.87 NA
# late market meeting prices prices. said that they trying who winter
#1 0.8 0.75 0.77 0.72 NA 0.78 0.73 NA 0.8 0.8 0.8
#2 NA NA 0.88 NA 0.79 0.82 NA 0.8 NA NA NA
Update
aNew <- sapply(tdm$dimnames$Terms, function(i) findAssocs(tdm, i, corlimit=0.95))
aNew2 <- aNew[!!sapply(aNew, function(x) length(dim(x)))]
aNew3 <- unnest(lapply(aNew2, function(x) data.frame(xterm=rownames(x),
cor=x[,1], stringsAsFactors=FALSE)[1:3,]), term)
res <- aNew3 %>%
spread(xterm, cor)
dim(res)
#[1] 1021 160
res[1:3,1:5]
# term ... 100,000 10.8 1.1
#1 ... NA NA NA NA
#2 100,000 NA NA NA 1
#3 10.8 NA NA NA NA

Resources