R: constructing bootstrap t confidence interval for 3 parameter estimates - r

I am trying to construct a bootstrap t confidence interval for 3 parameter estimates but I only able to construct for first parameter. Attached below is my code:
beta0<--8
beta1<-0.03
gamma<-0.0105
alpha<-0.05
n<-100
N<-10
for (i in 1:N)
{
u<-runif(n)
x<-rnorm(n)
c<-rexp(n,1/1255)
t1<-(1/gamma)*log(1-((gamma/exp(beta0+beta1*x))*log(1-u)))
t<-pmin(t1,c)
delta<-1*(t1>c)
length(delta)
delta[delta==1]<-ifelse(rbinom(length(delta[delta==1]),1,0.75),1,2)
deltae<-ifelse(delta==0, 1,0)
deltar<-ifelse(delta==1, 1,0)
deltai<-ifelse(delta==2, 1,0)
dat=data.frame(t,delta, deltae,deltar,deltai,x)
dat$interval[delta==2] <- as.character(cut(dat$t[delta==2], breaks=seq(0, 600, 100)))
labs <- cut(dat$t[delta==2], breaks=seq(0, 600, 100))
dat$lower[delta==2]<-as.numeric( sub("\\((.+),.*", "\\1", labs) )
dat$upper[delta==2]<-as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", labs) )
beta0hat.boot <- function(data,j)
{
dat<-data[j,]
data0<-dat[which(dat$delta==0),]#uncensored data
data1<-dat[which(dat$delta==1),]#right censored data
data2<-dat[which(dat$delta==2),]#interval censored data
library(maxLik)
#without imputataion
ll<-function(para)
{
b0<-para[1]
b1<-para[2]
g<-para[3]
e<-sum((b0+b1*data0$x)+g*data0$t+(1/g)*exp(b0+b1*data0$x)*(1-exp(g*data0$t)))
r<-sum((1/g)*exp(b0+b1*data1$x)*(1-exp(g*data1$t)))
i<-sum(log(exp((1/g)*exp(b0+b1*data2$x)*(1-exp(g*data2$lower)))-exp((1/g)*exp(b0+b1*data2$x)*(1-exp(g*data2$upper)))))
l<-e+r+i
return(l)
}
est<-maxLik(logLik=ll,start=c(para<-c(-8,0.03,0.0105)))
beta0hat<-est$estimate[1]
beta1hat<-est$estimate[2]
gammahat<-est$estimate[3]
observed<-solve(-est$hessian)
return(c(beta0hat,beta1hat,gammahat,observed[1,1],observed[2,2],observed[3,3]))
}
library(boot)
out<- boot(dat,beta0hat.boot,100)
ci<-boot.ci(out,type =c("stud","perc"),var.t0= out$t0[4],var.t=out$t[,4])
ci1<-boot.ci(out,type = c("stud","perc"),var.t0= out$t0[4],var.t=out$t[,4],index=1)
}
I am only able to construct the confidence interval for the first parameter only without using index=1 which is as follow
ci<-boot.ci(out,type =c("stud","perc"),var.t0= out$t0[4],var.t=out$t[,4])
when i add in index=1,
ci1<-boot.ci(out,type = c("stud","perc"),var.t0= out$t0[4],var.t=out$t[,4],index=1)
i got warnings:
In boot.ci(out, type = c("stud", "perc"), var.t0 = out$t0[4], var.t = out$t[, :
bootstrap variances needed for studentized intervals.
which caused the confidence interval cannot be built.
I want to add index=1 because I need to construct confidence interval for 2nd and 3rd parameter as well by using index=2 and index=3 but after I add in the index, i cannot get the confidence interval. Any idea how to get bootstrap t confidence interval for more than one statistics? I am able to get the percentile interval only by using the above code but not able to get bootstrap t interval.
dput(head(dat, 30)):
structure(list(t = c(143.786081550783, 104.647251273501, 65.5655695306165,
322.260530195967, 307.324160556309, 123.725827237157, 143.083942557736,
426.646805286557, 360.799323547846, 246.295906287976, 315.638222801499,
421.155652813492, 374.160625875751, 123.570819609099, 389.553807438155,
449.110810924173, 162.547725691726, 296.674617375856, 229.680453578098,
343.823664337269, 268.797764971971, 205.704838761594, 14.8630247008987,
91.6607201565057, 260.886289739501, 193.278377859747, 143.269719508224,
27.4780640122481, 267.191708749538, 39.8027657018974), delta = c(1,
0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 0, 1, 0, 1, 0, 1), deltae = c(0, 1, 1, 1, 0, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0
), deltar = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1), deltai = c(0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), x = c(-0.377643157724086, 0.218708630964153,
0.153162542263512, 1.28222373181234, 1.1423312203422, -1.04726335577352,
-0.360028857222088, 0.336098821643731, 0.758860540656158, 0.0533940581013279,
-0.0562132826178519, 0.0798656325287969, -0.748956798800072,
-0.235929730488004, -0.737049993834757, 1.05819046250488, 1.28776064495481,
0.457930197196181, -1.45563588128583, -1.1074384621643, -0.829026816498185,
-1.3824961444269, -1.58951008909026, -0.95052226776903, 0.0145909317659764,
-0.198775419436042, 0.0481467746529365, -0.136098038815233, -0.118319488454131,
-0.498263758291143), interval = c(NA, NA, NA, NA, "(300,400]",
NA, NA, NA, NA, NA, "(300,400]", NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), lower = c(NA,
NA, NA, NA, 300, NA, NA, NA, NA, NA, 300, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), upper = c(NA,
NA, NA, NA, 400, NA, NA, NA, NA, NA, 400, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("t",
"delta", "deltae", "deltar", "deltai", "x", "interval", "lower",
"upper"), row.names = c(NA, 30L), class = "data.frame")

Related

UPGMA with hclust plotting branch lengths as raw distances

I'm working on a presentation regarding utilizing UPGMA with the hlcust() function within our research lab. According to the literature, the branch length calculated by UPGMA for any pair of elements would be 1/2 the pairwise distance between those two elements.
I'm noticing that the example dendrogram I'm building for the presentation isn't calculating branch lengths that I expected. I'm not finding anything in ?hclust that would make me think that I'm missing a function argument that is causing the UPGMA algorithm to use the raw distances as the branch lengths. I understand that in certain situations, due to the limitations of computation accuracy, having a dendrogram which is exactly ultrametric may not always be possible (from here and here, and I'm sure elsewhere as well). That still doesn't explain why I see the raw pairwise distances being plotted as the branch length between two elements.
Using the data below, here's the code I used to plot an example dendrogram...
demoDend <- hclust(d = demoTable, method = "average") # make an hclust object
# use the ggdendro package to extract segments and labels for ggplot plotting
dendData <- ggdendro::dendro_data(demoDend)
dendSegs <- dendData$segments
dendLabs <- dendData$labels
library(ggplot2)
ggplot()+
geom_segment(data = dendSegs, aes(x = x, y = y, xend = xend, yend = yend))+
geom_text(data = dendLabs, aes(x = x, y = y-0.05, label = label, angle = 90))+
geom_hline(aes(yintercept = 0.333), linetype = 2, color = "blue")+
geom_hline(aes(yintercept = 0.2), linetype = 2, color = "red")+
theme_bw()
The two elements that stand out are 13195 and 13199 which have a distance of 0.2, and whose branch length is being plotted as 0.2 (red line in ggplot).
Even after examining the hclust object, some of the heights for the branches are the raw distances in the input matrix, and not 1/2 the distance. Do I need to manually half the heights in the object before plotting? Maybe I don't understand UPGMA as well as I thought? Any help or insight into the implementation of UPGMA with hclust() would be greatly appreciated.
Here's the sample distance data that I'm working with, from dput()
demoTable <- structure(c(0, 0.333333333333333, 0.333333333333333, 0, 0, 0.333333333333333,
0.333333333333333, 1, 1, 1, 1, 1, 1, NA, 0, 0, 0.333333333333333,
0.333333333333333, 0, 0, 1, 1, 1, 1, 1, 1, NA, NA, 0, 0.333333333333333,
0.333333333333333, 0, 0, 1, 1, 1, 1, 1, 1, NA, NA, NA, 0, 0,
0.333333333333333, 0.333333333333333, 1, 1, 1, 1, 1, 1, NA, NA,
NA, NA, 0, 0.333333333333333, 0.333333333333333, 1, 1, 1, 1,
1, 1, NA, NA, NA, NA, NA, 0, 0, 1, 1, 1, 1, 1, 1, NA, NA, NA,
NA, NA, NA, 0, 1, 1, 1, 1, 1, 1, NA, NA, NA, NA, NA, NA, NA,
0, 0.6, 0, 1, 0.6, 0.333333333333333, NA, NA, NA, NA, NA, NA,
NA, NA, 0, 0.6, 1, 0.5, 0.2, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0, 1, 0.6, 0.333333333333333, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0, 0.5, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0, 0.6, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0
), .Dim = c(13L, 13L), .Dimnames = list(c("13187", "13188", "13189",
"13190", "13191", "13192", "13193", "13194", "13195", "13196",
"13197", "13198", "13199"), NULL))

How to set NA values from a matrix to black-coloured tiles in a ggplot heatmap

I am working on the following structure and the following plotting code:
structure(c(NA, 11, 9, 9, 21, 7, 2, 5, 3, 0, 0, 1, 31, NA, 3,
2, 1, 0, 0, 10, 3, 0, 0, 0, 31, 16, NA, 2, 2, 10, 0, 5, 0, 0,
0, 0, 59, 65, 1, NA, 2, 4, 0, 4, 0, 0, 0, 0, 156, 23, 7, 17,
NA, 3, 2, 4, 7, 0, 0, 0, 31, 84, 0, 10, 16, NA, 0, 6, 0, 0, 2,
0, 129, 0, 2, 1, 0, 0, NA, 0, 0, 0, 0, 0, 41, 41, 0, 3, 4, 5,
0, NA, 0, 0, 0, 1, 16, 4, 1, 2, 0, 0, 0, 3, NA, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 1, 12, 2, 0, 0, 6, 0, 0, 0, 0,
NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), .Dim = c(12L,
12L), .Dimnames = list(c("WILL_", "WOULD_", "MAY_", "MIGHT_",
"CAN_", "COULD_", "SHALL_", "SHOULD_", "MUST_", "OUGHT TO_",
"USED TO_", "HAVE TO_"), c("_WILL", "_WOULD", "_MAY", "_MIGHT",
"_CAN", "_COULD", "_SHALL", "_SHOULD", "_MUST", "_OUGHT TO",
"_USED TO", "_HAVE TO")))
breaks <- c(0,1,5,10,50,100,500,100000)
reshape2::melt(structure, value.name = "Freq") %>%
mutate(label = ifelse(is.na(Freq) | Freq == 0, "", as.character(Freq))) %>%
ggplot(aes(Var2, fct_rev(Var1))) +
geom_tile(aes(fill = Freq), color = "black") +
geom_text(aes(label = label), color = "black") +
scale_fill_steps(low = "white", high = "purple", breaks = breaks, na.value = "grey",trans = "log")+
scale_x_discrete(NULL, expand = c(0, 0), position="top") +
scale_y_discrete(NULL, expand = c(0, 0)) +
theme(axis.text.x = element_text(angle=60,vjust = 0.5, hjust = 0))
I am trying to tweak the code so that original NA values (seen on the plot as the tiles forming a diagonal line from the co-occurrence of WILL WILL to HAVE TO HAVE TO, and the X HAVE TO column) are represented as black tiles separately from the other tiles which I would like to keep as they are.
Looking for tips on how to do this as I think I'm doing something wrong with the representation of values at the beginning of my code.
All the best
Cameron

Heatmap error with : 'x' must be a numeric matrix

I know this question might be duplicated, but I was trying some of the solutions posted in this forum with no success, and that's why I am posting it here.
Let's start with my dataset to make it reproducible.
dataset <- structure(list(Comparison = c("SH vs SAP", "SH vs NEA", "SH vs ERE",
"SH vs ERH", "SH vs NAL", "SAP vs NEA", "SAP vs ERE", "SAP vs ERH",
"SAP vs NAL", "NEA vs ERE", "NEA vs ERH", "NEA vs NAL", "ERE vs ERH",
"ERE vs NAL", "ERH vs NAL"), DC1 = c(NA, NA, NA, NA, NA, 1, 1,
1, NA, 1, 1, NA, 1, NA, NA), DC2 = c(NA, NA, NA, NA, NA, 1, 1,
1, NA, 0, 0, NA, 1, NA, NA), DC3 = c(1, 1, 1, 1, 1, 1, 1, 1,
0, 1, 0, 0, 1, 0, 1), DC4 = c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 1, 1, 1), DC5 = c(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1), DC6 = c(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), DC7 = c(0,
1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1), DC8 = c(0, 1, 0, 1,
1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1), DC9 = c(0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0), DC10 = c(1, 1, 0, 1, 1, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0)), .Names = c("Comparison", "DC1", "DC2", "DC3",
"DC4", "DC5", "DC6", "DC7", "DC8", "DC9", "DC10"), class = "data.frame", row.names = c(NA,
15L))
I have tried to change the dataset to a matrix, as this been suggested in other posts. However, it keeps giving the same error
heatmap(dataset)
heatmap(as.matrix(dataset))
Error in heatmap(dataset) :
'x' must be a numeric matrix
Error in heatmap(as.matrix(dataset)) :
'x' must be a numeric matrix
I tried to convert to numeric the columns, but the error keeps. And so is the case when I remove DC1 and DC2 columns which contain NA values.
Any help to spot the problem?
dataset[, 1] is character so as.matrix(dataset) is a character matrix. This explains:
'x' must be a numeric matrix
Your probably want
heatmap(as.matrix(dataset[, -1]))
And how can I include the names of the rows on the right?
Set the Comparison variable as the rownames of the matrix:
m <- as.matrix(dataset[, -1])
rownames(m) <- dataset$Comparison
heatmap(m)
So your real issue is really Convert the values in a column into row names in an existing data frame in R although the problem is presented with heatmap.

loop through column names of a data frame with shift

all, I'm working on a data set looks like below,
I want create new columns which are lags of variables interested in.
my current code like this,:
library(data.table)
setDF(final)
final.consec<-final[final$priva.consec.count>4 | final$unpriva.consec.count>4,]
interesting.vars<-c("IR", "sales","totalasset","GM","Export","Leverage","ROA")
for (i in interesting.vars) {
#i=as.name(i)
setDT(final.consec)[,paste("L.1.",i):=shift(i,n=1),firmid]
setDT(final.consec)[,paste("L.2.",i):=shift(i,n=2),firmid]
}
but this will produce all NAs instead of the lags and leads I want
I searched google and here, some suggest to use
shift(final.consec[[i]],n=1)
or
shift(as.name(i),n=1)
but none them work.
final = structure(list(year = c(2002, 2003, 2004, 2005, 2006, 2007, 2003,
2004, 2005, 2006, 2007, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007), firmid = c("016016226", "016016226", "016016226", "016016226",
"016016226", "016016226", "101100393", "101100393", "101100393",
"101100393", "101100393", "101100481", "101100481", "101100481",
"101100481", "101100481", "101100481", "101100481", "101100481",
"101100481", "101100588", "101100588", "101100588", "101100588",
"101100588", "101100588", "101100588", "101100588", "101100588",
"101100588"), provinceid = c(610000, 610000, 610000, 610000,
610000, 610000, 110000, 110000, 110000, 110000, 110000, 110000,
110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000,
110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000,
110000, 110000), industrycode2 = c(3100, 3100, 3100, 3100, 3100,
3100, 3100, 3100, 3100, 3100, 3100, 2300, 2300, 2300, 2300, 2300,
2300, 2300, 2300, 2300, 2300, 2300, 2300, 2300, 2300, 2300, 2300,
2300, 2300, 2300), sales = c(1998, 2995, 2902, 5006, 5291, 6241,
98670, 60643, 79075, 93600, 55591, 6525, 7984, 7654, 7432, 7596,
8410, 7359, 5456, 6645, 10355, 9498, 8967, 11551, 13647, 14008,
20647, 21437, 20155, 14535), cogs = c(1602, 898, 1301, 1453,
2615, 2835, 66143, 40037, 55971, 66167, 46862, 7430, 7487, 6820,
7482, 7436, 7905, 7243, 6560, 6605, 8126, 7868, 8722, 8700, 11222,
11521, 18282, 20322, 17859, 12913), inventory = c(1034, 1012,
896, 653, 1685, 2036, 16690, 29623, 28770, 27124, 6712, 0, 0,
0, 93, 21, 79, 78, 18, 797, 1778, 1588, 2785, 3414, 4435, 4147,
5874, 5339, 3991, 3345), fixedasset = c(29577, 30856, 28250,
28245, 28168, 27919, 77646, 101803, 94984, 84585, 54935, 18418,
17192, 17518, 13219, 16600, 14132, 20737, 21332, 25192, 15423,
7028, 5513, 11679, 9364, 35778, 59804, 65344, 69266, 73777),
totalasset = c(74278, 74358, 74000, 74006, 74011, 73913,
273642, 321636, 357791, 373095, 340400, 31312, 31242, 35461,
42755, 47784, 43798, 41565, 40220, 45753, 30114, 31164, 31347,
31526, 32244, 61842, 87170, 88730, 98821, 104000), stateshare = c(1,
1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1), foreignshare = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), privateshare = c(0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), stateown = c(1, 1, 1, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1), foreignown = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), privateown = c(0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), mixown = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), stateonly = c(1, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1), mixonly = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), foreignonly = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), privateonly = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), gs = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1), gm = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), gf = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), privatize = c(0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), IR = c(0.645443201065063, 1.12694883346558,
0.688700973987579, 0.449414998292923, 0.644359469413757,
0.718165755271912, 0.25233206152916, 0.739890575408936, 0.514016211032867,
0.409932434558868, 0.143229052424431, 0.0191550496965647,
0.0137506183236837, 0.0117767816409469, 0.0124298315495253,
0.00526109151542187, 0.00999367516487837, 0.0107690179720521,
0.00274390238337219, 0.120666161179543, 0.218803837895393,
0.201830193400383, 0.319307506084442, 0.392413794994354,
0.395205855369568, 0.3599514067173, 0.321299642324448, 0.262720197439194,
0.223472759127617, 0.259041279554367), GM = c(0.24719101190567,
0.807359278202057, 0.702536523342133, 0.714905440807343,
0.717204809188843, 0.731220781803131, 0.491767823696136,
0.514673948287964, 0.412785202264786, 0.41460245847702, 0.186270326375961,
-0.0678684562444687, 0.0663817301392555, 0.122287392616272,
-0.00668270513415337, 0.0215169452130795, 0.0638836175203323,
0.016015462577343, 0.00807300768792629, 0.014724993146956,
0.274304687976837, 0.207168281078339, 0.0280898883938789,
0.327701151371002, 0.216093391180038, 0.215866684913635,
0.129362210631371, 0.0548666454851627, 0.12856262922287,
0.125609844923019), CI = c(0.398193269968033, 0.414965450763702,
0.381756752729416, 0.381658256053925, 0.380592077970505,
0.377727866172791, 0.283750295639038, 0.316516190767288,
0.265473425388336, 0.226711690425873, 0.161383673548698,
0.588208973407745, 0.550284862518311, 0.494007498025894,
0.309180200099945, 0.347396612167358, 0.322663128376007,
0.498905330896378, 0.530382871627808, 0.550608694553375,
0.512153804302216, 0.225516617298126, 0.175870105624199,
0.370456129312515, 0.290410608053207, 0.578538835048676,
0.68606173992157, 0.728544950485229, 0.700923919677734, 0.709394216537476
), WACC = c(0.000888553797267377, 0.000511041202116758, 0.000527027004864067,
0.000459422240965068, 0.000513437204062939, 0.000608823902439326,
0.0238377153873444, 0.030428808182478, 0.0118085695430636,
0.0155402785167098, 0.00844594556838274, -0.0610947869718075,
-0.0143076628446579, -0.0111107975244522, 0.00177756987977773,
-0.0173279754817486, -0.0689300894737244, -0.0779983177781105,
-0.0916144475340843, -0.0209603756666183, -0.00126187154091895,
0.000481324619613588, 0.000191405866644345, 0.000348918343661353,
0.00533432560041547, -3.23404929076787e-05, -0.000206493074074388,
-0.0318719707429409, -0.0211898274719715, 0.00500000035390258
), Salesgrowth = c(NA, 0.404797554016113, -0.301691830158234,
0.210879027843475, 0.648279845714569, 0.0701394379138947,
NA, -0.486776739358902, 0.265392541885376, 0.168633610010147,
-0.496754705905914, NA, 0.201798588037491, -0.042211152613163,
-0.029433386400342, 0.0218267906457186, 0.101799681782722,
-0.133497416973114, -0.299208134412766, 0.197148770093918,
NA, -0.0863882452249527, -0.0575300790369511, 0.253220856189728,
0.166747704148293, 0.0261088777333498, 0.3879414498806, 0.0375483706593513,
-0.0616660043597221, -0.326892852783203), Export = c(0, 0,
0, 0, 0, 0, 0.0718493312597275, 0.0859285816550255, 0.113112106919289,
0.287075728178024, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), Leverage = c(10.397575378418, 10.444974899292,
11.0325202941895, 10.0521974563599, 1.022047996521, 1.01452708244324,
1.45040833950043, 1.67869281768799, 1.03448712825775, 1.11970138549805,
0.892383217811584, 1.10642445087433, 1.03876268863678, 1.37419652938843,
1.84710657596588, 3.55129051208496, 4.52725887298584, 7.83046531677246,
10.0521974563599, 9.32586765289307, 0.210126578807831, 0.234657898545265,
0.242646470665932, 0.251528382301331, 0.28160896897316, 1.45746076107025,
2.464350938797, 2.97518038749695, 3.88535690307617, 0.26302495598793
), Current = c(1.55197286605835, 1.69315719604492, 2.29052948951721,
2.24755930900574, 2.20689654350281, 2.10811376571655, 1.25970602035522,
1.65375781059265, 1.66235971450806, 1.39208269119263, 1.29157769680023,
0.969440996646881, 0.790197372436523, 0.650628983974457,
0.69122976064682, 1.09663212299347, 1.12163543701172, 1.6644686460495,
1.97333335876465, 2.0823986530304, 0.435712337493896, 0.577177941799164,
0.511789321899414, 0.731555223464966, 0.587089836597443,
2.40473389625549, 2.29052948951721, 2.24755930900574, 2.20689654350281,
1.11392271518707), Cover = c(0.395348846912384, 0.260869562625885,
0.209302321076393, 0.0377358496189117, 0.00228310492821038,
0.00416666688397527, 0.952529191970825, 0.885167479515076,
0.762962937355042, 0.727603793144226, -0.188469097018242,
0.00704225338995457, 0.0597014911472797, 0.0273972600698471,
-0.0126262623816729, 0.0230215825140476, 0.0177725125104189,
0.00775716686621308, -0.00155763234943151, 0.0120259020477533,
0.0303643718361855, 0, 0.00171057134866714, 0.00918484479188919,
0.389791190624237, -0.0153139354661107, -0.0214592274278402,
-0.00146555935498327, 0.00122399022802711, -0.00661703897640109
), Bank = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1), Inctaxrate = c(0,
0, 0.0666666701436043, 0, 0, 0, 0, 0.0026208502240479, 0.0247787609696388,
0.0154994260519743, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.142857149243355,
0.200000002980232, 0.181818187236786, 0.335363984107971,
0.333333343267441, 0.333333343267441, 0.33047690987587, 0,
0, 0), ROA = c(0.000659683893900365, 0.000430350453825668,
0.000405405415222049, 0.00043239738442935, 0.000499925692565739,
0.000595294462982565, 0.0148917194455862, 0.021353330463171,
0.00473740277811885, 0.00933810416609049, 0.00366333732381463,
-0.0607434846460819, -0.0158440563827753, -0.0109415976330638,
0.00189451524056494, -0.0169931352138519, -0.067902646958828,
-0.077444963157177, -0.0968421399593353, -0.0206762403249741,
0.000232450023759156, 0.000481324619613588, 0.000350910762790591,
9.51595502556302e-05, 0.000124054087791592, 0.000129361927974969,
2.29436736844946e-05, -0.0319057814776897, -0.0211797095835209,
0.00492307683452964), ROS = c(0.02452452480793, 0.0106844743713737,
0.0135440183803439, 0.0117001831531525, 0.00707456981763244,
0.00784313771873713, 0.0412992797791958, 0.113252975046635,
0.0214353464543819, 0.037222221493721, 0.0224316883832216,
-0.291494250297546, -0.0619989968836308, -0.0506924502551556,
0.0108988154679537, -0.106898367404938, -0.241563051939011,
-0.216171622276306, -0.163818180561066, -0.142362684011459,
0.000676001945976168, 0.00157927989494056, 0.00122672016732395,
0.000259717780863866, 0.000293104705633596, 0.000571102253161371,
9.68663734965958e-05, -0.132061392068863, -0.103845201432705,
0.0352253168821335), num_ID = c(66, 66, 66, 66, 66, 66, 95,
95, 95, 95, 95, 96, 96, 96, 96, 96, 96, 96, 96, 96, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100), priva = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
priva.year = c(0, 0, 0, 0, 0, 0, 2005, 2005, 2005, 2005,
2005, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), relative.year = c(NA, NA, NA, NA, NA, NA, -2, -1,
0, 1, 2, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), priva.consec.count = c(0L, 0L,
0L, 0L, 0L, 0L, 5L, 5L, 5L, 5L, 5L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), unpriva.consec.count = c(5,
5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9), `L.1. IR` = c(NA, 0.645443201065063,
1.12694883346558, 0.688700973987579, 0.449414998292923, 0.644359469413757,
NA, 0.25233206152916, 0.739890575408936, 0.514016211032867,
0.409932434558868, NA, 0.0191550496965647, 0.0137506183236837,
0.0117767816409469, 0.0124298315495253, 0.00526109151542187,
0.00999367516487837, 0.0107690179720521, 0.00274390238337219,
NA, 0.218803837895393, 0.201830193400383, 0.319307506084442,
0.392413794994354, 0.395205855369568, 0.3599514067173, 0.321299642324448,
0.262720197439194, 0.223472759127617), `L.2. IR` = c(NA,
NA, 0.645443201065063, 1.12694883346558, 0.688700973987579,
0.449414998292923, NA, NA, 0.25233206152916, 0.739890575408936,
0.514016211032867, NA, NA, 0.0191550496965647, 0.0137506183236837,
0.0117767816409469, 0.0124298315495253, 0.00526109151542187,
0.00999367516487837, 0.0107690179720521, NA, NA, 0.218803837895393,
0.201830193400383, 0.319307506084442, 0.392413794994354,
0.395205855369568, 0.3599514067173, 0.321299642324448, 0.262720197439194
), `H.1. IR` = c(1.12694883346558, 0.688700973987579, 0.449414998292923,
0.644359469413757, 0.718165755271912, NA, 0.739890575408936,
0.514016211032867, 0.409932434558868, 0.143229052424431,
NA, 0.0137506183236837, 0.0117767816409469, 0.0124298315495253,
0.00526109151542187, 0.00999367516487837, 0.0107690179720521,
0.00274390238337219, 0.120666161179543, NA, 0.201830193400383,
0.319307506084442, 0.392413794994354, 0.395205855369568,
0.3599514067173, 0.321299642324448, 0.262720197439194, 0.223472759127617,
0.259041279554367, NA), `H.2. IR` = c(0.688700973987579,
0.449414998292923, 0.644359469413757, 0.718165755271912,
NA, NA, 0.514016211032867, 0.409932434558868, 0.143229052424431,
NA, NA, 0.0117767816409469, 0.0124298315495253, 0.00526109151542187,
0.00999367516487837, 0.0107690179720521, 0.00274390238337219,
0.120666161179543, NA, NA, 0.319307506084442, 0.392413794994354,
0.395205855369568, 0.3599514067173, 0.321299642324448, 0.262720197439194,
0.223472759127617, 0.259041279554367, NA, NA), `L.1. sales` = c(NA,
1998, 2995, 2902, 5006, 5291, NA, 98670, 60643, 79075, 93600,
NA, 6525, 7984, 7654, 7432, 7596, 8410, 7359, 5456, NA, 10355,
9498, 8967, 11551, 13647, 14008, 20647, 21437, 20155), `L.2. sales` = c(NA,
NA, 1998, 2995, 2902, 5006, NA, NA, 98670, 60643, 79075,
NA, NA, 6525, 7984, 7654, 7432, 7596, 8410, 7359, NA, NA,
10355, 9498, 8967, 11551, 13647, 14008, 20647, 21437), `H.1. sales` = c(2995,
2902, 5006, 5291, 6241, NA, 60643, 79075, 93600, 55591, NA,
7984, 7654, 7432, 7596, 8410, 7359, 5456, 6645, NA, 9498,
8967, 11551, 13647, 14008, 20647, 21437, 20155, 14535, NA
), `H.2. sales` = c(2902, 5006, 5291, 6241, NA, NA, 79075,
93600, 55591, NA, NA, 7654, 7432, 7596, 8410, 7359, 5456,
6645, NA, NA, 8967, 11551, 13647, 14008, 20647, 21437, 20155,
14535, NA, NA), `L.1. totalasset` = c(NA, 74278, 74358, 74000,
74006, 74011, NA, 273642, 321636, 357791, 373095, NA, 31312,
31242, 35461, 42755, 47784, 43798, 41565, 40220, NA, 30114,
31164, 31347, 31526, 32244, 61842, 87170, 88730, 98821),
`L.2. totalasset` = c(NA, NA, 74278, 74358, 74000, 74006,
NA, NA, 273642, 321636, 357791, NA, NA, 31312, 31242, 35461,
42755, 47784, 43798, 41565, NA, NA, 30114, 31164, 31347,
31526, 32244, 61842, 87170, 88730), `H.1. totalasset` = c(74358,
74000, 74006, 74011, 73913, NA, 321636, 357791, 373095, 340400,
NA, 31242, 35461, 42755, 47784, 43798, 41565, 40220, 45753,
NA, 31164, 31347, 31526, 32244, 61842, 87170, 88730, 98821,
104000, NA), `H.2. totalasset` = c(74000, 74006, 74011, 73913,
NA, NA, 357791, 373095, 340400, NA, NA, 35461, 42755, 47784,
43798, 41565, 40220, 45753, NA, NA, 31347, 31526, 32244,
61842, 87170, 88730, 98821, 104000, NA, NA), `L.1. GM` = c(NA,
0.24719101190567, 0.807359278202057, 0.702536523342133, 0.714905440807343,
0.717204809188843, NA, 0.491767823696136, 0.514673948287964,
0.412785202264786, 0.41460245847702, NA, -0.0678684562444687,
0.0663817301392555, 0.122287392616272, -0.00668270513415337,
0.0215169452130795, 0.0638836175203323, 0.016015462577343,
0.00807300768792629, NA, 0.274304687976837, 0.207168281078339,
0.0280898883938789, 0.327701151371002, 0.216093391180038,
0.215866684913635, 0.129362210631371, 0.0548666454851627,
0.12856262922287), `L.2. GM` = c(NA, NA, 0.24719101190567,
0.807359278202057, 0.702536523342133, 0.714905440807343,
NA, NA, 0.491767823696136, 0.514673948287964, 0.412785202264786,
NA, NA, -0.0678684562444687, 0.0663817301392555, 0.122287392616272,
-0.00668270513415337, 0.0215169452130795, 0.0638836175203323,
0.016015462577343, NA, NA, 0.274304687976837, 0.207168281078339,
0.0280898883938789, 0.327701151371002, 0.216093391180038,
0.215866684913635, 0.129362210631371, 0.0548666454851627),
`H.1. GM` = c(0.807359278202057, 0.702536523342133, 0.714905440807343,
0.717204809188843, 0.731220781803131, NA, 0.514673948287964,
0.412785202264786, 0.41460245847702, 0.186270326375961, NA,
0.0663817301392555, 0.122287392616272, -0.00668270513415337,
0.0215169452130795, 0.0638836175203323, 0.016015462577343,
0.00807300768792629, 0.014724993146956, NA, 0.207168281078339,
0.0280898883938789, 0.327701151371002, 0.216093391180038,
0.215866684913635, 0.129362210631371, 0.0548666454851627,
0.12856262922287, 0.125609844923019, NA), `H.2. GM` = c(0.702536523342133,
0.714905440807343, 0.717204809188843, 0.731220781803131,
NA, NA, 0.412785202264786, 0.41460245847702, 0.186270326375961,
NA, NA, 0.122287392616272, -0.00668270513415337, 0.0215169452130795,
0.0638836175203323, 0.016015462577343, 0.00807300768792629,
0.014724993146956, NA, NA, 0.0280898883938789, 0.327701151371002,
0.216093391180038, 0.215866684913635, 0.129362210631371,
0.0548666454851627, 0.12856262922287, 0.125609844923019,
NA, NA), `L.1. Export` = c(NA, 0, 0, 0, 0, 0, NA, 0.0718493312597275,
0.0859285816550255, 0.113112106919289, 0.287075728178024,
NA, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0
), `L.2. Export` = c(NA, NA, 0, 0, 0, 0, NA, NA, 0.0718493312597275,
0.0859285816550255, 0.113112106919289, NA, NA, 0, 0, 0, 0,
0, 0, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0), `H.1. Export` = c(0,
0, 0, 0, 0, NA, 0.0859285816550255, 0.113112106919289, 0.287075728178024,
1, NA, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0,
0, NA), `H.2. Export` = c(0, 0, 0, 0, NA, NA, 0.113112106919289,
0.287075728178024, 1, NA, NA, 0, 0, 0, 0, 0, 0, 0, NA, NA,
0, 0, 0, 0, 0, 0, 0, 0, NA, NA), `L.1. Leverage` = c(NA,
10.397575378418, 10.444974899292, 11.0325202941895, 10.0521974563599,
1.022047996521, NA, 1.45040833950043, 1.67869281768799, 1.03448712825775,
1.11970138549805, NA, 1.10642445087433, 1.03876268863678,
1.37419652938843, 1.84710657596588, 3.55129051208496, 4.52725887298584,
7.83046531677246, 10.0521974563599, NA, 0.210126578807831,
0.234657898545265, 0.242646470665932, 0.251528382301331,
0.28160896897316, 1.45746076107025, 2.464350938797, 2.97518038749695,
3.88535690307617), `L.2. Leverage` = c(NA, NA, 10.397575378418,
10.444974899292, 11.0325202941895, 10.0521974563599, NA,
NA, 1.45040833950043, 1.67869281768799, 1.03448712825775,
NA, NA, 1.10642445087433, 1.03876268863678, 1.37419652938843,
1.84710657596588, 3.55129051208496, 4.52725887298584, 7.83046531677246,
NA, NA, 0.210126578807831, 0.234657898545265, 0.242646470665932,
0.251528382301331, 0.28160896897316, 1.45746076107025, 2.464350938797,
2.97518038749695), `H.1. Leverage` = c(10.444974899292, 11.0325202941895,
10.0521974563599, 1.022047996521, 1.01452708244324, NA, 1.67869281768799,
1.03448712825775, 1.11970138549805, 0.892383217811584, NA,
1.03876268863678, 1.37419652938843, 1.84710657596588, 3.55129051208496,
4.52725887298584, 7.83046531677246, 10.0521974563599, 9.32586765289307,
NA, 0.234657898545265, 0.242646470665932, 0.251528382301331,
0.28160896897316, 1.45746076107025, 2.464350938797, 2.97518038749695,
3.88535690307617, 0.26302495598793, NA), `H.2. Leverage` = c(11.0325202941895,
10.0521974563599, 1.022047996521, 1.01452708244324, NA, NA,
1.03448712825775, 1.11970138549805, 0.892383217811584, NA,
NA, 1.37419652938843, 1.84710657596588, 3.55129051208496,
4.52725887298584, 7.83046531677246, 10.0521974563599, 9.32586765289307,
NA, NA, 0.242646470665932, 0.251528382301331, 0.28160896897316,
1.45746076107025, 2.464350938797, 2.97518038749695, 3.88535690307617,
0.26302495598793, NA, NA), `L.1. ROA` = c(NA, 0.000659683893900365,
0.000430350453825668, 0.000405405415222049, 0.00043239738442935,
0.000499925692565739, NA, 0.0148917194455862, 0.021353330463171,
0.00473740277811885, 0.00933810416609049, NA, -0.0607434846460819,
-0.0158440563827753, -0.0109415976330638, 0.00189451524056494,
-0.0169931352138519, -0.067902646958828, -0.077444963157177,
-0.0968421399593353, NA, 0.000232450023759156, 0.000481324619613588,
0.000350910762790591, 9.51595502556302e-05, 0.000124054087791592,
0.000129361927974969, 2.29436736844946e-05, -0.0319057814776897,
-0.0211797095835209), `L.2. ROA` = c(NA, NA, 0.000659683893900365,
0.000430350453825668, 0.000405405415222049, 0.00043239738442935,
NA, NA, 0.0148917194455862, 0.021353330463171, 0.00473740277811885,
NA, NA, -0.0607434846460819, -0.0158440563827753, -0.0109415976330638,
0.00189451524056494, -0.0169931352138519, -0.067902646958828,
-0.077444963157177, NA, NA, 0.000232450023759156, 0.000481324619613588,
0.000350910762790591, 9.51595502556302e-05, 0.000124054087791592,
0.000129361927974969, 2.29436736844946e-05, -0.0319057814776897
), `H.1. ROA` = c(0.000430350453825668, 0.000405405415222049,
0.00043239738442935, 0.000499925692565739, 0.000595294462982565,
NA, 0.021353330463171, 0.00473740277811885, 0.00933810416609049,
0.00366333732381463, NA, -0.0158440563827753, -0.0109415976330638,
0.00189451524056494, -0.0169931352138519, -0.067902646958828,
-0.077444963157177, -0.0968421399593353, -0.0206762403249741,
NA, 0.000481324619613588, 0.000350910762790591, 9.51595502556302e-05,
0.000124054087791592, 0.000129361927974969, 2.29436736844946e-05,
-0.0319057814776897, -0.0211797095835209, 0.00492307683452964,
NA), `H.2. ROA` = c(0.000405405415222049, 0.00043239738442935,
0.000499925692565739, 0.000595294462982565, NA, NA, 0.00473740277811885,
0.00933810416609049, 0.00366333732381463, NA, NA, -0.0109415976330638,
0.00189451524056494, -0.0169931352138519, -0.067902646958828,
-0.077444963157177, -0.0968421399593353, -0.0206762403249741,
NA, NA, 0.000350910762790591, 9.51595502556302e-05, 0.000124054087791592,
0.000129361927974969, 2.29436736844946e-05, -0.0319057814776897,
-0.0211797095835209, 0.00492307683452964, NA, NA)), .Names = c("year",
"firmid", "provinceid", "industrycode2", "sales", "cogs", "inventory",
"fixedasset", "totalasset", "stateshare", "foreignshare", "privateshare",
"stateown", "foreignown", "privateown", "mixown", "stateonly",
"mixonly", "foreignonly", "privateonly", "gs", "gm", "gf", "privatize",
"IR", "GM", "CI", "WACC", "Salesgrowth", "Export", "Leverage",
"Current", "Cover", "Bank", "Inctaxrate", "ROA", "ROS", "num_ID",
"priva", "priva.year", "relative.year", "priva.consec.count",
"unpriva.consec.count", "L.1. IR", "L.2. IR", "H.1. IR", "H.2. IR",
"L.1. sales", "L.2. sales", "H.1. sales", "H.2. sales", "L.1. totalasset",
"L.2. totalasset", "H.1. totalasset", "H.2. totalasset", "L.1. GM",
"L.2. GM", "H.1. GM", "H.2. GM", "L.1. Export", "L.2. Export",
"H.1. Export", "H.2. Export", "L.1. Leverage", "L.2. Leverage",
"H.1. Leverage", "H.2. Leverage", "L.1. ROA", "L.2. ROA", "H.1. ROA",
"H.2. ROA"), row.names = c(NA, 30L), class = "data.frame")
==================================update=================================
according to Frank's answer, the solution to this problem is :
setDT(final.consec)#### with out this, below won'r work
interesting.vars<-c("IR", "sales","totalasset","GM","Export","Leverage","ROA")
##### lead 1
anscols.Lead1 = paste("Lead.1", interesting.vars, sep="_")
final.consec[, (anscols.Lead1 ) := shift(.SD, 1,NA,type = "lead"), .SDcols=interesting.vars,by = firmid]

Mapping content of one matrix onto structure of another matrix

I have two matrices sourced from the same dataset but with different amounts of data available for each.
I want to create a dataset that is a replicate of x in terms of column names and row names but which contains the data values in y. If the data is not available then an NA would be used as the value for that coordinate.
Not all of the row names in x are present in y and vice versa. The same holds true for the column names.
For the example input data I've given below, the rownames in x corresponding to those in y are the rowname start and end at | (I want to retain everthing after the | for other mappings).
What is the most efficient way to do this?
DESIRED OUTPUT
z = structure(c(NA, 1, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA,
NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), .Dim = c(11L, 5L), .Dimnames = list(
c("AACSL|729522", "AACS|65985", "AADACL2|344752", "AADACL3|126767",
"AADACL4|343066", "AADAC|13", "AADAT|51166", "AAGAB|79719",
"AAK1|22848", "AAK12|14", "AANAT|15"), c("S18", "S20", "S45",
"S95", "S100")))
EXAMPLE INPUT
x = structure(c(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0), .Dim = c(11L,
5L), .Dimnames = list(c("AACSL|729522", "AACS|65985", "AADACL2|344752",
"AADACL3|126767", "AADACL4|343066", "AADAC|13", "AADAT|51166",
"AAGAB|79719", "AAK1|22848", "AAK12|14", "AANAT|15"), c("S18",
"S20", "S45", "S95", "S100")))
y = structure(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0), .Dim = c(11L, 4L), .Dimnames = list(c("A1BG",
"A1CF", "A2ML1", "A4GALT", "AACS", "AAK1", "AARD", "AARS2", "AASDHPPT",
"AASS", "BAACS"), c("S18", "S10", "S45", "S95")))
I think there might be a slight problem with the example that you provided, i can not see how the z is coming from the x and y above.. see this code:
intersect(sapply(rownames(x), #I am just extracting the letter codes here
function(i){
return(
strsplit(x=i,split="|",fixed=TRUE)[[1]][[1]])
}),rownames(y))
#[1] "AACS" "AAK1"
weird, right? I mean, there is only 2 codes in y compared to x. However, I think the code below does what you are planning (with the exception of this inconsistency):
library(data.table)
library(reshape2)
library(dplyr)
x %>% as.data.frame %>% mutate(rownames=rownames(x)) %>%
mutate(nms=sapply(rownames(x),
function(i){
return(
strsplit(x=i,split="|",fixed=TRUE)[[1]][[1]])
})) %>%
melt(id.vars=c("nms","rownames")) %>%
merge(., y %>% as.data.frame %>% mutate(nms=rownames(y))%>% melt(id.vars="nms"), by=c("variable","nms"), all.x=TRUE) %>%
select(-nms, -value.x) %>% dcast(formula = rownames~variable, value.var="value.y") -> xy
#now put back the column names where they belong
rownames(xy)<-xy$rownames
#now the only thing left is to arrange the columns
xy[rownames(x),colnames(x)] -> xy
Or am I wrong in understanding some of your points?

Resources