Using RNN in R with multiple input variables - r

According to a previous question, there is an simple example:
/* Example*/
library('rnn')
# create training numbers
set.seed(1)
X1 = sample(0:127, 7000, replace=TRUE)
X2 = sample(0:127, 7000, replace=TRUE)
# create training response numbers
Y <- X1 + X2
# convert to binary
X1 <- int2bin(X1, length=8)
X2 <- int2bin(X2, length=8)
Y <- int2bin(Y, length=8)
# create 3d array: dim 1: samples; dim 2: time; dim 3: variables
X <- array( c(X1,X2), dim=c(dim(X1),2) )
# train the model
model <- trainr(Y=Y,
X=X,
learningrate = 0.1,
hidden_dim = 10,
start_from_end = TRUE )
/* End of Example*/
However, I want to ask how to define the input vector X of multiple variables for the trainr(). My data type is:
Y = 1
T = 0 T = 1 T = 3 T = 4 T = 5
X1 = 1 0 1 0 1
X2 = 1 1 1 0 1
X3 = 0 0 0 0 1
X1 = 1 0 1 0 1
X2 = 0 1 1 1 1
X3 = 0 0 0 0 1
...
Y = 2
T = 0 T = 1 T = 3 T = 4 T = 5
X1 = 0 0 1 0 1
X2 = 1 1 0 0 1
X3 = 0 1 0 0 1
X1 = 1 0 1 0 1
X2 = 1 1 1 0 1
X3 = 0 1 0 0 1
...
Y = 3
T = 0 T = 1 T = 3 T = 4 T = 5
X1 = 1 0 1 0 1
X2 = 1 0 1 0 1
X3 = 0 1 0 0 1
X1 = 1 1 1 0 1
X2 = 1 0 1 0 1
X3 = 0 1 0 0 0
...
To understand details of the package, I tried the following cases:
library(rnn)
set.seed(1)
X1 = sample(0:127, 5000, replace=TRUE)
X2 = sample(0:127, 5000, replace=TRUE)
X3 = sample(0:127, 5000, replace=TRUE)
X4 = sample(0:127, 5000, replace=TRUE)
Y <- X1 + X2 + X3 + X4
X1 <- int2bin(X1)
X2 <- int2bin(X2)
X3 <- int2bin(X3)
X4 <- int2bin(X4)
Y <- int2bin(Y)
X <- array( c(X1,X2,X3,X4), dim=c(dim(X1),4) )
Y <- array( Y, dim=c(dim(Y),1) )
model <- trainr(Y=Y,
X=X,
learningrate = 0.1,
hidden_dim = 10,
start_from_end = TRUE )
plot(colMeans(model$error),type='l',
xlab='epoch',
ylab='errors' )
A1 = int2bin( sample(0:127, 7000, replace=TRUE) )
A2 = int2bin( sample(0:127, 7000, replace=TRUE) )
A3 = int2bin( sample(0:127, 7000, replace=TRUE) )
A4 = int2bin( sample(0:127, 7000, replace=TRUE) )
A <- array( c(A1,A2,A3,A4), dim=c(dim(A1),4) )
B <- predictr(model,
A )
A1 <- bin2int(A1)
A2 <- bin2int(A2)
A3 <- bin2int(A3)
A4 <- bin2int(A4)
B <- bin2int(B)
hist( B-(A1+A2+A3+A4) )
However, the histogram showed the normal distribution with the center in -250. I think the distribution is not a reasonable one. Can any elaborate the result?
In fact, I want to know how to create the input vector X of all time for analysis corresponding to the Y value. Thanks for any response

Related

What can I write in R to denote "any value"?

I have a dataframe "data" containing 10 variables A to J (which all contain 0s and 1s) and 500 rows:
I need to make a second set of 10 variables AY to JY based on the variables A to J such that:
for AY, if A==1 then AY takes the value 1 with 80% probability and if A==0 then AY takes the value 1 with 20% probability
for BY, if B==1 then BY takes the value 1 with 80% probability and if B==0 then BY takes the value 1 with 20% probability
And so on...
Right now, I have the variables A to J stored the dataframe "data", and have the following as my code:
out <- paste0(LETTERS[1:10], "Y")
data2 <- data.frame(data)
colnames(data2) <- out
for (i in out) {
data2[i] <- ifelse(**???**, rbinom(length(out), 1, 0.8), rbinom(length(out), 1, 0.2))
}
What would I write instead of the question marks to denote "if any value in the list of variables AY:JY is equal to 1, execute the first argument, otherwise execute the second argument"?
Please find below one solution that should work.
Starting data
set.seed(4854)
df <- data.frame("A" = sample(c(0,1), 500, replace = TRUE),
"B" = sample(c(0,1), 500, replace = TRUE),
"C" = sample(c(0,1), 500, replace = TRUE),
"D" = sample(c(0,1), 500, replace = TRUE),
"E" = sample(c(0,1), 500, replace = TRUE),
"F" = sample(c(0,1), 500, replace = TRUE),
"G" = sample(c(0,1), 500, replace = TRUE),
"H" = sample(c(0,1), 500, replace = TRUE),
"I" = sample(c(0,1), 500, replace = TRUE),
"J" = sample(c(0,1), 500, replace = TRUE)
)
Saving original data
df2 <- df
Apply with apply a function which randomly samples with replacement
(i.e. replace = TRUE) 1 or 0 with the probabilities which you indicated
according to whether the original data is 0 or 1
df2 <- apply(df2, c(1,2), function (x)
ifelse(
x == 1,
sample(c(0, 1), 1, prob = c(0.2, 0.8), replace = TRUE),
sample(c(0, 1), 1, prob = c(0.8, 0.2), replace = TRUE)
))
Renaming of columns
colnames(df2) <- paste0(colnames(df),"Y")
Output
head(df2)
#> AY BY CY DY EY FY GY HY IY JY
#> [1,] 1 0 0 1 0 0 0 1 0 1
#> [2,] 0 1 0 0 0 0 0 0 1 1
#> [3,] 1 1 1 0 1 1 0 0 0 0
#> [4,] 1 0 1 0 1 1 1 1 1 0
#> [5,] 1 1 0 1 0 1 1 0 0 0
#> [6,] 0 0 0 1 1 1 1 1 0 1
Created on 2021-09-24 by the reprex package (v2.0.1)

Replace column values based on column name

I have a data frame with several binary variables: x1, x2, ... x100. I want to replace the entry 1 in each column with the number in the name of the column, i.e.:
data$x2[data$x2 == 1] <- 2
data$x3[data$x3 == 1] <- 3
data$x4[data$x4 == 1] <- 4
data$x5[data$x5 == 1] <- 5
...
How can I achieve this in a loop?
Using col:
# example data
set.seed(1); d <- as.data.frame(matrix(sample(0:1, 12, replace = TRUE), nrow = 3))
names(d) <- paste0("x", seq(ncol(d)))
d
# x1 x2 x3 x4
# 1 0 0 0 1
# 2 1 1 0 0
# 3 0 0 1 0
ix <- d == 1
d[ ix ] <- col(d)[ ix ]
d
# x1 x2 x3 x4
# 1 0 0 0 4
# 2 1 2 0 0
# 3 0 0 3 0
dplyr approach (using zx8754's data):
library(dplyr)
d %>%
mutate(across(starts_with('x'), ~ . * as.numeric(gsub('x', '', cur_column()))))
#> x1 x2 x3 x4
#> 1 0 0 0 4
#> 2 1 2 0 0
#> 3 0 0 3 0
Created on 2021-05-26 by the reprex package (v2.0.0)
Here is a base R solution with a lapply loop.
data[-1] <- lapply(names(data)[-1], function(k){
n <- as.integer(sub("[^[:digit:]]*", "", k))
data[data[[k]] == 1, k] <- n
data[[k]]
})
data
Test data.
set.seed(2021)
data <- replicate(6, rbinom(10, 1, 0.5))
data <- as.data.frame(data)
names(data) <- paste0("x", 1:6)
A solution based on a simple for loop is below (otherwise similar to the accepted answer using lapply):
for (i in 2:100) {
k <- paste0('x', i)
data[data[[k]] == 1, k] <- i
}

Building a linear constrait with 3 binary variables

I am trying to build a linear constraint that follows this logic
if either x1 = 1 or x2 =1 then y1 = 1
but if x1 = 0 and x2 = 0 then y1 = 0
if both x1 = 1 and x2= 1 then y1 = 1
Assumptions:
we are talking about integer-programming here
x1, x2 are binary-variables / integer-variables in [0, 1]
The truth-table looks like:
x1 x2 || y1
----------------
0 0 || 0
0 1 || 1
1 0 || 1
1 1 || 1
This is just:
y1 = x1 OR x2
This is trivially linearized (see relevant answer on cs.stackexchange.com):
y1 = binary-var / (could be integer-var too)
y1 <= x1 + x2
y1 >= x1
y1 >= x2

R long-table to multi-dimensional arrays

I have a table in the long format:
require(data.table)
sampleDT <- data.table(Old = c("A","B","A","B","A","B","A","B")
, New = c("A","A","B","B","A","A","B","B")
, Time = c(1,1,1,1,2,2,2,2)
, value1 = c(1,1,1,1,1,1,1,1)
, value2 = c(0,0,0,0,0,0,0,0))
print(sampleDT)
Old New Time value1 value2
1: A A 1 1 0
2: B A 1 1 0
3: A B 1 1 0
4: B B 1 1 0
5: A A 2 1 0
6: B A 2 1 0
7: A B 2 1 0
8: B B 2 1 0
I would like to convert to an array of 3 dimensions. Something like:
Basically, we would have columns "New, Old, Time" as our three dimensions.
And the value for each cell is an output of some sort of functions whose input are "value1, value2".
In this case, when Time = 1, the result is:
matrix(data = c(1, 1+0, 0, -0), nrow = 2, ncol = 2, byrow = FALSE)
[,1] [,2]
[1,] 1 0
[2,] 1 0
How to achieve it?
Memory usage and computing time are important considerations, as we're working on relatively big datasets.
Try xtabs():
sampleDT <- data.frame(Old = c("A","B","A","B","A","B","A","B"),
New = c("A","A","B","B","A","A","B","B"),
Time = c(1,1,1,1,2,2,2,2),
value1 = c(1,1,1,1,1,1,1,1),
value2 = c(0,0,0,0,0,0,0,0))
Value1 <- xtabs(value1 ~ Old + New + Time, sampleDT, drop = FALSE)
Value2 <- xtabs(value2 ~ Old + New + Time, sampleDT, drop = FALSE)
is.array(Value1)
is.array(Value2)
Value1[, 2,] <- 0 # Sets all second columns to zero for Value1
Value2[1,,] <- 0 # Idem with first row for Value2
Value2[2,2,] <- Value2[2,2,] * (-1)
Result <- Value1 + Value2
Result
, , Time = 1
New
Old A B
A 1 0
B 1 0
, , Time = 2
New
Old A B
A 1 0
B 1 0
Hope it helps.

How to plotting binary matrix only 1(one) elements in R

I have a sparse matrix .csv file and save the Matrix like:
v1 v2 v3 v4 v5 v6 ... vn
1 0 1 0 1 0 0
2 0 0 0 1 0 0
3 0 0 0 0 1 0
4 1 0 0 0 0 1
5 1 0 1 0 1 0
...
m
I want make plot's x value = v1~vn , y value = 1~m
and marking only non-zero elements(only 1)
in Matlab I use spy(), but I don't know how do this in R.
You can use the SparseM package:
m <- matrix(as.numeric(runif(100) > 0.9), ncol = 10) # create random sparse matrix
library(SparseM)
image(as.matrix.csr(m)) # plot it :)
Here is a solution using ggplot2::ggplot.
# Sample data
set.seed(2017);
df <- matrix(sample(c(0, 1), 100, replace = TRUE), nrow = 10);
df;
# Convert wide to long
library(reshape2);
df.long <- melt(df);
# Var1 = row
# Var2 = column
library(ggplot2);
gg <- ggplot(subset(df.long, value == 1), aes(x = Var2, y = Var1));
gg <- gg + geom_point(size = 2, fill = "blue", shape = 21);
gg <- gg + theme_bw();
gg <- gg + labs(y = "Row", x = "Column");
gg <- gg + scale_y_reverse();

Resources