TLDR:
I need a simple way to transform c(a = 1, a = 3, a = 6) into list(c(a = 1), c(a = 3), c(a = 6)).
Longer version:
I am using the function purrr::accumulate(), where the output of each element is an atomic vector of length greater or equal to one. When the length is one, purrr::accumulate() simplifies the whole output to an atomic vector, instead of a list.
Is there a simple way to undo or avoid this? Unfortunately, as.list() does not give me what I want.
Simple example to illustrate:
purrr::accumulate(2:3, `+`, .init = c(a=1, b=2))
gives me
list(c(a = 1, b = 2), c(a = 3, b = 4), c(a = 6, b = 7))
as expected. However,
purrr::accumulate(2:3, `+`, .init = c(a=1))
gives me
c(a = 1, a = 3, a = 6)
when I instead want
list(c(a = 1), c(a = 3), c(a = 6))
You could try
c(a = 1, a = 3, a = 6) %>% map(~setNames(.x, nm = "a"))
$a
a
1
$a
a
3
$a
a
6
or you can also remove the list names with set_names()
c(a = 1, a = 3, a = 6) %>% map(~setNames(.x, nm = "a")) %>%
set_names("")
[[1]]
a
1
[[2]]
a
3
[[3]]
a
6
I've been studying the advantages/disadvantages of for-loops versus versus the apply() family of functions and the answer isn't clear cut (apply() always faster than for-loops may not be true, depending on circumstances). So I want to test the various options against my actual data.
Below is a for-loop which looks pretty straightforward to me, but I'm unsure of how to replace it with lapply(). I assume lapply() is correct since the for-loop produces a list object.
The actual data I need to run this analysis against is a data frame containing 2.5 million rows, 30+ columns, so I'd like to run speed tests against the various options.
Any explanation would be most helpful. The examples I found online are light on explanations or the for-loops examples overly-complex, and I hope to learn to use apply() family functions well as they seem very useful and simpler to read than for-loops.
Here's the simplified for-loop code, with example data frame, which runs correctly for example purposes:
# Set up data frame to perform migration analysis on:
data <-
data.frame(
ID = c(1,1,1,2,2,2,3,3,3),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0")
)
# Function to set-up base table:
setTable <- function(data){
df <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
row.names(df) <- unique(data$Flags)
names(df) <- unique(data$Flags)
return(df)
}
# Function to complete migration table with for-loop:
migration <- function(data, from=1, to=3){
df <- setTable(data)
for (i in unique(data$ID)){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(df) == id_from)
row <- which(row.names(df) == id_to)
df[row, column] <- ifelse(is.na(df[row, column]), 1, df[row, column] + 1)
}
return(df)
}
# Now to run the function:
test1 <- migration(data, from=1, to=3)
Edit: wrapped in a function allowing to specify from & to:
library(data.table)
DF <- data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
)
migration <- function(DT, from=1, to=3){
setDT(DT)
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(from_flag = unique_flags, to_flag = unique_flags)))
dcast(DT[, .(from_flag = Flags[Period == from], to_flag = Flags[Period == to]), by = ID][
,.N, c("from_flag", "to_flag")][
all_flags, on = c("from_flag", "to_flag")], to_flag ~ from_flag, value.var = "N")
}
migration(DF, 1, 3)
When it comes to speed in R, you can almost always count on library(data.table):
library(data.table)
DT <- setDT(data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
))
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(first_flag = unique_flags, last_flag = unique_flags)))
resultDT <- dcast(DT[, .(first_flag = first(Flags), last_flag = last(Flags)), by = ID][
,.N, c("first_flag", "last_flag")][
all_flags, on = c("first_flag", "last_flag")], last_flag ~ first_flag, value.var = "N")
print(resultDT)
Step by step:
library(data.table)
DT <- setDT(data.frame(
ID = c(1,1,1,2,2,2,3,3,3,4,4,4),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0", "X2","X1","X0")
))
unique_flags <- unique(DT$Flags)
all_flags <- setDT(expand.grid(list(first_flag = unique_flags, last_flag = unique_flags)))
resultDT <- DT[, .(first_flag = first(Flags), last_flag = last(Flags)), by = ID] # find relevant flags
resultDT <- resultDT[,.N, c("first_flag", "last_flag")] # count transitions
resultDT <- resultDT[all_flags, on = c("first_flag", "last_flag")] # merge all combinations
resultDT <- dcast(resultDT, last_flag ~ first_flag, value.var = "N") # dcast
print(resultDT)
Regarding lapply you can do (I'd prefer data.table):
# Set up data frame to perform migration analysis on:
input_data <-
data.frame(
ID = c(1,1,1,2,2,2,3,3,3),
Period = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
Values = c(5, 10, 15, 0, 2, 4, 3, 6, 9),
Flags = c("X0","X1","X2","X0","X2","X0", "X2","X1","X0")
)
# Function to set-up base table:
setTable <- function(data){
DF <- data.frame(matrix(NA, ncol=length(unique(data$Flags)), nrow=length(unique(data$Flags))))
row.names(DF) <- unique(data$Flags)
names(DF) <- unique(data$Flags)
return(DF)
}
# Function to complete migration table with for-loop:
migration <- function(data, from=1, to=3){
DF <- setTable(data)
lapply(seq_along(unique(data$ID)), function(i){
id_from <- as.character(data$Flags[(data$ID == i & data$Period == from)])
id_to <- as.character(data$Flags[data$ID == i & data$Period == to])
column <- which(names(DF) == id_from)
row <- which(row.names(DF) == id_to)
DF[row, column] <<- ifelse(is.na(DF[row, column]), 1, DF[row, column] + 1)
})
return(DF)
}
# Now to run the function:
test1 <- migration(input_data, from=1, to=3)
I was trying to produce the following 7 x 4 matrix in R:
m = matrix(c(seq(25, 1, by = -4),
seq(26, 2, by = -4),
seq(27, 3, by = -4),
seq(28, 4, by = -4)), nrow = 7, ncol = 4)
BUT, I'm wondering if could I achieve the same matrix with more efficient R code than what I used above?
Here's a solution:
m <- matrix(rev(c(1:28)),nrow=7,ncol=4,byrow = TRUE)[,rev(1:4)]
And this one is even faster:
m <- matrix(28:1,nrow=7,ncol=4,byrow = TRUE)[,4:1]
m = matrix(c(rep(seq(25, 1, by = -4),4) + rep(c(0:3),each=7) ), nrow = 7, ncol = 4)
Not sure if you would call this more efficient...
I've been trying to use the R Statistical software to build a Takagi Sugeno fuzzy system. Using the R package frbs I've managed to set up the most of components of the FIS following the example in the demo files. Unfortunately, I've hit a problem:
Error in rule[, (4 * i), drop = FALSE] : subscript out of bounds
in line:
res <- predict(object, newdata)$predicted.val
I have no idea what is wrong in this script. Rules should be good, the same I use in MATLAB script and it works. I do everything like it is in documentation and examples in frbs library.
#rm(list=ls())
library(frbs)
varinp.mf <- matrix(c( 5, -1, 0.8493, NA, NA, 5, 1, 0.8493, NA, NA,
5, -1, 0.8493, NA, NA, 5, 1, 0.8493, NA, NA),
nrow = 5, byrow = FALSE)
num.fvalinput <- matrix(c(2,2), nrow=1)
x1 <- c("a1","a2")
x2 <- c("b1","b2")
names.varinput <- c(x1, x2)
range.data <- matrix(c(-1.5,1.5, -1.5, 1.5, -1.5, 1.5), nrow=2)
type.defuz <- "5"
type.tnorm <- "MIN"
type.snorm <- "MAX"
type.implication.func <- "MIN"
name <- "Przykład"
newdata <- matrix(c(-0.6, 0.3), ncol = 2, byrow = TRUE)
colnames.var <- c("x1", "x2")
type.model <- "TSK"
func.tsk <- matrix(c(1, 1, 1,
2, 1, 0,
1, -2, -1,
-1, 0.5, -2),
nrow = 4, byrow = TRUE)
# r1 <- c("a1","and","b1","->")
# r2 <- c("a1","and","b2", "->")
# r3 <- c("a2","and","b1", "->")
# r4 <- c("a2","and","b2", "->")
# rule <- list(r1,r2,r3,r4)
rule <- matrix(c("a1","and","b1","->",
"a1","and","b2","->",
"a2","and","b1","->",
"a2","and","b2","->"),
nrow = 4, byrow = TRUE)
object <- frbs.gen( range.data, num.fvalinput, names.varinput,
num.fvaloutput=NULL, varout.mf=NULL, names.varoutput=NULL, rule,
varinp.mf, type.model, type.defuz, type.tnorm, type.snorm,
func.tsk, colnames.var, type.implication.func)
plotMF(object)
res <- predict(object, newdata)$predicted.val
I see something is wrong in object$rule but i don't know how to fix it.
According to documentation: colnames.var
a list of names of input and output variables. Just add the output like hat for example colnames.var <- c("x1", "x2","o1").