I am learning to work with bnlearn and I keep running into the following error in the last line of my code below:
Error in custom.fit(dag, cpt) : wrong number of conditional probability distributions
What am I doing wrong?
modelstring(dag)= "[s][r][nblw|r][nblg|nblw][mlw|s:r][f|s:r:mlw][mlg|mlw:f]
[mlgr|mlg:nblg]"
###View DAG Specifics
dag
arcs(dag)
nodes(dag)
# Create Levels
State <- c("State0", "State1")
##Create probability distributions given; these are all 2d b/c they have 1 or 2 nodes
cptS <- matrix(c(0.6, 0.4), ncol=2, dimnames=list(NULL, State))
cptR <- matrix(c(0.7, 0.3), ncol=2, dimnames=list(NULL, State))
cptNBLW <- matrix(c(0.95, 0.05, 0.05, 0.95), ncol=2, dimnames=list(NULL, "r"= State))
cptNBLG <- matrix(c(0.9, 0.099999999999999998, 0.2, 0.8), ncol=2, dimnames=list(NULL,
"nblw"=State))
cptMLG <- matrix(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),ncol=2,nrow = 2,
dimnames=list("mlw"= State, "f"=State))
cptMLGR <- matrix(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45),ncol=2,nrow = 2,
dimnames=list("mlg"= State, "nblg"=State))
cptMLW <-matrix(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), ncol=2,nrow = 2,byrow = TRUE,
dimnames=list("r"= State, "s"=State))
# Build 3-d matrices( becuase you have 3 nodes, you can't use the matrix function; you
have to build it from scratch)
cptF <- c(0.05, 0.95, 0.4, 0.6, 0.9, 0.1, 0.99, 0.01, 0.9, 0.1, 0.95, 0.05, 0.95, 0.05, 0.99,
0.01)
dim(cptF) <- c(2, 2, 2, 2)
dimnames(cptF) <- list("s"=State, "r"=State, "mlw"=State)
###Create CPT Table
cpt <- list(s = cptS, r = cptR, mlw = cptMLW,nblw= cptNBLW,
mlg= cptMLG, nblg= cptNBLG, mlgr= cptMLGR)
# Construct BN network with Conditional Probability Table
S.net <- custom.fit(dag,cpt)
Reference: https://rpubs.com/sarataheri/bnlearnCGM
You have several errors in your CPT definitions. Primarily, you need to make sure that:
the number of probabilities supplied are equal to the product of the number of states in the child and parent nodes,
that the number of dimensions of the matrix/array is equal to the number of parent nodes plus one, for the child node,
the child node should be given in the first dimension when the node dimension is greater than one.
the names given in the dimnames arguments (e.g. the names in dimnames=list(ThisName = ...)) should match the names that were defined in the DAG, in your case with modelstring and in my answer with model2network. (So my earlier suggestion of using dimnames=list(cptNBLW = ...) should be dimnames=list(nblw = ...) to match how node nblw was declared in the model string)
You also did not add node f into your cpt list.
Below is your code with comments where things have been changed. (I have commented out the offending lines and added ones straight after)
library(bnlearn)
dag <- model2network("[s][r][nblw|r][nblg|nblw][mlw|s:r][mlg|mlw:f][mlgr|mlg:nblg][f|s:r:mlw]")
State <- c("State0", "State1")
cptS <- matrix(c(0.6, 0.4), ncol=2, dimnames=list(NULL, State))
cptR <- matrix(c(0.7, 0.3), ncol=2, dimnames=list(NULL, State))
# add child node into first slot of dimnames
cptNBLW <- matrix(c(0.95, 0.05, 0.05, 0.95), ncol=2, dimnames=list(nblw=State, "r"= State))
cptNBLG <- matrix(c(0.9, 0.099999999999999998, 0.2, 0.8), ncol=2, dimnames=list(nblg=State,"nblw"=State))
# Use a 3d array and not matrix, and add child node into dimnames
# cptMLG <- matrix(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),ncol=2,nrow = 2, dimnames=list("mlw"= State, "f"=State))
cptMLG <- array(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),dim=c(2,2,2), dimnames=list(mlg = State, "mlw"= State, "f"=State))
# cptMLGR <- matrix(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45),ncol=2,nrow = 2, dimnames=list("mlg"= State, "nblg"=State))
cptMLGR <- array(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45), dim=c(2,2,2), dimnames=list(mlgr=State, "mlg"= State, "nblg"=State))
# cptMLW <-matrix(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), ncol=2,nrow = 2,byrow = TRUE, dimnames=list("r"= State, "s"=State))
cptMLW <-array(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), dim=c(2,2,2), dimnames=list(mlw=State, "r"= State, "s"=State))
# add child into first slot of dimnames
cptF <- c(0.05, 0.95, 0.4, 0.6, 0.9, 0.1, 0.99, 0.01, 0.9, 0.1, 0.95, 0.05, 0.95, 0.05, 0.99, 0.01)
dim(cptF) <- c(2, 2, 2, 2)
dimnames(cptF) <- list("f" = State, "s"=State, "r"=State, "mlw"=State)
# add missing node f into list
cpt <- list(s = cptS, r = cptR, mlw = cptMLW,nblw= cptNBLW, mlg= cptMLG, nblg= cptNBLG, mlgr= cptMLGR, f=cptF)
# Construct BN network with Conditional Probability Table
S.net <- custom.fit(dag, dist=cpt)
Related
I am using the below R code to simulate time series data, moving average of order 1 to be precise. I am varying 3 variables which are:
N = Number of elements in the series c(15L, 20L, 30L, 50L, 100L)
SD = standard deviation c(1, 2, 3, 4, 5) ^ 2
theta = the \theta value c(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)
I have 5 data.frames which you will see as .csv files in your R working directory. Each data.frame has 35 columns that I want properly labeled.
MWE
N <- c(15L, 20L, 30L, 50L, 100L)
SD = c(1, 2, 3, 4, 5) ^ 2
theta = c(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)
res <- vector('list', length(N))
names(res) <- paste('N', N, sep = '_')
set.seed(123L)
for (i in seq_along(N)){
res[[i]] <- vector('list', length(SD))
names(res[[i]]) <- paste('SD', SD, sep = '_')
ma <- matrix(NA_real_, nrow = N[i], ncol = length(theta))
for (j in seq_along(SD)){
wn <- rnorm(N[i], mean = 0, sd = SD[j])
ma[1:2, ] <- wn[1:2]
for (k in 3:N[i]){
ma[k, ] <- wn[k - 1L] * theta + wn[k]
}
colnames(ma) <- paste('ma_theta', theta, sep = '_')
res[[i]][[j]] <- ma
}
}
res1 <- lapply(res, function(dat) do.call(cbind, dat))
sapply(names(res1), function(nm) write.csv(res1[[nm]],
file = paste0(nm, ".csv"), row.names = FALSE, quote = FALSE))
I want columnname to be label not only with respect to theta alone but also with SD.
I want the columnname to be labelled like the below. I do not want 2 or more columns to have the same label. I want ma_SD_1... to (with theta=(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)) be exhausted before ma_SD_4... (with theta=(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)) before ma_SD_9... (with theta=(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)) before ma_SD_16... (with theta=(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)) before ma_SD_25... (with theta=(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99)).
ma_SD_1_theta_0.2, ma_SD_1_theta_0.4, ma_SD_1_theta_0.6, ma_SD_1_theta_0.8, ma_SD_1_theta_0.9, ma_SD_1_theta_0.95, ma_SD_1_theta_0.99
ma_SD_4_theta_0.2, ma_SD_4_theta_0.4, ma_SD_4_theta_0.6, ma_SD_4_theta_0.8, ma_SD_4_theta_0.9, ma_SD_4_theta_0.95, ma_SD_4_theta_0.99
ma_SD_9_theta_0.2, ma_SD_9_theta_0.4, ma_SD_9_theta_0.6, ma_SD_9_theta_0.8, ma_SD_9_theta_0.9, ma_SD_9_theta_0.95, ma_SD_9_theta_0.99
ma_SD_1_theta_0.2, ma_SD_16_theta_0.4, ma_SD_16_theta_0.6, ma_SD_16_theta_0.8, ma_SD_16_theta_0.9, ma_SD_16_theta_0.95, ma_SD_16_theta_0.99
This should do it as you are iterating (using j) over the SD:
colnames(ma) <- paste('ma_SD',SD[j],'theta', theta, sep = '_')
How can I adapt the size of the following plots with regard to their length of the x-axis?
The width of the plots should refer to the length of their respective section of the x-axis. The height should be the same for all plots.
The function you want is base graphics function help("layout").
First I will make up a dataset, since you have not posted one. I will not draw the regression lines, just the points.
Data creation code.
fun <- function(X, A) {
apply(X, 1, function(.x){
xx <- seq(.x[1], .x[2], length.out = 100)
y <- A[1]*xx + A[2] + rnorm(100, 0, 25)
list(xx, y)
})}
Coef <- matrix(c(0.24, 0.54,
0.75, 0.54,
0.33, 2.17,
0.29, 3.3,
0.29, 4.41), byrow = TRUE, ncol = 2)
X <- matrix(c(0.1, 0.49,
0.5, 2.49,
2.5, 3.9,
4.0, 5.9,
6.0, 12.0), byrow = TRUE, ncol = 2)
set.seed(1234)
res <- fun(X, Coef)
The problem.
Define a layout matrix with each plot in a sequence from first to 5th. And the widths given by the X ranges.
layout_mat <- matrix(c(1, 2, 3, 4, 5), 1, 5, byrow = TRUE)
w <- apply(X, 1, diff)
l <- layout(layout_mat, widths = w)
layout.show(l)
Now make some room for the axis annotation, saving the default graphics parameters, and plot the 5 graphs.
om <- par(mar = c(3, 0.1, 0.1, 0.1),
oma = c(3, 2, 0.1, 0.1))
for(i in 1:5) plot(res[[i]][[1]], res[[i]][[2]])
par(om)
is there a way to generate a random sample from a higher order markov chain? I used the package clickstream to estimate a 2nd order markov chain and i'm now trying to generate a sample from it. I understand how to do this from a transition matrix with the randomClickstreams function but that would only work for a 1st order markov chain.
Here's a reproducible example where we generate a sample from a transition matrix and then fit a 2nd order markov chain on the sample:
trans_mat <- matrix(c(0, 0.2, 0.7, 0, 0.1,
0.2, 0, 0.5, 0, 0.3,
0.1, 0.1, 0.1, 0.7, 0,
0, 0.4, 0.2, 0.1, 0.3,
0, 0 , 0 , 0, 1), nrow = 5)
cls <- randomClickstreams(states = c("P1", "P2", "P3", "P4", "end"),
startProbabilities = c(0.5, 0.5, 0, 0, 0),
transitionMatrix = trans_mat,
meanLength = 20, n = 1000)
# fit 2nd order markov chain:
mc <- fitMarkovChain(clickstreamList = cls, order = 2,
control = list(optimizer = "quadratic"))
This is made of 2 transition matrices and 2 lambda parameters:
How can i then use these elements to create a random sample of say 10000 journeys?
I have a real data and predicted data and I want to calculate overall MAPE and MSE. The data are time series, with each column representing data for different weeks. I predict value for each of the 52 weeks for each of the items as shown below. What would be the best possible calculate overall Error in R.
real = matrix(
c("item1", "item2", "item3", "item4", .5, .7, 0.40, 0.6, 0.3, 0.29, 0.7, 0.09, 0.42, 0.032, 0.3, 0.37),
nrow=4,
ncol=4)
colnames(real) <- c("item", "week1", "week2", "week3")
predicted = matrix(
c("item1", "item2", "item3", "item4", .55, .67, 0.40, 0.69, 0.13, 0.9, 0.47, 0.19, 0.22, 0.033, 0.4, 0.37),
nrow=4,
ncol=4)
colnames(predicted) <- c("item", "week1", "week2", "week3")
How do you get the predicted values in the first place? The model you use to get the predicted values is probably based on minimising some function of prediction errors (usually MSE). Therefore, if you calculate your predicted values, the residuals and some metrics on MSE and MAPE have been calculated somewhere along the line in fitting the model. You can probably retrieve them directly.
If the predicted values happened to be thrown into your lap and you have nothing to do with fitting the model, then you calculate MSE and MAPE as per below:
You have only one record per week for every item. So for every item, you can only calculate one prediction error per week. Depending on your application, you can choose to calculate the MSE and MAPE per item or per week.
This is what your data looks like:
real <- matrix(
c(.5, .7, 0.40, 0.6, 0.3, 0.29, 0.7, 0.09, 0.42, 0.032, 0.3, 0.37),
nrow = 4, ncol = 3)
colnames(real) <- c("week1", "week2", "week3")
predicted <- matrix(
c(.55, .67, 0.40, 0.69, 0.13, 0.9, 0.47, 0.19, 0.22, 0.033, 0.4, 0.37),
nrow = 4, ncol = 3)
colnames(predicted) <- c("week1", "week2", "week3")
Calculate the (percentage/squared) errors for every entry:
pred_error <- real - predicted
pct_error <- pred_error/real
squared_error <- pred_error^2
Calculate MSE, MAPE:
# For per-item prediction errors
apply(squared_error, MARGIN = 1, mean) # MSE
apply(abs(pct_error), MARGIN = 1, mean) # MAPE
# For per-week prediction errors
apply(squared_error, MARGIN = 0, mean) # MSE
apply(abs(pct_error), MARGIN = 0, mean) # MAPE
Data:
x <- seq(0, 1, len = 1024)
pos <- c(0.1, 0.13, 0.15, 0.23, 0.25, 0.40, 0.44, 0.65, 0.76, 0.78, 0.81)
hgt <- c(4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2)
wdt <- c(0.005, 0.005, 0.006, 0.01, 0.01, 0.03, 0.01, 0.01, 0.005, 0.008, 0.005)
pSignal <- numeric(length(x))
for (i in seq(along=pos)) {
pSignal <- pSignal + hgt[i]/(1 + abs((x - pos[i])/wdt[i]))^4
}
df = as.data.frame(rbind(pSignal,pSignal,pSignal))
dflist=list(df,df,df)
I'm trying to run this pracma package's findpeaks() function to find the local maxima of each row in each data.frame in the list, dflist. The output is a N x 4 array. N = the number of peaks. So in the first row of the first data.frame if it finds 4 peaks, it will be a 4x4 matrix. My goal is to loop this function over every row in each data.frame and store the matrix that is output in a list.
My code:
## Find Peaks
pks=list()
for (i in 1:length(dflist)){
for (j in 1:length(dflist[[i]])){
row = dflist[[i]][j,]
temppks = findpeaks(as.vector(row,mode='numeric')
,minpeakheight = 1.1,nups=2)
pks[i][[j]]=rbind(pks,temppks)
}
}
This doesn't seem to be doing quite what I want it too. any ideas?
A combination of apply() and sapply() could do the work:
my.f.row <- function(row) findpeaks(as.vector(row,mode='numeric'), minpeakheight = 1.1, nups=2)
sapply(dflist, function(df.i) apply(df.i, 1, my.f.row))
eventually you have to reorganize the result.