Counterintuitive results for dplyr::between() when using vectors

Counterintuitive results for dplyr::between() when using vectors - r

When using dplyr::between(), I assumed that it would compare each element. However it seems like that is not the case, as shown in the below example.
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)
dplyr::between(x=x, left=y, right=z)
# [1] TRUE TRUE TRUE FALSE FALSE FALSE
For example, in the 3rd element, 0.2 is not between 0.0 and 0.0, but TRUE is returned.
In the 4th element, 0.5 is between 0.052 and 0.85, but FALSE is returned.
Any ideas on what causes this behavior?

dplyr::between only accepts single value in left and right, it cannot work with vector of values.
The behaviour that you expect is present in data.table::between :
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)
data.table::between(x=x, lower=y, upper=z)
#[1] TRUE TRUE FALSE TRUE TRUE TRUE

In base R, this is easier with comparison operators and is flexible
x >= y & x <= z
#[1] TRUE TRUE FALSE TRUE TRUE TRUE
data
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)

Related

bnlearn Error: Wrong number of conditional probability distributions

I am learning to work with bnlearn and I keep running into the following error in the last line of my code below:
Error in custom.fit(dag, cpt) : wrong number of conditional probability distributions
What am I doing wrong?
modelstring(dag)= "[s][r][nblw|r][nblg|nblw][mlw|s:r][f|s:r:mlw][mlg|mlw:f]
[mlgr|mlg:nblg]"
###View DAG Specifics
dag
arcs(dag)
nodes(dag)
# Create Levels
State <- c("State0", "State1")
##Create probability distributions given; these are all 2d b/c they have 1 or 2 nodes
cptS <- matrix(c(0.6, 0.4), ncol=2, dimnames=list(NULL, State))
cptR <- matrix(c(0.7, 0.3), ncol=2, dimnames=list(NULL, State))
cptNBLW <- matrix(c(0.95, 0.05, 0.05, 0.95), ncol=2, dimnames=list(NULL, "r"= State))
cptNBLG <- matrix(c(0.9, 0.099999999999999998, 0.2, 0.8), ncol=2, dimnames=list(NULL,
"nblw"=State))
cptMLG <- matrix(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),ncol=2,nrow = 2,
dimnames=list("mlw"= State, "f"=State))
cptMLGR <- matrix(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45),ncol=2,nrow = 2,
dimnames=list("mlg"= State, "nblg"=State))
cptMLW <-matrix(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), ncol=2,nrow = 2,byrow = TRUE,
dimnames=list("r"= State, "s"=State))
# Build 3-d matrices( becuase you have 3 nodes, you can't use the matrix function; you
have to build it from scratch)
cptF <- c(0.05, 0.95, 0.4, 0.6, 0.9, 0.1, 0.99, 0.01, 0.9, 0.1, 0.95, 0.05, 0.95, 0.05, 0.99,
0.01)
dim(cptF) <- c(2, 2, 2, 2)
dimnames(cptF) <- list("s"=State, "r"=State, "mlw"=State)
###Create CPT Table
cpt <- list(s = cptS, r = cptR, mlw = cptMLW,nblw= cptNBLW,
mlg= cptMLG, nblg= cptNBLG, mlgr= cptMLGR)
# Construct BN network with Conditional Probability Table
S.net <- custom.fit(dag,cpt)
Reference: https://rpubs.com/sarataheri/bnlearnCGM

You have several errors in your CPT definitions. Primarily, you need to make sure that:
the number of probabilities supplied are equal to the product of the number of states in the child and parent nodes,
that the number of dimensions of the matrix/array is equal to the number of parent nodes plus one, for the child node,
the child node should be given in the first dimension when the node dimension is greater than one.
the names given in the dimnames arguments (e.g. the names in dimnames=list(ThisName = ...)) should match the names that were defined in the DAG, in your case with modelstring and in my answer with model2network. (So my earlier suggestion of using dimnames=list(cptNBLW = ...) should be dimnames=list(nblw = ...) to match how node nblw was declared in the model string)
You also did not add node f into your cpt list.
Below is your code with comments where things have been changed. (I have commented out the offending lines and added ones straight after)
library(bnlearn)
dag <- model2network("[s][r][nblw|r][nblg|nblw][mlw|s:r][mlg|mlw:f][mlgr|mlg:nblg][f|s:r:mlw]")
State <- c("State0", "State1")
cptS <- matrix(c(0.6, 0.4), ncol=2, dimnames=list(NULL, State))
cptR <- matrix(c(0.7, 0.3), ncol=2, dimnames=list(NULL, State))
# add child node into first slot of dimnames
cptNBLW <- matrix(c(0.95, 0.05, 0.05, 0.95), ncol=2, dimnames=list(nblw=State, "r"= State))
cptNBLG <- matrix(c(0.9, 0.099999999999999998, 0.2, 0.8), ncol=2, dimnames=list(nblg=State,"nblw"=State))
# Use a 3d array and not matrix, and add child node into dimnames
# cptMLG <- matrix(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),ncol=2,nrow = 2, dimnames=list("mlw"= State, "f"=State))
cptMLG <- array(c(0.95, 0.05, 0.4, 0.6, 0.2, 0.8, 0.05, 0.95),dim=c(2,2,2), dimnames=list(mlg = State, "mlw"= State, "f"=State))
# cptMLGR <- matrix(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45),ncol=2,nrow = 2, dimnames=list("mlg"= State, "nblg"=State))
cptMLGR <- array(c(0.6,0.4,0.95,0.05,0.2,0.8,0.55,0.45), dim=c(2,2,2), dimnames=list(mlgr=State, "mlg"= State, "nblg"=State))
# cptMLW <-matrix(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), ncol=2,nrow = 2,byrow = TRUE, dimnames=list("r"= State, "s"=State))
cptMLW <-array(c(0.95, 0.05, 0.1, 0.9, 0.2, 0.8, 0.01, 0.99), dim=c(2,2,2), dimnames=list(mlw=State, "r"= State, "s"=State))
# add child into first slot of dimnames
cptF <- c(0.05, 0.95, 0.4, 0.6, 0.9, 0.1, 0.99, 0.01, 0.9, 0.1, 0.95, 0.05, 0.95, 0.05, 0.99, 0.01)
dim(cptF) <- c(2, 2, 2, 2)
dimnames(cptF) <- list("f" = State, "s"=State, "r"=State, "mlw"=State)
# add missing node f into list
cpt <- list(s = cptS, r = cptR, mlw = cptMLW,nblw= cptNBLW, mlg= cptMLG, nblg= cptNBLG, mlgr= cptMLGR, f=cptF)
# Construct BN network with Conditional Probability Table
S.net <- custom.fit(dag, dist=cpt)

Complex clipping (spatial intersection ?) of polygons and lines in R

I would like to clip (or maybe the right formulation is performing spatial intersection) polygons and lines using a polygon rather than a rectangle, like so:
Here is some code to make the polygons for reproducibility and examples:
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1,1))
polygon(p2$x, p2$y, col = "blue")
polygon(p1$x, p1$y)
lines(l1$x, l1$y)

You could use the spatstat package for this. Below the original example is
worked through. In spatstat polygons are used as “observation windows” of
point patterns, so they are of class owin. It is possible to do set
intersection, union etc. with owin objects.
p1 <- data.frame(x = c(-0.81, -0.45, -0.04, 0.32, 0.47, 0.86, 0.08, -0.46, -1, -0.76),
y = c(0.46, 1, 0.64, 0.99, -0.04, -0.14, -0.84, -0.24, -0.44, 0.12))
p2 <- data.frame(x = c(-0.63, -0.45, -0.2, -0.38, -0.26, -0.82, -0.57, -0.76),
y = c(-0.1, 0.15, -0.17, -0.79, -1, -0.97, -0.7, -0.61))
l1 <- data.frame(x = c(0.1, 0.28, 0.29, 0.52, 0.51, 0.9, 1),
y = c(0.19, -0.15, 0.25, 0.28, 0.64, 0.9, 0.47))
In spatstat polygons must be traversed anti-clockwise, so:
library(spatstat)
p1rev <- lapply(p1, rev)
p2rev <- lapply(p2, rev)
W1 <- owin(poly = p1rev)
W2 <- owin(poly = p2rev)
L1 <- psp(x0 = l1$x[-nrow(l1)], y0 = l1$y[-nrow(l1)],
x1 = l1$x[-1], y1 = l1$y[-1], window = boundingbox(l1))
plot(boundingbox(W1,W2,L1), type= "n", main = "Original")
plot(W2, col = "blue", add = TRUE)
plot(W1, add = TRUE)
plot(L1, add = TRUE)
W2clip <- W2[W1]
L1clip <- L1[W1]
plot(W1, main = "Clipped")
plot(W2clip, col = "blue", add = TRUE)
plot(L1clip, add = TRUE)

How can I create a customized colormap for geoviews (bokeh)?

I'm trying to plot an xarray dataset in Geoviews, like this:
https://geoviews.org/gallery/bokeh/xarray_image.html#bokeh-gallery-xarray-image
There I can define a colormap by cmap.
The cmap is just a list of hex-codes, like:
['#150b00',
'#9b4e00',
'#f07800',
'#ffa448',
'#a8a800',
'#dddd00',
'#ffff00',
'#ffffb3',
'#ffffff',
'#b0ffff',
'#00e8e8',
'#00bfbf',
'#008a8a',
'#79bcff',
'#0683ff',
'#0000c1',
'#000048']
I want to define to levels of values for these color, like this list:
[-10.0,
-5.0,
-2.5,
-1.0,
-0.5,
-0.2,
-0.1,
-0.05,
0.05,
0.1,
0.2,
0.5,
1.0,
2.5,
5.0,
10.0]
How can I define these levels?

Please try to set the parameter color_levels to the wanted values. This is explained in HoloViews Styling Plots in the section Custom color intervals. HoloVies is the source where the gv.Image comes from. Therefore this should work.
cmap = ['#150b00', '#9b4e00', '#f07800', '#ffa448', '#a8a800', '#dddd00', '#ffff00', '#ffffb3', '#ffffff', '#b0ffff', '#00e8e8', '#00bfbf', '#008a8a', '#79bcff', '#0683ff', '#0000c1', '#000048']
levels = [-10.0, -5.0, -2.5, -1.0, -0.5, -0.2, -0.1, -0.05, 0.05, 0.1, 0.2, 0.5, 1.0, 2.5, 5.0, 10.0]
images.opts(
cmap=cmap,
color_levels=levels,
colorbar=True,
width=600,
height=500) * gf.coastline
Comment
If this is not working, then I apologize. In the moment I am not able to install GeoViews on my machine.

Random walks on directed graph in R

I am new in R programming. I have a directed graph which has 6 nodes and also provided a probability matrix of 6 rows and 6 columns. If a random walker walk 100,000 steps on the graph should end up the output vector like the following:
0.1854753, 0.1301621,0.0556688, 0.1134808, 0.15344649, 0.3617481
corresponding to the probabilities of 6 nodes being visited in this random walk experiment(counts divided by the total number of steps, in this case, 100,000).
I need to create a function for this task and to demonstrate how to use it. The function takes a graph and number of steps as input.
The provided matrix as follows:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0.0 0.5 0.3 0.0 0.0 0.2
[2,] 0.1 0.2 0.0 0.4 0.1 0.2
[3,] 0.5 0.0 0.0 0.0 0.0 0.5
[4,] 0.0 0.1 0.0 0.0 0.6 0.3
[5,] 0.0 0.0 0.0 0.4 0.0 0.6
[6,] 0.4 0.0 0.0 0.0 0.2 0.4
Can someone help me to solve the problem?

Here is a step-by-step implementation using a Markov chain (through R library markovchain).
We start by loading the library.
library(markovchain);
We define the transition matrix and states (here simply 1...6 for the graph nodes)
mat <- matrix(c(
0.0, 0.5, 0.3, 0.0, 0.0, 0.2,
0.1, 0.2, 0.0, 0.4, 0.1, 0.2,
0.5, 0.0, 0.0, 0.0, 0.0, 0.5,
0.0, 0.1, 0.0, 0.0, 0.6, 0.3,
0.0, 0.0, 0.0, 0.4, 0.0, 0.6,
0.4, 0.0, 0.0, 0.0, 0.2, 0.4), ncol = 6, byrow = T)
states <- as.character(1:6);
We define a Markov chain object.
mc <- new(
"markovchain",
states = states,
byrow = TRUE,
transitionMatrix = mat,
name = "random_walk");
We now simulate a random walk consisting of nSteps (here 1e6) and obtain asymptotic probabilities for every state (node) with prop.table(table(...))
nSteps <- 1e6;
random_walk <- markovchainSequence(nSteps, mc, t0 = "1");
prop.table(table(random_walk));
#random_walk
# 1 2 3 4 5 6
#0.185452 0.129310 0.055692 0.113410 0.153787 0.362349
Note that asymptotic probabilities might change slightly if you re-run the code.
Wrapping this in a single function is straight-forward and I'll leave that up to you.

Assuming you are giving probability matrix (prob_mat) for the directed graph and no of steps (no_of_steps) as input. This should do:
set.seed(150)
find_pos_prob <- function(prob_mat, no_of_steps){
x <- c(1:nrow(prob_mat)) # index for nodes
position <- 1 # initiating from 1st Node
occured <- rep(0,nrow(prob_mat)) # initiating occured count
for (i in 1:no_of_steps) {
# update position at each step and increment occurence
position <- sample(x, 1, prob = prob_mat[position,])
occured[position] <- occured[position] + 1
}
return (occured/no_of_steps)
}
find_pos_prob(prob_mat, 100000)
#[1] 0.18506 0.13034 0.05570 0.11488 0.15510 0.35892
Data:
prob_mat <- matrix( c(0.0, 0.5, 0.3, 0.0, 0.0, 0.2,
0.1, 0.2, 0.0, 0.4, 0.1, 0.2,
0.5, 0.0, 0.0, 0.0, 0.0, 0.5,
0.0, 0.1, 0.0, 0.0, 0.6, 0.3,
0.0, 0.0, 0.0, 0.4, 0.0, 0.6,
0.4, 0.0, 0.0, 0.0, 0.2, 0.4), byrow = TRUE, ncol = 6)
Note: Simulation results will differ from analytical solutions. Ideally you should remove the seed, run the function 15-20 times and take the average of probabilities over the runs

Element wise mean from data frame [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
I have a data frame with 3 columns and 16 rows. Each element has values like row1 values are (0.9, 0.9, 1.0), (0.7,0.9, 1.0), (0.9, 0.9, 1.0). I want element wise mean e.g., (0.9+0.7+0.9/3), (0.9+0.9+0.9/3), (1.0+1.0+1.0/3) and store the result as new column. Any suggestions?
SHO1 SHO2 SHO3
1 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.7, 0.9, 1.0
2 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.7, 0.9, 1.0
3 0.0, 0.0, 0.1 0.9, 0.9, 1.0 0.0, 0.0, 0.1
expected out for row1:
0.7+0.9+0.7/3, 0.9+0.9+0.9/3, 1.0+1.0+1.0/3

Based on the dput output by the OP (in the comments), we found that the columns in 'df1' are not 'strings'. Infact each element of each column is a list. So, instead of doing strsplit (as I suggested earlier), we loop through the columns with lapply and rbind the list elements (do.call(rbind). The output 'list' contains 'matrix' as list elements.
We can use Reduce to take the elementwise sum (Reduce('+', ..), and divide by the length of the list i.e. 3.
The matrix output ('m1') can be pasted together rowwise (do.call(paste) after converting to 'data.frame' and create a new column in the original dataset ('df1').
m1 <- Reduce('+', lapply(df1, function(x) do.call(rbind, x)))/ncol(df1)
df1$newCol <- do.call(paste, c(as.data.frame(m1), sep=", "))
df1
# SHO1 SHO2 SHO3
#1 0.9, 0.9, 1.0 0.7, 0.9, 1.0 0.9, 0.9, 1.0
#2 0.9, 0.9, 1.0 0.7, 0.9, 1.0 0.9, 0.9, 1.0
#3 0.3, 0.5, 0.7 0.7, 0.9, 1.0 0.3, 0.5, 0.7
#4 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.9, 0.9, 1.0
# newCol
#1 0.833333333333333, 0.9, 1
#2 0.833333333333333, 0.9, 1
#3 0.433333333333333, 0.633333333333333, 0.8
#4 0.833333333333333, 0.9, 1
data
df1 <- structure(list(SHO1 = structure(list(VH = c(0.9, 0.9, 1),
VH = c(0.9,
0.9, 1), M = c(0.3, 0.5, 0.7), H = c(0.7, 0.9, 1)), .Names = c("VH",
"VH", "M", "H")), SHO2 = structure(list(H = c(0.7, 0.9, 1), H = c(0.7,
0.9, 1), H = c(0.7, 0.9, 1), VH = c(0.9, 0.9, 1)), .Names = c("H",
"H", "H", "VH")), SHO3 = structure(list(VH = c(0.9, 0.9, 1),
VH = c(0.9, 0.9, 1), M = c(0.3, 0.5, 0.7), VH = c(0.9, 0.9,
1)), .Names = c("VH", "VH", "M", "VH"))), .Names = c("SHO1",
"SHO2", "SHO3"), row.names = c(NA, 4L), class = "data.frame")

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Counterintuitive results for dplyr::between() when using vectors - r

In base R, this is easier with comparison operators and is flexible x >= y & x <= z #[1] TRUE TRUE FALSE TRUE TRUE TRUE data x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5) y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3) z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)

Related

bnlearn Error: Wrong number of conditional probability distributions

Complex clipping (spatial intersection ?) of polygons and lines in R

How can I create a customized colormap for geoviews (bokeh)?

Random walks on directed graph in R

Element wise mean from data frame [closed]

Categories

Resources