I'm trying to implement the floyd warshall algorithm but it won't work correctly.
What I want is the shortest path distances from one vertex to another written in a matrix d and the predecessors in a matrix pred. The input is an adjacency matrix which contains all of the edge weights.
function FloWa(C)
N = size(C)
n = min(C[1],C[2])
pred = -1*ones(C[1],C[2])
d = C
for k in 1:n
for i in 1:n
for j in 1:n
if d[i,j] > d[i,k] + d[k,j]
if pred[i,k] == -1
pred[i,j] = k
else
pred[i,j] = pred[k,j]
end
d[i,j] = d[i,k] + d[k,j]
end
if i == j && d[i,i] < 0
println("negative Dicycle")
end
end
end
end
return d, pred
end
When i am running my code with the matrix
A = [0 2 1 4 5 1; 1 0 4 2 3 4; 2 1 0 1 2 4; 3 5 2 0 3 3; 2 4 3 4 0 1; 3 4 7 3 1 0]
i don't get the right results.
For d i get the same matrix as A and pred is printed as an Array{Float64}(0,1).
I have not checked the implementation of the algorithm, but you seem to initialize pred and d incorrectly. Here is a way to do it that is I assume you indented:
n = size(C, 1) # get number of rows in C
#assert n == size(C, 2) # make sure that C is square or throw an error
pred = fill(-1, size(C)) # fill pred with -1 and make it have the same size as C
d = copy(C) # d is a copy of C
Related
Ok, I have a list of words with their frequencies. There are many, many thousands of these. Here's a mini example:
w = c("abandon", "break", "fuzz", "when")
f = c(2, 10, 8, 200)
df = data.frame(cbind(w, f))
df
w f
1 abandon 2
2 break 10
3 fuzz 8
4 when 200
What I want to do is count the characters in each word and then aggregate the results. The count_chars function from the dw4psy package can do this for a given vector of strings. I've done this successfully by just creating a giant vector of strings from the word list (which has 10s of 1000s of words), as follows:
library(ds4psy) # for count_chars function
library(dplyr)
w = c("abandon", "break", "fuzz", "when")
f = c(2, 10, 8, 200)
df = data.frame(cbind(w, f))
df$w = as.character(df$w)
df$f = as.integer(df$f)
# repword will repeat wrd frq times with no spaces between
repword <- function(frq, wrd) paste(rep(times=frq, x=wrd), collapse="")
# now we create one giant vector of strings to do the counts on
# CAUTION -- uses lots of memory when you have 10s of 1000s of words
mytext = paste(mapply(repword, df$f, df$w))
# get a table of letter counts
mycounts = count_chars(mytext)
# convert to data frame sorted by character
mycounts.df <- mycounts[order(names(mycounts))] %>%
as.data.frame()
# sort by Freq in descending order
mycounts.df %>%
arrange(desc(Freq))
However, a colleague does not have enough memory for this brute force solution. So I tried to figure out how to do this word-by-word using foreach or mapply, but I am really stuck.
One issue is that you need a vector that has every letter in it to combine them (so far as I can tell). So I create a dummy word with all letters in it, and then do some tweaks to keep it from counting the repeated letters each time.
# create a dummy string that is a-z
dummy = paste0(letters, collapse="")
# now we create a count - it will be all 1s; we will subtract it every time
dummycount = count_chars(dummy)
countword <- function(frq, wrd) {
myword = paste0(dummy, wrd, collapse="")
# subtract 1 from each letter to correct for dummy
mycount = count_chars(myword) - dummycount
mycount = mycount * frq # multiply by frequency
return(mycount)
}
totalcount = dummycount - 1 # set a table to zeroes
foreach(frq = df$f, wrd = df$w) %do% {
totalcount = totalcount + countword(frq, wrd)
}
But this just doesn't work ... I get a weird result:
> totalcount
chars
a b c d e f g h i j k l m n o p q r s t u v w x y z
16 12 10 6 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
I would be very grateful for any advice!
If we want the same output with foreach (assuming OP wants to work with foreach), simply loop over the sequence of rows
library(foreach)
library(parallel)
library(doSNOW)
no_of_cores = detectCores()
cl <- makeSOCKcluster(no_of_cores)
registerDoSNOW(cl)
out <- foreach(i = 1:nrow(df), .export = "count_chars",
.combine = `+`) %dopar% {
tmp <- countword(df$f[i], df$w[i])
totalcount[names(tmp)] <- totalcount[names(tmp)] + tmp
totalcount}
stopCluster(cl)
-output
> out
a b c d e f g h i j k l m n o p q r s t u v w x y z
14 12 0 2 210 8 0 200 0 0 10 0 0 204 2 0 0 10 0 0 8 0 200 0 0 16
Can you simply multiply the output of count_chars() by f, and do this by row?
library(data.table)
setDT(df)[, data.table(count_chars(w)*f), by=1:nrow(df)][, .(ct = sum(N)), chars][order(-ct)]
Output:
chars ct
1: e 210
2: n 204
3: h 200
4: w 200
5: z 16
6: a 14
7: b 12
8: k 10
9: r 10
10: f 8
11: u 8
12: d 2
13: o 2
I have got this matrix below
k
[,1] [,2] [,3] ,4][,5] [,6]
[1,] 1 4 9 16 25 36
[2,] 1 3 7 13 21 31
[3,] 2 2 5 10 17 26
[4,] 4 2 4 8 14 22
[5,] 7 3 3 6 11 18
[6,] 11 5 3 5 9 15
and I want to loop through starting from k[1,1] and ending at k[6,6]. My looping criteria is based on min(k[i,j+1], k[i+1,j], k[i+1, j+1]) and the answer I hope to get is something like 1+1+2+2+3+3+5+9+15 = 41 (travelling through the minimum path)
So pretty much it calculates the minimum starting from k[1,1] and then continues downwards till k[6,6]
warpingDist = function(x, y, z){
mincal = numeric(length(k))
m = nrow(k)
n = ncol(k)
i=1
j=1
mincal = which(k == min(k[i, j+1], k[i+1, j], k[i+1, j+1]), arr.ind = TRUE)
indx = data.frame(mincal)
i= indx$row
j= indx$col
if(i != m || j!=n)
{
warpingDist(k[i, j+1], k[i+1, j], k[i+1, j+1])
}
warpSum = sum(mincal)
return(warpSum)
}
value = apply(k, c(1,2), warpingDist)
value
When I run this code it displays the below:
Error: object 'value' not found
Not sure why this is happening...
As you don't provide a minimal reproducible example, I can only guess:
warpingDist = function(x, y, z, k){
# browser() # This is a good option to activate, if you run your script in RStudio
...
return(warpSum)
}
# your code
k <- whatever it is
result <- warpingDist(x, y, z, k)
I hope that helps.
Am glad, I was finally able to solve the problem...The code runs fast as well
Problem: To find the minimum cost for a matrix. For clarity, let's assume I have the matrix given below:
[1,] 1 4 6 7 8 9 0
[2,] 10 12 1 3 11 2 0
[3,] 11 12 2 8 17 1 0
[4,] 20 1 18 4 28 1 0
[5,] 5 20 80 6 9 3 0
My goal is to add the minimum path distance starting from kata[1,1] first row to the last row K[5,4]. So effectively, I want to have something like 1 + 4 + 1 + 2 + 4 + 6 + 9 + 3.
Below is the R code which I have used to implement this. It implements two functions:
# Function that calculates minimum of three values. Returns the Value.
minFUN <- function(Data, a, b){
d = (min(Data[a, b+1], Data[a+1, b], Data[a+1, b+1]))
return(d)
}
# Function that calculates the index of the minimum value, from which the
# The next iteration begins
NextRC <- function(Data, a, b){
d = min(Data[a, b+1], Data[a+1, b], Data[a+1, b+1])
if(d == Data[a, b+1]){
c = cbind(a, b+1)
}else
if(d == Data[a+1, b]){
c = cbind(a+1, b)
} else
if(d == Data[a+1, b+1]){
c = cbind(a+1, b+1)
}
return(c)
}
Je <- c()
NewRow = 1
NewCol = 1
# Warping Function that uses both functions above to loop through the dataset
WarpDist <- function(Data, a = NewRow, b = NewCol){
for(i in 1:4) {
Je[i] = minFUN(Data, a, b)
# Next Start Point
NSP = NextRC(Data, a,b)
NewRow = as.numeric(NSP[1,1])
NewCol = as.numeric(NSP[1,2])
a = NewRow
b = NewCol
}
return(Je)
}
Value=WarpDist(Data = Data, a = NewRow, b = NewCol)
warpo = Data[1,1] + sum(Value)
w = sqrt(warpo)
The result is the minimum path from the first row to the last row
Value
[1] 4 1 2 4 6
The result omits 9 and 3 because its already on the last row.
Time:
Time difference of 0.08833408 secs
I have 2 vectors, (x and y), which are coordinate points for a grid. I have a few lines of code to take each point (x[i] and y[i]) and return a value for the point. I want that returned value to be placed in a new list ('v').
Here's what I'm thinking:
v = c()
for(i in x & y){
getval(x[i], y[i]) # placeholder/theoretical function
v[i] = ptval # ptval returned from getval()
}
This isn't working, though, and I think it has to do with a formatting issue in the for-loop.
Here's an example of what I want for the final data, where x and y are inputs (gps coordinates, in reality) and v is the return from getval():
x y v
1 1 0
1 2 0
1 3 1
2 1 0
2 2 0
2 3 2
3 1 3
3 2 1
3 3 0
v = c()
for(i in 1:length(x){
getval(x[i], y[i]) # placeholder/theoretical function
v[i] = ptval # ptval returned from getval()
}
or if you need all combination from x and y
expand.grid(x,y)
I want to fit a Bradley-Terry model to many observers' rankings of three objects from within a larger set of objects.
My raw data looks like this:
obs1=c("A","C","D")
obs2=c("B","D","E")
obs3=c("C","B","E")
obs4=c("C","D","E")
obs5=c("C","E","D")
data=cbind(obs1,obs2,obs3,obs4,obs5)
obs1 obs2 obs3 obs4 obs5
1 A B C C C
2 C D B D E
3 D E E E D
but what I need as an input to countsToBinomial{BradleyTerry2} and then to BTm{BradleyTerry2} is a table like below, which contains the number of times the column-name objects were ranked before the row-name object by the five observers.
All information I found begins with the counts as data, but since my number of observations is very high, I wonder if there is a way of automatising this transformation.
A B C D E
A 0 0 0 0 0
B 0 0 1 0 0
C 1 0 0 0 0
D 1 1 3 0 1
E 0 2 3 2 0
Any ideas highly appreciated!
Ok here's the answer using nested for loops and match...
x <- unique(as.vector(data))
x <- sort(x)
cmatrix <- matrix(0,nrow = length(x), ncol = length(x))
colnames(cmatrix) <- x
row.names(cmatrix) <- x
This creates your output matrix as cmatrix with initial values all 0. Then we run the loops as follows...
count <- 0
for (i in 1:ncol(cmatrix) ){
for (j in 1:nrow(cmatrix) ){
for (k in 1:ncol(data)){
if( is.na(match(colnames(cmatrix)[i],data[,k])) == FALSE){
if( is.na(match(row.names(cmatrix)[j],data[,k])) == FALSE){
if( match(colnames(cmatrix)[i],data[,k]) < match(row.names(cmatrix)[j],data[,k]) ){count <- count+1}
}
cmatrix[j,i] <- cmatrix[j,i]+count
}
count <- 0
}
}
}
This will give you required output table. This solution will work for any number of values and not just for A to E.
Let's say I have a set of closed linear intervals represented by this matrix:
interval.mat = matrix(c(1,2,3,5,4,6,8,9), byrow = TRUE, ncol = 2)
where interval.mat[,1] are the interval start points and interval.mat[,2] are their corresponding end points.
I'm looking for an efficient (since this example matrix is a toy and in reality my matrix contains a few thousands of intervals) way to produce a matrix that will hold all the pairwise positive distances between the intervals. The distance between a pair of intervals should be the start of the interval with the bigger end among the two minus the end of the interval with the smaller end among the two. For example the distance between intervals c(1,2) and c(3,5) should 3 - 2 = 1, since the second interval ends after the first one. In case the intervals overlap the distance should be 0. So for example, in the case of c(3,5) and c(4,6) the distance would be 0.
So, the pairwise distance matrix for the intervals above would be:
> matrix(c(0,1,2,6,1,0,0,3,2,0,0,2,6,3,2,0), byrow = TRUE, nrow = 4, ncol = 4)
[,1] [,2] [,3] [,4]
[1,] 0 1 2 6
[2,] 1 0 0 3
[3,] 2 0 0 2
[4,] 6 3 2 0
Here's an Rcpp solution. It will be fast and memory efficient (for details see below).
First let's define a helper function which calculates all the pairwise distances. If n is the number of intervals to consider, we have n*(n-1)/2 unique pairs of vectors (we don't take the same intervals into account, of course, as the distance between them is 0).
library('Rcpp')
library('inline')
cppFunction("
NumericVector distint_help(NumericMatrix x) {
int n = x.nrow(); // number of rows
NumericVector out(n*(n-1)/2); // result numeric vector
int k = 0;
for (int i=0; i<n-1; ++i) {
for (int j=i+1; j<n; ++j) {
if (x(i,0) >= x(j,1))
out[k++] = x(i,0)-x(j,1);
else if (x(j,0) > x(i,1))
out[k++] = x(j,0)-x(i,1);
else
out[k++] = 0.0;
}
}
return out;
}
")
The above function returns a numeric vector with the calculated distances. Let's try to mimic the output of the built-in dist function (checkout the result of x <- dist(interval.mat); unclass(x)).
Now the main function:
distint <- function(interval) {
stopifnot(is.numeric(interval), is.matrix(interval), ncol(interval) == 2)
res <- distint_help(interval) # use Rcpp to calculate the distances
# return the result similar to the one of dist()
structure(res, class='dist', Size=nrow(interval), Diag=FALSE, Upper=FALSE)
}
distint(interval.mat)
## 1 2 3
## 2 1
## 3 2 0
## 4 6 3 2
The above object may be converted to an "ordinary" square matrix:
as.matrix(distint(interval.mat))
## 1 2 3 4
## 1 0 1 2 6
## 2 1 0 0 3
## 3 2 0 0 2
## 4 6 3 2 0
Unless the distance matrix is sparse (there are many many zeros), the above solution is storage efficient.
A benchmark:
test <- matrix(runif(1000), ncol=2)
library('microbenchmark')
library(proxy)
f <- function(x,y) max(min(x)-max(y),0)
microbenchmark(distint(test), as.matrix(dist(test, method=f)), times=10)
## Unit: milliseconds
## expr min lq median uq max neval
## distint(test) 1.584548 1.615146 1.650645 3.071433 3.164231 10
## as.matrix(dist(test, method = f)) 455.300974 546.438875 551.596582 599.977164 609.418194 10
You can use the proxy package, which has a dist(...) method that allows user definition of the distance function. Note that loading this library will mask the dist(...) function in base R
library(proxy)
f <- function(x,y) max(min(x)-max(y),0)
as.matrix(dist(interval.mat,method=f))
# 1 2 3 4
# 1 0 1 2 6
# 2 1 0 0 3
# 3 2 0 0 2
# 4 6 3 2 0