Plot the multiplication of complex roots of fractional polynomials - r

I'm thinking that #GGrothendieck's answer to the request for solutions to fractional roots of negative numbers deserves a graphical addendum:
Can someone plot the roots in a unit complex circle. as well as add the "graphical sum" of some of the roots, i.e. the sequential products of the same 5 roots of -8, vectors multiplied in sequence?
x <- as.complex(-8) # or x <- -8 + 0i
# find all three cube roots
xroot5 <- (x^(1/5) * exp(2*c(0:4)*1i*pi/5))
plot(xroot5, xlim=c(-8, 2), ylim=c(-5,5))
abline(h=0,v=0,lty=3)
Originally I was thinking this would be some sort of head to tail illustration but complex multiplication is a series of expansions and rotations around the origin.

The Reduce function with accumulate=TRUE will deliver the sequence of intermediate powers of each of the roots of x^5 = -8 up to the fith power:
x <- as.complex(-8) # or x <- -8 + 0i
# find all five roots
xroot5 <- (x^(1/5) * exp(2*c(0:4)*1i*pi/5))
xroot5
#[1] 1.226240+0.890916i -0.468382+1.441532i -1.515717+0.000000i -0.468382-1.441532i
#[5] 1.226240-0.890916i
(Reduce("*", rep( xroot5[4], 5),acc=TRUE) )
#[1] -0.468382-1.441532i -1.858633+1.350376i 2.817161+2.046787i 1.631001-5.019706i
#[5] -8.000000+0.000000i
# Using the fourth root:
beg <- (Reduce("*", rep( xroot5[4], 5),acc=TRUE) )[-5]
ends <- (Reduce("*", rep( xroot5[4], 5),acc=TRUE) )[-1]
# Need more space
plot(xroot5, xlim=c(-8, 2), ylim=c(-6,6))
abline(h=0,v=0,lty=3)
arrows(Re(beg),Im(beg), Re(ends), Im(ends), col="red")
# Plot sequence of powers of first root:
beg <- Reduce("*", rep( xroot5[1], 5),acc=TRUE)[-5]
ends <- Reduce("*", rep( xroot5[1], 5),acc=TRUE)[-1]
arrows(Re(beg),Im(beg), Re(ends), Im(ends), col="blue")

The circle is centered at 0, 0. The roots all have the same radius and picking any one of them, the radius is
r <- Mod(xroot[1])
The following gives a plot which looks similar to the plot in the question except we have imposed an aspect ratio of 1 in order to properly draw it and there is a circle drawn through the 5 points:
plot(Re(xroot5), Im(xroot5), asp = 1)
library(plotrix)
draw.circle(0, 0, r)
Multiplying any root by
e <- exp(2*pi*1i/5)
will rotate it into the next root. For example, this plots xroot5[1] in red:
i <- 0
points(Re(xroot5[1] * e^i), Im(xroot5[1] * e^i), pch = 20, col = "red")
and then repeat the last line for i = 1, 2, 3, 4 to see the others successively turn red.

Related

How to find a point in a vector where the value begin to plateau

I have the following vector:
wss <- c(23265.2302840678, 4917.06943551649, 1330.49917983449, 288.050702912287,
216.182464712486, 203.769578557051, 151.991297068931, 139.635571841227,
118.285305833194, 117.164567420633, 105.397722980407, 95.4682187817563,
116.448588269066, 88.1287299776581, 83.9345098736843)
And if we with the following plot code
plot(1:15, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
we can get this:
By eye we can see at x-axis point 4 the value change begin to change drastically plateaued.
My question is given the vector wss how can we automatically detect the index 4 without looking at the plot.
Edit: This works better:
#change relative to the maximum change
threshold <- 0.1
d1 <- diff(wss)
# this assumes that the first value is the highest
## you could use max(d1) instead of d1[1]
which.max((d1 / d1[1]) < threshold) #results in 3
d1 <- diff(wss2)
which.max(d1 / d1[1] < threshold) #results in 5
Second Edit: This is somewhat subjective, but here's how my three methods compare for your two data sets. While it's easy to visualize what a plateau is, you need to be able to describe in math terminology what a plateau is in order to automate it.
Original: If you know that the second derivative will flip from positive to negative, you can do this:
sec_der <- diff(wss, differences = 2)
inflection_pt <- which.min(sign(sec_der))
inflection_pt
For this data set, the result is 5 which corresponds to the original datasets result of 7 (i.e., 151.991).
Instead of looking at inflection points, you could instead look at some relative percent threshold.
thrshold <- 0.06
which.min(sign(abs(diff(wss)) / wss[1:(length(wss)-1)] - thrshold))
This results in 5 as well using the first derivative approach.
Regardless, using the diff() function would be a key part of figuring this out in base R. Also see:
Finding the elbow/knee in a curve
Code to create graphs:
wss <- c(23265.2302840678, 4917.06943551649, 1330.49917983449, 288.050702912287,
216.182464712486, 203.769578557051, 151.991297068931, 139.635571841227,
118.285305833194, 117.164567420633, 105.397722980407, 95.4682187817563,
116.448588269066, 88.1287299776581, 83.9345098736843)
wss2 <- c(1970.08410513303, 936.826421218935, 463.151086710784, 310.219800983285, 227.747583214178, 191.601552329558, 159.703151798393, 146.881710048563, 138.699803963718, 134.534334658148)
data_list <- list(wss, wss2)
# Potential_methods -------------------------------------------------------
plateau_method = list(thresh_to_max = function(x) which.max(diff(x) / diff(x)[1] < threshold)
, inflection_pt = function(x) which.min(sign(diff(x, differences = 2)))
, deriv_to_raw = function(x) which.min(sign(abs(diff(x)) / x[1:(length(x)-1)] - threshold))
)
threshold <- 0.1
results <- t(sapply(plateau_method, mapply, data_list))
# graphing ----------------------------------------------------------------
par(mfrow = c(3,2))
apply(results, 1, function (x) {
for (i in seq_along(x)) {
plot(data_list[[i]],ylab="Within groups sum of squares", type = 'b', xlab = 'Number of Clusters')
abline(v = x[i])
}
} )
lapply(seq_along(names(plateau_method))
, function (i) {
mtext(paste(names(plateau_method)[i]
, "- \n"
, substring(plateau_method[i], 15))
, side = 3, line = -18*(i)+15, outer = TRUE)
})
mtext('Threshold = 0.1', side = 3, line = -53, outer = T)

R - Sampling blackjack cards

A five card charlie is where you draw five cards and don't go bust, i.e. the points from 5 cards is <= 21. I want to find the probability of a 5 card charlie by brute force - i.e. simulate a large number of "plays" and check if you go bust or not.
by using brute force using R. I'm assuming here that there are 4 decks as is common in Casinos, and I'm sampling 5 cards from these 4 decks, checking if they've won and if so counting it towards the probability. Googling states it should be around 1/50, i.e. 2%:
deck <- c(rep(1:9, 16), rep(10, 64))
n <- 0
size <- 1:10e6
for (i in size){
smpl <- sample(deck,5,replace = F)
if (sum(smpl) <= 21){
n <- n+1
}
}
print(n/max(size) * 100)
[1] 5.98644
Note that "deck" here is the point system, i.e. we have 1:9 points for 4 suits, and 4 deck of cards hence need 1:9 16 times, and similarly Jack Queen King and Ten all count as ten but 4*4*4 possible cards.
Sample 5 cards without replacement, check if the sum is <= 21, and if so count it, then finally do this 10 million times and calculated the probability. However this gives 6% rather than 2%.
I have two questions:
1) How can I modify this so that I can sample 100 million or more plays?
2) Where am I going wrong with the 6% probability?
I think that what is off here is the assumption that it should be 2%.
Your code said about 5%. I've adapted an existing answer and it also says 5%:
deck <- c(rep(1:9, 4), rep(10, 16))
result <- combn(deck, 5, function(x) {sum(x) <= 21})
sum(result)/dim(result)
[1] 0.05385693
For k = 5, 6, 7 - card charlie you could try the following (to compute the probability with simulation) with replicate:
sapply(5:7, function(k) mean(replicate(n=10^6,
sum(sample(c(rep(1:9, 4), rep(10, 16)), k, replace = F)) <= 21)))
#[1] 0.053943 0.008525 0.000890
Here is how the probability decreases with k (for k-card charlie)
library(ggplot2)
ggplot(aes(card, prob),
data=data.frame(card=2:7, prob=sapply(2:7, function(x) mean(replicate(n=10^6, sum(sample(c(rep(1:9, 4), rep(10, 16)),x,replace = F)) <= 21))))) +
geom_point() + geom_line()

Simulating random walk on the n-cycle in R

I have been trying to implement a random walk on the n-cycle algorithm in R.
By n-cycle I mean the set of integers Zn, or modulo n. Basically, it’s example 5.3.1 from the book “Markov chains and mixing time”, by Levin, Peres and Wilmer. The intention is as follows: consider two chains modeling the movement of two particles X and Y on Zn with starting points X1 and Y1. By the flip of a fair coin we decide which particle will move (the particles cannot move simultaneously unless they have coupled); the direction is decided by another flip of fair coin.
Once the two particle collide, they move together hereafter. It is part of a study project to implement a CFTP algorithm, so the length of the chains should have a pre-defined value, say T.
The code does not run and an error message appears. The error is “object ‘res’ not found”. However, I had previously defined “res” as a list to store the output of the function. Why does this happen and how could it be fixed?
I have two scripts: in the first one the code is split in smaller helper functions; the second one may be messier, as I tried to put all the helper functions within one single function.
Any help will be much appreciated.
This one is script 2.
# X1 - initial state of chain X
# Y1 - initial state of chain Y
# T - "length" of a chain, number of steps the chains will run for.
# n - length of the n-cycle, i.e., Zn.
Main_Function <- function (X1 = 8, Y1 = 4 , T = 20, n = 6){
X <- rep( X1, T) %% n # X, Y and res will store the results
Y <- rep( Y1, T) %% n
res <- list(X,Y) # Here I defined the object res. Later on R encounters an error "object 'res' not found".
ps <- TakeOneStep() # TakeOneStep is a function defined below
return(ps)
}
TakeOneStep <- function(){
incr_same <- sample(c(-1, 0, 1), size = 1, prob = c(1/4, 1/2, 1/4)) #direction of the particles after they have coupled
incr_dif <- sample(c(-1,1), size = 1, prob = c(1/2, 1/2)) # direction of the particles before coupling occurred.
choice <- runif(T) # determines which chain moves, before coupling occurred.
for(t in 2:T){
if(res[[1]][t-1]%%n == res[[2]][t-1]%%n){
res[[1]][t] <- (res[[1]][t-1] + incr_same) %% n
res[[2]][t] <- (res[[2]][t-1] + incr_same) %% n
}else{ if(choice[t] < 0.5) {
res[[1]][t] <- (res[[1]][t-1] + incr_dif) %% n
}else{res[[2]][t] <- (res[[2]][t-1] + incr_dif)%%n}
}
}
return(res)
}

How to find decimal places lower than 2 and fill them with zeros in R

I'd like to merge two datasets based on a common column. Dataset A is a geoTIFF image, representing RGB values of an area. Dataset B is a point cloud with xyz values of the same area.
I want to merge the RGB info in the image to the 3d data. I thougth to use the x y coordinates of the two datasets (which are in the same coordinate system).
I wrote a script inspired by code snippets found in stackoverflow, but I need to implement my whole code (sources are 1, 2, and 3).
The issue is that the x y coordinates in thwe two files have different precision (decimal numbers). Dataset A has 0 to 2 digits; dataset B has much more. I rounded the dataset B digits to be 2. Now, I'd like to pad with zeros when the digits of datset A are less than 2, so that the final merge will hopefully work.
Would a simple if statement be fine considering that my datset has >280000 rows? Or should I go for indexing? Anyway, I'm fairly new in using R, so I hope the possible posters woud help me with a code example. Below is my code:
require(raster)
require(rgl)
setwd("C:/my/folder")
# Read tiff file
img <- stack("image.tif")
vals <- extract(img, 1:ncell(img))
coord <- xyFromCell(img, 1:ncell(img))
combine <- cbind(coord, vals)
remove(vals)
remove(coord)
# read POINTCLOUD and assign names
lidar <- read.table("lidardata.txt")
names(lidar) <- c("x","y","z")
decimalplaces <- function(x) {
if ((x %% 1) != 0) {
nchar(strsplit(sub('0+$', '', as.character(x)), ".", fixed=TRUE)[[1]][[2]])
} else {
return(0)
}
}
# HERE I SHOULD PAD THE LIDAR VARIABLE WITH ZEROS IN DECIMAL POSITIONS WHEN THE DIGITS ARE LESS THAN 2!!!
lidar$xy <- do.call(paste0,lidar[,1:2])
combine$x <- round(combine$x, digits = 2)
combine$y <- round(combine$y, digits = 2)
combine$xy <- do.call(paste0,combine[1:2])
finaldata <- merge(combine,lidar,by = 'xy', all = FALSE)
EDIT 1
As suggested by #Heroka, here is also an example of how the lidar (the dataset A) looks like, and how it should be after padding it with zeros.
LIDAR (original)
x y z
12 9 87
11 23.4 100
LIDAR (altered, and with 'xy' column added for joining)
x y z xy
12.00 9.00 87 12.009.00
11.00 23.40 100 11.0023.40
EDIT 2
I somehow managed to retrieve the number of digits in all x and y of my 'lidar' variable (dataset B) with counting <- sapply(lidar$x, decimalplaces)
In the example above (LIDAR-original), this would give [0 0] for the first (x) column, and [0 1] for the second (y) column. I should be able to find each row in my x y datset with a value of 0 or 1 as digits (not 2) and pad with 0 like in LIDAR-altered above.
I do not understand why you need to pad with zeros. If the coordinates are of class numeric and both were rounded using round (which should avoid issues of floating point precision) you should be able to just merge by them. Something like this:
lidar$x <- round(lidar$x, 2)
lidar$y <- round(lidar$y, 2)
combine$x <- round(combine$x, digits = 2)
combine$y <- round(combine$y, digits = 2)
finaldata <- merge(combine, lidar, by = c("x", "y") , all = FALSE)

Algorithm for automating pairwise significance grouping labels in R

After struggling with this problem for a while, I am hoping to get some advice here. I am wondering if anyone is aware of an automated method for determining pairwise grouping labels based on significance. The question is independent of the significance test (e.g. Tukey for parametric or Mann-Whitney for non-parametric) - given these pairwise comparisons, some boxplot-type figures often represent these groupings with a sub-script:
I have done this example by hand, which can be quite tedious. I think that the sequence of labeling in the algorithm should be based on the number of levels in each group - e.g. those groups containing single levels that are significantly different from all other levels should be named first, then groups containing 2 levels, then 3, etc., all the while checking that new groupings add a new needed grouping and do not violate and differences.
In the example below, the tricky part is getting the algorithm to recognize that level 1 should be grouped with 3 and 5, but 3 and 5 should not be grouped (i.e. share a label).
Example code:
set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)
dat <- vector(mode="list", n)
for(i in seq(dat)){
dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}
df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))
bp <- boxplot(y ~ group, df, notch=TRUE)
kr <- kruskal.test(y ~ group, df)
kr
mw <- pairwise.wilcox.test(df$y, df$g)
mw
mw$p.value > 0.05 # TRUE means that the levels are not significantly different at the p=0.05 level
# 1 2 3 4 5 6
#2 FALSE NA NA NA NA NA
#3 TRUE FALSE NA NA NA NA
#4 FALSE FALSE FALSE NA NA NA
#5 TRUE FALSE FALSE FALSE NA NA
#6 FALSE FALSE FALSE TRUE FALSE NA
#7 FALSE FALSE FALSE FALSE FALSE FALSE
text(x=1:n, y=bp$stats[4,], labels=c("AB", "C", "A", "D", "B", "D", "E"), col=1, cex=1.5, pos=3, font=2)
First let me restate the problem in the language of graph theory. Define a graph as follows. Each sample gives rise to a vertex that represents it. Between two vertices, there is an edge if and only if some test indicates that the samples represented by those vertices could not be distinguished statistically. In graph theory, a clique is a set of vertices such that, between every two vertices in the set, there is an edge. We're looking for a collection of cliques such that every edge in the graph belongs to (at least? exactly?) one of the cliques. We'd like to use as few cliques as possible. (This problem is called clique edge cover, not clique cover.) We then assign each clique its own letter and label its members with that letter. Each sample distinguishable from all others gets its own letter as well.
For example, the graph corresponding to your sample input could be drawn like this.
3---1---5 4--6
My proposed algorithm is the following. Construct the graph and use the Bron--Kerbosch algorithm to find all maximal cliques. For the graph above, these are {1, 3}, {1, 5}, and {4, 6}. The set {1}, for example, is a clique, but it is not maximal because it is a subset of the clique {1, 3}. The set {1, 3, 5} is not a clique because there is no edge between 3 and 5. In the graph
1
/ \
3---5 4--6,
the maximal cliques would be {1, 3, 5} and {4, 6}.
Now search recursively for a small clique edge cover. The input to our recursive function is a set of edges remaining to be covered and the list of maximal cliques. Find the least edge in the remaining set, where, e.g., edge (1,2) < (1,5) < (2,3) < (2,5) < (3,4). For each maximal clique that contains this edge, construct a candidate solution comprised of that clique and the output of a recursive call where the clique edges are removed from set of edges remaining. Output the best candidate.
Unless there are very few edges, this may be too slow. The first performance improvement is memoize: maintain a map from inputs to outputs of the recursive function so that we can avoid doing the work twice. If that doesn't work, then R should have an interface to an integer program solver, and we can use integer programming to determine the best collection of cliques. (I'll explain this more if the other approach is insufficient.)
I thought I would post the solution that I was able to derive with additional help from the following question:
set.seed(1)
n <- 7
n2 <- 100
mu <- cumsum(runif(n, min=-3, max=3))
sigma <- runif(n, min=1, max=3)
dat <- vector(mode="list", n)
for(i in seq(dat)){
dat[[i]] <- rnorm(n2, mean=mu[i], sd=sigma[i])
}
df <- data.frame(group=as.factor(rep(seq(n), each=n2)), y=unlist(dat))
bp <- boxplot(y ~ group, df, notch=TRUE)
#significance test
kr <- kruskal.test(y ~ group, df)
mw <- pairwise.wilcox.test(df$y, df$g)
#matrix showing connections between levels
g <- as.matrix(mw$p.value > 0.05)
g <- cbind(rbind(NA, g), NA)
g <- replace(g, is.na(g), FALSE)
g <- g + t(g)
diag(g) <- 1
rownames(g) <- 1:n
colnames(g) <- 1:n
g
#install.packages("igraph")
library(igraph)
# Load data
same <- which(g==1)
topology <- data.frame(N1=((same-1) %% n) + 1, N2=((same-1) %/% n) + 1)
topology <- topology[order(topology[[1]]),] # Get rid of loops and ensure right naming of vertices
g3 <- simplify(graph.data.frame(topology,directed = FALSE))
get.data.frame(g3)
# Plot graph
plot(g3)
# Calcuate the maximal cliques
res <- maximal.cliques(g3)
# Reorder given the smallest level
res <- sapply(res, sort)
res <- res[order(sapply(res,function(x)paste0(sort(x),collapse=".")))]
ml<-max(sapply(res, length))
reord<-do.call(order, data.frame(
do.call(rbind,
lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
)
))
res <- res[reord]
lab.txt <- vector(mode="list", n)
lab <- letters[seq(res)]
for(i in seq(res)){
for(j in res[[i]]){
lab.txt[[j]] <- paste0(lab.txt[[j]], lab[i])
}
}
bp <- boxplot(y ~ group, df, notch=TRUE, outline=FALSE, ylim=range(df$y)+c(0,1))
text(x=1:n, y=bp$stats[5,], labels=lab.txt, col=1, cex=1, pos=3, font=2)
Cool code.
I think you need to quote the function order() when calling do.call:
reord<-do.call("order", data.frame(
do.call(rbind,
lapply(res, function(x) c(sort(x), rep.int(0, ml-length(x))))
)
))

Resources