Related
I'm trying to print a vector field based on a matrix multiplication. The problem is that the function that will print values to make the matrix multiplication can only take a single number. When a range of number is put into the all.p function, the output is not usable to do the matrix multiplication. Is there a way to change all.p so that with multiple inputs, the matrix multiplication can still be valid, and the vector field can be computed? The code fails at the vectorfield function as this function with put the values into the range 0 to 1, but the all.p can't take multiple inputs.
geno.fit = matrix(c(0.791,1.000,0.834,
0.670,1.006,0.901,
0.657,0.657,1.067),
nrow = 3,
ncol = 3,
byrow = T)
all.p <- function(p) {
if (length(p)>1) {
stop("More numbers in input than expected")
}
P = p^2
PQ = 2*p*(1-p)
Q = (1-p)^2
return(list=c(P=P,PQ=PQ,Q=Q))
}
library(pracma)
f <- function(x, y) all.p(x) %*% geno.fit %*% all.p(y)
xx <- c(0, 1); yy <- c(0, 1)
vectorfield(fun = f, xlim = xx, ylim = yy, scale = 0.1)
for (xs in seq(0, 1, by = 0.25)) {
sol <- rk4(f, 0, 1, xs, 100)
lines(sol$x, sol$y, col="darkgreen")
}
grid()
I also tried to use a for loop.
f <- function(x, y, n = 16) {
space3 = matrix(NA,nrow = n,ncol = n)
for (i in 1:(length(x))) {
for (j in 1:(length(y))) {
# Calculate mean fitness
space3[i,j] = all.p(x[i]) %*% geno.fit %*% all.p(y[j])
}
}
return(space3)
}
xx <- c(0, 1); yy <- c(0, 1)
f(seq(0,1,length.out = 16), seq(0,1,length.out = 16))
vectorfield(fun = f, xlim = xx, ylim = yy, scale = 0.1)
Below is the code to make the gradient ascend (without the vectors).
library(fields) # for image.plot
res = 0.01
seq.x = seq(0,1,by = res)
space = outer(seq.x,seq.x,"*")
pace2 = space
for (i in 1:length(seq.x)) {
for (j in 1:length(seq.x)) {
space[i,j] = all.p(1-seq.x[i]) %*% geno.fit %*% all.p(1-seq.x[j])
}
}
round(t(space),3)
new.space = t(space)
image.plot(new.space)
by.text = 8
for (i in seq(1,length(seq.x),by = by.text)) {
for (j in seq(1,length(seq.x),by = by.text)) {
text(seq.x[i],seq.x[j],
labels = round(new.space[i,j],4),
cex = new.space[i,j]/2,
col = "black")
}
}
contour(new.space,ylim=c(1,0),add = T, nlevels = 50)
I was able to make the vector field function work, but it's not showing what I was expecting from the previous gradient ascend vector field:
How can the 2 be reconciled? (i.e., plotting the vectors on the gradient ascend image which would show the proper direction of the vectors in the steepest ascend)
Here is my solution:
library(fields) # for image.plot
library(plotly)
library(raster)
# Genotype fitness matrix -------------------------------------------------
geno.fit = matrix(c(0.791,1.000,0.834,
0.670,1.006,0.901,
0.657,0.657,1.067),
nrow = 3,
ncol = 3,
byrow = T)
# Resolution
res = 0.01
# Sequence of X
seq.x = seq(0,1,by = res)
# Make a matrix
space = outer(seq.x,seq.x,"*")
# Function to calculate the AVERAGE fitness for a given frequency of an allele to get the expected frequency of genotypes in a population
all.p <- function(p) { # Takes frequency of an allele in the population
if (length(p)>1) { # Has to be only 1 number
stop("More numbers in input than expected")
}
P = p^2 # Gets the AA
PQ = 2*p*(1-p) # gets the Aa
Q = (1-p)^2 # Gets the aa
return(list=c(P=P, # Return the values
PQ=PQ,
Q=Q))
}
# Examples
all.p(0)
all.p(1)
# Plot the matrix of all combinations of genotype frequencies
image.plot(space,
ylim=c(1.05,-0.05),
ylab= "Percentage of Chromosome EF of TD form",
xlab= "Percentage of Chromosome CD of BL form")
# Backup the data
space2 = space
# calculate the average fitness for EVERY combination of frequency of 2 genotypes
for (i in 1:length(seq.x)) {
for (j in 1:length(seq.x)) {
# Calculate mean fitness
space[i,j] = all.p(1-seq.x[i]) %*% geno.fit %*% all.p(1-seq.x[j])
}
}
# Show the result
round(t(space),3)
# Transform the space
new.space = t(space)
image.plot(new.space,
# ylim=c( 1.01,-0.01),
ylab= "Percentage of Chromosome EF of TD (Tidbinbilla) form",
xlab= "Percentage of Chromosome CD of BL (Blundell) form")
# Add the numbers to get a better sense of the average fitness values at each point
by.text = 8
for (i in seq(1,length(seq.x),by = by.text)) {
for (j in seq(1,length(seq.x),by = by.text)) {
text(seq.x[i],seq.x[j],
labels = round(new.space[i,j],4),
cex = new.space[i,j]/2,
col = "black") # col = "gray70"
}
}
# Add contour lines
contour(new.space,ylim=c(1,0),add = T, nlevels = 50)
# Plotly 3D graph --------------------------------------------------------
# To get the 3D plane in an INTERACTIVE graph
xyz=cbind(expand.grid(seq.x,
seq.x),
as.vector(new.space))
plot_ly(x = xyz[,1],y = xyz[,2],z = xyz[,3],
color = xyz[,3])
# Vector field on the Adaptive landscape ----------------------------------
library(tidyverse)
library(ggquiver)
raster2quiver <- function(rast, aggregate = 50, colours = terrain.colors(6), contour.breaks = 200)
{
names(rast) <- "z"
quiv <- aggregate(rast, aggregate)
terr <- terrain(quiv, opt = c('slope', 'aspect'))
quiv$u <- -terr$slope[] * sin(terr$aspect[])
quiv$v <- -terr$slope[] * cos(terr$aspect[])
quiv_df <- as.data.frame(quiv, xy = TRUE)
rast_df <- as.data.frame(rast, xy = TRUE)
print(ggplot(mapping = aes(x = x, y = y, fill = z)) +
geom_raster(data = rast_df, na.rm = TRUE) +
geom_contour(data = rast_df,
aes(z=z, color=..level..),
breaks = seq(0,3, length.out = contour.breaks),
size = 1.4)+
scale_color_gradient(low="blue", high="red")+
geom_quiver(data = quiv_df, aes(u = u, v = v), vecsize = 1.5) +
scale_fill_gradientn(colours = colours, na.value = "transparent") +
theme_bw())
return(quiv_df)
}
r <-raster(
space,
xmn=range(seq.x)[1], xmx=range(seq.x)[2],
ymn=range(seq.x)[1], ymx=range(seq.x)[2],
crs=CRS("+proj=utm +zone=11 +datum=NAD83")
)
# Draw the adaptive landscape
raster2quiver(rast = r, aggregate = 2, colours = tim.colors(100))
Not exactly what I wanted, but it does what I was looking for!
I am trying to solve this system of ODEs through deSolve, dX/dt = -X*a + (Y-X)b + c and dY/dt = -Ya + (X-Y)*b for time [0,200], a=0.30, b=0.2 but c is 1 for time [50,70] and 0 otherwise. The code I have been using is,
time <- seq(0, 200, by=1)
parameters <- c(a=0.33, b=0.2, c=1)
state <- c(X = 0, Y = 0)
two_comp <- function(time, state, parameters){
with(as.list(c(state, parameters)), {
dX = -X*a + (Y-X)*b + c
dY = -Y*a + (X-Y)*b
return(list(c(dX, dY)))
})
}
out <- ode(y = state, times = time, func = two_comp, parms = parameters)
out.df = as.data.frame(out)
I have left out the time varying part of the c parameter since I can't figure out a way to include it and run it smoothly. I tried including it in the function definitions, but to no avail.
The standard way is to use approxfun, i.e. create a time dependent signal, that we also call forcing variable:
library("deSolve")
time <- seq(0, 200, by=1)
parameters <- c(a=0.33, b=0.2, c=1)
state <- c(X = 0, Y = 0)
two_comp <- function(time, state, parameters, signal){
cc <- signal(time)
with(as.list(c(state, parameters)), {
dX <- -X * a + (Y - X) * b + cc
dY <- -Y * a + (X - Y) * b
return(list(c(dX, dY), c = cc))
})
}
signal <- approxfun(x = c(0, 50, 70, 200),
y = c(0, 1, 0, 0),
method = "constant", rule = 2)
out <- ode(y = state, times = time, func = two_comp,
parms = parameters, signal = signal)
plot(out)
Note also the deSolve specific plot function and that the time dependent variable cc is used as an additional output variable.
More about this can be found:
in the ?forcings help page and
in a short tutorial on Github.
The interval limits where c is equal to 1 can be passed as parameters. Then, inside the differential function, use them to create a logical value
time >= lower & time <= upper
Since FALSE/TRUE are coded as the integers 0/1, every time this condition is false, c is multiplied by zero and the trick is done.
library(deSolve)
two_comp <- function(time, state, parameters){
with(as.list(c(state, parameters)), {
dX = -X*a + (Y-X)*b + c*(time >= lower & time <= upper)
dY = -Y*a + (X-Y)*b
return(list(c(dX, dY)))
})
}
time <- seq(0, 200, by=1)
parameters <- c(a=0.33, b=0.2, c=1, lower = 50, upper = 70)
state <- c(X = 0, Y = 0)
out <- ode(
y = state,
times = time,
func = two_comp,
parms = parameters
)
out.df <- as.data.frame(out)
head(out.df)
matplot(out.df$time, out.df[-1], type = "l", lty = "solid", ylim = c(0, 3))
legend("topright", legend = names(out.df)[-1], col = 1:2, lty = "solid")
This is my first attempt at fitting a non-linear model in R, so please bear with me.
Problem
I am trying to understand why nls() is giving me this error:
Error in nlsModel(formula, mf, start, wts): singular gradient matrix at initial parameter estimates
Hypotheses
From what I've read from other questions here at SO it could either be because:
my model is discontinuous, or
my model is over-determined, or
bad choice of starting parameter values
So I am calling for help on how to overcome this error. Can I change the model and still use nls(), or do I need to use nls.lm from the minpack.lm package, as I have read elsewhere?
My approach
Here are some details about the model:
the model is a discontinuous function, a kind of staircase type of function (see plot below)
in general, the number of steps in the model can be variable yet they are fixed for a specific fitting event
MWE that shows the problem
Brief explanation of the MWE code
step_fn(x, min = 0, max = 1): function that returns 1 within the interval (min, max] and 0 otherwise; sorry about the name, I realize now it is not really a step function... interval_fn() would be more appropriate I guess.
staircase(x, dx, dy): a summation of step_fn() functions. dx is a vector of widths for the steps, i.e. max - min, and dy is the increment in y for each step.
staircase_formula(n = 1L): generates a formula object that represents the model modeled by the function staircase() (to be used with the nls() function).
please do note that I use the purrr and glue packages in the example below.
Code
step_fn <- function(x, min = 0, max = 1) {
y <- x
y[x > min & x <= max] <- 1
y[x <= min] <- 0
y[x > max] <- 0
return(y)
}
staircase <- function(x, dx, dy) {
max <- cumsum(dx)
min <- c(0, max[1:(length(dx)-1)])
step <- cumsum(dy)
purrr::reduce(purrr::pmap(list(min, max, step), ~ ..3 * step_fn(x, min = ..1, max = ..2)), `+`)
}
staircase_formula <- function(n = 1L) {
i <- seq_len(n)
dx <- sprintf("dx%d", i)
min <-
c('0', purrr::accumulate(dx[-n], .f = ~ paste(.x, .y, sep = " + ")))
max <- purrr::accumulate(dx, .f = ~ paste(.x, .y, sep = " + "))
lhs <- "y"
rhs <-
paste(glue::glue('dy{i} * step_fn(x, min = {min}, max = {max})'),
collapse = " + ")
sc_form <- as.formula(glue::glue("{lhs} ~ {rhs}"))
return(sc_form)
}
x <- seq(0, 10, by = 0.01)
y <- staircase(x, c(1,2,2,5), c(2,5,2,1)) + rnorm(length(x), mean = 0, sd = 0.2)
plot(x = x, y = y)
lines(x = x, y = staircase(x, dx = c(1,2,2,5), dy = c(2,5,2,1)), col="red")
my_data <- data.frame(x = x, y = y)
my_model <- staircase_formula(4)
params <- list(dx1 = 1, dx2 = 2, dx3 = 2, dx4 = 5,
dy1 = 2, dy2 = 5, dy3 = 2, dy4 = 1)
m <- nls(formula = my_model, start = params, data = my_data)
#> Error in nlsModel(formula, mf, start, wts): singular gradient matrix at initial parameter estimates
Any help is greatly appreciated.
I assume you are given a vector of observations of length len as the ones plotted in your example, and you wish to identify k jumps and k jump sizes. (Or maybe I misunderstood you; but you have not really said what you want to achieve.)
Below I will sketch a solution using Local Search. I start with your example data:
x <- seq(0, 10, by = 0.01)
y <- staircase(x,
c(1,2,2,5),
c(2,5,2,1)) + rnorm(length(x), mean = 0, sd = 0.2)
A solution is a list of positions and sizes of the jumps. Note that I use vectors to store these data, as it will become cumbersome to define variables when you have 20 jumps, say.
An example (random) solution:
k <- 5 ## number of jumps
len <- length(x)
sol <- list(position = sample(len, size = k),
size = runif(k))
## $position
## [1] 89 236 859 885 730
##
## $size
## [1] 0.2377453 0.2108495 0.3404345 0.4626004 0.6944078
We need an objective function to compute the quality of the solution. I also define a simple helper function stairs, which is used by the objective function.
The objective function abs_diff computes the average absolute difference between the fitted series (as defined by the solution) and y.
stairs <- function(len, position, size) {
ans <- numeric(len)
ans[position] <- size
cumsum(ans)
}
abs_diff <- function(sol, y, stairs, ...) {
yy <- stairs(length(y), sol$position, sol$size)
sum(abs(y - yy))/length(y)
}
Now comes the key component for a Local Search: the neighbourhood function that is used to evolve the solution. The neighbourhood function takes a solution and changes it slightly. Here, it will either pick a position or a size and modify it slightly.
neighbour <- function(sol, len, ...) {
p <- sol$position
s <- sol$size
if (runif(1) > 0.5) {
## either move one of the positions ...
i <- sample.int(length(p), size = 1)
p[i] <- p[i] + sample(-25:25, size = 1)
p[i] <- min(max(1, p[i]), len)
} else {
## ... or change a jump size
i <- sample.int(length(s), size = 1)
s[i] <- s[i] + runif(1, min = -s[i], max = 1)
}
list(position = p, size = s)
}
An example call: here the new solution has its first jump size changed.
## > sol
## $position
## [1] 89 236 859 885 730
##
## $size
## [1] 0.2377453 0.2108495 0.3404345 0.4626004 0.6944078
##
## > neighbour(sol, len)
## $position
## [1] 89 236 859 885 730
##
## $size
## [1] 0.2127044 0.2108495 0.3404345 0.4626004 0.6944078
I remains to run the Local Search.
library("NMOF")
sol.ls <- LSopt(abs_diff,
list(x0 = sol, nI = 50000, neighbour = neighbour),
stairs = stairs,
len = len,
y = y)
We can plot the solution: the fitted line is shown in blue.
plot(x, y)
lines(x, stairs(len, sol.ls$xbest$position, sol.ls$xbest$size),
col = "blue", type = "S")
Try DE instead:
library(NMOF)
yf= function(params,x){
dx1 = params[1]; dx2 = params[2]; dx3 = params[3]; dx4 = params[4];
dy1 = params[5]; dy2 = params[6]; dy3 = params[7]; dy4 = params[8]
dy1 * step_fn(x, min = 0, max = dx1) + dy2 * step_fn(x, min = dx1,
max = dx1 + dx2) + dy3 * step_fn(x, min = dx1 + dx2, max = dx1 +
dx2 + dx3) + dy4 * step_fn(x, min = dx1 + dx2 + dx3, max = dx1 +
dx2 + dx3 + dx4)
}
algo1 <- list(printBar = FALSE,
nP = 200L,
nG = 1000L,
F = 0.50,
CR = 0.99,
min = c(0,1,1,4,1,4,1,0),
max = c(2,3,3,6,3,6,3,2))
OF2 <- function(Param, data) { #Param=paramsj data=data2
x <- data$x
y <- data$y
ye <- data$model(Param,x)
aux <- y - ye; aux <- sum(aux^2)
if (is.na(aux)) aux <- 1e10
aux
}
data5 <- list(x = x, y = y, model = yf, ww = 1)
system.time(sol5 <- DEopt(OF = OF2, algo = algo1, data = data5))
sol5$xbest
OF2(sol5$xbest,data5)
plot(x,y)
lines(data5$x,data5$model(sol5$xbest, data5$x),col=7,lwd=2)
#> sol5$xbest
#[1] 1.106396 12.719182 -9.574088 18.017527 3.366852 8.721374 -19.879474 1.090023
#> OF2(sol5$xbest,data5)
#[1] 1000.424
I am attempting to reproduce a corrgram (below; Fig 1) using Zuur et al (2010) reproducible R code (below) showing the frequency with which pairs of water- bird species both have zero abundance. The colour and the amount that a circle has been filled correspond to the proportion of observa- tions with double zeros. The diagonal running from bottom left to the top right represents the percentage of observations of a variable equal to zero..
I have adapted this code for my data but I am experiencing the same problem after running the code for both datasets. When I run the code, the circles inside the corrgram are not filling in, and remain empty (below; Figure 2).
I am however confused as to why I am hitting this problem. If anyone has a solution as to why this occurs, then I would be deeply appreciative for your help.
Data: By Zuur et al (2010)
The data is too large to include with this post but it can be found in the supporting materials section called ElphickBirdData.txt
R Code: Zuur et al (2010)
RiceField <- read.table(file="ElphickBirdData.txt", header = TRUE)
AllS <- c(
"TUSW", "GWFG", "WHGO", "CAGO", "MALL",
"GADW", "GWTE", "CITE", "UNTE", "AMWI", "NOPI",
"NOSH", "RIDU", "CANV", "BUFF", "WODU", "RUDU",
"EUWI", "UNDU", "PBGB", "SORA", "COOT", "COMO",
"AMBI", "BCNH", "GBHE", "SNEG", "GREG", "WFIB",
"SACR", "AMAV", "BNST", "BBPL", "KILL", "LBCU",
"GRYE", "LEYE", "LBDO", "SNIP", "DUNL", "WESA",
"LESA", "PEEP", "RUFF", "UNSH", "RBGU", "HEGU",
"CAGU", "GUSP")
#Determine species richness
Richness <- colSums(RiceField[,AllS] > 0, na.rm = TRUE)
#Remove all covariates
Birds <- RiceField[,AllS]
#To reduce the of variables in the figure, we only used the
#20 species that occured at more than 40 sites.
#As a result, N = 20. Else it becomes a mess.
Birds2 <- Birds[, Richness > 40]
N <- ncol(Birds2)
AllNames <- names(Birds2)
A <- matrix(nrow = N, ncol = N)
for (i in 1:N){
for (j in 1:N){
A[i,j] <- sum(RiceField[,AllS[i]]==0 & RiceField[,AllS[j]]==0, na.rm=TRUE)
}}
A1 <- A/2035
print(A1, digits = 2)
rownames(A1) <- AllNames
colnames(A1) <- AllNames
library(lattice)
library(RColorBrewer)
panel.corrgram.2 <- function(x, y, z, subscripts, at = pretty(z), scale = 0.8, ...)
{
require("grid", quietly = TRUE)
x <- as.numeric(x)[subscripts]
y <- as.numeric(y)[subscripts]
z <- as.numeric(z)[subscripts]
zcol <- level.colors(z, at = at, ...)
for (i in seq(along = z))
{
lims <- range(0, z[i])
tval <- 2 * base::pi *
seq(from = lims[1], to = lims[2], by = 0.01)
grid.polygon(x = x[i] + .5 * scale * c(0, sin(tval)),
y = y[i] + .5 * scale * c(0, cos(tval)),
default.units = "native",
gp = gpar(fill = zcol[i]))
grid.circle(x = x[i], y = y[i], r = .5 * scale,
default.units = "native")
}
}
levelplot(A1,xlab=NULL,ylab=NULL,
at=do.breaks(c(0.5,1.01),101),
panel=panel.corrgram.2,
scales=list(x=list(rot=90)),
colorkey=list(space="top"),
col.regions=colorRampPalette(c("red","white","blue")))
#Grey colours
levelplot(A1.bats,xlab=NULL,ylab=NULL,
at=do.breaks(c(0.5,1.01),101),
panel=panel.corrgram.2,
scales=list(x=list(rot=90)),
colorkey=list(space="top"),
col.regions=colorRampPalette(c(grey(0.8),grey(0.5),grey(0.2))))
Figure 1.
Figure 2
The cause of your problem is that grid.circles daubs grid.polygon with white. You can solved it by changing order of grid.circle and grid.polygon (or add gp = gpar(fill=NA) to grid.circle() ).
panel.corrgram.2.2 <- function(x, y, z, subscripts, at = pretty(z), scale = 0.8, ...)
{
require("grid", quietly = TRUE)
x <- as.numeric(x)[subscripts]
y <- as.numeric(y)[subscripts]
z <- as.numeric(z)[subscripts]
zcol <- level.colors(z, at = at, ...)
for (i in seq(along = z))
{
lims <- range(0, z[i])
tval <- 2 * base::pi *
seq(from = lims[1], to = lims[2], by = 0.01)
grid.circle(x = x[i], y = y[i], r = .5 * scale, # change the order
default.units = "native")
grid.polygon(x = x[i] + .5 * scale * c(0, sin(tval)),
y = y[i] + .5 * scale * c(0, cos(tval)),
default.units = "native",
gp = gpar(fill = zcol[i]))
}
}
levelplot(A1,xlab=NULL,ylab=NULL,
at=do.breaks(c(0.5,1.01),101),
panel=panel.corrgram.2.2,
scales=list(x=list(rot=90)),
colorkey=list(space="top"),
col.regions=colorRampPalette(c("red","white","blue")))
I am trying to model diffusion in 2D in R with the diffusion rate being dependent on the density, y. I have completed this model in 1D, but trying to change it 2D it keep getting the error code:
Error in -VF.grid$x.int * D.grid$x.int * diff(rbind(C.x.up, C, C.x.down, non-conformable arrays
I have no data, as it is a simulation. My code is as follows;
library(ReacTran)
N <- 50 # number of grid cells
Nx <-50
Ny <-50
XX <- 10 # total size
dy <- dx <- XX/N # grid size
Dy <- Dx <- 0.1 # diffusion coeff, X- and Y-direction
r <- 0.005 # growth rate
ini <- 10 # initial value at x=0
N2 <- ceiling(N/2)
K <- 100 #Carrying Capacity
A0<- 2 #pop ini size
x.grid <- setup.grid.1D(x.up = 0, x.down = 1, N = N)
y.grid <- setup.grid.1D(x.up = 0, x.down = 1, N = N)
grid2D <- setup.grid.2D(x.grid, y.grid)
D.grid <- setup.prop.2D(value = Dx, y.value = Dy, grid = grid2D) #diffusion coefficient on cell interfaces
v.grid <- setup.prop.2D(value = 0, y.value=0, grid = grid2D) #advection velocity
A.grid <- setup.prop.2D(value = 1, y.value=1, grid = grid2D) #interface area
AFDW.grid <- setup.prop.2D(value = 0, y.value=0, grid = grid2D) #advction weight difference
VF.grid <- setup.prop.2D(value = 0, y.value=1, grid = grid2D) #volume fraction
# The model equations - using the grids
Diff2Db <- function (t, y, parms) {
U <- matrix(nrow = N, ncol = N, data = y)
dCONC <- tran.2D(C = y, C.x.up=0, C.x.down=0,
C.y.up=0, C.y.down=0,
grid = grid2D, D.grid = D.grid,
D.x=(y-1)^2 + 1, D.y=(y-1)^2 + 1, dx=dx, dy=dy,
A.grid = A.grid,
VF.grid = VF.grid, AFDW.grid = AFDW.grid, v.grid = v.grid
)$dC
return (list(dCONC))
}
# initial condition: 0 everywhere, except in central point
y <- matrix(nrow = N, ncol = N, data = 0)
y[N2,N2] <- ini # initial concentration in the central point...
times <- 0:8
outb <- ode.2D (y = y, func = Diff2Db, t = times, parms = NULL,
dim = c(49, N), lrw = 160000)
I am out of ideas to try to fix it. Any help would be greatly appreciated.
Thank you in advance