Plotting fitted values from regression - r

Hey I have following code in R
S0 = 40
r = log(1 + 0.07)
sigma = 0.3
K = 45
n_steps_per_year = 4
dt = 1 / n_steps_per_year
T = 3
n_steps = n_steps_per_year * T
R = n_paths
Q = 70
P = 72
n_paths = P * Q
d = exp(-r * dt)
N = matrix(rnorm(n_paths * n_steps, mean = 0, sd = 1), n_paths, n_steps)
paths_S = matrix(nrow = n_paths, ncol = n_steps + 1, S0)
for(i in 1:n_paths){
for(j in 1:n_steps){
paths_S[i, j + 1] = paths_S[i, j] * exp((r - 0.5 * sigma ^ 2) * dt + sigma * sqrt(dt) * N[i, j])
}
}
I = apply(K - paths_S, c(1,2), max, 0)
V = matrix(nrow = n_paths, ncol = n_steps + 1)
V[, n_steps + 1] = I[, n_steps + 1]
dV = d * V[, n_steps + 1]
model = lm(dV ~ poly(paths_S[, n_steps], 10))
pred = predict(model, data.frame(x = paths_S[, n_steps]))
plot(paths_S[, n_steps], d * V[, n_steps + 1])
lines(paths_S[, n_steps], pred)
but when I run the last two lines then I get very strange plot (multiple lines instead of one line). What is going on?

You did not provide n_paths, lets assume:
n_paths = 7
set.seed(111)
Then running your code, before you plot, you need to order your x values before plotting:
o = order(paths_S[,12])
plot(paths_S[o, n_steps], d * V[o, n_steps + 1],cex=0.2,pch=20)
lines(paths_S[o, n_steps], pred[o],col="blue")

Related

ggplot's stat_function() giving wrong result

I generated some data to perfome a regression on it:
library(tidyverse)
library(nnet)
# Generating the data --------------------------
set.seed(100)
helicopter <- rnorm(20, mean = 35, sd = 3)
car <- rnorm(20, mean = 30, sd = 3)
bus <- rnorm(20, mean = 25, sd = 3)
bike <- rnorm(20, mean = 20, sd = 3)
transportation_data <- data.frame(helicopter, car, bus, bike) %>%
pivot_longer(cols = 1:4, values_to = "income", names_to = "mode")
# Setting up the regression -------------------
transportation_regression <- multinom(mode~income, data = transportation_data)
So far, so good. I now want to plot the regression results (probability of choosing a certain mode of transportation based on income) using stat_function:
ins <- coef(transportation_regression)[1:3]
betas <- coef(transportation_regression)[4:6]
transportation_data %>%
ggplot(aes(x = income))+
stat_function(fun = function(x) { 1 / (1 + sum(exp(ins + betas * x))) }, aes(color = "bike"))+
stat_function(fun = function(x) { exp(ins[1] + betas[1] * x) / (1 + sum(exp(ins + betas * x))) }, aes(color = "bus"))+
stat_function(fun = function(x) { exp(ins[2] + betas[2] * x) / (1 + sum(exp(ins + betas * x))) }, aes(color = "car"))+
stat_function(fun = function(x) { exp(ins[3] + betas[3] * x) / (1 + sum(exp(ins + betas * x))) }, aes(color = "helicopter"))
I get this output, which is obviously wrong, and a warning Warning: longer object length is not a multiple of shorter object length where I don't know what it means.
When I use the same functions, but predict data points first, everything works just fine:
income <- seq(0,50,0.1)
result <- matrix( , nrow = length(income), ncol = 4)
i <- 1
for(x in income){
result[i,1] <- 1 / (1 + sum(exp(ins + betas * x))) # bike
result[i,2] <- exp(ins[1] + betas[1] * x) / (1 + sum(exp(ins + betas * x))) # bus
result[i,3] <- exp(ins[2] + betas[2] * x) / (1 + sum(exp(ins + betas * x))) # car
result[i,4] <- exp(ins[3] + betas[3] * x) / (1 + sum(exp(ins + betas * x))) # helicopter
i <- i + 1
}
cbind(income, as.data.frame(result)) %>%
pivot_longer(cols = V1:V4) %>%
ggplot(aes(x = income, y = value, color = name))+
geom_line()
Why don't the stat_function() in ggplot work?
I think it's just a misunderstanding of how the function works. Here's an example of using stat_function() to generate the right result:
library(tidyverse)
library(nnet)
# Generating the data --------------------------
set.seed(100)
helicopter <- rnorm(20, mean = 35, sd = 3)
car <- rnorm(20, mean = 30, sd = 3)
bus <- rnorm(20, mean = 25, sd = 3)
bike <- rnorm(20, mean = 20, sd = 3)
transportation_data <- data.frame(helicopter, car, bus, bike) %>%
pivot_longer(cols = 1:4, values_to = "income", names_to = "mode")
# Setting up the regression -------------------
transportation_regression <- multinom(mode~income, data = transportation_data)
#> # weights: 12 (6 variable)
#> initial value 110.903549
#> iter 10 value 48.674542
#> iter 20 value 46.980349
#> iter 30 value 46.766625
#> iter 40 value 46.734782
#> iter 50 value 46.732249
#> final value 46.732163
#> converged
ins <- coef(transportation_regression)[1:3]
betas <- coef(transportation_regression)[4:6]
transportation_data %>%
ggplot(aes(x = income))+
stat_function(fun = function(x) { 1 / (1 + exp(ins[1] + betas[1] * x) + exp(ins[2] + betas[2] * x) + exp(ins[3] + betas[3] * x)) }, aes(color = "bike"))+
stat_function(fun = function(x) { exp(ins[1] + betas[1] * x) / (1 + exp(ins[1] + betas[1] * x) + exp(ins[2] + betas[2] * x) + exp(ins[3] + betas[3] * x)) }, aes(color = "bus"))+
stat_function(fun = function(x) { exp(ins[2] + betas[2] * x) / (1 + exp(ins[1] + betas[1] * x) + exp(ins[2] + betas[2] * x) + exp(ins[3] + betas[3] * x)) }, aes(color = "car"))+
stat_function(fun = function(x) { exp(ins[3] + betas[3] * x) / (1 + exp(ins[1] + betas[1] * x) + exp(ins[2] + betas[2] * x) + exp(ins[3] + betas[3] * x)) }, aes(color = "helicopter"))
There were a couple of problems originally. Take, for example, the first instance of stat_function(),
stat_function(fun = function(x) {
1 / (1 + sum(exp(ins + betas * x))) },
aes(color = "bike"))
You're expecting ins + betas * x to be equivalent to ins[1] + betas[1] * x + ins[2] + betas[2] * x + ins[3] + betas[3] * x, but it isn't essentially recycling ins and betas to make them vectors as long as x and then multiplying betas by x and adding ins.
The other problem was the sum() around exp(ins ...) Rather than summing the rows, it's summing all rows and columns of the output, making a scalar value.
You could also make it a bit more general using matrix calculations:
b <- coef(transportation_regression)
transportation_data %>%
ggplot(aes(x = income))+
stat_function(fun = function(x) { 1 / (1 + rowSums(exp(cbind(1, x) %*% t(b)))) }, aes(color = "bike"))+
stat_function(fun = function(x) { exp(ins[1] + betas[1] * x) / (1 + rowSums(exp(cbind(1, x) %*% t(b)))) }, aes(color = "bus"))+
stat_function(fun = function(x) { exp(ins[2] + betas[2] * x) / (1 + rowSums(exp(cbind(1, x) %*% t(b)))) }, aes(color = "car"))+
stat_function(fun = function(x) { exp(ins[3] + betas[3] * x) / (1 + rowSums(exp(cbind(1, x) %*% t(b)))) }, aes(color = "helicopter"))
Created on 2023-02-04 by the reprex package (v2.0.1)

Linear constrains Z and D in MARSS package

Recently, I need to establish a MARSS model such as:
y_t = c + beta * d1_t + alpha * x_t + v_t; x_t = x_(t-1) + w_t
then I try in R and I meet the problem:
Error in optim(pars, negloglike, method = "BFGS") :
objective function in optim evaluates to length 0 not 1
My benchmark model:
y_t = c + beta * d1_t + alpha * d2_t + alpha * x_t + v_t; x_t = x_(t-1) + w_t
My code:
y = rnorm(10)
x = matrix(0, 3, 10)
x[1:2, ] = matrix(rnorm(20), 2, 10)
x[3, ]= 1
x0 = matrix(0.1, 1, 1)
V0 = matrix(0.01, 1, 1)
B = matrix(1, 1, 1)
U = A = 'zero'
# pars to be estimated
pars = c(beta=0.5, alpha=0.5, c=0.5, q=1)
# calculate loglike
negloglike = function(pars){
Q = matrix(list('q'), 1, 1)
R = matrix(1, 1, 1)
Z = matrix(list(pars['alpha']), 1, 1)
D = matrix(list(pars['beta'], pars['alpha'], pars['c']), 1, 3)
model.list = list(B=B, U=U, Q=Q, Z=Z, A=A, D=D, d=x, R=R, x0=x0, V0=V0)
-1 * MARSS(y, model=model.list, control=list(
maxit=200,conv.test.slope.tol=0.1,abstol=0.1),method='kem', silent=TRUE)$loglik
}
optim(pars, negloglike, method = 'BFGS')
but it denoted:
Error in optim(pars, negloglike, method = "BFGS") :
objective function in optim evaluates to length 0 not 1
I need help, Thanks!

How to implement Euler method in R

I am trying to implement this Euler Method procedure but I am unable to get the required graphs.
solve_logistic <- function(N0, r = 1, delta_t = 0.01, times = 1000) {
N <- rep(N0, times)
dN <- function(N) r * N * (1 - N)
for (i in seq(2, times)) {
# Euler
N[i] <- N[i-1] + delta_t * dN(N[i-1])
# Improved Euler
# k <- N[i-1] + delta_t * dN(N[i-1])
# N[i] <- N[i-1] + 1 /2 * delta_t * (dN(N[i-1]) + dN(k))
# Runge-Kutta 4th order
# k1 <- dN(N[i-1]) * delta_t
# k2 <- dN(N[i-1] + k1/2) * delta_t
# k3 <- dN(N[i-1] + k2/2) * delta_t
# k4 <- dN(N[i-1] + k3) * delta_t
#
# N[i] <- N[i-1] + 1/6 * (k1 + 2*k2 + 2*k3 + k4)
}
N
}
This is the graph I want to make:
And you can also view the original source which I am following for this graph
Your interest for epedimiological model is a good thing.
To obtain a similar graph as you show, you need to code first the analytical solution of N(t) which is given on the reference web site.
logistic <- function(N0, r, t){
return(1 / (1 + ((1-N0)/N0) * exp(- r * t)))
}
Moreover you should be careful with absisse informations.
r <- 1
t <- 1:1000
N0 <- 0.03
delta_t <- 0.01
plot(t * delta_t, logistic(N0 = N0, r = r, t = t * delta_t), type = "l",
ylim = c(0, 1),
ylab = "N(t)",
xlab = "times")
lines(t * delta_t, solve_logistic(N0 = N0, times = max(t)),
col = "red", lty = 2)
It gives you part of the graphic, now you are able to compute error of the method and test with another delta.
The Euler method is a numerical method for EDO resolution based on Taylor expansion like gradient descent algorithm
.
solve_logistic <- function(N0, r = 1, delta_t = 0.01, times = 1000) {
N <- rep(N0, times)
dN <- function(N) r * N * (1 - N)
for (i in seq(2, times)) {
# Euler (you follow the deepest slope with a small step delta)
N[i] <- N[i-1] + delta_t * dN(N[i-1])
}
N
}

How to extend a line to touch a polygon?

I would like to extend line to touch polygon
Create line and polygon line and polygon
Here is my solution to "stretch and trim".
Turned out a bit trickier than the first "solution" I offered initially.
I also included cases for horizontal and vertical lines.
My example I think covers all cases (direction of lines)
I have the coding very simple , no effort to make it more efficient
library(sf)
#
s1 <- rbind(c(1, 2), c(2, 3))
ls1 <- st_linestring(s1)
s2 <- rbind(c(2, 2), c(1, 3))
ls2 <- st_linestring(s2)
s3 <- rbind(c(2, 2), c(1, 1))
ls3 <- st_linestring(s3)
s4 <- rbind(c(1, 2), c(2, 1)) # VERTICAL LINE
ls4 <- st_linestring(s4)
vl <- rbind(c(2.5, 2), c(2.5, 3))
svl <- st_linestring(vl)
hl <- rbind(c(0.5, 0.5), c(1, 0.5))
shl <- st_linestring(hl)
line <- st_multilinestring(list(ls1, ls2, ls3, ls4, svl, shl))
p1 <- rbind(c(0, 0), c(1, 0), c(3, 2), c(2, 4), c(1, 4), c(0, 0))
pol <- st_polygon(list(p1))
#
(
plot1 <- ggplot() +
geom_sf(data = ls1, col = 'red', size = 2) +
geom_sf(data = ls2, col = 'darkred', size = 2) +
geom_sf(data = ls3, col = 'blue', size = 2) +
geom_sf(data = ls4, col = 'darkblue', size = 2) +
geom_sf(data = svl, col = 'green', size = 2) +
geom_sf(data = shl, col = 'green', size = 2) +
geom_sf(data = pol, fill = NA)
)
####################### Function
line_stretchntrim <- function(line, polygon) {
if (st_crs(line) != st_crs(polygon))
return("CRS not matching")
bb <- st_bbox(polygon)
bbdiagLength <-
as.numeric(sqrt((bb$xmin - bb$xmax) ^ 2 + (bb$ymin - bb$ymax) ^ 2))
xy <- st_coordinates(line)[, 1:2]
npairs <- nrow(xy) / 2
etline <- NULL
for (i in 1:npairs) {
ii <- (i - 1) * 2 + 1
x <- as.numeric(xy[ii:(ii + 1), 1])
y <- as.numeric(xy[ii:(ii + 1), 2])
dxline <- diff(x)
dyline <- diff(y)
d <- sqrt(dxline ^ 2 + dyline ^ 2)
scale <- abs(as.numeric(bbdiagLength)) # * extra if need be
signx <- sign(dxline)
signy <- sign(dyline)
theta <- atan(dxline / dyline)
# expand
if (signy == 1) {
dx1 <- -sin(theta) * scale #* d
dy1 <- -cos(theta) * scale #* d
dx2 <- sin(theta) * scale #* d
dy2 <- cos(theta) * scale #* d
}
if (signy == -1) {
dx1 <- sin(theta) * scale# * d
dy1 <- cos(theta) * scale# * d
dx2 <- -sin(theta) * scale# * d
dy2 <- -cos(theta) * scale# * d
}
## Cases when dxline == 0 or dyline == 0
# dxline == 0
if ((dxline == 0) * (signy == -1)) {
dx1 <- 0
dy1 <- cos(theta) * scale# * d
dx2 <- 0
dy2 <- -cos(theta) * scale# * d
}
if ((dxline == 0) * (signy == 1)) {
dx1 <- 0
dy1 <- -cos(theta) * scale# * d
dx2 <- 0
dy2 <- cos(theta) * scale# * d
}
if ((signx == 1) * (dyline == 0)) {
dx1 <- -sin(theta) * scale# * d
dy1 <- 0
dx2 <- sin(theta) * scale# * d
dy2 <- 0
}
if ((signx == -1) * (dyline == 0)) {
dx1 <- sin(theta) * scale# * d
dy1 <- 0
dx2 <- -sin(theta) * scale# * d
dy2 <- 0
}
x1 <- x[1] + dx1
y1 <- y[1] + dy1
# second point shift
x2 <- x[2] + dx2
y2 <- y[2] + dy2
# construct spatial line
sline <- st_linestring(matrix(c(x1, y1, x2, y2),
byrow = TRUE, ncol = 2))
slineSf <- st_sf(geom = st_sfc(sline), crs = st_crs(polygon))
# Now trim to polygon
stline <- st_intersection(slineSf, polygon)
etline <- if (i == 1)
stline
else
rbind(etline, stline)
}
etline
}
stretched_line <- line_stretchntrim(line, pol)
#
ggplot() +
geom_sf(data = pol, fill = NA) +
geom_sf(data = line, size = 2) +
geom_sf(data = stretched_line)

Changing branch length in dendrogram (pheatmap)

I am trying to plot a heatmap with the library pheatmap in R.
I think that by default the branch length is proportional to the "dissimilarity" of the clusters that got merged at this step. I would like to chance that, so it is a fixed value because for my purpose it looks very weird!
If anyone has an idea how I can fix this, I would be very happy.
Here is a sample code
library(pheatmap)
test = matrix(rnorm(6000), 100, 60)
pheatmap(test)
Cheers!
Here is an example of two column groups with high dissimilarity:
library(pheatmap)
test = cbind(matrix(rnorm(3000), 100, 30),
matrix(rnorm(3000)+10, 100, 30))
pheatmap(test)
TIn pheatmapthe dendrogram is plotted by the pheatmap:::draw_dendrogram function
and branch lengths are stored in the h object.
Below I define equal-length branches adding the command
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
as follows:
draw_dendrogram <- function(hc, gaps, horizontal = T) {
# Define equal-length branches
hc$height <- cumsum(rep(1/length(hc$height), length(hc$height)))
h = hc$height/max(hc$height)/1.05
m = hc$merge
o = hc$order
n = length(o)
m[m > 0] = n + m[m > 0]
m[m < 0] = abs(m[m < 0])
dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL,
c("x", "y")))
dist[1:n, 1] = 1/n/2 + (1/n) * (match(1:n, o) - 1)
for (i in 1:nrow(m)) {
dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1])/2
dist[n + i, 2] = h[i]
}
draw_connection = function(x1, x2, y1, y2, y) {
res = list(x = c(x1, x1, x2, x2), y = c(y1, y, y, y2))
return(res)
}
x = rep(NA, nrow(m) * 4)
y = rep(NA, nrow(m) * 4)
id = rep(1:nrow(m), rep(4, nrow(m)))
for (i in 1:nrow(m)) {
c = draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1],
dist[m[i, 1], 2], dist[m[i, 2], 2], h[i])
k = (i - 1) * 4 + 1
x[k:(k + 3)] = c$x
y[k:(k + 3)] = c$y
}
x = pheatmap:::find_coordinates(n, gaps, x * n)$coord
y = unit(y, "npc")
if (!horizontal) {
a = x
x = unit(1, "npc") - y
y = unit(1, "npc") - a
}
res = polylineGrob(x = x, y = y, id = id)
return(res)
}
# Replace the non-exported function `draw_dendrogram` in `pheatmap`:
assignInNamespace(x="draw_dendrogram", value=draw_dendrogram, ns="pheatmap")
pheatmap(test)
The result is:

Resources