stat_bin() can only have an x or y aesthetic

stat_bin() can only have an x or y aesthetic - r

library(ggplot2)
dev.new()
n = 0
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
}
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if ((num < (f(x1)/2/(33-15))) && (num == (18*(f(x1)/2)))) {
x_accept = c(x_accept, x1)
}
}
histo <- data.frame(x = x_accept)
dat <- data.frame(x = x, y = f(x))
ggplot(dat, aes(x, y)) +
geom_line(aes(x, y)) +
geom_histogram(aes(x))
From the previous code it gives me this error:
Error in f():
! stat_bin() can only have an x or y aesthetic.
Data from the dput(head(dat)):
structure(list(x = c(15, 15.1, 15.2, 15.3, 15.4, 15.5), y = c(0,
0.00123456790123456, 0.00246913580246913, 0.00370370370370371,
0.00493827160493828, 0.00617283950617284)), row.names = c(NA,
6L), class = "data.frame")
I would appreciate some insight into why it's doing this

Related

While loop using a probability density function

I want to make a while loop that, until n is not equal to 105, it takes a random value x, and if the value x respects some restrictions, it store that value.
The x has to be between 15 and 33
It also simulates a variable num that has to be a random variable from a uniform distribution betwenn 0 and 1.
library(ggplot2)
dev.new()
n = 0
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
}
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if ((num < (f(x1)/2/(33-15))) && (num == (18*(f(x1)/2)))) {
x_accept = c(x_accept, x1)
}
}
I've made this but it doesn't work, the x_accept is just an empty list

C stack usage 15927808 is too close to the limit

library(ggplot2)
dev.new()
n = 0
x_accept <- list()
set.seed(1684)
x = seq(15, 33, by = 0.1)
f <- function(x) {
out <- ifelse(
x < 15 | 33 < x,
0,
ifelse(
15 <= x & x <= 24,
(2*(x-15))/((33-15)*(24-15)),
ifelse(
24 < x & x <= 33,
(2*(33-x))/((33-15)*(33-24)),
NA_real_
)))
if (any((is.na(out) | is.nan(out)) & (!is.na(x) & !is.nan(x)))) {
warning("f(x) undefined for some input values")
out
}
while (n != 105) {
n = n + 1
x1 = runif(1, min = 15 , max = 33)
num = runif(1, min = 0 , max = 1)
if (num < (f(x1)/2/(33-15)) && num <- (18*(f(x1)/2)))
x_accept = list(x_accept, x1)
}
}
histo <- data.frame(x_histo = x1, y_histo = x1)
dat <- data.frame(x = x, y = f(x))
ggplot(dat, aes(x, y)) +
geom_line()
ggplot(histo, aes(x_histo, y_histo) +
geom_histogram()
It gives me this error, it's the first time I've seen it and I don't really how what it means or how to solve it. C stack usage 15927808 is too close to the limit.

Maximum likelihood estimation using a step function

I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round() argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?
dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))
plot(dat/100)
stepFnc <- function(parms, t) {
par <- as.list(parms)
(c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}
lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))
loglik <- function(t1 = 50, t2 = 50) {
fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
lines(fit)
-sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}
mle <- bbmle::mle2(loglik)
mle#coef
lines(snowStepCurve(mle#coef, length(dat)), lwd = 2, lty = 2, col = "orange")

With discrete x data I'd do a brute-force approach:
x <- seq_along(dat)
foo <- function(x, lwr, upr) {
y <- x
y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
y
}
SSE <- function(lwr, upr) {
sum((dat - foo(x, lwr, upr))^ 2)
}
limits <- expand.grid(lwr = x, upr = x)
limits <- limits[limits$lwr <= limits$upr,]
nrow(limits)
SSEvals <- mapply(SSE, limits$lwr, limits$upr)
id <- which(SSEvals == min(SSEvals))
optlims <- limits[id,]
meanouter <- mean(dat[x <= optlims$lwr | x > optlims$upr])
meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
bar <- function(x) {
y <- x
y[x <= optlims$lwr | x > optlims$upr] <- meanouter
y[x > optlims$lwr & x <= optlims$upr] <- meaninner
y
}
plot(dat/100)
curve(bar(x) / 100, add = TRUE)

Assign multiple results from function when grouping

I have this code which does exactly what I want but I have to call my function three times for every group which seems very inefficient.
library(data.table)
myRegr = function(x, y) {
regr = lm.fit(cbind(1, x), y)
coefs = regr$coef
k = coefs[[2]]
m = coefs[[1]]
r2 = 1 - var(regr$residuals) / var(y)
return (c(k = k, m = m, r2 = r2))
}
dt = data.table(a = c(0, 0, 0, 1, 1, 1),
x = c(12, 21, 15, 34, 32, 31),
y = c(3, 1, 6, 4, 2, 8))
result = dt[,list(minX = min(x),
minY = min(y),
k = myRegr(x, y)["k"],
m = myRegr(x, y)["m"],
r2 = myRegr(x, y)["r2"]
),
by = list(a)]
print(result)
Outputs:
a minX minY k m r2
0 12 1 -0.3095238 8.285714 0.3176692
1 31 2 -1.0000000 37.000000 0.2500000
Any idea how I can rewrite this to only call the function once?
UPDATE:
My example didn't cover the complete problem as I have a fourth column which I'm selecting on, here is a better example:
library(data.table)
myRegr = function(x, y) {
regr = lm.fit(cbind(1, x), y)
coefs = regr$coef
k = coefs[[2]]
m = coefs[[1]]
r2 = 1 - var(regr$residuals) / var(y)
return (c(k = k, m = m, r2 = r2))
}
df = data.frame(a = c(0, 0, 0, 1, 1, 1),
x = c(12, 21, 15, 34, 32, 31),
y = c(3, 1, 6, 4, 2, 8),
time = as.POSIXct(c("2019-01-01 08:12:00", "2019-01-01 08:13:00", "2019-01-01 08:14:00", "2019-01-01 08:12:00", "2019-01-01 08:13:00", "2019-01-01 08:14:00")))
dt = data.table(df)
result = dt[, list(firstX = x[time == min(time)],
firstY = y[time == min(time)],
k = myRegr(x, y)["k"],
m = myRegr(x, y)["m"],
r2 = myRegr(x, y)["r2"]
),
by = a]
print(result)
Outputs:
a firstX firstY k m r2
0 12 3 -0.3095238 8.285714 0.3176692
1 34 4 -1.0000000 37.000000 0.2500000
Tried wrapping it all in a function but it actually slowed things down:
library(data.table)
myRegrList = function(group) {
firstX = group[,x[time == min(time)]]
firstY = group[,y[time == min(time)]]
regr = lm.fit(cbind(1, group$x), group$y)
coefs = regr$coef
k = coefs[[2]]
m = coefs[[1]]
r2 = 1 - var(regr$residuals) / var(group$y)
return (list(firstX = firstX, firstY = firstY, k = k, m = m, r2 = r2))
}
result = dt[, myRegrList(.SD), by = a]
print(result)

If you make your function return a list you only need to call
dt[, myRegr(x, y), by = a]
# a minX minY k m r2
#1: 0 12 1 -0.3095238 8.285714 0.3176692
#2: 1 31 2 -1.0000000 37.000000 0.2500000
With
myRegr = function(x, y) {
regr = lm.fit(cbind(1, x), y)
coefs = regr$coef
k = coefs[[2]]
m = coefs[[1]]
r2 = 1 - var(regr$residuals) / var(y)
return (list(# minX = min(x),
# minY = min(y),
k = k,
m = m,
r2 = r2))
}
update
You might subset for x and y values and then join with the result of your function
result <- dt[dt[, .I[which.min(time)], by = a]$V1, .(a, x, y)]
result <- result[dt[, myRegr(x, y), by = a], on = .(a)]
result
# a x y k m r2
#1: 0 12 3 -0.3095238 8.285714 0.3176692
#2: 1 34 4 -1.0000000 37.000000 0.2500000

You can modify your function to return a vector and dcast final result:
library(data.table)
myRegr = function(x, y) {
regr <- lm.fit(cbind(1, x), y)
c(
regr$coef[[1]],
regr$coef[[2]],
1 - var(regr$residuals) / var(y)
)
}
result <- df[, .(minX = min(x), minY = min(y), myRegr(x, y), c("m", "k", "r2")), a]
dcast(result, a + minX + minY ~ V4, value.var = "V3")
This solution is not perfect as I have to create V4 (add c("m", "k", "r2") vector). There should be a better way to do this (perhaps even not to use dcast). Maybe more experienced data.table users could advice on this?
Data:
df <- data.table(
a = c(0, 0, 0, 1, 1, 1),
x = c(12, 21, 15, 34, 32, 31),
y = c(3, 1, 6, 4, 2, 8)
)

Using R: How to repeat a function I made thousands of times keeping a count of which number it ends on for each repeat

Here's the code I made so far:
z = vector()
for(i in 1:20){
Alkie = function(T=20, lambda=2.5, k=2, mu=3) {
t = 0
N = 0
i = 1
A.t = rexp(1, lambda)
D.t = Inf
while(t[i] < T) {
t[i+1] = min(A.t, D.t)
N[i+1] = N[i] + ifelse(A.t < D.t, 1, -1)
if(A.t < D.t) {
A.t = A.t + rexp(1,lambda)
if(N[i+1] == 1) D.t = t[i+1] + rgamma(1, k, mu)
if(N[i+1] == 6) D.t = t[i+1] + rgamma(1, 0, mu)
}
else
D.t = ifelse(N[i+1] == 0, Inf, t[i+1] + rgamma(1, k, mu))
i = i + 1
}
cbind(t=t, N=N)
}
x = Alkie(T=20, lambda=2.5, k=2, mu=3)
n = nrow(x)
plot(c(x[1,1], rep(x[-1,1], each=2), x[n,1]), rep(x[,2], each=2), type="l",
xlab="t(mins)", ylab="N(t)", col="blue")
How do I store the counts?