I am attempting to assign a number from 1 through 10 to a series of vectors based on what quantile they're in in a dataframe.
So far I have tried
quants <- quantile(Data$Avg, c(.1, .2, .3, .4, .5, .6, .7, .8, .9))
Data$quant <- for ( i in nrow(Data) ) {
ifelse(Data$Avg [i] < quants[1], Data$quant[1] = 1 ,
ifelse(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2], Data$quant[1] = 2, Data$quant = 3
))}
I get the following mistake:
Can anyone spot the mistake I am making here?
You might be better off using cut rather than a loop:
Data = data.frame(Avg = runif(100))
quantpoints <- seq(0.1, 0.9, 0.1)
quants <- quantile(Data$Avg, quantpoints)
cutpoints <- c(-Inf, quants, Inf)
cut(Data$Avg, breaks = cutpoints, labels = seq(1, length(cutpoints) - 1))
This should work:
Data$quant <- for ( i in nrow(Data) ) {
Data$quant[1] <- ifelse(Data$Avg [i] < quants[1], 1, ifelse(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2], 2, 3))
}
Or equivalently (inside the for loop):
if(Data$Avg [i] < quants[1])
Data$quant[1] <- 1
else{
if(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2])
Data$quant[1] <- 2
else
Data$quant[1] <- 3
}
You should assign the output of ifelse conditions outside of it. That is:
output <- ifelse(a > b, a, b)
Related
I am trying to apply the following for-loop to every matrices in the list per_d and create a new list called per_hole. I am not sure how to do this, should I use lapply?
Thank you very much in advance for your helps!
per_hole <- per_d
for (i in 1:S) {
for (j in 1:t){
if (per_hole [i,j] > CS) {
per_hole [i,j] <- per_hole [i,j] - rnorm (1, mean = 1, sd = 0.5)
} else {
per_hole [i,j] <- per_hole [i,j] + rnorm (1, mean = 1, sd = 0.5)
}}}
codes for reproduction
N <- 1
CS <- 10.141
S <- seq (7.72,13,0.807)
t <- 15
l <- length (S)
m0 <- 100
exps <- c(0.2, 0.5, 0.9, 1.5, 2)
sd_per <- c(0.2, 0.5, 0.8, 1.3, 1.8)
sd_noise <- 3
per <- lapply(sd_per, function(x){
per <- matrix(nrow = length(S)*N, ncol = t+1)
for (i in 1:dim(per)[1]) {
for (j in 1:t+1){
per [,1] <- replicate (n = N, S)
per [i,j] <- round (abs (rnorm (1, mean = per[i,1], sd =x)),digits=3)
colnames(per) <- c('physical',paste('t', 1:15, sep = ""))
per <- as.data.frame (per)
}
}
per <- per [,-1]
return(per)
}
)
names(per) <- paste("per", seq_along(sd_per), sep = "")
per_d <- lapply(per, function(x){
per_d <- abs (x - 10.141)
}
)
names(per_d) <- paste("per_d", seq_along(sd_per), sep = "")
You can try
per_hole <- lapply(per_d,function(x) x + ifelse(x>CS,-1,1)*rnorm(prod(dim(x)),1,0.5))
or
per_hole <- lapply(per_d, function(x) x + rnorm(prod(dim(x)), 1-2*(x > CS), 0.5))
In my attached R function, I was wondering how to solve for mdes (suppose it is unknown) which is currently one of the input values IF everything else is known?
Is it also possible to solve for mdes and power (both currently input values) IF everything else is known?
foo <- function(A = 200, As = 15, B = 100,Bs = 10,iccmax = 0.15,mdes = .25,SD = 1.2,power = 80)
{
tail <- 2
alpha <- 5
inv_d <- function(mdes) {
c(mean_dif = 1, Vmax = 2/mdes^2)
}
SDr <- 1/SD
pars <- inv_d(mdes)
mean_dif <- pars[[1]]
Vmax <- pars[[2]]
zbeta <- qnorm((power/100))
zalpha <- qnorm(1-(alpha/(100*tail)))
maxvarmean_difhat <- (mean_dif / (zbeta + zalpha))**2
ntreat <- sqrt((A/As)*((1-iccmax)/iccmax))
ncont <- sqrt((B/Bs)*((1-iccmax)/iccmax))
costpertreatcluster <- A + (As*ntreat)
costperconcluster <- B + (Bs*ncont)
gtreat <- (sqrt(A*iccmax) + sqrt(As*(1-iccmax)))**2
gcon <- (sqrt(B*iccmax) + sqrt(Bs*(1-iccmax)))**2
pratio <- sqrt(gtreat/gcon)
budgetratio <- 99999
budgetratio <- ifelse( ((pratio <= SD) & (pratio >= SDr)), pratio**2, ifelse((pratio > SD), pratio*SD, pratio*SDr))
fraction <- budgetratio/(1 + budgetratio)
mmvnumer <- 99999
mmvnumer <- ifelse( ((pratio <= SD) & (pratio >= SDr)),
gcon*Vmax*(1+(pratio**2)),
ifelse((pratio > SD),
gcon*Vmax*(((pratio*SD)+1)**2/((SD**2)+1)),
gcon*Vmax*(((pratio*SDr)+1)**2/((SDr**2) + 1))) )
budget <- mmvnumer/maxvarmean_difhat
treatbudget <- fraction*budget
conbudget <- (1-fraction)*budget
ktreat <- treatbudget/costpertreatcluster
kcont <- conbudget/costperconcluster
ktreatrup <- ceiling(ktreat)
kcontrup <- ceiling(kcont)
ktreatplus <- ifelse(pmin(ktreatrup,kcontrup) < 8, ktreatrup + 3, ktreatrup + 2)
kcontplus <- ifelse(pmin(ktreatrup,kcontrup) < 8, kcontrup + 3, kcontrup + 2)
budgetplus <- (ktreatplus*costpertreatcluster) + (kcontplus*costperconcluster)
return(c(ncont = ncont, kcont = kcontplus,
ntreat = ntreat, ktreat = ktreatplus, budget = budgetplus))
}
#--------------------------------------------------------------------------------
# EXAMPLE OF USE:
foo()
ncont kcont ntreat ktreat budget
7.527727 73.000000 8.692270 62.000000 33279.051347
Define a function of one variable as
p0 = foo()
fn1 = function(x) sum((foo(mdes=x) - p0)^2)
and find a minimum that should be 0, and which corresponds to your mdes = 0.25 input!
optimize(fn1, c(0.0, 1.0))
## $minimum
## [1] 0.2497695
## $objective
## [1] 0
For two variables, this is more difficult, as the function has many local minima and is ill-defined outside certain regions. Applying optim() you will need well-chosen starting points.
I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round() argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?
dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))
plot(dat/100)
stepFnc <- function(parms, t) {
par <- as.list(parms)
(c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}
lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))
loglik <- function(t1 = 50, t2 = 50) {
fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
lines(fit)
-sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}
mle <- bbmle::mle2(loglik)
mle#coef
lines(snowStepCurve(mle#coef, length(dat)), lwd = 2, lty = 2, col = "orange")
With discrete x data I'd do a brute-force approach:
x <- seq_along(dat)
foo <- function(x, lwr, upr) {
y <- x
y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
y
}
SSE <- function(lwr, upr) {
sum((dat - foo(x, lwr, upr))^ 2)
}
limits <- expand.grid(lwr = x, upr = x)
limits <- limits[limits$lwr <= limits$upr,]
nrow(limits)
SSEvals <- mapply(SSE, limits$lwr, limits$upr)
id <- which(SSEvals == min(SSEvals))
optlims <- limits[id,]
meanouter <- mean(dat[x <= optlims$lwr | x > optlims$upr])
meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
bar <- function(x) {
y <- x
y[x <= optlims$lwr | x > optlims$upr] <- meanouter
y[x > optlims$lwr & x <= optlims$upr] <- meaninner
y
}
plot(dat/100)
curve(bar(x) / 100, add = TRUE)
I am writing code for producing a variogram. For validating my result, I checked with geoR::variog() but both variograms are different.
I tried to understand the code of variog() to see what happens under the hood but there are so many things happening that I can't seem to understand it. I, in my code, am using the parameters X-coordinate, Y-coordiante, data value, number of lags, minimum lag value, lag interval, azimuth (angle in degrees; 90 corresponds to vertical direction), angle tolerance (in degrees) and maximum bandwidth.
variogram = function(xcor, ycor, data, nlag, minlag, laginv, azm, atol, maxbandw){
dl <- length(data)
lowangle <- azm - atol
upangle <- azm + atol
gamlag <- integer(nlag)
n <- integer(nlag)
dist <- pairdist(xcor, ycor)
maxd <- max(dist)
llag <- seq(minlag, minlag + (nlag-1) * laginv, by = laginv)
hlag <- llag + laginv
for(i in 1:dl){
for(j in i:dl){
if(i != j){
if(xcor[j]- xcor[i] == 0)
theta <- 90
else
theta <- 180/pi * atan((ycor[j] - ycor[i])/(xcor[j] - xcor[i]))
for(k in 1:nlag){
d <- dist[j, i]
b <- abs(d * sin(theta - azm))
if((llag[k] <= d & d < hlag[k]) & (lowangle <= theta & theta < upangle) & (b <= maxbandw)){
gamlag[k] <- gamlag[k] + (data[i] - data[j])^2;
n[k] <- n[k] + 1
}
}
}
}
}
gamlag <- ifelse(n == 0, NA, gamlag/(2*n))
tmp <- data.frame("lag" = llag, "gamma" = gamlag)
return(tmp)
}
function call for the above code
ideal_variogram_2 <- variogram(data3[,1], data3[,2], data3[,3], 18, 0, 0.025, 90, 45, 1000000)
ideal_variogram_2 <- na.omit(ideal_variogram_2)
plot(ideal_variogram_2$lag, ideal_variogram_2$gamma, main = "Using my code")
function call for variog()
geodata1 <- as.geodata(data3, coords.col = 1:2, data.col = 3)
ideal_variogram_1 <- variog(geodata1, coords = geodata1$coords, data = geodata1$data, option = "bin", uvec = seq(0, 0.45, by = 0.025), direction = pi/2, tolerance = pi/4)
df <- data.frame(u = ideal_variogram_1$u, v = ideal_variogram_1$v)
plot(df$u, df$v, main = "Using variog()")
The 2 variograms that I got are at the following link:
Variogram
I am attempting to assign a number from 1 through 10 to a series of vectors based on what quantile they're in in a dataframe.
So far I have tried
quants <- quantile(Data$Avg, c(.1, .2, .3, .4, .5, .6, .7, .8, .9))
Data$quant <- for ( i in nrow(Data) ) {
ifelse(Data$Avg [i] < quants[1], Data$quant[1] = 1 ,
ifelse(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2], Data$quant[1] = 2, Data$quant = 3
))}
I get the following mistake:
Can anyone spot the mistake I am making here?
You might be better off using cut rather than a loop:
Data = data.frame(Avg = runif(100))
quantpoints <- seq(0.1, 0.9, 0.1)
quants <- quantile(Data$Avg, quantpoints)
cutpoints <- c(-Inf, quants, Inf)
cut(Data$Avg, breaks = cutpoints, labels = seq(1, length(cutpoints) - 1))
This should work:
Data$quant <- for ( i in nrow(Data) ) {
Data$quant[1] <- ifelse(Data$Avg [i] < quants[1], 1, ifelse(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2], 2, 3))
}
Or equivalently (inside the for loop):
if(Data$Avg [i] < quants[1])
Data$quant[1] <- 1
else{
if(Data$Avg [i] > quants[1] & Data$Avg[i] < quants[2])
Data$quant[1] <- 2
else
Data$quant[1] <- 3
}
You should assign the output of ifelse conditions outside of it. That is:
output <- ifelse(a > b, a, b)