Somers D differences between R and SAS and within R - r

I am new to both R and SAS. I want to calculate somers D, following the logistic regression.my dataframe(vac1) is combination of Titer and Protection.
> vac1=structure(list(Titer = c(0.9, 0.9, 0.9, 1.51, 0.9, 0.9, 2.86,1.95,2.71, 2.56, 2.71, 3.01, 2.71, 2.41, 2.11, 1.95, 2.26, 2.71, 2.56, 2.41, 2.56, 1.95, 1.81, 2.26, 2.11, 1.81, 1.95, 1.95, 1.34, 2.56, 2.26, 2.26, 2.11, 2.41, 2.71, 2.56, 1.65, 1.95, 1.51, 1.95,1.81, 1.81, 1.81, 1.95, 2.11, 2.86,2.41, 1.95, 2.56, 2.71, 2.71,2.41, 1.81, 2.41, 1.65, 1.81, 2.11, 2.11, 1.81, 1.81,2.26, 2.41,1.65, 2.56, 2.71, 2.11, 1.81), Protection = c(0, 0, 0, 0, 0,0, 1, 0, 1, 1,1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0)), .Names = c("Titer","Protection"), row.names = c(NA, -67L), class = "data.frame").
my logistic regression formula is.
> logit=glm(Protection~Titer, data=vac1, family=binomial(link="logit")).
the resulting predicted probalities from logit model is combined with original Protection data from vac1 dataframe and created vac4 dataframe.
> vac4=cbind(vac1$Protection,logit$fit)
> colnames(vac4)=c("Protection","PredictedProb").
calculated somers D by 2 ways.
1.using InformationValue package
>library(InformationValue)
>somersD(actuals=vac4$Protection, predictedScores=vac4$PredictedProb
I got the value 0.733.
2.using function copied from a link
http://shashiasrblog.blogspot.in/2014/02/binary-logistic-regression-fast.html
OptimisedConc=function(logit)
{
Data = vac4
ones = Data[Data[,1] == 1,]
zeros = Data[Data[,1] == 0,]
conc=matrix(0, dim(zeros)[1], dim(ones)[1])
disc=matrix(0, dim(zeros)[1], dim(ones)[1])
ties=matrix(0, dim(zeros)[1], dim(ones)[1])
for (j in 1:dim(zeros)[1])
{
for (i in 1:dim(ones)[1])
{
if (ones[i,2]>zeros[j,2])
{conc[j,i]=1}
else if (ones[i,2]<zeros[j,2])
{disc[j,i]=1}
else if (ones[i,2]==zeros[j,2])
{ties[j,i]=1}
}
}
Pairs=dim(zeros)[1]*dim(ones)[1]
PercentConcordance=(sum(conc)/Pairs)*100
PercentDiscordance=(sum(disc)/Pairs)*100
PercentTied=(sum(ties)/Pairs)*100
N<-length(logit$fit)
gamma<-(sum(conc)-sum(disc))/Pairs
Somers_D<-(sum(conc)-sum(disc))/(Pairs-sum(ties))
k_tau_a<-2*(sum(conc)-sum(disc))/(N*(N-1))
return(list("Percent Concordance"=PercentConcordance,
"Percent Discordance"=PercentDiscordance,
"Percent Tied"=PercentTied,
"Pairs"=Pairs,
"Gamma"=gamma,
"Somers D"=Somers_D,
"Kendall's Tau A"=k_tau_a))
}
OptimisedConc(logit).
Here i am getting the gamma and somers D values but are reversed compared to what i got it in SAS and the somers D value calculated by 2nd method in R and SAS is different from what i obtained it using the InformationValue package of R. similarly kendalls tau is infinite showing in R and in SAS it is 0.38.
can anyone help where i am making mistake? thanking you.

Related

How do I change confidence interval calculation to log-log on R?

If I wanted to calculate confidence intervals using the coxph and confinf functions, how do I change the confidence interval calculation to log-log? My understanding is that log is the default.
I tried conf.type="log-log" but it did not work, just got an error message
library(survival)
coxph(formula = Surv(futime, fustat) ~ tx, data = tki, conf.type="log-log")
fit <- coxph(formula = Surv(futime, fustat) ~ tx, data = tki)
summary(fit)
#output provides HR CIs
confint(fit)
#coefficient CIs
exp(confint(fit))
> dput(tki) structure(list(futime = c(9.26, 11.06, 2.35, 3.75, 12.4, 10.3, 8.11, 7.29, 6.75, 6.56, 0.26, 1.9, 0.34, 1.63, 1.55, 1.6, 4.78, 2.65, 1.72, 3.63), fustat = c(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1), tx = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L))

Coarsened Exact Matching with cem package- Error in .subset2(x, i, exact = exact)

I'm trying to perform coarsened exact matching on the following data.
> dput(head(cem_data))
structure(list(sex = c(1, 1, 1, 2, 2, 2), age = c(40, 59, 53,
60, 49, 60), edlev = c(3, 3, 3, 2, 3, 3), sw = c(44, 17, 10,
41, 26, 23), sw2 = c(15, 1, 5, 34, 5, 6), som = c(2.14, 0.14,
1.86, 3, 1.71, 2.14), som_2 = c(0.71, 0.14, 2, 2.57, 1.71, 2.14
), ap = c(3.5, 1.5, 1.33, 3.33, 2.67, 2.17), ap_2 = c(3, 0.17,
2.33, 3, 0.83, 1.67), dep = c(2.83, 0.17, 0.33, 2.83, 2.17, 2.33
), dep_2 = c(1.17, 0, 0.33, 2.33, 0.83, 1), int = c(2.86, 1.43,
1, 2, 2.29, 2.14), int_2 = c(2.29, 0.57, 0.14, 2.57, 1.71, 1.43
), pho = c(3.2, 0, 0, 3.4, 0.8, 0.4), pho_2 = c(1.6, 0, 0, 3.2,
0, 0.4), psy_b = c(2.67, 0.11, 0.83, 3.06, 1.61, 1.72), psy_b_2 = c(1.11,
0.06, 0.89, 2.67, 0.94, 1.28), s_wirk = c(4, 2.2, 1.6, 3.2, 1.4,
2.2), s_wirk_2 = c(2.8, 0.8, 1.8, 2.6, 1.6, 1.4), soz_b = c(2.75,
1.5, 1, 2.25, 1.25, 1.5), soz_b_2 = c(2.75, 1, 1, 2.25, 1.5,
1.25), soz_u = c(0.75, 0.75, 1.75, 3.25, 1, 3.25), soz_u_2 = c(1,
0.25, 1.75, 2.5, 2.5, 2), wohl = c(3.6, 1.4, 1.8, 3.4, 3, 3),
wohl_2 = c(2, 0.6, 1.4, 2.8, 2.2, 1.2), au_bei_aufn = c(1,
1, 1, 1, 1, 1), age_reha = c(40.9890410958904, 59.3945205479452,
53.372602739726, 60.2, 49.3342465753425, 60.7534246575342
), group_format = c(0, 0, 0, 0, 0, 0)), row.names = c(6L,
7L, 10L, 15L, 20L, 29L), class = "data.frame")
With the following code:
require(cem)
voll_data <- voll_data %>%
select(-c("auf_nr", "icd_10_1", "icd_10_2", "icd_10_3", "icd_10_4","icd_10_5", "bdi_date", "aufnahme", "entlassung")) %>%
mutate_if(is.factor,as.numeric) %>%
mutate_if(is.character, as.numeric)
cem_data <- data.frame(na.omit(voll_data))
#cem_data_s <- scale(cem_data[,5:26])
#cem_data <- cbind.data.frame(cem_data[, 1:4], cem_data_s, cem_data[, 27:36])
variables <- c("age", "sex", "edlev", "sw","au_bei_aufn")
ungleich2 <- imbalance(cem_data$group_format, data=cem_data)
However, following error is being shown, when calculating the "matt".
Error in .subset2(x, i, exact = exact) : attempt to select less than one element in get1index
7.
(function(x, i, exact) if (is.matrix(i)) as.matrix(x)[[i]] else .subset2(x, i, exact = exact))(x, ..., exact = exact)
6.
[[.data.frame(data, treatment)
5.
data[[treatment]]
4.
is.factor(x)
3.
as.factor(data[[treatment]])
2.
cem.main(treatment = treatment, data = data, cutpoints = cutpoints, drop = drop, k2k = k2k, method = method, mpower = mpower, verbose = verbose, baseline.group = baseline.group, keep.all = keep.all)
1.
cem(treatment = cem_data$group_format, data = cem_data, drop = "sw2", cutpoints = list(age = agecut), grouping = list(edlev_gr))
# automated coarsening
matt <- cem(cem_data$group_format, data = cem_data, drop= "sw2")
print(matt)
Does anyone have an idea what am I doing wrong?
Thanks a lot!!

How to draw a rectangle under the plot lines in plot?

This is my graph:
which I did using the following dataset:
targ_plot = structure(c(4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 3.75, 3.75, 3.75, 3.5, 3.5, 3.5, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3.25, 3.25, 3.25, 3.5, 3.5, 3.5, 3.75, 3.75, 4, 4, 4.25,
4.25, 4.5, 4.5, 4.5, 4.75, 4.75, 4.75, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5.25, 5.25, 5.25, 4.25, 3.75, 3, 3, 3, 2.5, 2.25,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75, 1.75,
1.75, 2, 2, 2, 2.25, 2.25, 2.25, 2.25, 2, 1.75, 1.75, 1.75, 1.75,
1.75, 1.75, 1.75, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
1.5, 1, 1, 1, 1, 1, 1, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75,
0.4, 0.4, 0.4, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 3.25,
3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 3.25, 2.75,
2.75, 2.75, 2.5, 2.5, 2.5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2.25, 2.25,
2.25, 2.5, 2.5, 2.5, 2.75, 2.75, 3, 3, 3.25, 3.25, 3.5, 3.5,
3.5, 3.75, 3.75, 3.75, 4, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25,
4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 4.25, 3.75, 3.25,
2.5, 2, 2, 1.5, 1.25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.5, 1.5, 1.5,
1.5, 1.25, 1, 1, 1, 1, 1, 1, 1, 0.75, 0.75, 0.75, 0.75, 0.75,
0.75, 0.75, 0.75, 0.75, 0.75, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.15, 0.15, 0.15, 0.05, 0.05,
0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05,
0.05, 0.05, 0.05, 0.05, 0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.25, 2.25, 2.25,
2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 2.25, 1.75, 1.75, 1.75,
1.5, 1.5, 1.5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.25, 1.25, 1.25, 1.5,
1.5, 1.5, 1.75, 1.75, 2, 2, 2.25, 2.25, 2.5, 2.5, 2.5, 2.75,
2.75, 2.75, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3.25, 3.25,
3.25, 3.25, 2.75, 2, 1, 1, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.5, 0.5, 0.5,
0.75, 0.75, 0.75, 0.75, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25,
0.25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, -0.1, -0.1, -0.1, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2,
-0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2, -0.3, -0.3,
-0.3, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4,
-0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.4, -0.5,
-0.5, -0.5, -0.5, -0.5, 0.499999999999989, 1.24999999999997,
-0.250000000000039, 0, 0.999999999999979, -2, 0, 1.25000000000002,
-0.249999999999995, 2.99999999999998, 8.65, -5.14999999999999,
0.499999999999989, 2.4, 8.44999999999998, 2.99999999999998, 0.950000000000006,
-4.49999999999999, 0.124999999999998, 0, 0.550000000000006, 0.499999999999989,
0, 0, 0.499999999999989, 0.299999999999967, 2.50000000000001,
3.55000000000001, 0.249999999999995, -0.249999999999995, 0, -0.100000000000033,
0.300000000000011, -0.100000000000033, -0.200000000000022, -1.2,
-0.100000000000033, -0.699999999999967, 0, -0.649999999999995,
0, 0.800000000000001, 0, 0, 0, 0, -1.00000000000002, -0.800000000000001,
-0.200000000000022, -1.2, 0.200000000000022, -0.599999999999978,
-2.49999999999999, -0.550000000000006, -1.75000000000001, 0.424999999999986,
0, -0.099999999999989, -0.999999999999979, 0.4, -0.099999999999989,
-0.800000000000001, 0.099999999999989, 0.150000000000006, 0,
-0.100000000000033, 0.150000000000006, -0.350000000000072, -1.49999999999997,
-1.18499999999999, -0.300000000000011, 0.349999999999984, -0.0999999999999446,
0.349999999999984, -0.0500000000000611, 0, -0.100000000000033,
-0.100000000000033, -1.14999999999998, 0, -0.300000000000011,
2.4, -2.6, 2.59999999999998, -6.30000000000002, 0.349999999999984,
3.95, 4.6, -0.900000000000001, 1.35000000000001, 2.3, 0.2, -0.45,
0, -0.5, -0.750000000000001, -0.35, 0, -0.1, -0.25, 0.1, 1.6,
0.4, 0.2, 0.35, 0.3, 0, -0.2, 0.0500000000000056, 0.350000000000006,
-0.0499999999999945, 0.99999999999999, -0.700000000000012, 0,
0.299999999999989, 0.600000000000001, 2.5, 12.2, -14.3, 2.8,
0.1, 0.35, 0.499999999999995, -0.2, 0.3, 1.8, -10.4, 0.700000000000001,
0.85, 0.550000000000001, 0.599999999999999, 0, 0.1, 0, 0.1, 0.1,
-0.2, 0.399999999999999, 0.1, 0, 0, 0, -0.3, 0.45, 0, 0.550000000000001,
0.999999999999998, 0.899999999999998, 2.1, 0.499999999999999,
0, -0.1, -0.8, 0, 0.4, -0.35, 0.505, 0, -1.3, 1.175, 0, 0, -0.0399999999999998,
0, 0.0150000000000011, 0, 0, 5.03, 0.88, 0, 2.405, 0, 0, 0, 0.4,
0, 0.344999999999995, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0849992036819458,
0, 0, 0, 0, 0.964999198913574, 0, 0, 0, 0, 0, -0.455000996589661,
0, 0, 0, 0.549998879432678, 0, -0.510001182556152, 0, 0, 0, 0.499999523162842,
0, 0.499999523162842, 3.41, 0, 3.440002, -0.06, 0, 0.024998,
0.009999), .Dim = c(217L, 4L), .Dimnames = list(NULL, c("MLF",
"MRO", "DFR", "Target")), .Tsp = c(2002, 2020, 12), class = c("mts",
"ts", "matrix"))
colors = c("#00366C", "#909800" , "#79ABE2", "#E16A86")
and this is the code that produced the graph above:
plot(targ_plot, plot.type = "single", ylab = "" , xlab = "" , main = "Target", col = colors, lwd = 2, xaxt = "n")
axis(1, at = seq(2002, 2020, 1), labels = seq(2002, 2020, 1))
legend("bottomright", colnames(targ_plot), col = colors, lwd = 3, bty = "n")
abline(h = 0, col = "black", lty = 2)
rect(2008,16,2009,-17, col= rgb(0,0,1, alpha=0.5), border = FALSE)
As you can see the rectangle is above the lines, I want it to be below the lines and to be gray possibly. I have been looking for similar things everywhere. I couldn't solve the problem.
Can you help me sort this out?
Thanks
Steps:
Create a blank plotting region by plot(..., type = "n").
Draw the rectangle.
Set the graphical parameter new by par(new = T).
Run your plotting code.
Reset graphical parameters.
# (1)
plot(targ_plot, type = "n", plot.type = "single", ylab = "", xlab = "", main = "Target", col = colors, lwd = 2, xaxt = "n")
# (2)
rect(2008, 16, 2009, -17, col = gray(0.5, alpha = 0.5), border = FALSE)
# (3)
op <- par(new = T)
# (4)
plot(targ_plot, plot.type = "single", ylab = "", xlab = "", main = "Target", col = colors, lwd = 2, xaxt = "n")
axis(1, at = seq(2002, 2020, 1), labels = seq(2002, 2020, 1))
legend("bottomright", colnames(targ_plot), col = colors, lwd = 3, bty = "n")
abline(h = 0, col = "black", lty = 2)
# (5)
par(op)

Loop Function running very slowly

Can anybody help make this loop function run quicker. It is taking too much time to calculate currently.
Acceleration <- c(0.16, 0.37, 0.37, 0.48, 1.05, 1.05, 1.93, 2.04, 2.04, 2.07, 2.35, 2.35, 2.03, 1.93, 1.93, 1.75, 1.82, 1.82, 1.49, 0.82, 0.82, 0.34, -1.69, -1.69, -2.62, -2.38, -2.38, -2.01, -0.86, -0.86, 1.14, 0.98, 0.98, 1.69, 1.64, 1.64, 2.16, 2.43, 2.43, 2.52, 2.89, 2.89, 2.25, 2.28, 2.28, 1.76, 1.09, 1.09, 1.56, 1.44, 1.44, 0.85, 1.35, 1.35, 0.78, 0.38, 0.38, 0.11, 0.14, 0.14, -0.78)
Velocity <- c(1.67, 1.77, 1.77, 1.91, 2.19, 2.19, 2.82, 3.05, 3.05, 3.47, 3.79, 3.79, 4.1, 4.26, 4.26, 4.55, 4.76, 4.76, 4.81, 4.8, 4.8, 4.69, 3.86, 3.86, 3.32, 2.89, 2.89, 2.8, 2.91, 2.91, 3.62, 3.67, 3.67, 4.2, 4.34, 4.34, 4.95, 5.27, 5.27, 5.8, 6.2, 6.2, 6.46, 6.69, 6.69, 6.86, 6.76, 6.76, 7.15, 7.26, 7.26, 7.3, 7.59, 7.59, 7.67, 7.59, 7.59, 7.45, 7.48, 7.48, 7.16)
Test <- data.frame(Acceleration,Velocity)
Here is the calculated column with a loop.
Test$Accels[1] <- 0
for(i in 2:nrow(Test))
{Test$Accels[i] <-
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0}
Desired Output:
Test$Accels <- c(0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
Can anyone help me re-write the Test$Accels column code to make it run faster?
In another calculated column in the dataframe i sometimes use i have the following code inside:
if(Test$Velocity[i] < 1.4 | Test$Velocity[i+1] < 1.4 | Test$Velocity[i+2] < 1.4 | Test$Velocity[i+3] < 1.4 | Test$Velocity[i+4] < 1.4 ) {0}
Can anyone help me re-write this part of the code to be quicker/shorter also?
Using sapply sped it up for me. It is not shorter but quicker.
microbenchmark::microbenchmark(
Test$Accels[2:nrow(Test)] <- sapply(2:nrow(Test), function(i){
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0})
)
microbenchmark::microbenchmark(
for(i in 2:nrow(Test))
{Test$Accels[i] <-
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0})

Cut and quantile in R in not including zero

I am trying to bin numeric data from a column in R based on the 1st, 3rd and 4th quantile ( ie. 0-25%, 25%-75%,75%-100%). I have used the following code but the zero do not get included in the binning. They are shown as NA.
rawdata1$usage4 <- cut(rawdata1$Usage_Percentage,
breaks = quantile(rawdata1$Usage_Percentage,
probs = c(-Inf,0.25,0.75,Inf),include.lowest=T),labels=F)
Error in quantile.default(rawdata1$Usage_Percentage, probs = c(-Inf,
0.25, : 'probs' outside [0,1]
However if use the following code and divide it into 4 quantiles, the bins are just fine.
rawdata1$usage5 <- cut(rawdata1$Usage_Percentage,
breaks = quantile(rawdata1$Usage_Percentage),
include.lowest=T,labels=F)
To include zeros in the binning you can also use the cut2 function from Hmisc.
Here is an example.
data <- data.frame(vect = c(1.64, 1.5, 1.5, 1.41, 1.64, 1.64, 0, 1.45, 1.64, 1.5, 1.45, 0, 1.45, 1.64,
1.5, 1.5, 1.5, 0, 1.5, 1.41, 0.18, 0.09, 0.1, 0.09, 0.05, 0.09, 1.64, 1.5,
1.5, 0.1, 0.05, 0.09, 0, 5.82, 5.86, 5.86, 0, 5.82, 5.82, 5.82, 5.82, 5.82,
5.86, 5.86, 5.82, 0, 5.91, 9.41, 9.5, 5.91, 0, 9.45, 5.91, 9.45, 5.91, 0,
0, 9.55, 5.91, 9.55, 9.5, 9.55, 0, 5.82, 1.64))
data$bin <- factor(Hmisc::cut2(data$vect, g = 4), labels = c(1:4))
#g represents the number of quantile groups

Resources