Loop Function running very slowly - r

Can anybody help make this loop function run quicker. It is taking too much time to calculate currently.
Acceleration <- c(0.16, 0.37, 0.37, 0.48, 1.05, 1.05, 1.93, 2.04, 2.04, 2.07, 2.35, 2.35, 2.03, 1.93, 1.93, 1.75, 1.82, 1.82, 1.49, 0.82, 0.82, 0.34, -1.69, -1.69, -2.62, -2.38, -2.38, -2.01, -0.86, -0.86, 1.14, 0.98, 0.98, 1.69, 1.64, 1.64, 2.16, 2.43, 2.43, 2.52, 2.89, 2.89, 2.25, 2.28, 2.28, 1.76, 1.09, 1.09, 1.56, 1.44, 1.44, 0.85, 1.35, 1.35, 0.78, 0.38, 0.38, 0.11, 0.14, 0.14, -0.78)
Velocity <- c(1.67, 1.77, 1.77, 1.91, 2.19, 2.19, 2.82, 3.05, 3.05, 3.47, 3.79, 3.79, 4.1, 4.26, 4.26, 4.55, 4.76, 4.76, 4.81, 4.8, 4.8, 4.69, 3.86, 3.86, 3.32, 2.89, 2.89, 2.8, 2.91, 2.91, 3.62, 3.67, 3.67, 4.2, 4.34, 4.34, 4.95, 5.27, 5.27, 5.8, 6.2, 6.2, 6.46, 6.69, 6.69, 6.86, 6.76, 6.76, 7.15, 7.26, 7.26, 7.3, 7.59, 7.59, 7.67, 7.59, 7.59, 7.45, 7.48, 7.48, 7.16)
Test <- data.frame(Acceleration,Velocity)
Here is the calculated column with a loop.
Test$Accels[1] <- 0
for(i in 2:nrow(Test))
{Test$Accels[i] <-
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0}
Desired Output:
Test$Accels <- c(0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
Can anyone help me re-write the Test$Accels column code to make it run faster?
In another calculated column in the dataframe i sometimes use i have the following code inside:
if(Test$Velocity[i] < 1.4 | Test$Velocity[i+1] < 1.4 | Test$Velocity[i+2] < 1.4 | Test$Velocity[i+3] < 1.4 | Test$Velocity[i+4] < 1.4 ) {0}
Can anyone help me re-write this part of the code to be quicker/shorter also?

Using sapply sped it up for me. It is not shorter but quicker.
microbenchmark::microbenchmark(
Test$Accels[2:nrow(Test)] <- sapply(2:nrow(Test), function(i){
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0})
)
microbenchmark::microbenchmark(
for(i in 2:nrow(Test))
{Test$Accels[i] <-
if(Test$Acceleration[i] <= 0) { 0 }
else if(Test$Acceleration[i] >= 2 & Test$Acceleration[i+1] >= 2 & Test$Acceleration[i+2] >= 2 & Test$Acceleration[i+3] >= 2 & Test$Acceleration[i+4] >= 2 &
Test$Accels[i-1] == 0) { 2 }
else if(Test$Accels[i-1] > 0) { 1 }
else 0})

Related

How to use complete.cases in gtsummary for each variable for doing a paired t.test instead doing complete.cases for full data frame?

I am trying to do a paired t.test on my data for pre-post analysis and uses gtsummary package to create the table. As I have missing data I filter the dataframe by complete.cases(.) but as it filter for all the columns I am loosing much data. Instead of that I want filter complete.cases() only for the particular variable it test for each time. Eg: if it is doing the test for variable1 it should check the complete.cases() for only variable1. Can someone please help me how to accomplish it? Following is the code I am using now.
trial_paired <-
df %>% filter(OSAclass == 'OSA') %>% select(c('time1', 'CPAP','Cholesterol', 'Triglyceride','HDL_chol','LDL_chol'))%>%
group_by(time1) %>%
mutate(id = row_number()) %>%
ungroup()
t2 <-
trial_paired %>%
# delete missing values
filter(complete.cases(.)) %>%
# keep IDs with both measurements
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
# summarize data
tbl_summary(by = time1 , include = -id, type = all_continuous() ~ "continuous2", statistic = all_continuous() ~ c("{median} ({p25}, {p75})", "{min}, {max}", "{mean} ({sd})")) %>%
add_p(test = list(all_continuous() ~ "paired.t.test",
all_categorical() ~ "mcnemar.test"),
group = id)
structure(list(time1 = c("first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second", "first", "second", "first",
"second", "first", "second", "first", "second", "first", "second",
"first", "second", "first", "second"), CPAP = c(1, 1, 1, 1, 0,
0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 1, 1, 1, 1, 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0), Cholesterol = c(4.83, 4.83, 4.81, 4.81, 4.48, 4.48,
4.25, 4.25, 4.93, 4.93, 5.57, 5.57, 5.52, 5.52, 5.47, 5.47, 4.61,
4.61, 5.4, 5.4, 5.31, 5.31, 4.89, 4.89, 6.62, 6.62, 5.15, 5.15,
4.7, 4.7, 4.62, 4.62, 4.66, 4.66, 5.17, 5.17, 4.78, 4.78, 8.82,
8.82, 4.28, 4.28, 4.9, 4.9, 2.9, 2.9, 5.92, 5.92, 5.39, 5.39,
4.92, 4.92, 3.75, 3.75, 3.87, 3.87, 6.1, 6.1, 6.05, 6.05, 5.18,
5.18, 4.57, 4.57, 5.42, 5.42, 6.08, 6.08, 5.48, 5.48, 4.78, 4.78,
3.89, 3.89, 4.62, 4.62, 4.6, 4.6, 6.02, 6.02, 3.67, 3.67, 6.06,
6.06, 6.12, 6.12, 4.84, 4.84, 5.86, 5.86, 5.9, 5.9, 6.27, 6.27,
3.87, 3.87, 7.4, 7.4, 5.55, 5.55, 4.45, 4.45, 5.26, 5.26, 4.62,
4.62, 7.17, 7.17, 5.35, 5.35, 5.99, 5.99, 5.94, 5.94, 4.38, 4.38,
5.2, 5.2, 4.68, 4.68, 3.29, 3.29, 4.85, 4.85, 4.83, 4.83, 5.21,
5.21, 6.61, 6.61, 6.33, 6.33, 5.59, 5.59, 7.14, 7.14, 4.8, 4.8,
4.22, 4.22, 5.45, 5.45, 4.87, 4.87, 5.89, 5.89, 5.1, 5.1, 4.18,
4.18, 5.58, 5.58, 6.41, 6.41, 4.26, 4.26, 4.88, 4.88, 4.3, 4.3,
6.51, 6.51, 5.19, 5.19, 6, 6, 4.39, 4.39, 6, 6, 4.73, 4.73, 6.23,
6.23, 4.51, 4.51), Triglyceride = c(4.62, 4.62, 1.16, 1.16, 2.29,
2.29, 2.41, 2.41, 2.88, 2.88, 2.89, 2.89, 5.22, 5.22, 2.3, 2.3,
0.95, 0.95, 2.21, 2.21, 2.54, 2.54, 1.98, 1.98, 3.4, 3.4, 1.77,
1.77, 1.95, 1.95, 3.53, 3.53, 1.17, 1.17, 1.04, 1.04, 2.53, 2.53,
2.69, 2.69, 0.71, 0.71, 1.32, 1.32, 0.82, 0.82, 2.75, 2.75, 1.76,
1.76, 3.59, 3.59, 2.38, 2.38, 1.87, 1.87, 2.06, 2.06, 15.53,
15.53, 1.66, 1.66, 1.57, 1.57, 1.23, 1.23, 1.99, 1.99, 1.98,
1.98, 2, 2, 1.52, 1.52, 0.92, 0.92, 1.49, 1.49, 3.4, 3.4, 1.39,
1.39, 1.06, 1.06, 3.37, 3.37, 0.9, 0.9, 1.49, 1.49, 1.8, 1.8,
1.45, 1.45, 1.44, 1.44, 3.9, 3.9, 0.95, 0.95, 0.89, 0.89, 0.74,
0.74, 2.42, 2.42, 3.99, 3.99, 1.32, 1.32, 2.27, 2.27, 2.09, 2.09,
1.53, 1.53, 2.02, 2.02, 2.38, 2.38, 1.06, 1.06, 1.71, 1.71, 1.16,
1.16, 1.41, 1.41, 2.9, 2.9, 1.17, 1.17, 1.41, 1.41, 2.84, 2.84,
2.94, 2.94, 0.67, 0.67, 1.83, 1.83, 2.33, 2.33, 2.82, 2.82, 1.47,
1.47, 0.82, 0.82, 2.96, 2.96, 2.84, 2.84, 2.04, 2.04, 3.14, 3.14,
1.44, 1.44, 2.14, 2.14, 0.85, 0.85, 2.39, 2.39, 1.1, 1.1, 1.52,
1.52, 1.41, 1.41, 2.64, 2.64, 1.06, 1.06), HDL_chol = c(0.81,
0.81, 0.86, 0.86, 1.3, 1.3, 0.99, 0.99, 1.06, 1.06, 1.31, 1.31,
1.01, 1.01, 1.02, 1.02, 1.38, 1.38, 1.31, 1.31, 1.63, 1.63, 1.63,
1.63, 1.27, 1.27, 1.28, 1.28, 0.99, 0.99, 0.94, 0.94, 1.14, 1.14,
2.14, 2.14, 1.74, 1.74, 1.19, 1.19, 1.03, 1.03, 1.19, 1.19, 1.75,
1.75, 0.93, 0.93, 1.85, 1.85, 0.88, 0.88, 1.02, 1.02, 1.05, 1.05,
1.1, 1.1, 0.38, 0.38, 0.95, 0.95, 1.15, 1.15, 1.38, 1.38, 1.34,
1.34, 0.86, 0.86, 1.02, 1.02, 1.19, 1.19, 1.89, 1.89, 1.22, 1.22,
1.37, 1.37, 0.92, 0.92, 1.33, 1.33, 1.44, 1.44, 1.28, 1.28, 1.28,
1.28, 1.18, 1.18, 1.32, 1.32, 1.98, 1.98, 1.23, 1.23, 1.93, 1.93,
0.76, 0.76, 1.72, 1.72, 1.24, 1.24, 1.13, 1.13, 1.88, 1.88, 1.27,
1.27, 1.34, 1.34, 1.28, 1.28, 0.9, 0.9, 1.07, 1.07, 1.25, 1.25,
1.41, 1.41, 1.59, 1.59, 1.35, 1.35, 1.47, 1.47, 1.41, 1.41, 2.37,
2.37, 1.17, 1.17, 1.35, 1.35, 1.02, 1.02, 1.32, 1.32, 0.86, 0.86,
1.62, 1.62, 1.11, 1.11, 1.17, 1.17, 1, 1, 1.28, 1.28, 1.16, 1.16,
0.93, 0.93, 1.13, 1.13, 1.24, 1.24, 1.76, 1.76, 0.89, 0.89, 1.55,
1.55, 1.76, 1.76, 1.34, 1.34, 1.86, 1.86, 1.29, 1.29), LDL_chol = c(2.49,
2.49, 3.58, 3.58, 2.7, 2.7, 2.42, 2.42, 3.25, 3.25, 3.58, 3.58,
3.15, 3.15, 3.78, 3.78, 3.06, 3.06, 3.56, 3.56, 2.97, 2.97, 2.74,
2.74, 4.72, 4.72, 3.34, 3.34, 3.17, 3.17, 2.87, 2.87, 3.09, 3.09,
2.87, 2.87, 2.56, 2.56, 7.19, 7.19, 2.87, 2.87, 3.28, 3.28, 1.2,
1.2, 4.2, 4.2, 3.22, 3.22, 3.1, 3.1, 2.27, 2.27, 2.43, 2.43,
4.49, 4.49, 1.52, 1.52, 3.67, 3.67, 2.97, 2.97, 3.67, 3.67, 4.3,
4.3, 3.96, 3.96, 3.2, 3.2, 2.41, 2.41, 2.64, 2.64, 3.03, 3.03,
3.82, 3.82, 2.28, 2.28, 4, 4, 3.91, 3.91, 3.27, 3.27, 4.07, 4.07,
4.11, 4.11, 4.47, 4.47, 2.39, 2.39, 5.23, 5.23, 3.43, 3.43, 3.13,
3.13, 3.13, 3.13, 2.55, 2.55, 4.99, 4.99, 3.16, 3.16, 4.05, 4.05,
4.15, 4.15, 2.6, 2.6, 3.54, 3.54, 2.74, 2.74, 1.59, 1.59, 2.79,
2.79, 2.77, 2.77, 3.32, 3.32, 4.3, 4.3, 4.56, 4.56, 2.87, 2.87,
5.29, 5.29, 2.7, 2.7, 2.85, 2.85, 3.55, 3.55, 3.26, 3.26, 3.4,
3.4, 3.49, 3.49, 2.59, 2.59, 3.74, 3.74, 4.24, 4.24, 2.73, 2.73,
2.98, 2.98, 2.87, 2.87, 4.89, 4.89, 3.38, 3.38, 4.35, 4.35, 2.51,
2.51, 4.16, 4.16, 2.99, 2.99, 3.92, 3.92, 2.77, 2.77), ANGPTL8 = c(3337.5,
3962.5, 2737.5, 962.5, 1775, 3737.5, 1025, 962.5, 1175, 912.5,
1662.5, 2075, 2862.5, 1950, 2337.5, 1875, 350, 14412.5, 962.5,
787.5, 1650, 2150, 3250, 1150, 1425, 1162.5, 975, 762.5, 5562.5,
2662.5, 1450, 787.5, 387.5, 475, 1037.5, 1125, 1462.5, 1750,
1137.5, 800, 812.5, 1637.5, 750, 4850, 1112.5, 1187.5, 662.5,
462.5, 4125, 1825, 1275, 750, 6275, 1062.5, 737.5, 3650, 1650,
1425, 2925, 1512.5, 1100, 887.5, 662.5, 825, 487.5, 662.5, 400,
600, 1077.77777777778, 1211.11111111111, 555.555555555556, 511.111111111111,
1066.66666666667, 1311.11111111111, 277.777777777778, 1822.22222222222,
1000, 1055.55555555556, 1255.55555555556, 1000, 1555.55555555556,
1266.66666666667, 1233.33333333333, 1422.22222222222, 1655.55555555556,
800, 555.555555555556, 677.777777777778, 411.111111111111, 344.444444444445,
766.666666666667, 800, 333.333333333333, 1011.11111111111, 455.555555555555,
955.555555555556, 833.333333333333, 777.777777777778, 844.444444444444,
866.666666666667, 755.555555555556, 1011.11111111111, 722.222222222222,
888.888888888889, 255.555555555556, 244.444444444445, 1433.33333333333,
1033.33333333333, 488.888888888889, 477.777777777778, 1600, 1022.22222222222,
1077.77777777778, 988.888888888889, 622.222222222222, 2500, 2077.77777777778,
688.888888888889, 788.888888888889, 1155.55555555556, 1288.88888888889,
1633.33333333333, 1744.44444444445, 2011.11111111111, 366.666666666667,
466.666666666667, 522.222222222222, 1222.22222222222, 477.777777777778,
788.888888888889, 994.444444444445, 1383.33333333333, 2183.33333333333,
661.111111111111, 2350, 1772.22222222222, 672.222222222222, 1183.33333333333,
494.444444444445, 883.333333333333, 416.666666666667, 338.888888888889,
2005.55555555555, 594.444444444444, NA, 305.555555555555, 961.111111111111,
1138.88888888889, 616.666666666667, 583.333333333333, 1405.55555555556,
705.555555555555, 1605.55555555556, 1594.44444444445, 1094.44444444444,
1272.22222222222, 3127.77777777778, 961.111111111111, 750, 661.111111111111,
916.666666666667, 572.222222222222, 1150, 1094.44444444444, 683.333333333333,
827.777777777778, 972.222222222222, 238.888888888889, NA, 327.777777777778,
850, 750, 672.222222222222, 827.777777777778, 983.333333333333,
1038.88888888889), BMP_2 = c(23, 26.92, 25.62, 26.27, 25.62,
26.92, 24.97, 26.92, 25.62, 28.2, NA, 26.92, 22.34, 23, 26.92,
24.32, 24.32, 25.62, 24.32, 25.62, 24.32, 23, 25.62, 28.2, 25.62,
24.32, 23, 26.92, 25.62, 28.2, 24.32, 26.92, 18.95, 23, 23, 25.62,
23, 24.32, 24.32, 23, 25.62, 25.62, 21.67, 26.92, 24.32, 25.62,
21.67, 23, 23, 26.92, 28.2, 24.32, 28.2, 28.2, 26.92, 26.92,
25.62, 25.62, 24.32, 24.32, 24.32, 24.32, 25.62, 23, 17.57, 20.32,
30.61, 27.33, 20.94, 26.16, 23.68, 26.16, 26.16, 28.46, 23.68,
26.16, 20.94, 32.65, 26.16, 28.46, 28.46, 30.61, 26.16, 32.65,
23.68, 28.46, 23.68, 28.46, 19.43, 22.35, 26.16, 28.46, 23.68,
28.46, 26.16, 30.61, 26.16, 28.46, 23.68, 23.68, 28.46, 30.61,
30.61, 30.61, 26.16, 28.46, 20.94, 26.16, 23.68, 30.61, 26.16,
28.46, 20.94, 23.68, 31.64, 26.16, 23.68, 30.61, 23.68, 28.46,
26.16, 30.61, 20.94, 26.16, 14.02, 26.16, 20.94, 23.68, 30.61,
34.58, 23.39, 26.67, 19.74, 19.74, 3, 15.48, 15.48, 23.39, 17.71,
15.48, 15.48, 19.74, 3, 10, NA, 23.39, 19.74, 26.67, 19.74, 19.74,
19.74, 23.39, 17.71, 23.39, 23.39, 26.67, 3, 3, 3, 23.39, 19.74,
19.74, 19.74, 29.69, 33.85, 23.39, 10, 10, 15.48, 23.39, 10,
19.74, 15.48, 15.48, 19.74, 19.74), IGFBP_3_1 = c(441353.12,
NA, 393869.87, NA, NA, NA, 579939.36, NA, 456112.02, NA, NA,
610080.87, NA, NA, 533744.22, 628064.64, 523351.47, NA, 517877.29,
NA, 486315.82, NA, NA, 542659.7, 508437.67, 589967.34, 536282.89,
512564.26, 436271.69, 601179.52, 504448.47, 506264.97, 420330.98,
NA, 538394.66, NA, NA, NA, NA, NA, 495111.88, 549340.97, 672083.18,
NA, 591978.44, NA, NA, 571958.24, 507324.12, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 475288.45, NA, 536037.9, 548109.89,
559995.14, NA, 473616.64, 542571.78, 465343.85, 1127900, 714496.84,
NA, 646959.05, 4856100, 443062.73, 542179.38, 579299.18, 1142900,
564875.53, 1037100, 1174200, NA, 548298.03, 874608.37, 902414.03,
1471500, NA, NA, 1668200, NA, 3153500, 1527000, 534397.71, 556715.71,
1016800, 703025.17, NA, NA, 161911.33, 126486.58, 682462.8, NA,
1365000, NA, 977538.37, NA, 3348600, NA, 1022700, 783787.11,
NA, NA, 859094.87, NA, 1056900, 953743.93, 363547.86, 422392.66,
796697.33, 804929.76, 686250.79, 859712.77, 726741.92, 2091000,
568594.78, 644119.63, 1139000, NA, 802047.77, NA, 1256800, 1442100,
1058500, 974033.9, 967920.77, 981304.96, 1107000, 1197400, 1019800,
1346600, 1135800, 1261900, 1203600, 1352600, NA, 1335400, 1100400,
1398300, 924378.25, 1194500, 1384400, 1186500, 1360700, 1222800,
843925.82, 1232900, 1600800, 1489200, 1133700, 1451700, 1182700,
1445100, 1732100, 1528500, 1321900, 1313500, 1101500, 1422500,
1344700, 1460200, 1224900, 1225100, 1167800, 1155800, 1149200,
1278700)), row.names = c(NA, -176L), class = c("tbl_df", "tbl",
"data.frame"))
You can use !is.na(variable) to drop rows with NA values only for specific variable.
library(dplyr)
library(gtsummary)
t2 <-
trial_paired %>%
# delete missing values in variable1
filter(!is.na(variable1)) %>%
# keep IDs with both measurements
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
# summarize data
tbl_summary(by = time1 , include = -id, type = all_continuous() ~ "continuous2", statistic = all_continuous() ~ c("{median} ({p25}, {p75})", "{min}, {max}", "{mean} ({sd})")) %>%
add_p(test = list(all_continuous() ~ "paired.t.test",
all_categorical() ~ "mcnemar.test"),
group = id)
To do this dynamically we can create a function.
summary_data <- function(data, var) {
data %>%
# delete missing values
filter(!is.na(.data[[var]])) %>%
# keep IDs with both measurements
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
# summarize data
tbl_summary(by = time1 , include = -id, type = all_continuous() ~ "continuous2", statistic = all_continuous() ~ c("{median} ({p25}, {p75})", "{min}, {max}", "{mean} ({sd})")) %>%
add_p(test = list(all_continuous() ~ "paired.t.test",
all_categorical() ~ "mcnemar.test"),
group = id)
}
#apply function to single column
summary_data(trial_paired, 'Cholesterol')
summary_data(trial_paired, 'Triglyceride')
#apply function to multiple column
cols <- c('Cholesterol', 'Triglyceride', 'HDL_chol')
#Or drop only the first column
#cols <- names(trial_paired)[-1]
res <- lapply(cols, summary_data, data = trial_paired)

How do I plot a linear regression line in a specified bin in a histogram?

So we are trying to determine speciation rate as a function of animal weight. Animal weight follows a gaussian distribution when they are plotted altogether; hence, we only want to fit the regression line in the decreasing trend of the histogram. Specifically, the line should start from x = 2.1 and y = 3.0. Fig. 1 is my current plot using the code below, while Fig. 2 is the outcome I would like to acquire (superimposed line via paint), which I don't know how to do. Any help on the matter will be greatly appreciated.
Attached is my code:
x.log = c(-2.9, -2.7, -2.5, -2.3, -2.1, -1.9, -1.7, -1.5, -1.3, -1.1,
-0.9,-0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.5, 0.7, 0.9, 1.1,
1.3, 1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 2.9, 3.1, 3.3, 3.5, 3.7,
3.9, 4.1, 4.3, 4.5, 4.7, 4.9, 5.1, 5.3, 5.5, 5.7, 5.9, 6.1,
6.3, 6.5,6.9, 7.1, 7.3, 7.5, 7.7, 7.9)
y.log = c(0, 0, 0, 0.47, 0.60, 0.95, 1.14, 1.38, 1.68, 1.79, 2.10, 2.26,
2.29, 2.39, 2.48, 2.52, 2.79, 2.68, 2.80, 2.84, 2.96, 2.92,
2.91, 3.01, 2.95, 3.05, 2.94, 2.96, 2.98, 2.83, 2.85, 2.83,
2.71, 2.63, 2.61, 2.57, 2.37, 2.26, 2.17, 1.99, 1.87, 1.74,
1.62, 1.36, 1.30, 1.07, 1.20, 0.90, 0.30, 0.69, 0.30, 0.47, 0
0.30, 0)
# plot the histogram
names(log.nspecies) = logbio
log.nspecies = log.nspecies[order (as.numeric(names(log.nspecies)))]
xpos = barplot(log.nspecies, las = 2, space = 0, col = 'red',
xlab = 'ln Weight', ylab = 'ln Number of species')

Cut and quantile in R in not including zero

I am trying to bin numeric data from a column in R based on the 1st, 3rd and 4th quantile ( ie. 0-25%, 25%-75%,75%-100%). I have used the following code but the zero do not get included in the binning. They are shown as NA.
rawdata1$usage4 <- cut(rawdata1$Usage_Percentage,
breaks = quantile(rawdata1$Usage_Percentage,
probs = c(-Inf,0.25,0.75,Inf),include.lowest=T),labels=F)
Error in quantile.default(rawdata1$Usage_Percentage, probs = c(-Inf,
0.25, : 'probs' outside [0,1]
However if use the following code and divide it into 4 quantiles, the bins are just fine.
rawdata1$usage5 <- cut(rawdata1$Usage_Percentage,
breaks = quantile(rawdata1$Usage_Percentage),
include.lowest=T,labels=F)
To include zeros in the binning you can also use the cut2 function from Hmisc.
Here is an example.
data <- data.frame(vect = c(1.64, 1.5, 1.5, 1.41, 1.64, 1.64, 0, 1.45, 1.64, 1.5, 1.45, 0, 1.45, 1.64,
1.5, 1.5, 1.5, 0, 1.5, 1.41, 0.18, 0.09, 0.1, 0.09, 0.05, 0.09, 1.64, 1.5,
1.5, 0.1, 0.05, 0.09, 0, 5.82, 5.86, 5.86, 0, 5.82, 5.82, 5.82, 5.82, 5.82,
5.86, 5.86, 5.82, 0, 5.91, 9.41, 9.5, 5.91, 0, 9.45, 5.91, 9.45, 5.91, 0,
0, 9.55, 5.91, 9.55, 9.5, 9.55, 0, 5.82, 1.64))
data$bin <- factor(Hmisc::cut2(data$vect, g = 4), labels = c(1:4))
#g represents the number of quantile groups

How to calculate the average slope within a moving window in R

My dataset contains 2 variables y and t [05s]. y was measured every 05 seconds.
I am trying to calculate the average slope within a moving 20-second-window, i.e. after calculating the first 20-second slope value the window moves forward one time unit (05 seconds) and calculates the next 20-second-window, producing successive 20-second slope values at 05-second increments.
I thought that calculating a rolling regression with rollapply (zoo package) would do the trick, but I get the same intercept and slope values for each window over and over again. What can I do?
My data:
dput(DataExample)
structure(list(t = c(0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35,
0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95,
1, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55,
1.6, 1.65, 1.7, 1.75, 1.8, 1.85, 1.9, 1.95, 2, 2.05, 2.1, 2.15,
2.2, 2.25, 2.3, 2.35, 2.4, 2.45, 2.5, 2.55, 2.6, 2.65, 2.7, 2.75,
2.8, 2.85, 2.9, 2.95, 3, 3.05, 3.1, 3.15, 3.2, 3.25, 3.3, 3.35,
3.4, 3.45, 3.5, 3.55, 3.6, 3.65, 3.7, 3.75, 3.8, 3.85, 3.9, 3.95,
4, 4.05, 4.1, 4.15, 4.2, 4.25, 4.3, 4.35, 4.4, 4.45, 4.5, 4.55,
4.6, 4.65, 4.7, 4.75, 4.8, 4.85, 4.9, 4.95, 5, 5.05, 5.1, 5.15,
5.2, 5.25, 5.3, 5.35, 5.4, 5.45, 5.5, 5.55, 5.6, 5.65, 5.7, 5.75,
5.8, 5.85, 5.9, 5.95, 6, 6.05, 6.1, 6.15, 6.2, 6.25, 6.3, 6.35,
6.4, 6.45, 6.5, 6.55, 6.6, 6.65, 6.7, 6.75, 6.8, 6.85, 6.9, 6.95,
7, 7.05, 7.1, 7.15, 7.2, 7.25, 7.3, 7.35, 7.4, 7.45, 7.5, 7.55,
7.6, 7.65, 7.7, 7.75, 7.8, 7.85, 7.9, 7.95, 8, 8.05, 8.1, 8.15,
8.2, 8.25, 8.3, 8.35, 8.4, 8.45, 8.5, 8.55, 8.6, 8.65, 8.7, 8.75,
8.8, 8.85, 8.9, 8.95, 9, 9.05, 9.1, 9.15, 9.2, 9.25, 9.3, 9.35,
9.4, 9.45, 9.5, 9.55, 9.6, 9.65, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95,
10, 10.05, 10.1, 10.15, 10.2, 10.25, 10.3), y = c(3.05, 3.04,
3.02, 3.05, 3.01, 3.02, 3.02, 3.05, 3.02, 3.01, 3.04, 3.04, 3.03,
3.03, 3.03, 3.02, 3.02, 3.03, 3.03, 3.03, 3.04, 3.03, 3.03, 3.03,
3.03, 3.02, 3.02, 3.02, 3.01, 3.03, 3.03, 3.03, 3.03, 3.03, 3.02,
3.01, 3.02, 3.02, 3.01, 3.02, 3.02, 3.02, 3.03, 3.02, 3.02, 3.01,
3.01, 3.02, 3.01, 3.02, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01,
3.02, 3, 3.01, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01, 3.02,
3, 3.01, 3.01, 3.01, 3.01, 3.01, 3.01, 3, 3, 3.01, 3, 3, 3.01,
3.01, 3.01, 3.01, 3, 3, 3, 3.01, 3, 3, 3.01, 3.01, 3.01, 3.01,
3.01, 3.01, 3, 3.02, 3, 3.01, 3.02, 3.04, 3.05, 3.08, 3.04, 3.06,
3.08, 3.06, 3.08, 3.09, 3.04, 3.05, 3.07, 3.08, 3.06, 3.08, 3.08,
3.07, 3.08, 3.08, 3.05, 3.06, 3.07, 3.07, 3.06, 3.08, 3.08, 3.08,
3.08, 3.08, 3.05, 3.06, 3.08, 3.08, 3.06, 3.09, 3.07, 3.08, 3.08,
3.08, 3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.08, 3.08,
3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.07, 3.08, 3.07,
3.07, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07,
3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.06, 3.07, 3.06,
3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.07, 3.06, 3.07, 3.06, 3.07,
3.06, 3.07, 3.06, 3.06, 3.06, 3.07, 3.04, 3.04, 3.04, 3.06, 3.06,
3.04, 3.04)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-207L), .Names = c("t", "y"))
R-Code:
require(zoo)
library("zoo", lib.loc="~/R/win-library/3.3")
rollapply(zoo(DataExample),
width=5,
FUN = function(Z)
{
z = lm(formula=y~t, data = as.data.frame(DataExample));
return(z$coef)
}, by=1,
by.column=FALSE, align="right")
The comment seems to have been deleted but it was pointed out that the function in rollapply in the code in the question was not using the argument passed to it. After fixing that and making some other minor improvements, this returns the intercept and the slope in columns 1 and 2 respectively.
library(zoo)
Coef <- function(Z) coef(lm(y ~ t, as.data.frame(Z)))
rollapplyr(zoo(DataExample), 5, Coef, by.column = FALSE)
Here a complete code to illustrate what I was meaning with the speed of .lm.fit and lm.
As well as a usage with data.table.
library(zoo)
library(data.table)
library(ggplot2)
theme_set(theme_bw())
library(microbenchmark)
# function for linear regression and find the slope coefficient
rollingSlope.lm <- function(vector) {
a <- coef(lm(vector ~ seq(vector)))[2]
return(a)
}
rollingSlope.lm.fit <- function(vector) {
a <- coef(.lm.fit(cbind(1, seq(vector)), vector))[2]
return(a)
}
# create data example
test <- data.table(x = seq(100), y = dnorm(seq(100), mean=75, sd=30))
ggplot(test, aes(x, y))+ geom_point()
# graphics about the slope calculated
test[, ':=' (Slope.lm.fit = rollapply(y, width=5, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm = rollapply(y, width=5, FUN=rollingSlope.lm, fill=NA))]
# change the width size
test[, ':=' (Slope.lm.fit.50 = rollapply(y, width=50, FUN=rollingSlope.lm.fit, fill=NA),
Slope.lm.50 = rollapply(y, width=50, FUN=rollingSlope.lm, fill=NA))]
# melt data for plotting
test2 <- melt.data.table(test, measure.vars=c("Slope.lm.fit", "Slope.lm", "Slope.lm.fit.50", "Slope.lm.50"))
ggplot(test2, aes(x, value))+ geom_point(aes(color=variable))
# efficiency of the 2 lm
mb <- microbenchmark(lm.fit = a <- rollapply(test$y, 5, rollingSlope.lm.fit, fill=NA),
lm = b <- rollapply(test$y, 5, rollingSlope.lm, fill=NA))
# check if they equal
all.equal(a, b, check.attributes=FALSE)
# TRUE
# plot results
boxplot(mb, unit="ms", notch=TRUE)
This is how I would go about doing it without the zoo library
## Modified version of your function that does not rely on accessing
## variables that is external to its environment.
slopes<-function(data) {
z = lm(formula=y~t, data=data );
z$coef ## Implicit return of last variable
}
## The number of frames to take the windowed slope of
windowsize<-4
do.call(rbind,lapply(seq(dim(data)[1]-windowsize),
function(x) slopes(data[x:(x+windowsize),])))
It iterates over a list from 1 to length data - windowsize subsetting data into overlapping window sizes of 4. The subsetted data is then passed to your slopes function before being bound into a single array.
I've tried to plot slopes as geom_segment() but I failed. At least I've got the df with different values for slope:
slope <- function(dat){
return(data.frame(t = sprintf("[%f,%f]", min(dat$t), max(dat$t)),
slope = lm(y~t-1, data = dat)$coef,
row.names = NULL)
)
}
mw <- function(dtf, wdth = 0.2, incr = 0.05){
if(!nrow(dtf)){
return(data.frame())
}
return(rbind(slope(dtf[dtf$t <= min(dtf$t) + wdth,]),
mw(dtf[dtf$t >= min(dtf$t) + incr,])
)
)
}
slp <- mw(dtf)
head(slp)
tail(slp)
# t slope
# 1 [0.000000,0.200000] 20.180000
# 2 [0.050000,0.250000] 16.498182
# 3 [0.100000,0.300000] 13.433333
# 4 [0.200000,0.400000] 9.554737
# 5 [0.250000,0.450000] 8.299608
# 6 [0.300000,0.500000] 7.340606
# ...
#175 [9.900000,10.100000] 0.3049778
#176 [10.000000,10.200000] 0.3017733
#177 [10.050000,10.250000] 0.3002829
#178 [10.150000,10.300000] 0.2982748
#179 [10.250000,10.300000] 0.2958620
#180 [10.300000,10.300000] 0.2951456

Somers D differences between R and SAS and within R

I am new to both R and SAS. I want to calculate somers D, following the logistic regression.my dataframe(vac1) is combination of Titer and Protection.
> vac1=structure(list(Titer = c(0.9, 0.9, 0.9, 1.51, 0.9, 0.9, 2.86,1.95,2.71, 2.56, 2.71, 3.01, 2.71, 2.41, 2.11, 1.95, 2.26, 2.71, 2.56, 2.41, 2.56, 1.95, 1.81, 2.26, 2.11, 1.81, 1.95, 1.95, 1.34, 2.56, 2.26, 2.26, 2.11, 2.41, 2.71, 2.56, 1.65, 1.95, 1.51, 1.95,1.81, 1.81, 1.81, 1.95, 2.11, 2.86,2.41, 1.95, 2.56, 2.71, 2.71,2.41, 1.81, 2.41, 1.65, 1.81, 2.11, 2.11, 1.81, 1.81,2.26, 2.41,1.65, 2.56, 2.71, 2.11, 1.81), Protection = c(0, 0, 0, 0, 0,0, 1, 0, 1, 1,1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0)), .Names = c("Titer","Protection"), row.names = c(NA, -67L), class = "data.frame").
my logistic regression formula is.
> logit=glm(Protection~Titer, data=vac1, family=binomial(link="logit")).
the resulting predicted probalities from logit model is combined with original Protection data from vac1 dataframe and created vac4 dataframe.
> vac4=cbind(vac1$Protection,logit$fit)
> colnames(vac4)=c("Protection","PredictedProb").
calculated somers D by 2 ways.
1.using InformationValue package
>library(InformationValue)
>somersD(actuals=vac4$Protection, predictedScores=vac4$PredictedProb
I got the value 0.733.
2.using function copied from a link
http://shashiasrblog.blogspot.in/2014/02/binary-logistic-regression-fast.html
OptimisedConc=function(logit)
{
Data = vac4
ones = Data[Data[,1] == 1,]
zeros = Data[Data[,1] == 0,]
conc=matrix(0, dim(zeros)[1], dim(ones)[1])
disc=matrix(0, dim(zeros)[1], dim(ones)[1])
ties=matrix(0, dim(zeros)[1], dim(ones)[1])
for (j in 1:dim(zeros)[1])
{
for (i in 1:dim(ones)[1])
{
if (ones[i,2]>zeros[j,2])
{conc[j,i]=1}
else if (ones[i,2]<zeros[j,2])
{disc[j,i]=1}
else if (ones[i,2]==zeros[j,2])
{ties[j,i]=1}
}
}
Pairs=dim(zeros)[1]*dim(ones)[1]
PercentConcordance=(sum(conc)/Pairs)*100
PercentDiscordance=(sum(disc)/Pairs)*100
PercentTied=(sum(ties)/Pairs)*100
N<-length(logit$fit)
gamma<-(sum(conc)-sum(disc))/Pairs
Somers_D<-(sum(conc)-sum(disc))/(Pairs-sum(ties))
k_tau_a<-2*(sum(conc)-sum(disc))/(N*(N-1))
return(list("Percent Concordance"=PercentConcordance,
"Percent Discordance"=PercentDiscordance,
"Percent Tied"=PercentTied,
"Pairs"=Pairs,
"Gamma"=gamma,
"Somers D"=Somers_D,
"Kendall's Tau A"=k_tau_a))
}
OptimisedConc(logit).
Here i am getting the gamma and somers D values but are reversed compared to what i got it in SAS and the somers D value calculated by 2nd method in R and SAS is different from what i obtained it using the InformationValue package of R. similarly kendalls tau is infinite showing in R and in SAS it is 0.38.
can anyone help where i am making mistake? thanking you.

Resources