Creating and adding median, mean etc. in ggplot - r

I have a data frame R_m which looks like this:
data frame R_m
I have used pivot_longer to modify the data for ggplot and then print it:
R_m2 <- R_m %>%
pivot_longer(names_to = "per", values_to="ind", cols=-sim, names_ptypes=list(per=integer()))
ggplot(R_m2, aes(x=per, y=ind,color=sim, group=sim))+geom_line() +
theme(legend.position = "none")
Now, I would like to add a line for mean, median and some quantiles in the graph. Before, I got this data from elsewhere and imported it as a data frame 'stat' into R. It looks like this:
median, mean and quantile
which plotted nicely by adding
geom_line(data=subset(stat,sim=="Median"),colour="black", size = 1)
Outcome looks like this:
Now I would like to achieve the same in R. Creating an array of medians (where median is taken across each column in R_m) was straightforward to do. But then my first stumbling block was that using rbind, cbind etc. I could not create a data frame that looks like 'stat'. I could not rename the zero column to "sim", so the geom_line command above no longer works. Anyway, this requires a lot of data manipulation. Is there a more efficient way of adding a median, mean and percentile lines to a graph?
structure(list(sim = c(166, 37, 163, 65, 95, 92, 98, 168, 19,
157, 177, 200, 115, 177, 149, 130, 66, 114, 96, 12, 138, 39,
80, 33, 157, 107, 180, 159, 166, 14, 126, 67, 190, 86, 147, 182,
43, 5, 109, 141, 53, 186, 49, 68, 168, 107, 67, 28, 158, 178),
per = c(407L, 1763L, 2158L, 1608L, 836L, 1638L, 285L, 1978L,
45L, 1927L, 192L, 1517L, 163L, 789L, 1989L, 2478L, 2410L,
2445L, 1532L, 181L, 1489L, 1434L, 2515L, 676L, 1503L, 2458L,
732L, 1266L, 1705L, 1852L, 1543L, 1568L, 41L, 1992L, 600L,
1314L, 33L, 199L, 370L, 46L, 1171L, 1173L, 2048L, 994L, 836L,
372L, 2374L, 1414L, 1628L, 1188L), ind = c(97.7428137181456,
100.462039003802, 95.2793483563514, 98.3721036305918, 99.0584691732282,
103.301132288618, 102.428408453689, 100.387198613893, 99.7888039221544,
101.017059784079, 106.12288506898, 102.636429823681, 93.8144062244855,
104.280572544198, 97.3182467653953, 96.5603916025096, 96.3529141792467,
98.3149638711415, 98.3629878947972, 94.6106342501915, 99.6835722307572,
98.1716050345778, 103.055895201755, 100.054976695486, 96.1369802984859,
98.5257212288309, 98.8568719059079, 102.900859147552, 99.37215427561,
102.623437273663, 104.128600607447, 102.673062489082, 100.368131206055,
98.3487549118012, 96.4401682804699, 96.4407823981984, 97.9413312935541,
102.122624393907, 98.2979203190445, 101.018531709501, 100.444354410774,
101.118257199515, 100.867412455804, 98.9923953588876, 100.417977446024,
102.21423103019, 102.296794518966, 99.9367239162818, 102.314273028354,
100.80711113148)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))

Related

Creating window intervals of the positions in R

I am creating an interval list of the positions, by window 4 and step 1.
pos <- subs$variable
intervals <- paste0(pos[seq(1, n, by=1)],":", pos[seq(4, n, by=1)])
intervals I get are:
[1] "92:107" "101:120" "106:132" "107:136" "120:140" "132:146" "136:147" "140:152" "146:166" "147:167" "152:174"
[12] "166:186" "167:187" "174:189" "186:204" "187:228" "189:229" "204:107" "228:120" "229:132"
subs <- structure(list(variable = c(92, 101, 106, 107, 120, 132, 136,
140, 146, 147, 152, 166, 167, 174, 186, 187, 189, 204, 228, 229
), covMean = c(11355.658, 11450.079, 11479.711, 11495.132, 11612.053,
11580.158, 11421.684, 11288.105, 11278, 11239.763, 11236.895,
10425.526, 10386.789, 10233.816, 9523.132, 9503.316, 9450.158,
8532.763, 7795.368, 7656.895), emboss = c(1.3717151, 1.3828546,
1.3880071, 1.3879077, 1.3959816, 1.3830276, 1.3724465, 1.363247,
1.337794, 1.3334625, 1.3097811, 1.2826296, 1.2811749, 1.2676601,
1.2193303, 1.2136416, 1.2044259, 1.1649542, 1.1163399, 1.1125204
)), row.names = c(56L, 62L, 65L, 67L, 77L, 82L, 87L, 95L, 97L,
114L, 119L, 133L, 154L, 156L, 169L, 173L, 186L, 190L, 212L, 214L
), class = "data.frame")
Why it starts iterating from the beginning, I want it to stop at the last position.
seq(1, n, by=1) and seq(4, n, by=1) are not of same length, the values are recycled. Try :
n <- length(pos)
intervals <- paste(pos[1:(n-3)],pos[4:n], sep =':')
intervals
# [1] "92:107" "101:120" "106:132" "107:136" "120:140" "132:146" "136:147" "140:152" "146:166"
#[10] "147:167" "152:174" "166:186" "167:187" "174:189" "186:204" "187:228" "189:229"

How do I efficiently summarize summary output from multiple GAM models?

I am running multiple GAM models and need to view and compare the summary output from these. I'd like a quick and efficient way to extract and compile summary statistics from the models but have not found a way to do so.
A example data set is provided below:
example.data <- structure(list(response = c(1.47, 0.84, 1.99, 2.29, 4.14, 4.47,
2.71, 1.67, 4.12, 1.67, 2.03, 1.74, 0.98, 0.96, 0.56, 2.45, 1.31,
3.06, 2.35, 3.2, 1.16, 2.07, 0.99, 1.35, 1.02, 2.92, 1.8, 2.17,
2.56, 1.56, 2.33, 3.19, 1.53, 2.94, 3.28, 1.53, 2.8, 5.53, 1.26,
2.43, 3.5, 2.22, 3.73, 2.46, 2.16, 1.99, 3.34, 2.63, 2.51, 1.78
), predictor1 = c(17, 14.4, 99.45, 10.8, 54.25, 55.1, 40, 9,
54.25, 14.4, 14.4, 17, 14.4, 17, 10.8, 54.25, 54.25, 15.3, 55.1,
54.25, 14.4, 58, 17, 53.425, 58, 40.45, 14.4, 12.75, 91.05, 6.24,
100.25, 77.25, 43.4, 183.6, 91.05, 9.84, 100.25, 64, 10, 10,
91.05, 8.25, 100.25, 54.25, 89.4, 9.84, 10.8, 54.25, 10.8, 54.25
), predictor2 = c(165.7, 177.3, 594.2, 192.5, 426.2, 270.8, 244,
236.1, 416, 175.8, 258.6, 233.5, 115.8, 141, 153.5, 414.2, 438.9,
203, 261.4, 357.8, 148, 205.5, 137.4, 214.7, 167.8, 371.4, 179.9,
273.7, 567.2, 231.5, 355.3, 270, 319.5, 301.9, 301.9, 215.5,
256.5, 417, 231.8, 284.6, 396.3, 323, 458.4, 290, 203, 198, 350.8,
338, 323.5, 264.7), predictor3 = c(829.8, 841, 903.6, 870.3,
794, 745, 845.2, 906.5, 890.3, 874.2, 805.4, 828.8, 872, 854.7,
912.2, 790.8, 759.2, 855.1, 741.6, 961.8, 839.9, 805.1, 885.2,
887.8, 833.9, 1050.9, 787.5, 837, 731.9, 774.4, 820.8, 995.8,
916.3, 1032.1, 1014.3, 773.7, 846.4, 723.7, 764.2, 708.3, 1009.3,
1053.7, 751.7, 901.1, 848.7, 796.5, 697.1, 733.6, 725.6, 856.6
)), row.names = c(50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L,
86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L,
99L, 100L), class = "data.frame")
Right now, the unsophisticated and inefficient way I do it is something like this:
library(mgcv)
mod1 = gam(response ~ s(predictor1), data=example.data)
mod2 = gam(response ~ s(predictor2), data=example.data)
mod3 = gam(response ~ s(predictor3), data=example.data)
mod.names <- c("mod1", "mod2", "mod3")
mod.predictors <- c("predictor1", "predictor2", "predictor3")
mod.rsq <- c(summary(mod1)$r.sq, summary(mod2)$r.sq, summary(mod3)$r.sq)
mod.AIC <- c(AIC(mod1), AIC(mod2), AIC(mod3))
summary.data <- data.frame(mod.names,
mod.rsq,
mod.AIC,
mod.predictors)
summary.data
I can then select models accordingly from the summary table.
I have over one hundred potential predictors in the actual data, and it's obviously laborious to manually specify all the models and their output so a more automated alternative would be desirable.
The hard part of this question is choosing which models to run: that's a hard statistical question, and depending on what you choose, a less hard programming problem.
I'll assume that you are only interested in models like the ones in your example. Then this should work:
library(mgcv)
#> Loading required package: nlme
#> This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
predictors <- setdiff(names(example.data), "response")
result <- data.frame(predictors = predictors, rsq = NA, AIC = NA)
model <- response ~ predictor
for (i in seq_len(nrow(result))) {
pred <- result$predictors[i]
model[[3]] <- bquote(s(.(as.name(pred))))
mod <- gam(model, data = example.data)
result$rsq[i] <- summary(mod)$r.sq
result$AIC[i] <- AIC(mod)
}
result
#> predictors rsq AIC
#> 1 predictor1 0.2011252 138.0875
#> 2 predictor2 0.4666861 118.7270
#> 3 predictor3 0.1959123 139.0365
The tricky part is computing the model formula. I start with a simple model response ~ predictor, then replace the 3rd part (predictor) with code produced by bquote(s(.(as.name(pred)))). That function produces unevaluated code like s(predictor1) when pred holds "predictor1".

R From if-else and for-loops to a more efficient function

This question aims to receive feedback to make a function more efficient. Apologies for a long, case specific post.
I created a function that calculates percentages in estimates of the American Community Survey (ACS). Because estimates in the ACS have margins of error, calculating percentages (e.g. % of the total population being below 17 years old) requires the recalculation of the error that results from dividing the estimate of both variables (population below 17 / total population).
So to calculate the new margin of error for a proportion calculated as p = estimate_a/estimate_b, the formula to use is MOE(p) = (1/estimate_b)*sqrt((MOE_b^2)-(p^2*MOE_a^2)). If the value inside of the square root was negative, then the substraction should be changed to a sum, with the formula becoming MOE(p) = (1/estimate_b)*sqrt((MOE_b^2)+(p^2*MOE_a^2)). If the result of p = estimate_a/estimate_b is 1, the documentation suggest calculating MOE using another formula: MOE(p) = MOE_a/estimate_b
To make these calculations, I created a function that takes a data frame with estimates and their MOEs, calculates the proportion between two specified variables, and writes two new columns in the original dataframe - one with the proportion, and another one with its margin of error. The function loops through the rows of the data frame carrying out if-else checks to determine what formula to apply, including skipping rows that might have NA values. The original data on which I apply this function is considerably long - ~250000 rows, and the structure of this function makes it go very slowly. Hence, the question is whether there are ways to improve the quality of this code to improve its speed. The function and dummy data are provided below:
percent_calculator <- function(DF, A_e, B_e, A_se, B_se, New_fn){
# arguments legend >> DF = data frame; A_e = estimate_a (string of the fieldname); B_e = estimate_b (string of the fieldname);
# A_se = MOE_a (string of the fieldname); B_se = MOE_b (string of the fieldname); New_fn = root for new fieldname in the data frame (string)
pb<- txtProgressBar(min = 0, max = nrow(DF), initial = 0) # progress bar initialization
for (i in 1:nrow(DF)){ # for loop that iterates through the rows of the DF
setTxtProgressBar(pb,i)
if(is.na(DF[[A_e]][i])==FALSE & is.na(DF[[B_e]][i])==FALSE){ # check if any of the estimates used to calculate the proportion is NA (if so, skip)
if (DF[[B_e]][i]!= 0){ # check if estimate_b is not 0, to avoid creating inf values from A_e/B-e
DF[[paste0(New_fn, "_e")]][i] <- (DF[[A_e]][i]/DF[[B_e]][i])
if(DF[[paste0(New_fn, "_e")]][i] == 1){ # check if P==1 to then use the appropiate formula for MOE
DF[[paste0(New_fn, "_se")]][i] <- (DF[[A_se]][i]/DF[[B_e]][i])
} else {
if((DF[[A_se]][i]^2)-(DF[[paste0(New_fn, "_e")]][i]^2)*(DF[[B_se]][i]^2)>= 0){ # check for the sign of the value inside of the square root
DF[[paste0(New_fn, "_se")]][i] <- (1/DF[[B_e]][i])*sqrt((DF[[A_se]][i]^2)-(DF[[paste0(New_fn, "_e")]][i]^2)*(DF[[B_se]][i]^2))
} else {
DF[[paste0(New_fn, "_se")]][i] <- (1/DF[[B_e]][i])*sqrt((DF[[A_se]][i]^2)+(DF[[paste0(New_fn, "_e")]][i]^2)*(DF[[B_se]][i]^2))
}
}
} else { # assign 0 value if B_e was 0
DF[[paste0(New_fn, "_e")]][i] <- 0
DF[[paste0(New_fn, "_se")]][i] <- 0
}
} else { # assign NA if any of the estimates was NA
DF[[paste0(New_fn, "_e")]][i] <- NA
DF[[paste0(New_fn, "_se")]][i] <- NA
}
DF[[paste0(New_fn, "_e")]][i] <- DF[[paste0(New_fn, "_e")]][i]*100 # switch from proportion to percentage in the estimate value
DF[[paste0(New_fn, "_se")]][i] <- DF[[paste0(New_fn, "_se")]][i]*100 # switch from proportion to percentage in the MOE value
}
return(DF)
}
Dummy <- structure(list(TotPop_e = c(636L, 1287L, 810L, 1218L, 2641L,
835L, 653L, 1903L, 705L, 570L, 2150L, 6013L, 1720L, 2555L, 1150L,
2224L, 1805L, 728L, 2098L, 3099L, 4194L, 1909L, 2401L, 1446L,
1345L, 1573L, 2037L, 634L, 1916L, 1522L, 592L, 831L, 577L, 2196L,
1482L, 1436L, 1668L, 3095L, 3677L, 2641L, 1285L, 932L, 2461L,
1609L, 1143L, 1617L, 1075L, 1280L, 838L, 1447L, 3941L, 2402L,
1130L, 851L, 10316L, 9576L, 2396L, 3484L, 5688L, 2200L, 1856L,
1441L, 2539L, 3056L, 1325L, 2454L, 2010L, 2340L, 1448L, 2435L,
2782L, 3633L, 1766L, 2564L, 1473L, 1214L, 1951L, 2561L, 4262L,
2576L, 4257L, 2314L, 2071L, 3182L, 1839L, 2214L, 1101L, 1898L,
790L, 867L, 1764L, 970L, 1320L, 2850L, 1019L, 1483L, 3720L, 2215L,
3581L, 3391L), TotPop_se = c(132.522796352584, 149.544072948328,
127.051671732523, 130.091185410334, 232.826747720365, 135.562310030395,
100.303951367781, 176.29179331307, 114.285714285714, 96.6565349544073,
339.817629179331, 438.297872340425, 245.592705167173, 324.012158054711,
333.130699088146, 224.924012158055, 321.580547112462, 169.604863221885,
175.075987841945, 469.908814589666, 375.075987841945, 411.550151975684,
378.115501519757, 235.258358662614, 241.337386018237, 291.793313069909,
337.386018237082, 138.601823708207, 145.896656534954, 193.920972644377,
135.562310030395, 117.325227963526, 244.984802431611, 318.54103343465,
207.90273556231, 200, 279.635258358663, 657.750759878419, 401.215805471125,
401.823708206687, 229.787234042553, 139.817629179331, 303.951367781155,
201.215805471125, 200, 252.887537993921, 356.838905775076, 241.945288753799,
238.297872340426, 267.477203647416, 320.9726443769, 255.31914893617,
178.115501519757, 116.109422492401, 891.793313069909, 766.565349544073,
255.31914893617, 463.22188449848, 448.632218844985, 367.781155015198,
269.300911854103, 261.398176291793, 286.93009118541, 446.808510638298,
224.316109422492, 212.158054711246, 233.434650455927, 304.559270516717,
356.231003039514, 275.379939209726, 330.699088145897, 368.996960486322,
248.024316109423, 310.030395136778, 153.799392097264, 243.768996960486,
265.65349544073, 337.386018237082, 436.474164133739, 359.270516717325,
344.072948328268, 196.960486322188, 231.003039513678, 356.231003039514,
212.158054711246, 348.328267477204, 206.079027355623, 240.729483282675,
196.352583586626, 141.033434650456, 215.80547112462, 127.659574468085,
248.024316109423, 589.057750759878, 231.61094224924, 486.93009118541,
605.471124620061, 713.06990881459, 488.753799392097, 382.370820668693
), Under17_se = c(35.8095476596307, 50.9877853224243, 50.0994474845873,
44.7376765786604, 113.994325548832, 59.7386237841673, 22.7862186188344,
95.1285234870203, 42.3093316505904, 35.4621507988699, 143.021311606928,
205.334390935311, 102.292167403598, 115.712493289527, 88.9617416652971,
98.0345650964952, 149.50823698925, 40.0016629212452, 86.7428425216985,
158.047696828218, 173.225615182675, 144.710221534209, 121.094774232467,
76.9999466678128, 88.9160360898593, 97.7665610480423, 133.02517642826,
30.4983051540691, 83.3625069421341, 75.7125713164268, 50.3826325227805,
37.5622898620679, 7.29483282674772, 122.185425418875, 83.4644035953588,
63.8384709681463, 99.5458131127046, 208.446825330589, 150.282359742524,
206.017151858922, 87.7761872483956, 56.194023821941, 120.701992909334,
50.6423479626955, 55.4225960853081, 93.2888100499867, 126.879946773287,
143.069104861932, 86.7747884744339, 79.4517480028886, 140.260959630942,
125.115775875384, 52.187662082273, 38.1819057688564, 365.828168907497,
380.635956883794, 135.735302000757, 213.321896356121, 198.507936644685,
126.535797699776, 141.516048792542, 114.238818548927, 117.737122860635,
165.644292987747, 71.238834852709, 93.0825940979755, 41.8438489710712,
97.0666682368976, 86.5060758100772, 92.8659724484427, 76.6536183156139,
192.822109819002, 101.83958502542, 139.341067042001, 55.3992539361667,
92.106793773051, 78.2330906844691, 115.177918141833, 207.546042154974,
139.609995160777, 153.568552211039, 73.5738128652025, 112.249861520572,
171.38868664475, 66.0687084216098, 181.939713349267, 28.4417934718288,
90.1132509720827, 57.4202669424023, 46.8440239496863, 80.4799857926917,
42.6875862955885, 81.3500156027725, 142.669475129055, 23.4653605661019,
191.159072511375, 159.615857998832, 191.592580855392, 184.123292172321,
125.375425911215), Under17_e = c(123, 284, 189, 228, 661, 180,
49, 500, 121, 115, 686, 1456, 385, 578, 302, 476, 738, 124, 527,
803, 1219, 459, 614, 218, 229, 422, 543, 69, 536, 306, 149, 80,
0, 520, 281, 270, 454, 669, 905, 978, 282, 178, 630, 187, 145,
367, 327, 577, 225, 246, 966, 629, 211, 65, 2857, 3051, 592,
1162, 1322, 464, 490, 264, 576, 617, 326, 695, 169, 381, 309,
476, 355, 915, 431, 869, 269, 358, 335, 650, 1443, 561, 900,
411, 759, 1265, 171, 833, 45, 255, 134, 144, 339, 203, 388, 413,
66, 416, 654, 565, 700, 362)), row.names = c(NA, 100L), class = "data.frame")
# example run to calculate pct of people below 17
Dummy <- percent_calculator(Dummy , "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17")
You don't need that loop at all. All your operations are simple arithmetic that can take vectors, instead of single values. This is called vectorization. You then implement your logic tree with a nested ifelse. ifelse does compute all three possible outcomes (which is a bit unnecessary), but that is very much worth it in this case. If you want to optimize further have a look here: Is `if` faster than ifelse?
Timings at the bottom.
percent_calculator_vectorized <- function(DF, A_e, B_e, A_se, B_se, New_fn){
# arguments legend >> DF = data frame; A_e = estimate_a (string of the fieldname); B_e = estimate_b (string of the fieldname);
# A_se = MOE_a (string of the fieldname); B_se = MOE_b (string of the fieldname); New_fn = root for new fieldname in the data frame (string)
e_name <- paste0(New_fn, "_e")
se_name <- paste0(New_fn, "_se")
DF[[e_name]] <- DF[[A_e]] / DF[[B_e]]
DF[[se_name]] <- ifelse(
DF[[e_name]] == 1, # check if P==1 to then use the appropriate formula for MOE
DF[[A_se]] / DF[[B_e]],
ifelse(
(DF[[A_se]]^2)-(DF[[e_name]]^2)*(DF[[B_se]]^2)>= 0, # check for the sign of the value inside of the square root
(1/DF[[B_e]])*sqrt((DF[[A_se]]^2)-(DF[[e_name]]^2)*(DF[[B_se]]^2)),
(1/DF[[B_e]])*sqrt((DF[[A_se]]^2)+(DF[[e_name]]^2)*(DF[[B_se]]^2))
)
)
# assign 0 value if B_e was 0
DF[DF[[B_e]] == 0 & !is.na(DF[[B_e]]), c(e_name, se_name)] <- 0
# switch from proportion to percentage in the estimate value
DF[, c(e_name, se_name)] <- DF[, c(e_name, se_name)] * 100
return(DF)
}
Dummy2 <- percent_calculator(Dummy , "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17")
Dummy3 <- percent_calculator_vectorized(Dummy , "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17")
all.equal(Dummy2, Dummy3) #TRUE
Timings:
bench::mark(
orig = percent_calculator(Dummy , "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17"),
vect = percent_calculator_vectorized(Dummy , "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17"),
)
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <lis> <list>
1 orig 17.2ms 18.5ms 53.1 331.2KB 14.8 18 5 339ms <df[,6] … <df[,3] … <bch… <tibble …
2 vect 157.4µs 168µs 5700. 19.4KB 11.6 2450 5 430ms <df[,6] … <df[,3] … <bch… <tibble …
Speed up for this small dataset is ~100x, also with a ~10x smaller memory footprint.
I see you have a working solution but for my own reasons I wanted to try a tidyverse solution. This one is just about as fast as the base R solution and for me would be easier to maintain. I also added some more oddities to your toy data to make sure I caught the edge cases.
library(dplyr)
ACS_recalculator <- function(DF, A_e, B_e, A_se, B_se, New_fn){
e_name <- paste0(New_fn, "_e")
se_name <- paste0(New_fn, "_se")
A_ex <- ensym(A_e)
B_ex <- ensym(B_e)
A_sex <- ensym(A_se)
B_sex <- ensym(B_se)
DF <-
DF %>%
mutate(e_value = ifelse(!!B_ex != 0, !!A_ex / !!B_ex, 0),
se_value = case_when(
!!B_ex == 0 ~ 0,
e_value == 1 ~ !!A_sex / !!B_ex,
((!!A_sex)^2) - (e_value^2 * ((!!B_sex)^2)) >= 0 ~ (1/!!B_ex) * sqrt(((!!A_sex)^2) - (e_value^2) * ((!!B_sex)^2)),
((!!A_sex)^2) - (e_value^2 * ((!!B_sex)^2)) < 0 ~ (1/!!B_ex) * sqrt(((!!A_sex)^2) + (e_value^2) * ((!!B_sex)^2)),
TRUE ~ NA_real_),
e_value = e_value * 100,
se_value = se_value * 100) %>%
rename(!!e_name := e_value,
!!se_name := se_value)
return(DF)
}
Dummy2 <- ACS_recalculator(Dummy2, "Under17_e", "TotPop_e", "Under17_se", "TotPop_se", "P_Bel17")
head(Dummy2)
#> TotPop_e TotPop_se Under17_se Under17_e P_Bel17_e P_Bel17_se
#> 1 636 132.5228 35.80955 123 19.33962 3.932255
#> 2 1287 149.5441 50.98779 284 22.06682 3.020104
#> 3 810 127.0517 50.09945 189 23.33333 4.986043
#> 4 1218 130.0912 44.73768 228 18.71921 3.081212
#> 5 2641 232.8267 113.99433 661 25.02840 3.709747
#> 6 835 135.5623 59.73862 180 21.55689 6.239876
Your original example data with more missings and zeros
Dummy2 <- structure(list(TotPop_e = c(636L, 1287L, 810L, 1218L, 2641L,
835L, 653L, 1903L, 0L, 570L, 2150L, 6013L, 1720L, 2555L, 1150L,
2224L, 1805L, 728L, 2098L, 3099L, 4194L, 1909L, 2401L, 1446L,
1345L, 1573L, 2037L, 634L, 1916L, 1522L, 592L, 831L, 577L, 2196L,
1482L, 1436L, 1668L, 3095L, 3677L, 2641L, 1285L, 932L, 2461L,
1609L, 1143L, 1617L, 1075L, 1280L, 838L, 1447L, 3941L, 2402L,
1130L, 851L, 10316L, 9576L, 2396L, 3484L, 5688L, 2200L, 1856L,
1441L, 2539L, 3056L, 1325L, 2454L, 2010L, 2340L, 1448L, 2435L,
2782L, 3633L, 1766L, 2564L, 1473L, 1214L, 1951L, 2561L, 4262L,
2576L, 4257L, 2314L, 2071L, 3182L, 1839L, 2214L, NA, 1898L,
790L, 867L, 1764L, 970L, 1320L, 2850L, 1019L, 1483L, 3720L, 2215L,
3581L, 3391L), TotPop_se = c(132.522796352584, 149.544072948328,
127.051671732523, 130.091185410334, 232.826747720365, 135.562310030395,
100.303951367781, 176.29179331307, 114.285714285714, 0,
339.817629179331, 438.297872340425, 245.592705167173, 324.012158054711,
333.130699088146, 224.924012158055, 321.580547112462, 169.604863221885,
175.075987841945, 469.908814589666, 375.075987841945, 411.550151975684,
378.115501519757, 235.258358662614, 241.337386018237, 291.793313069909,
337.386018237082, 138.601823708207, 145.896656534954, 193.920972644377,
135.562310030395, 117.325227963526, 244.984802431611, 318.54103343465,
207.90273556231, 200, 279.635258358663, 657.750759878419, 401.215805471125,
401.823708206687, 229.787234042553, 139.817629179331, 303.951367781155,
201.215805471125, 200, 252.887537993921, 356.838905775076, 241.945288753799,
238.297872340426, 267.477203647416, 320.9726443769, 255.31914893617,
178.115501519757, 116.109422492401, NA, 766.565349544073,
255.31914893617, 463.22188449848, 448.632218844985, 367.781155015198,
269.300911854103, 261.398176291793, 286.93009118541, 446.808510638298,
224.316109422492, 212.158054711246, 233.434650455927, 304.559270516717,
356.231003039514, 275.379939209726, 330.699088145897, 368.996960486322,
248.024316109423, 310.030395136778, 153.799392097264, 243.768996960486,
265.65349544073, 337.386018237082, 436.474164133739, 359.270516717325,
344.072948328268, 196.960486322188, 231.003039513678, 356.231003039514,
212.158054711246, 348.328267477204, 206.079027355623, 240.729483282675,
196.352583586626, 141.033434650456, 215.80547112462, 127.659574468085,
248.024316109423, 589.057750759878, 231.61094224924, 486.93009118541,
605.471124620061, 713.06990881459, 488.753799392097, 382.370820668693
), Under17_se = c(35.8095476596307, 50.9877853224243, 50.0994474845873,
44.7376765786604, 113.994325548832, 59.7386237841673, 22.7862186188344,
95.1285234870203, 42.3093316505904, 35.4621507988699, 143.021311606928,
205.334390935311, 102.292167403598, 115.712493289527, 88.9617416652971,
98.0345650964952, 149.50823698925, 40.0016629212452, 86.7428425216985,
158.047696828218, 173.225615182675, 144.710221534209, 121.094774232467,
76.9999466678128, 88.9160360898593, 97.7665610480423, 133.02517642826,
30.4983051540691, 83.3625069421341, 75.7125713164268, 50.3826325227805,
37.5622898620679, 7.29483282674772, 122.185425418875, 83.4644035953588,
63.8384709681463, 99.5458131127046, 208.446825330589, 150.282359742524,
206.017151858922, 87.7761872483956, 56.194023821941, 120.701992909334,
50.6423479626955, 55.4225960853081, 93.2888100499867, 126.879946773287,
143.069104861932, 86.7747884744339, 79.4517480028886, 140.260959630942,
125.115775875384, 52.187662082273, 38.1819057688564, 365.828168907497,
380.635956883794, 135.735302000757, 213.321896356121, 198.507936644685,
126.535797699776, 141.516048792542, 114.238818548927, 117.737122860635,
165.644292987747, 71.238834852709, 93.0825940979755, 41.8438489710712,
97.0666682368976, 86.5060758100772, 92.8659724484427, 76.6536183156139,
192.822109819002, 101.83958502542, 139.341067042001, 55.3992539361667,
92.106793773051, 78.2330906844691, 115.177918141833, 207.546042154974,
139.609995160777, 153.568552211039, 73.5738128652025, 112.249861520572,
171.38868664475, 66.0687084216098, 181.939713349267, 28.4417934718288,
90.1132509720827, 57.4202669424023, 46.8440239496863, 80.4799857926917,
42.6875862955885, 81.3500156027725, 142.669475129055, 23.4653605661019,
191.159072511375, 159.615857998832, 191.592580855392, 184.123292172321,
125.375425911215), Under17_e = c(123, 284, 189, 228, 661, 180,
49, 500, 121, 115, 686, 1456, 385, 578, 302, 476, 738, 124, 527,
803, 1219, 459, 614, 218, 229, 422, 543, 69, 536, 306, 149, 80,
0, 520, 281, 270, 454, 669, 905, 978, 282, 178, 630, 187, 145,
367, 327, 577, 225, 246, 966, 629, 211, 65, 2857, 3051, 592,
1162, 1322, 464, 490, 264, 576, 617, 326, 695, 169, 381, 309,
476, 355, 915, 431, 869, 269, 358, 335, 650, 1443, 561, 900,
411, 759, 1265, 171, 833, 45, 255, 134, 144, 339, 203, 388, 413,
66, 416, 654, 565, 700, 362)), row.names = c(NA, 100L), class = "data.frame")

gvisMotionChart blank when animated

Problem:
My gvisMotionChart works fine when paused, but is blank when animated.
Code:
Motion=gvisMotionChart(mydat,
idvar="Time_Period_Year_Cd",
timevar="Year",
xvar="Total_Customers",
yvar="Percent_Happy",
colorvar = "New_Product_Count",
sizevar = "Group_A_Count"
)
plot(Motion)
Result (when paused):
Result (when animated):
I imagine it must be something with my code or data, because I was able to get the example in the documentation to work properly. I tried several variations, like not specifying the xvar then plotting "year" as the x-axis, etc.
Data:
mydat <- structure(list(Time_Period_Year_Cd = c(201220L, 201320L, 201340L,
201360L, 201420L, 201440L, 201460L, 201480L, 201520L, 201540L,
201560L, 201580L, 201620L, 201640L, 201660L, 201680L, 201720L
), New_Product_Count = c(1606L, 1834L, 1205L, 1204L, 1645L, 704L,
651L, 473L, 692L, 559L, 535L, 531L, 911L, 663L, 599L, 702L, 512L
), Group_A_Count = c(616, 670, 512, 520, 594, 265, 215, 148,
235, 171, 160, 166, 231, 220, 148, 138, 101), Group_B_Count = c(267,
288, 177, 194, 320, 122, 156, 103, 121, 108, 105, 105, 187, 146,
134, 152, 103), Group_C_Count = c(365, 420, 293, 269, 373, 172,
151, 120, 192, 132, 135, 148, 225, 150, 191, 205, 177), Group_D_Count = c(333,
429, 202, 204, 335, 132, 121, 97, 133, 143, 131, 107, 264, 139,
119, 196, 129), Number_Bought_Per_Customer = c(5.46637608966376,
6.4432933478735, 6.79668049792531, 7.04734219269103, 7.2468085106383,
7.41193181818182, 7.44086021505376, 6.48625792811839, 6.91329479768786,
7.16994633273703, 6.49906542056075, 5.30885122410546, 4.78155872667398,
4.09049773755656, 3.80801335559265, 3.04415954415954, 2.826171875
), Total_Customers = c(5038L, 5940L, 5557L, 5472L, 6052L, 5164L,
4544L, 3954L, 4473L, 4948L, 3884L, 3723L, 4011L, 4303L, 3413L,
3421L, 2964L), Percent_Happy = c(0.797988105101756, 0.83794901700776,
0.773512106024391, 0.775157532067893, 0.834237370911927, 0.8306291015089,
0.820150552373225, 0.824696031621165, 0.776615269241095, 0.848073917652629,
0.841092657179119, 0.781823675677749, 0.840457049668049, 0.763698900181159,
0.872781703430453, 0.896473511416122, 0.787873482140602), Year = c(2012,
2013, 2013, 2013, 2014, 2014, 2014, 2014, 2015, 2015, 2015, 2015,
2016, 2016, 2016, 2016, 2017)), .Names = c("Time_Period_Year_Cd",
"New_Product_Count", "Group_A_Count", "Group_B_Count", "Group_C_Count",
"Group_D_Count", "Number_Bought_Per_Customer", "Total_Customers",
"Percent_Happy", "Year"), row.names = c(NA, 17L), class = "data.frame")
Each idvar value has only one timevar value:
mydat %>% count(Time_Period_Year_Cd) %>% head
# Time_Period_Year_Cd n
# <int> <int>
# 1 201220 1
# 2 201320 1
# 3 201340 1
# 4 201360 1
# 5 201420 1
# 6 201440 1
So there cannot be any transition. In contrast to e.g.
df <- mydat %>% mutate(idvar = substr(Time_Period_Year_Cd, 1, 4)) %>% group_by(idvar) %>% mutate(timevar = 1:n()) %>% ungroup
Motion=gvisMotionChart(df,
idvar="idvar",
timevar="timevar",
xvar="Total_Customers",
yvar="Percent_Happy",
colorvar = "New_Product_Count",
sizevar = "Group_A_Count"
)
plot(Motion)

us variable names to draw charts in ggplot

This is the my data frame:
dput(head(df,10))
structure(list(Users = c(200L, 305L, 358L, 419L, 492L, 576L,
675L, 791L, 928L, 1087L), Transactions = c(460, 702.4, 823.2,
964.8, 1130.8, 1325.3, 1553.2, 1820.4, 2133.5, 2500.4), Workload = c(100,
109.9, 109.9, 110, 110, 110.1, 110.1, 110.2, 110.2, 110.3), HeapUsage = c(25,
35.83, 36.47, 37.13, 37.79, 38.47, 39.17, 39.87, 40.59, 41.32
)), .Names = c("Users", "Transactions", "Workload", "HeapUsage"
), row.names = c(NA, 10L), class = "data.frame")
I need to use the variable names. For example,
yv=c("HeapUsage") and xv=c("Users", "Transactions", "Workload")
I need to be able to create however many entries in xv vector against yv variable.
I tried this:
ggplot(df)+geom_point()+facet_wrap(yv~xv)
does not seem to be working. In this case, I need 3 different charts yaxis being yv and xaxis being 1. Users, 2. Transactions 3. Workload
Is there an easy way to do it?

Resources