I know that polr does not give p-values because they are not very reliable. Nevertheless, I would like to add them to my modelsummary (Vignette) output. I know to get the values as follows:
library(MASS)
polr_res <- polr(as.ordered(rep77) ~ foreign + length + mpg, Hess=TRUE, data=fullauto);summary(polr_res)
Call:
polr_res(formula = as.ordered(rep77) ~ foreign + length + mpg, data = fullauto,
Hess = TRUE)
## coefficient test
library("AER")
coeftest(polr_res)
modelsummary
Because polr has no p-values, I cannot call modelsummary(models, stars=TRUE) on my models (which includes other models which do have p-values and for which I want to show stars).
library(modelsummary)
models <- list(
"Ordinal Probit" = polr_res,
)
# model_names <- c("OLS", "")
modelsummary(models, stars=TRUE)
I tried first to simply add the p-values to the tidy object, but I cannot add that object to the list of models.
polr_pval <- coeftest(polr)[,4]
polr_pval <- as.data.frame(polr_pval)
tidy_polr <- tidy(polr)
tidy_polr[,5] <- polr_pval
The vignette describes that I can make a custom class which adapts the polr, but I do not understand how:
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-i-
https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#customizing-existing-models-part-ii-
Could anyone help me figure this out?
EDIT:
I am posting an edit showing the problem I was having when using Vincent's answer, with R version 3.6.1 (2019-07-05). If you are encountering this issue, (preferably) update to R version 4.0.0 or download an update for modelsummary from Github (see also Vincent's comments below).:
library(remotes)
remotes::install_github('vincentarelbundock/modelsummary')
Output:
DATA for R
fullauto <- structure(list(make = structure(c(1, 1, 1, 2, 2, 3, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
9, 10, 10, 11, 11, 12, 12, 12, 13, 14, 14, 14, 14, 14, 14, 15,
15, 15, 15, 15, 15, 15, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18,
18, 18, 19, 20, 21, 21, 21, 22, 22, 22, 22, 23), label = "Make", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(AMC = 1, Audi = 2, BMW = 3,
Buick = 4, Cad. = 5, Chev. = 6, Datsun = 7, Dodge = 8, Fiat = 9,
Ford = 10, Honda = 11, Linc. = 12, Mazda = 13, Merc. = 14, Olds = 15,
Peugeot = 16, Plym. = 17, Pont. = 18, Renault = 19, Subaru = 20,
Toyota = 21, VW = 22, Volvo = 23)), model = structure(c(1, 2,
3, 4, 5000, 320, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 200, 210, 510, 810, 21, 22, 23, 24, 25, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 98, 604, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 260), label = "Model", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Concord = 1, Pacer = 2, Spirit = 3,
Fox = 4, Century = 5, Electra = 6, LeSabre = 7, Opel = 8, Regal = 9,
Riviera = 10, Skylark = 11, Deville = 12, Eldrado = 13, Seville = 14,
Chevette = 15, Impala = 16, Malibu = 17, MCarlo = 18, Monza = 19,
Nova = 20, Colt = 21, Diplomat = 22, Magnum = 23, StRegis = 24,
Strada = 25, Fiesta = 26, Mustang = 27, Accord = 28, Civic = 29,
Cntntl = 30, `Mark V` = 31, Vrsills = 32, GLC = 33, Bobcat = 34,
Cougar = 35, `XR-7` = 36, Marquis = 37, Monarch = 38, Zephyr = 39,
Cutlass = 40, CutlSupr = 41, `Delta 88` = 42, Omega = 43, Starfire = 44,
Toronado = 45, Arrow = 46, Champ = 47, Horizon = 48, Sapporo = 49,
Volare = 50, Catalina = 51, Firebird = 52, GranPrix = 53, `Le Mans` = 54,
Phoenix = 55, Sunbird = 56, `Le Car` = 57, Subaru = 58, Celica = 59,
Corolla = 60, Corona = 61, Rabbit = 62, Diesel = 63, Scirocco = 64,
Dasher = 65)), price = structure(c(4099, 4749, 3799, 6295, 9690,
9735, 4816, 7827, 5788, 4453, 5189, 10372, 4082, 11385, 14500,
15906, 3299, 5705, 4504, 5104, 3667, 3955, 6229, 4589, 5079,
8129, 3984, 4010, 5886, 6342, 4296, 4389, 4187, 5799, 4499, 11497,
13594, 13466, 3995, 3829, 5379, 6303, 6165, 4516, 3291, 4733,
5172, 4890, 4181, 4195, 10371, 8814, 12990, 4647, 4425, 4482,
6486, 4060, 5798, 4934, 5222, 4723, 4424, 4172, 3895, 3798, 5899,
3748, 5719, 4697, 5397, 6850, 7140, 11995), label = "Price", format.stata = "%8.0g"),
mpg = structure(c(22, 17, 22, 23, 17, 25, 20, 15, 18, 26,
20, 16, 19, 14, 14, 21, 29, 16, 22, 22, 24, 19, 23, 35, 24,
21, 30, 18, 16, 17, 21, 28, 21, 25, 28, 12, 12, 14, 30, 22,
14, 14, 15, 18, 20, 19, 19, 18, 19, 24, 16, 21, 14, 38, 34,
25, 26, 18, 18, 18, 19, 19, 19, 24, 26, 35, 18, 31, 18, 25,
41, 25, 23, 17), label = "Mileage (mpg)", format.stata = "%8.0g"),
rep78 = structure(c(3, 3, NA, 3, 5, 4, 3, 4, 3, NA, 3, 3,
3, 3, 2, 3, 3, 4, 3, 2, 2, 3, 4, 5, 4, 4, 5, 2, 2, 2, 3,
4, 3, 5, 4, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 4, 3, 1,
3, 4, NA, 3, 5, 3, NA, 2, 4, 1, 3, 3, NA, 2, 3, 5, 5, 5,
5, 4, 5, 4, 4, 5), label = "Repair Record 1978", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), rep77 = structure(c(2, 1, NA,
3, 2, 4, 3, 4, 4, NA, 3, 4, 3, 3, 2, 3, 3, 4, 3, 3, 2, 3,
3, 5, 4, 4, 4, 2, 2, 2, 1, NA, 3, 5, 4, 4, 4, 3, 4, 3, 3,
4, 2, NA, 3, 3, 4, 4, 3, 1, 3, 4, NA, 3, 4, NA, NA, 2, 4,
2, 3, 3, NA, 2, 3, 4, 5, 5, 5, 3, 4, 3, 3, 3), label = "Repair Record 1977", format.stata = "%9.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Poor = 1, Fair = 2, Average = 3,
Good = 4, Excellent = 5)), hdroom = structure(c(2.5, 3, 3,
2.5, 3, 2.5, 4.5, 4, 4, 3, 2, 3.5, 3.5, 4, 3.5, 3, 2.5, 4,
3.5, 2, 2, 3.5, 1.5, 2, 2.5, 2.5, 2, 4, 4, 4.5, 2.5, 1.5,
2, 3, 2.5, 3.5, 2.5, 3.5, 3.5, 3, 3.5, 3, 3.5, 3, 3.5, 4.5,
2, 4, 4.5, 2, 3.5, 4, 3.5, 2, 2.5, 4, 1.5, 5, 4, 1.5, 2,
3.5, 3.5, 2, 3, 2.5, 2.5, 3, 2, 3, 3, 2, 2.5, 2.5), label = "Headroom (in.)", format.stata = "%6.1f"),
rseat = structure(c(27.5, 25.5, 18.5, 28, 27, 26, 29, 31.5,
30.5, 24, 28.5, 30, 27, 31.5, 30, 30, 26, 29.5, 28.5, 28.5,
25, 27, 21, 23.5, 22, 27, 24, 29, 29, 28, 26.5, 26, 23, 25.5,
23.5, 30.5, 28.5, 27, 25.5, 25.5, 29.5, 25, 30.5, 27, 29,
28, 28, 29, 27, 25.5, 30, 31.5, 30.5, 21.5, 23, 25, 22, 31,
29, 23.5, 28.5, 28, 27, 25, 23, 25.5, 22, 24.5, 23, 25.5,
25.5, 23.5, 37.5, 29.5), label = "Rear Seat (in.)", format.stata = "%6.1f"),
trunk = structure(c(11, 11, 12, 11, 15, 12, 16, 20, 21, 10,
16, 17, 13, 20, 16, 13, 9, 20, 17, 16, 7, 13, 6, 8, 8, 8,
8, 17, 17, 21, 16, 9, 10, 10, 5, 22, 18, 15, 11, 9, 16, 16,
23, 15, 17, 16, 16, 20, 14, 10, 17, 20, 14, 11, 11, 17, 8,
16, 20, 7, 16, 17, 13, 7, 10, 11, 14, 9, 11, 15, 15, 16,
12, 14), label = "Trunk space (cu. ft.)", format.stata = "%8.0g"),
weight = structure(c(2930, 3350, 2640, 2070, 2830, 2650,
3250, 4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290,
2110, 3690, 3180, 3220, 2750, 3430, 2370, 2020, 2280, 2750,
2120, 3600, 3600, 3740, 2130, 1800, 2650, 2240, 1760, 4840,
4720, 3830, 1980, 2580, 4060, 4130, 3720, 3370, 2830, 3300,
3310, 3690, 3370, 2730, 4030, 4060, 3420, 3260, 1800, 2200,
2520, 3330, 3700, 3470, 3210, 3200, 3420, 2690, 1830, 2050,
2410, 2200, 2670, 1930, 2040, 1990, 2160, 3170), label = "Weight (lbs.)", format.stata = "%8.0g"),
length = structure(c(186, 173, 168, 174, 189, 177, 196, 222,
218, 170, 200, 207, 200, 221, 204, 204, 163, 212, 193, 200,
179, 197, 170, 165, 170, 184, 163, 206, 206, 220, 161, 147,
179, 172, 149, 233, 230, 201, 154, 169, 221, 217, 212, 198,
195, 198, 198, 218, 200, 180, 206, 220, 192, 170, 157, 165,
182, 201, 214, 198, 201, 199, 203, 179, 142, 164, 174, 165,
175, 155, 155, 156, 172, 193), label = "Length (in.)", format.stata = "%8.0g"),
turn = structure(c(40, 40, 35, 36, 37, 34, 40, 43, 43, 34,
42, 43, 42, 44, 43, 45, 34, 43, 31, 41, 40, 43, 35, 32, 34,
38, 35, 46, 46, 46, 36, 33, 43, 36, 34, 51, 48, 41, 33, 39,
48, 45, 44, 41, 43, 42, 42, 42, 43, 40, 43, 43, 38, 37, 37,
36, 38, 44, 42, 42, 45, 40, 43, 41, 34, 36, 36, 35, 36, 35,
35, 36, 36, 37), label = "Turn Circle (ft.) ", format.stata = "%8.0g"),
displ = structure(c(121, 258, 121, 97, 131, 121, 196, 350,
231, 304, 196, 231, 231, 425, 350, 350, 231, 250, 200, 200,
151, 250, 119, 85, 119, 146, 98, 318, 318, 225, 105, 98,
140, 107, 91, 400, 400, 302, 86, 140, 302, 302, 302, 250,
140, 231, 231, 231, 231, 151, 350, 350, 163, 156, 86, 105,
119, 225, 231, 231, 231, 231, 231, 151, 79, 97, 134, 97,
134, 89, 90, 97, 97, 163), label = "Displacement (cu. in.)", format.stata = "%8.0g"),
gratio = structure(c(3.57999992370605, 2.52999997138977,
3.07999992370605, 3.70000004768372, 3.20000004768372, 3.64000010490417,
2.9300000667572, 2.41000008583069, 2.73000001907349, 2.86999988555908,
2.9300000667572, 2.9300000667572, 3.07999992370605, 2.27999997138977,
2.19000005722046, 2.24000000953674, 2.9300000667572, 2.55999994277954,
2.73000001907349, 2.73000001907349, 2.73000001907349, 2.55999994277954,
3.89000010490417, 3.70000004768372, 3.53999996185303, 3.54999995231628,
3.53999996185303, 2.47000002861023, 2.47000002861023, 2.94000005722046,
3.36999988555908, 3.15000009536743, 3.07999992370605, 3.04999995231628,
3.29999995231628, 2.47000002861023, 2.47000002861023, 2.47000002861023,
3.73000001907349, 2.73000001907349, 2.75, 2.75, 2.25999999046326,
2.4300000667572, 3.07999992370605, 2.9300000667572, 2.9300000667572,
2.73000001907349, 3.07999992370605, 2.73000001907349, 2.41000008583069,
2.41000008583069, 3.57999992370605, 3.04999995231628, 2.97000002861023,
3.36999988555908, 3.53999996185303, 3.23000001907349, 2.73000001907349,
3.07999992370605, 2.9300000667572, 2.9300000667572, 3.07999992370605,
2.73000001907349, 3.72000002861023, 3.80999994277954, 3.05999994277954,
3.21000003814697, 3.04999995231628, 3.77999997138977, 3.77999997138977,
3.77999997138977, 3.74000000953674, 2.98000001907349), label = "Gear Ratio", format.stata = "%6.2f"),
order = structure(c(1, 2, 3, 5, 4, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 47, 48, 49, 50, 51, 52, 46, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74), label = "Original order", format.stata = "%8.0g"),
foreign = structure(c(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1), label = "Foreign", format.stata = "%8.0g", class = c("haven_labelled",
"vctrs_vctr", "double"), labels = c(Domestic = 0, Foreign = 1
)), wgtd = structure(c(2930, 3350, 2640, NA, NA, NA, 3250,
4080, 3670, 2230, 3280, 3880, 3400, 4330, 3900, 4290, 2110,
3690, 3180, 3220, 2750, 3430, NA, NA, NA, NA, 2120, 3600,
3600, 3740, NA, 1800, 2650, NA, NA, 4840, 4720, 3830, NA,
2580, 4060, 4130, 3720, 3370, 2830, 3300, 3310, 3690, 3370,
2730, 4030, 4060, NA, 3260, 1800, 2200, 2520, 3330, 3700,
3470, 3210, 3200, 3420, 2690, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), format.stata = "%9.0g"), wgtf = structure(c(NA,
NA, NA, 2070, 2830, 2650, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 2370, 2020, 2280, 2750, NA,
NA, NA, NA, 2130, NA, NA, 2240, 1760, NA, NA, NA, 1980, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3420, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1830, 2050, 2410,
2200, 2670, 1930, 2040, 1990, 2160, 3170), format.stata = "%9.0g")), label = "Automobile Models", row.names = c(NA,
-74L), class = c("tbl_df", "tbl", "data.frame"))
I think the easiest way to achieve this is to define a tidy_custom.polr method as described here in the documentation.. For instance, you could do:
library(MASS)
library(AER)
library(modelsummary)
tidy_custom.polr <- function(x, ...) {
s <- coeftest(x)
out <- data.frame(
term = row.names(s),
p.value = s[, "Pr(>|z|)"])
out
}
mod = list(
"LM" = lm(gear ~ hp + mpg, data = mtcars),
"POLR" = polr(as.ordered(gear) ~ hp + mpg, data = mtcars))
modelsummary(mod, stars = TRUE)
The below details a simplified version of an optimization problem that I am having trouble solving.
The objective is to minimize a cost function for an organization that delivers water via trucks and use that equation to produce a truck delivery schedule that minimizes costs.
The organization delivers water to ~10,000 household tanks throughout the year.
The tanks have a maximum capacity of 300 gallons and minimum desired limit of 100 gallons -- that is, the tanks should be topped up to 300 before they go below 100.
For example, if the tank is 115 gallons on week 2 and is estimated to use 20 gallons in week 3, it needs to be refilled in week 3.
The costs include:
A per-delivery fee of $10
The weekly cost of trucks. The weekly cost of a truck is $1,000. So if 200 deliveries are in a single week the cost is $3,000 (200 * 10 + 1000 * 1).If 201 deliveries are made, the cost jumps significantly to $4,010 (201 * 10 + 1000 * 2).
Water usage varies across households and across weeks. Peak water usage is in the summer. If we blindly followed the rule to refill just before hitting the 100 gallon minimum limit, then it is likely that the peak number of trucks would be higher than needed if the deliveries were spread out into the "shoulders" of the summer.
I have created estimates of weekly water usage for each week for each household. Furthermore, I have grouped like households to reduce the size of the optimization problem (~10k households down to 8 groups).
To restate the goal: The output of this optimizer should be: deliver or not, for each household group, for each of the 52 weeks in a year.
Simplified data (i.e., for 8 groups and 12 weeks):
df.usage <- structure(list(reduction.group = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8), week = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12), water_usage = c(46, 50, 42, 47, 43, 39,
38, 32, 42, 36, 42, 30, 46, 50, 42, 47, 43, 39, 38, 32, 42, 36,
42, 30, 46, 50, 43, 47, 43, 39, 38, 32, 42, 36, 42, 30, 46, 50,
43, 47, 43, 39, 38, 32, 42, 36, 42, 30, 29, 32, 27, 30, 27, 25,
24, 20, 26, 23, 27, 19, 29, 32, 27, 30, 27, 25, 24, 20, 26, 23,
27, 19, 29, 32, 27, 30, 28, 25, 25, 21, 27, 23, 27, 19, 29, 32,
27, 30, 28, 25, 25, 21, 27, 23, 27, 20), tank.level.start = c(115,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 165, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 200, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 215, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 225, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 230,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 235, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 240, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)), row.names = c(NA, 96L), class = "data.frame")
Tank level refill rules
Here is a nested set of loops for determining tank levels over time with a "refill" logic:
library(dplyr)
reduction.groups <- unique(df.usage$reduction.group)
df.after.refill.logic <- list()
for (i in reduction.groups) {
temp <- df.usage %>% filter(reduction.group == i)
temp$refilled <- 0
temp$level <- temp$tank.level.start
n <- nrow(temp)
if (n > 1) for (j in 2:n) {
temp$level[j] <- ( temp$level[j-1] - temp$water_usage[j] )
if(temp$level[j] < 100) {
temp$level[j] <- 300
temp$refilled[j] <- 1
}
}
df.after.refill.logic <- bind_rows(df.after.refill.logic, temp)
}
Decision Variables
Delivery or not to each group, each week of the year (Binary)
Constraints
No partial trucks: number of trucks must be integers
Truck capacity: truck deliveries/week <= 200
Tanks can't go below 100 gallons: level >= 100
Delivery must be binary
Constants
1600 # truck_weekly_costs
10 # cost_per_delivery
200 # weekly_delivery_capacity_per_truck
Example Cost Function
weekly_cost_function <- function(i){
cost <- (ceiling(sum(i)/200)) * 1600 + (sum(i) * 10)
cost
}
**example cost for one week with i = 199 deliveries:**
weekly_cost_function(i = 199)
[1] 3590
Attempt to Model the Problem using OMPR
Below is the beginning of a model created with the OMPR package (although using another package would be okay):
I am confused about how to set this up using the data above.
Three obvious problems:
How can I include the ceiling logic expressed in the Example Cost Function in the OMPR code?
The model below isn't incorporating the data in the dataframe above (df.usage). The goal is for an optimizer to generate values for the "refilled" and "level" variables based on the four variables (reduction.group, week, water_usage, tank_level_start), along with the constants.
The refill logic I wrote in the "determining tank levels" loop above isn't incorporated. Should that be added as a constraint? If so, how?
num_groups <- length(unique(df.usage$reduction.group))
num_weeks <- length(unique(df.usage$week))
MIPModel() %>%
add_variable(x[i,w], # create decision variable: deliver or not by...
i = 1:num_groups, # group,
w = 1:num_weeks, # in week.
type = "integer", # Integers only
lb = 0, ub = 1) %>% # between 0 and 1, inclusive
set_objective(sum_expr( x[i,w]/200 * 1600 + x[i,w] * 10,
i = 1:num_groups,
w = 1:num_weeks),
sense = "min") %>%
# add constraint to achieve ceiling(x[i,w]/200), or should this be in the set_objective call?
add_constraint(???) %>%
solve_model(with_ROI("glpk"))
Desired Output
Here is what an example head() output would look like:
reduction.group week water.usage refill level
1 1 46 0 115
1 2 50 1 300
1 3 42 0 258
1 4 47 0 211
1 5 43 0 168
1 6 39 0 129
Importantly, the refill values would be whatever minimizes the cost function and keeps the level above 100.
The ceiling function is a difficult non-linear function (non-differentiable, not continuous), and should be avoided at all cost. However it can be modeled quite easily with general integer variables. For non-negative variables x >= 0 we can formulate
y = ceiling(x)
as
x <= y <= x+1
y integer
This is completely linear and is trivial to implement in OMPR (or in any other LP/MIP tool).
Detail note. This formulation will allow the model to choose y=x or y=x+1 in the special case where x assumes an integer value. If you want to be picky about this case, you can do:
x+0.0001 <= y <= x+1
y integer
I would not worry about this.
With the ceiling function, this seems like a difficult problem for a hill-climbing optimizer. I think genetic algorithms are a better fit. The matrix of deliver-or-not for each house each week makes a nice genome.
library(dplyr)
# Original given sample input data.
df.usage <- structure(list(reduction.group = c(1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8), week = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12), water_usage = c(46, 50, 42, 47, 43, 39,
38, 32, 42, 36, 42, 30, 46, 50, 42, 47, 43, 39, 38, 32, 42, 36,
42, 30, 46, 50, 43, 47, 43, 39, 38, 32, 42, 36, 42, 30, 46, 50,
43, 47, 43, 39, 38, 32, 42, 36, 42, 30, 29, 32, 27, 30, 27, 25,
24, 20, 26, 23, 27, 19, 29, 32, 27, 30, 27, 25, 24, 20, 26, 23,
27, 19, 29, 32, 27, 30, 28, 25, 25, 21, 27, 23, 27, 19, 29, 32,
27, 30, 28, 25, 25, 21, 27, 23, 27, 20), tank.level.start = c(115,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 165, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 200, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 215, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 225, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 230,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 235, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 240, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA)), row.names = c(NA, 96L), class = "data.frame")
# Orginal given delivery cost function.
weekly_cost_function <- function(i){
cost <- (ceiling(sum(i)/200)) * 1600 + (sum(i) * 10)
cost
}
# Calculate the list of houses (reduction.groups) and number of delivery weeks (weeks).
reduction.groups <- unique(df.usage$reduction.group)
temp <- df.usage %>% filter(reduction.group == 1)
weeks <- nrow(temp)
# The genome consists of a matrix representing deliver-or-not to each house each week.
create_random_delivery_schedule <- function(number_of_houses, number_of_weeks, prob = NULL) {
matrix(sample(c(0, 1), number_of_houses * number_of_weeks, replace = TRUE, prob = prob), number_of_houses)
}
# Generate a population of random genes.
population_size <- 100
schedules <- replicate(population_size, create_random_delivery_schedule(length(reduction.groups), weeks), simplify = FALSE)
# Calculate fitness of an individual.
fitness <- function(schedule) {
# Fitness is related to delivery cost.
delivery_cost <- sum(apply(schedule, 2, weekly_cost_function))
# If the schedule allows a tank level to drop below 100, apply a fitness penalty.
# Don't make the fitness penalty too large.
# If the fitness penalty is large enough to be catastrophic (essentially zero children)
# then solutions that are close to optimal will also be likely to generate children
# who fall off the catastropy cliff so there will be a selective pressure away from
# close to optimal solutions.
# However, if your optimizer generates a lot of infeasible solutions raise the penalty.
for (i in reduction.groups) {
temp <- df.usage %>% filter(reduction.group == i)
temp$level <- temp$tank.level.start
if (weeks > 1) for (j in 2:weeks) {
if (1 == schedule[i,j]) {
temp$level[j] <- 300
} else {
temp$level[j] <- ( temp$level[j-1] - temp$water_usage[j] )
if (100 > temp$level[j]) {
# Fitness penalty.
delivery_cost <- delivery_cost + 10 * (100 - temp$level[j])
}
}
}
}
# Return one over delivery cost so that lower cost is higher fitness.
1 / delivery_cost
}
# Generate a new schedule by combining two parents chosen randomly weighted by fitness.
make_baby <- function(population_fitness) {
# Choose some parents.
parents <- sample(length(schedules), 2, prob = population_fitness)
# Get DNA from mommy.
baby <- schedules[[parents[1]]]
# Figure out what part of the DNA to get from daddy.
house_range <- sort(sample(length(reduction.groups), 2))
week_range <- sort(sample(weeks, 2))
# Get DNA from daddy.
baby[house_range[1]:house_range[2],week_range[1]:week_range[2]] <- schedules[[parents[2]]][house_range[1]:house_range[2],week_range[1]:week_range[2]]
# Mutate, 1% chance of flipping each bit.
changes <- create_random_delivery_schedule(length(reduction.groups), weeks, c(0.99, 0.01))
baby <- apply(xor(baby, changes), c(1, 2), as.integer)
}
lowest_cost <<- Inf
# Loop creating and evaluating generations.
for (ii in 1:100) {
population_fitness <- lapply(schedules, fitness)
lowest_cost_this_generation <- 1 / max(unlist(population_fitness))
print(sprintf("lowest cost = %f", lowest_cost_this_generation))
if (lowest_cost_this_generation < lowest_cost) {
lowest_cost <<- lowest_cost_this_generation
best_baby <<- schedules[[which.max(unlist(population_fitness))]]
}
schedules <<- replicate(population_size, make_baby(population_fitness), simplify = FALSE)
}