my problem is summarized in finding a vector X with the best solution to the problem:
L is the profits,
R is the restrictions,
P is a constraint parameters matrix,
max SUM_i (x_i * l_i)
or max(t(L)%*%X)
restriction
SUM_i(x_i*p_ij)<=r_j
or P%*%X <= R.
I find a solution for X, but not the best, which would be
fb = c(.217,0,0,23,2865,0,13,427).
How do I find the best solution?
code:
X<-matrix(rep(1,6),6,1)
P<-matrix(c(
1, 1, 1, 2, 0, 0,
0, 1, 1, 2, 1, 1,
99.4, 37.75, 19.75, 54.40, 74.75, 53,
2.400, 1.540, 0, 0, 0, 0,
2.400, 1.960, 0, 0, 0, 0,
1.800, 3.300, 5.330, 0, 0, 0,
0, 0, 2.070, 0, 8.700, 0,
0, 0, .436, 0, 19.100, 12.363,
0, 3.000, .364, 0, 9.100, 26.737 ),
9,6,1)
L <- matrix(c(83.4, 72.35, 27.3, 72.05, 217.25, 455), 6,1)
R <- matrix(c(60,60,2000,351,448,479,338,424,359),9,1)
farm<- function(par, P,R, L){
trues<- P%*%par<=R
if (min(trues)==1 && min(par)>=0) {
return(-t(L)%*%par)
}
else{
return(0)
}
}
mtds = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN","Brent")
out <- optim(par = X, # initial guess
fn = farm,
P = P,
R = R ,
L = L,
method = mtds[5])
# my result
t(L)%*%out$par
#A matrix: 1 × 1 of type dbl
#7419.596
# the first best
fb<- matrix(c(.217,0,0,23.2865,0,13.427),6,1)
t(L)%*%fb
#A matrix: 1 × 1 of type dbl
#7805.175
I think you can try fmincon from package pracma
library(pracma)
objfun <- function(x) -t(L)%*%x
res <- fmincon(x0 = X,fn = objfun,A = P,b = R,lb = rep(0,length(X)))
and you will see that
> res$par
[1] 4.201711e-16 -1.239088e-15 1.863081e-17 2.310286e+01
[5] 5.566620e-01 1.323762e+01
> -res$value
[,1]
[1,] 7808.615
That looks very much like a model that could be solved by a linear programme.
library("Rglpk")
Rglpk_solve_LP(obj = L,
mat = P,
dir = rep("<=", 9),
rhs = R,
max = TRUE)
Related
I am trying to run an interval regression using the survival r package (as described here https://stats.oarc.ucla.edu/r/dae/interval-regression/), but I am running into difficulties when trying to pool results across multiply imputed datasets. Specifically, although estimates are returned, I get the following error: log(1 - 2 * pnorm(width/2)) : NaNs produced. The estimates seem reasonable, at face value (no NaNs, very large or small SEs).
I ran the same model on the stacked dataset (ignoring imputations) and on individual imputed datasets, but in either case, I do not get the error. Would someone be able to explain to me what is going on? Is this an ignorable error? If not, is there a workaround that avoids this error?
Thanks so much!
# A Reproducible Example
require(survival)
require(mice)
require(car)
# Create DF
dat <- data.frame(dv = c(1, 1, 2, 1, 0, NA, 1, 4, NA, 0, 3, 1, 3, 0, 2, 1, 4, NA, 2, 4),
catvar1 = factor(c(0, 0, 0, 0, 0, 1, 0, 0, 0, NA, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0)),
catvar2 = factor(c(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, NA, 0)))
dat_imp <- mice(data = dat)
# Transform Outcome Var for Interval Reg
dat_imp_long <- complete(dat_imp, action = "long", include=TRUE)
# 1-4 correspond to ranges (e.g., 1 = 1 to 2 times...4 = 10 or more)
# create variables that reflect this range
dat_imp_long$dv_low <- car::recode(dat_imp_long$dv, "0 = 0; 1 = 1; 2 = 3; 3 = 6; 4 = 10")
dat_imp_long$dv_high <- car::recode(dat_imp_long$dv, "0 = 0; 1 = 2; 2 = 5; 3 = 9; 4 = 999")
dat_imp_long$dv_high[dat_imp_long$dv_high > 40] <- Inf
# Convert back to mids
dat_mids <- as.mids(dat_imp_long)
# Run Interval Reg
model1 <- with(dat_mids, survreg(Surv(dv_low, dv_high, type = "interval2") ~
catvar1 + catvar2, dist = "gaussian"))
# Warning message for both calls: In log(1 - 2 * pnorm(width/2)) : NaNs produced
# Problem does not only occur with pool, but summary
summary(model1)
summary(pool(model1))
# Run Equivalent Model on Individual Datasets
# No errors produced
imp1 <- subset(dat_imp_long, .imp == 1)
model2 <- survreg(Surv(dv_low, dv_high, type = "interval2") ~
catvar1 + catvar2, dist = "gaussian", data = imp1)
summary(model2)
imp2 <- subset(dat_imp_long, .imp == 2)
model3 <- survreg(Surv(dv_low, dv_high, type = "interval2") ~
catvar1 + catvar2, dist = "gaussian", data = imp2)
summary(model3)
# Equivalent Analysis on Stacked Dataset
# No error
model <- with(dat_imp_long, survreg(Surv(dv_low, dv_high, type = "interval2") ~
catvar1 + catvar2, dist = "gaussian"))
summary(model)
I have the following data:
dat<- structure(list(Pentad = 1:73, RR = c(0, 0.014285714, 0, 0.088571429,
0.071428571, 0, 0.065714286, 0.028571429, 0.094285714, 0.011428571,
0, 0, 0, 0, 0, 0, 0.04, 0, 0.814285714, 0.285714286, 1.14, 5.334285714,
2.351428571, 1.985714286, 1.494285714, 2.005714286, 20.04857143,
25.00857143, 16.32, 11.06857143, 8.965714286, 3.985714286, 5.202857143,
7.802857143, 4.451428571, 9.22, 32.04857143, 19.50571429, 3.148571429,
2.434285714, 9.057142857, 28.70857143, 34.15142857, 33.02571429,
46.50571429, 70.61714286, 3.168571429, 1.928571429, 7.031428571,
0.902857143, 5.377142857, 11.35714286, 15.04571429, 11.66285714,
21.24, 11.43714286, 11.69428571, 2.977142857, 4.337142857, 0.871428571,
1.391428571, 0.871428571, 1.145714286, 2.317142857, 0.182857143,
0.282857143, 0.348571429, 0, 0.345714286, 0.142857143, 0.18,
4.894285714, 0.037142857), YY = c(0.577142857, 0, 1.282857143,
1.445714286, 0.111428571, 0.36, 0, 0, 0, 1, 0.011428571, 0.008571429,
0.305714286, 0, 0, 0, 0, 0.8, 0.062857143, 0, 0, 0, 0, 0.013333333,
0.043333333, 1.486666667, 0, 2.486666667, 1.943333333, 0.773333333,
8.106666667, 7.733333333, 0.5, 4.356666667, 2.66, 6.626666667,
4.404285714, 7.977142857, 12.94285714, 18.49428571, 7.357142857,
11.08285714, 9.034285714, 14.29142857, 34.61428571, 45.30285714,
6.66, 6.702857143, 5.962857143, 14.85428571, 2.1, 2.837142857,
7.391428571, 32.03714286, 9.005714286, 3.525714286, 12.32, 2.32,
7.994285714, 6.565714286, 4.771428571, 2.354285714, 0.005714286,
2.508571429, 0.817142857, 2.885714286, 0.897142857, 0, 0, 0,
0, 0.145714286, 0.434285714)), class = "data.frame", row.names = c(NA,
-73L))
There are three columns: Pentad, RR, and YY.
I would like to get the following:
(a) Get the first pentad when the precipitation exceeds the "annual mean" in "at least three consecutive pentads"
(b) Get the last pentad when the precipitation exceeds the "annual mean" in at least three consecutive pentads BEFORE lowering it below the annual mean.
I was able to do (a) using the following script:
first_exceed_seq <- function(x, thresh = mean(x), len = 3)
{
# Logical vector, does x exceed the threshold
exceed_thresh <- x > thresh
# Indices of transition points; where exceed_thresh[i - 1] != exceed_thresh[i]
transition <- which(diff(c(0, exceed_thresh)) != 0)
# Reference index, grouping observations after each transition
index <- vector("numeric", length(x))
index[transition] <- 1
index <- cumsum(index)
# Break x into groups following the transitions
exceed_list <- split(exceed_thresh, index)
# Get the number of values exceeded in each index period
num_exceed <- vapply(exceed_list, sum, numeric(1))
# Get the starting index of the first sequence where more then len exceed thresh
transition[as.numeric(names(which(num_exceed >= len))[1])]
}
first_exceed_seq(dat$RR)
Here's the plot of the time series:
The correct answer in (a) is 27.
I would like to ask how can I do this for (b). The correct answer for (b) should be 57.
I'll appreciate any help on in this in R.
I don't know if I got your problem right.
This is what I tried:
dat %>%
mutate(
anual_mean = mean(RR),
exceed_thresh = RR > anual_mean,
lag1 = lag(exceed_thresh, 1),
lag2 = lag(exceed_thresh, 2),
pick_3 = ifelse(exceed_thresh & lag1 & lag2, RR, NA)
)
From the answer to this question here 3 years ago
it seems, this is not possible. The Gurobi documentation is not clear ro me:
the model argument state
quadcon (optional)
...
The optional sense string defines the sense of
the quadratic constrint. Allowed values are <, = or >. If not
present, the default sense is <. It is stored in
model$quadcon[[i]]$sense.
constraints state
Quadratic Constraints
...
Quadratic equality constraints are always
non-convex; they will give a GRB_ERROR_QCP_EQUALITY_CONSTRAINT
error with default settings.
[...] If you set the NonConvex parameter to 2, however, then Gurobi will accept arbitrary quadratic
constraints and attempt to solve the resulting model.
But NonConvex throws an Error 10007: Unknown parameter: 'NonConvex' in R.
Any help is appreciated, a reproducible example can be found below:
library(Matrix)
model <- list(
modelsense = "min",
Q = structure(c(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1), .Dim = c(4L, 4L)),
A = structure(c(36, 0, 24, 0, -23, 0, -49, 1), .Dim = c(2L, 4L)),
rhs = c(0, 1),
sense = c("=", ">="),
vtype = "I",
quadcon = list(list(Qc = new("dgTMatrix", i = 0:3, j = 0:3,
Dim = c(4L, 4L),
Dimnames = list(NULL, NULL),
x = c(1, 1, 1, -2),
factors = list()),
# sense = "<=", # works fine
sense = ">=", # Error 10020: Q matrix is not positive semi-definite (PSD)
sense = "=", # Error 10021: Quadratic equality constraints
rhs = 0)))
params <- list(OutputFlag = 0)
result <- gurobi::gurobi(model, params)
print(result$x)
Like mattmilten already said in the comments, it's necessary to upgrade to version 9.0. Then it should work with
params <- list(OutputFlag = 0, NonConvex = 2)
I've tried working on this loop and come out with the below errors. I'm not sure if I can provide data, if needed I'll do my best to obfuscate the data. Here is the loop I am trying to use, any tips on what I'm doing wrong would be greatly appreciated as I haven't found a viable solution yet. The exact error is below the code.
decay_function = function(df)
{
df <- df[order(df$department,df$product,df$region,df$monthnum),]
for(mk in 1:ncol(levels_department)) {
newdata <- df[which(df$department==as.character(levels_department[,mk])), ]
levels_product<-as.data.frame(t(levels(as.factor(newdata$product))))
for(md in 1:ncol(levels_product)){
newdata <- newdata[which(newdata$product==as.character(levels_product[,md])), ]
levels_region<-as.data.frame(t(levels(as.factor(newdata$region))))
for(dm in 1:ncol(levels_region)){
newdata <- newdata[which(newdata$region==as.character(levels_region[,dm])), ]
for(i in 1:(nrow(newdata)-1)){
start_month = newdata$monthnum[i]
end_month = newdata$monthnum[nrow(newdata)]
row_vector = c()
decay_vector = c()
for(j in 5:ncol(newdata)){
k = 0
for(l in start_month:end_month){
distance_initial = (l - start_month)
vector_increment = (l - (start_month-1))
decay_rate = (0.5)^((1/halflife)*distance_initial)
decay_value = (decay_rate)*(newdata[[i,j]])
k = k + decay_value
}
df2[i,j] = k
}
print(df2)
}
if (mk=='1' & md=='1' & dm=='1'){
outdata<-df2
} else {
outdata<-rbind(outdata,df2)
}
}
}
}
}
output_data = decay_function(tempone)
Error in start_month:end_month : argument of length 0
> dput(head(df))
structure(list(monthnum = c(33, 33, 33, 33, 33, 33), Region = c(2251,
2251, 2251, 2251, 2251, 2251), Department = c("Softlines", "Softlines",
"Softlines", "Softlines", "Softlines", "Softlines"), Product = c("T-Shirt",
"Jacket", "Sweat Shirt", "Tank Top", "Sweat Pants", "Mens Jeans"
), Incentive_Amount = c(5742.43, 108006.61, 459076.67, 34006,
141632.42, 29580.38), Leads_T1 = c(0, 0, 0, 0, 0, 0), DCLeads = c(0,
1, 0, 0, 0, 0), PhoneLeads = c(0, 0, 0, 0, 0, 0), T3_CRM_Leads = c(0,
0, 0, 0, 0, 0), Leads_Third = c(0, 1, 0, 0, 0, 0)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L))
I have two character vectors of equal length; where position one in vector.x matches position one in vector.y and so on. The elements refer to column names in a data frame (wide format). I would like to somehow loop through these vectors to produce xy scatter graphs for each pair in the vector, preferably in a faceted plot. Here is a (hopefully) reproducible example. To be clear, with this example, I would end up with 10 scatter graphs.
vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella", "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")
vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")
structure(list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176,
0.0108639420589757), Marinobacter = c(0, 0.00219023779724656,
0, 0.00137867647058824, 0.00310398344542162), Neptuniibacter = c(0.00945829750644884,
0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393
), Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856
), Pseudomonas = c(0.00466773123694878, 0.00782227784730914,
0.0282765737874097, 0.00707720588235294, 0.00400931195033627),
Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856
), Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529,
0.147697878944646), unclassified_GpIIa = c(0, 0.000730079265748853,
0, 0.003125, 0.00103466114847387), unclassified_Porticoccus = c(0,
0, 0, 0.00119485294117647, 0.00569063631660631), Aplanochytrium = c(0,
0, 0, 0.000700770847932726, 0.0315839846865529), Bathycoccus = c(0.000388802488335925,
0, 0, 0.0227750525578136, 0.00526399744775881), Brockmanniella = c(0,
0.00383141762452107, 0, 0.000875963559915907, 0), Caecitellus_paraparvulus = c(0,
0, 0, 0.000875963559915907, 0.00797575370872547)), row.names = c("B11",
"B13", "B22", "DI5", "FF6"), class = "data.frame")
As Rui Barradas shows, it's possible to get a very nice plot from ggplot and gridExta. If you wanted to stick to base R, here's how you'd do that (assuming your data set is called df1):
# set plot sizes
par(mfcol = c(floor(sqrt(length(vector.x))), ceiling(sqrt(length(vector.x)))))
# loop through plots
for (i in 1:length(vector.x)) {
plot(df1[[vector.x[i]]], df1[[vector.y[i]]], xlab = vector.x[i], ylab = vector.y[i])
}
# reset plot size
par(mfcol = c(1,1))
This is a bit long and convoluted but it works.
library(tidyverse)
library(gridExtra)
df_list <- apply(data.frame(vector.x, vector.y), 1, function(x){
DF <- df1[which(names(df1) %in% x)]
i <- which(names(DF) %in% vector.x)
if(i == 2) DF[2:1] else DF
})
gg_list <- lapply(df_list, function(DF){
ggplot(DF, aes(x = get(names(DF)[1]), y = get(names(DF)[2]))) +
geom_point() +
xlab(label = names(DF)[1]) +
ylab(label = names(DF)[2])
})
g <- do.call(grid.arrange, gg_list)
g
Not too elegant, but should get you going:
vector.x <- c("Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Aplanochytrium", "Bathycoccus", "Brockmanniella", "Brockmanniella", "Caecitellus_paraparvulus", "Caecitellus_paraparvulus")
vector.y <- c("Aliiroseovarius", "Neptuniibacter", "Pseudofulvibacter", "Thalassobius", "unclassified_Porticoccus", "Tenacibaculum", "Pseudomonas", "unclassified_GpIIa", "Marinobacter", "Thalassobius")
df1 = structure(
list(Aliiroseovarius = c(0, 0, 0, 0.00487132352941176, 0.0108639420589757),
Marinobacter = c(0, 0.00219023779724656, 0, 0.00137867647058824, 0.00310398344542162),
Neptuniibacter = c(0.00945829750644884, 0.00959532749269921, 0.0171310629514964, 0.2796875, 0.345835488877393),
Pseudofulvibacter = c(0, 0, 0, 0.00284926470588235, 0.00362131401965856),
Pseudomonas = c(0.00466773123694878, 0.00782227784730914, 0.0282765737874097, 0.00707720588235294, 0.00400931195033627),
Tenacibaculum = c(0, 0, 0, 0.00505514705882353, 0.00362131401965856),
Thalassobius = c(0, 0.00166875260742595, 0, 0.0633272058823529, 0.147697878944646),
unclassified_GpIIa = c(0, 0.000730079265748853, 0, 0.003125, 0.00103466114847387),
unclassified_Porticoccus = c(0, 0, 0, 0.00119485294117647, 0.00569063631660631),
Aplanochytrium = c(0, 0, 0, 0.000700770847932726, 0.0315839846865529),
Bathycoccus = c(0.000388802488335925, 0, 0, 0.0227750525578136, 0.00526399744775881),
Brockmanniella = c(0, 0.00383141762452107, 0, 0.000875963559915907, 0),
Caecitellus_paraparvulus = c(0, 0, 0, 0.000875963559915907, 0.00797575370872547)),
row.names = c("B11", "B13", "B22", "DI5", "FF6"),
class = "data.frame"
)
df2 = NULL
for(i in 1:10) {
df.tmp = data.frame(
plot = paste0(vector.x[i], ":", vector.y[i]),
x = df1[[vector.x[i]]],
y = df1[[vector.y[i]]]
)
if(is.null(df2)) df2=df.tmp else df2 = rbind(df2, df.tmp)
}
ggplot(data=df2, aes(x, y)) +
geom_point() +
facet_grid(cols = vars(plot))