Related
When trying to predict I am getting this error
error in evaluating the argument 'x' in selecting a method for function 'as.matrix': Cholmod error 'X and/or Y have wrong dimensions' at file ../MatrixOps/cholmod_sdmult.c, line 90
Here is my code so far
library(data.table)
library(caret)
library(Metrics)
library(glmnet)
library(plotmo)
library(lubridate)
#Reading in the necessary data needed for this project
train <- fread("project/volume/data/processed/start_train.csv")
test<-fread("project/volume/data/processed/start_test.csv")
example_sub<-fread("project/volume/data/processed/example_submission.csv")
card_tab <- fread("project/volume/data/processed/card_tab.csv")
#Merging the card_tab dataset with both my train and test datasets to add more variables to each
train = merge(train, card_tab, by = "id")
test = merge(test, card_tab, by = "id")
train$power = as.numeric(train$power)
train$toughness = as.numeric(train$toughness)
test$power = as.numeric(test$power)
test$toughness = as.numeric(test$toughness)
train$powerovertough = train$power/train$toughness
test$powerovertough = test$power/test$toughness
train$current_date<-as_date(train$current_date)
train<-train[order(-current_date)]
test$current_date<-as_date(test$current_date)
test<-test[order(-current_date)]
#Handling NA values in both train and test. The NA values will be set to 0
train[is.na(train)] <- 0
test[is.na(test)] <- 0
# Specifying which columns of our model we will be dropping and applying it to our train and test datasets
drops<- c('id','future_date','current_date','card_name','power','loyalty','cmc','type','colors','mana_cost','subtypes', 'text','set','set_name')
train<-train[, !drops, with = FALSE]
test<-test[, !drops, with = FALSE]
#Saving the response variable in the train dataset
train_y<-train$future_price
test$future_price<-0
#Using dummies to train model
dummies <- dummyVars(future_price ~ ., data = train)
train<-predict(dummies, newdata = train)
test<-predict(dummies, newdata = test)
train<-data.table(train)
test<-data.table(test)
#Cross validating the model to fin the best lamda value
train<-as.matrix(train)
test<-as.matrix(test)
gl_model<-cv.glmnet(train, train_y, alpha = 1,family="gaussian")
bestlam<-gl_model$lambda.min
# Fitting the full model
gl_model<-glmnet(train, train_y, alpha = 1,family="gaussian")
plot_glmnet(gl_model)
saveRDS(gl_model,"./project/volume/models/gl_model.model")
test<-as.matrix(test)
#use the full model
pred<-predict(gl_model,s=bestlam, newx = test)
I am trying to predict future_price for my test set. The error is saying my dimensions are wrong but I don't know what could be causing them to be different. I have tried observing the data sets as it runs through the code and they seem to have the same variables.
Here is the dput
> dput(head(train))
structure(c(0.25, 0.1, 0.1, 0.1, 0.25, 0.25, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 3, 0, 2, 0, 0, 0, 0.333333333333333,
0, 1, 0), .Dim = c(6L, 20L), .Dimnames = list(NULL, c("current_price",
"typesArtifact", "typesArtifact Creature", "typesCreature", "typesEnchantment",
"typesEnchantment Artifact", "typesEnchantment Creature", "typesInstant",
"typesLand", "typesPlaneswalker", "typesSorcery", "supertypes",
"supertypesBasic", "supertypesLegendary", "rarityCommon", "rarityMythic",
"rarityRare", "rarityUncommon", "toughness", "powerovertough"
)))
> dput(head(test))
structure(c(0.15, 0.16, 2, 0.39, 0.16, 0.19, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0), .Dim = c(6L, 18L), .Dimnames = list(
NULL, c("current_price", "typesArtifact", "typesArtifact Creature",
"typesCreature", "typesEnchantment", "typesInstant", "typesLand",
"typesPlaneswalker", "typesSorcery", "supertypes", "supertypesBasic",
"supertypesLegendary", "rarityCommon", "rarityMythic", "rarityRare",
"rarityUncommon", "toughness", "powerovertough")))
I'm looking to condense the steps in my script, but I'm having issues with lapply(). It looks to be an issue with my code as usual. Any help would be much appreciated!
library(iNEXT)
sa4 <- list(Bird = list(structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0), .Dim = c(26L,
6L), .Dimnames = list(Scientific_name = c(" Pycnonotus plumosus",
"Acridotheres javanicus", "Aegithina tiphia", "Aethopyga siparaja",
"Anthreptes malacensis", "Aplonis panayensis", "Cacatua goffiniana",
"Callosciurus notatus", "Cinnyris jugularis", "Copsychus malabaricus",
"Copsychus saularis", "Dicaeum cruentatum", "Dicrurus paradiseus",
"Gorsachius melanolophus", "Larvivora cyane", "Macronus gularis",
"Oriolus chinensis", "Orthotomus atrogularis", "Otus lempiji",
"Pitta moluccensis", "Pycnonotus goiavier", "Pycnonotus plumosus",
"Pycnonotus zeylanicus", "Spilopelia chinensis", "Todiramphus chloris",
"Zosterops simplex"), Sampling_Point = c("SA_01", "SA_02", "SA_03",
"SA_04", "SA_05", "SA_06")))), Butterfly = list(structure(c(0,
0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0), .Dim = c(10L,
4L), .Dimnames = list(Scientific_name = c("Burara harisa consobrina",
"Catopsilia pyranthe pyranthe", "Catopsilia scylla cornelia",
"Delias hyparete metarete", "Eurema sp", "Idea leuconoe clara",
"Pachliopta aristolochiae asteris", "Phalanta phalantha phalantha",
"Troides helena cerberus", "Zizula hylax pygmaea"), Sampling_Point = c("SA_01",
"SA_02", "SA_04", "SA_06")))), Mammal = list(structure(c(0, 1,
1, 1, 1, 0), .Dim = 2:3, .Dimnames = list(Scientific_name = c("Callosciurus notatus",
"Unidentified Fruit Bat sp"), Sampling_Point = c("SA_03", "SA_04",
"SA_05")))), Reptile = list(structure(1, .Dim = c(1L, 1L), .Dimnames = list(
Scientific_name = "Hemidactylus frenatus", Sampling_Point = "SA_05"))))
I've been doing it the longer way:
estimateD(sa4$Butterfly, datatype="incidence_raw") #Sampling coverage for butterflies
estimateD(sa4$Mammal, datatype="incidence_raw") #Sampling coverage for mammals
estimateD(sa4$Bird, datatype="incidence_raw") #Sampling coverage for birds
estimateD(sa4$Reptile, datatype="incidence_raw") #Sampling coverage for reptiles
Note that estimateD(sa4$Reptile, datatype="incidence_raw" generates an error since it only has one species.
Is it possible to condense the following steps via lapply? In this situation I've only have 4 taxa, but for other projects, it might be a lot more. I tried the following and it gives me a warning message--which actually is the same warning message as the one above. I'm wondering if lapply stops working if one component gives an error?
> (lapply(sa4, function(x) estimateD(x, datatype="incidence_raw")) )
Error in `[.data.frame`(tmp, , c(1, 2, 3, 7, 4, 5, 6)) :
undefined columns selected
In addition: Warning messages:
1: In FUN(X[[i]], ...) :
Invalid data type, the element of species by sites presence-absence matrix should be 0 or 1. Set nonzero elements as 1.
2: In log(b/Ub) : NaNs produced
Please let me know if I need to provide more information? Thank you!
This is a simple error trapping issue. Wrap tryCatcharound your problem function call and have the error function return information on what happened.
results <- lapply(sa4, function(x) {
tryCatch(estimateD(x, datatype="incidence_raw"),
error = function(e) e)
})
Now determine which ran alright.
ok <- !sapply(results, inherits, "error")
ok
# Bird Butterfly Mammal Reptile
# TRUE TRUE TRUE FALSE
And check those that did.
results[ok]
It is the issue with the 'Reptiles', so if we select the first 3 elements of the list, it should work
lapply(sa4[1:3], function(x) estimateD(x, datatype="incidence_raw"))
I am trying to calculate robustness, a graph theory measure using R (braingraph package).
Robustness = robustness(my_networkgraph, type = c("vertex"), measure = ("btwn.cent"))
I get the following error, when I use the above robustness function:
Error in order(vertex_attr(g, measure), decreasing = TRUE) : argument 1 is not a vector
Any idea, what I am doing wrong here?
My network, which is a matrix has been converted to igraph object and robustness was calculated.
My network as a matrix:
mynetwork <- matrix(c(0, 1, 0, 1, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 1, 0, 1, 1,
0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0), nrow = 8)
This matrix was converted as igraph using the following code:
my_networkgraph <-graph_from_adjacency_matrix(mynetwork, mode = c("undirected"),weighted = NULL, diag = TRUE, add.colnames = NULL, add.rownames = NA)
Please help me to understand the above error
Thanks
Priya
There was a bug in the above function. To run the robustness code, you will need to supply a vertex attribute to your network: V(network)$degree <- degree(network) V(network)$btwn.cent <- centr_betw(network)$res
Apologies if this is a repeat question. Many have posted looking looking for a way to do post-hoc analyses on the conditional model (fixed factors) in glmmTMB. I want to do plannned contrasts between certain groups, not test every pairwise comparison (e.g. Tukey).
The code below worked well on nlme:lme for a lmm. However, it returns an error on the code below.
Error in modelparm.default(model, ...) :
dimensions of coefficients and covariance matrix don't match
Is there a way to do planned contrasts on a glmmTMB?
#filtdens is a dataframe and TRT,DATE,BURN,VEG are factors
filtdens <- merged %>% filter(!BLOCK %in% c("JB2","JB4","JB5") & MEAS =="DENS" &
group == "TOT" & BURN == "N" & VEG == "C")
filtdens$TD <- interaction(filtdens$TRT, filtdens$DATE)
mod2 <- glmmTMB(count~(TD)+(1|BLOCK),
data=filtdens,
zi=~1,
family=nbinom1(link = "log"))
k1 <- matrix(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1), byrow = T, ncol = 12)
summary(glht(mod2, linfct=k1),test=adjusted("bonferroni"))
A reproducible example would be helpful, but: this vignette in the development version offers code that ought to enable multcomp::linfct, i.e.:
glht_glmmTMB <- function (model, ..., component="cond") {
glht(model, ...,
coef. = function(x) fixef(x)[[component]],
vcov. = function(x) vcov(x)[[component]],
df = NULL)
}
modelparm.glmmTMB <- function (model,
coef. = function(x) fixef(x)[[component]],
vcov. = function(x) vcov(x)[[component]],
df = NULL, component="cond", ...) {
multcomp:::modelparm.default(model, coef. = coef., vcov. = vcov.,
df = df, ...)
}
Test (this example is with Tukey, but I don't see why it shouldn't work more generally ...)
library(glmmTMB)
data("cbpp",package="lme4")
cbpp_b1 <- glmmTMB(incidence/size~period+(1|herd),
weights=size,family=binomial,
data=cbpp)
g1 <- glht(cbpp_b1, linfct = mcp(period = "Tukey"))
summary(g1)
This works with the current CRAN version, but the current development version of glmmTMB offers more options (e.g. emmeans(); see the above-linked vignette). You'll need to install via devtools::install_github("glmmTMB/glmmTMB/glmmTMB") (you'll need compilation tools installed as well).
I'm using R to complete some GA driven searches.
Returned from my GA script is the resulting chromosome, returned as a binary numeric of length 40.
An example is: c(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0).
I also have a corresponding data frame with 40 columns.
Using the data in the numeric vector, how do I efficiently build a (or re-build the) data frame so that it contains only those columns represented by the 1's in my numeric vector?
Building a sample data.frame and assigning your sample vector to x:
df <- as.data.frame(matrix(sample(1:100, 400, replace=T), ncol=40))
x <- c(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0)
I can subset:
df[ ,x==1]
or:
df[, as.logical(x)]