how to predict using var with exogenous variables in R - r
I have the following data:
library(data.table)
modelling_dt_train <- structure(list(`1` = c(54593L, 74481L, 85566L, 97637L, 101081L,
184089L, 158895L, 153780L, 153681L, 157188L, 142216L, 136437L,
135501L, 111264L, 123259L, 110397L, 146034L, 162900L, 132499L,
121516L, 119651L, 114045L, 112551L, 123209L, 134930L, 132147L,
151327L, 155666L, 158538L, 205766L, 200407L, 219588L, 231954L,
179884L, 159121L, 156148L, 136191L, 132956L, 202086L, 141047L,
118490L, 116595L, 127620L, 135962L, 137419L, 127334L, 158804L,
139142L, 181773L, 228278L, 272373L, 186666L, 148791L, 143608L,
169634L, 188149L, 239867L, 332543L, 253463L, 240574L, 237245L,
275466L, 262755L, 241538L, 303377L),
`2` = c(148181L, 186894L,
243357L, 227298L, 195640L, 412137L, 363152L, 355169L, 296208L,
328993L, 281652L, 308027L, 316254L, 249293L, 320821L, 220521L,
284411L, 263807L, 258093L, 261060L, 320153L, 311547L, 279734L,
258453L, 269697L, 313700L, 255285L, 232495L, 305346L, 393256L,
390655L, 527039L, 529056L, 450689L, 425190L, 372144L, 303765L,
324658L, 365035L, 285178L, 230985L, 251308L, 290378L, 279595L,
294676L, 391377L, 445682L, 364056L, 441207L, 516852L, 673401L,
415677L, 304000L, 266365L, 311924L, 314192L, 407313L, 664519L,
456920L, 384978L, 351644L, 432627L, 409624L, 386330L, 487679L
),
`3` = c(60217L, 66492L, 66675L, 76400L, 117252L, 264527L,
256384L, 241815L, 187115L, 193106L, 177620L, 140833L, 188291L,
110069L, 163581L, 107650L, 118319L, 118821L, 122383L, 117267L,
134962L, 121227L, 124952L, 111740L, 137493L, 163895L, 60653L,
69311L, 88810L, 128620L, 132077L, 153399L, 162989L, 151866L,
127325L, 122813L, 115284L, 103765L, 113185L, 101607L, 92379L,
98646L, 94376L, 98069L, 98972L, 103074L, 142199L, 123497L, 141823L,
205582L, 251187L, 109603L, 80711L, 80799L, 84175L, 104965L, 181221L,
245377L, 201378L, 235504L, 188925L, 214614L, 220312L, 191591L,
203292L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -65L))
modelling_x_train <- structure(list(`1` = c(1982134L, 1968327L, 2019222L, 2025126L,
2033065L, 2188202L, 2066808L, 2070103L, 2041154L, 2201142L, 2105848L,
2067669L, 2005707L, 2239632L, 2435928L, 2363759L, 2444016L, 2556139L,
2807283L, 2674632L, 2687984L, 2889011L, 2839239L, 2712064L, 2928420L,
2889533L, 3106868L, 2746471L, 2953436L, 3225171L, 2926874L, 2914124L,
3210355L, 2847523L, 2890636L, 3268445L, 2941468L, 2931027L, 2906610L,
3222324L, 2833093L, 2978953L, 3196315L, 3055240L, 3210672L, 3368890L,
3046191L, 2960181L, 3341146L, 3227672L, 3062702L, 3197227L, 3445476L,
3441273L, 3651232L, 3566179L, 3619685L, 3716756L, 3600666L, 3732533L,
3695464L, 3857145L, 3700072L, 3608183L, 3904237L),
`2` = c(4082316L,
4644387L, 5230567L, 5115720L, 4729153L, 5658227L, 5492034L, 5443022L,
5094415L, 5939637L, 5354626L, 5509783L, 5438960L, 4912936L, 5736293L,
5167632L, 5244341L, 5580274L, 5750346L, 5358527L, 5916955L, 6129790L,
5245982L, 5801479L, 5683117L, 5721551L, 6972176L, 7072498L, 7979325L,
8324202L, 7434885L, 8189438L, 8062609L, 7658496L, 8066643L, 8528136L,
7515745L, 8276800L, 8227022L, 6523804L, 5780869L, 6481060L, 6912797L,
6276934L, 6592158L, 6908732L, 6067945L, 6459707L, 6910377L, 6645470L,
6538196L, 6694136L, 7484290L, 7299620L, 8532078L, 7713988L, 7256825L,
8237839L, 7834919L, 7725377L, 7291804L, 8224205L, 7784470L, 7514557L,
8164590L),
`3` = c(3181556L, 3232260L, 3272852L, 3233534L, 2876956L,
2979204L, 3275916L, 3345278L, 2951867L, 2976889L, 3289397L, 2955148L,
3306653L, 1861934L, 2239827L, 2207356L, 2335514L, 2387791L, 2592206L,
2371527L, 2586856L, 2447660L, 2322218L, 2342827L, 2666258L, 2627928L,
2525534L, 2521129L, 2573991L, 2752528L, 2538251L, 2676848L, 2802139L,
2702108L, 2630417L, 2778233L, 2725544L, 2723849L, 2795745L, 1954820L,
1842684L, 2132844L, 2182141L, 2041725L, 2355857L, 2414334L, 2350885L,
2367547L, 2436918L, 2328244L, 2390647L, 2460700L, 3081623L, 2877487L,
3025104L, 3108909L, 3172441L, 3267766L, 3354357L, 3273165L, 3322516L,
3342817L, 3413854L, 3217624L, 2736617L)),
.Names = c("1", "2",
"3"), class = c("data.table", "data.frame"), row.names = c(NA,
-65L))
where modelling_dt_train is the time series of 3 products and modelling_x_train is an exogenous variable (which is also a time series) for the same products.
I am estimating a VAR model using the following code
library(vars)
x <- log(modelling_dt_train)
x <- x[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]
modelling_x_train <- log(modelling_x_train)
modelling_x_train <- modelling_x_train[,lapply(.SD,function(x){ifelse(is.infinite(x),0,x)})]
x_mat <- as.matrix(x)
dx <- x_mat
var = VAR(dx, p=p, exogen = modelling_x_train, season = 18)
So far so good, but when I want to predict the values for 12 periods using
predict(var, newdata = modelling_dt_test, dumvar = modelling_x_test, n.ahead = 12)
I get an error:
Error in predict.varest(var, newdata = modelling_dt_test, dumvar = modelling_x_test, :
Column names of dumvar do not coincide with exogen.
The newdata and the dumvar that I am using are some future values of the previous same products, but further in time
modelling_x_test <- structure(list(`1` = c(4447896L, 4779229L, 4628391L, 4737933L,
5102152L, 4838918L, 4955183L, 5258605L, 5084001L, 4798945L, 5204015L,
5129690L, 5101568L),
`2` = c(6108187L, 6733956L, 7065148L, 7111155L,
6513151L, 7622806L, 7062042L, 7206067L, 7144091L, 7412266L, 6752614L,
7705255L, 7487054L),
`3` = c(1716975L, 2022198L, 2122109L, 2155489L,
2428639L, 2433860L, 2717315L, 2471655L, 2795100L, 2908946L, 2581813L,
2633578L, 2666302L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -13L))
modelling_dt_test <- structure(list(`1` = c(244876L, 275993L, 256180L, 321256L, 316042L,
275097L, 250842L, 245543L, 233386L, 218958L, 254270L, 238804L,
234079L),
`2` = c(375278L, 429496L, 478816L, 532311L, 442922L,
485787L, 460750L, 501956L, 454178L, 425800L, 413112L, 434328L,
446069L),
`3` = c(119577L, 139870L, 127951L, 125017L, 138176L,
114517L, 129880L, 120941L, 159176L, 157890L, 149554L, 144210L,
165979L)),
.Names = c("1", "2", "3"), class = c("data.table",
"data.frame"), row.names = c(NA, -13L))
EDIT
In the source code here, at line 58 there is this check. But what this line is checking is if colnames(data.all) (which are "X1" "X2" "X3" "X1.l1" "X2.l1" "X3.l1" "const" "sd1" "sd2" "sd3" "sd4" "sd5" "sd6" "sd7" "sd8" "sd9" "sd10" "sd11" "sd12" "sd13" "sd14" "sd15" "sd16" "sd17" "X1.1" "X2.1" "X3.1" , so it seems to me as the coefficients) are equal with colnames(modelling_x_test) which are "1" "2" "3" (the products). How can these ever be equal ?
Related
Filtering or matching by exact time
When I try and filter a df by an exact time, not between two different times it produces a NA. I am trying to create a variable in df1 that is based on information in df2. Here is my data dput(df2) structure(list(Time = structure(c(1647531450.72, 1647531451.757, 1647531452.794, 1647531453.83, 1647531454.867, 1647531455.818, 1647531456.854, 1647531457.891, 1647531458.928, 1647531459.878, 1647531460.915, 1647531461.952, 1647531462.902, 1647531463.939, 1647531464.976, 1647531466.013, 1647531467.05, 1647531468, 1647531469.037, 1647531470.074, 1647531471.11, 1647531472.147, 1647531473.098, 1647531474.134, 1647531475.171, 1647531476.208, 1647531477.245, 1647531478.195, 1647531479.232, 1647531480.269, 1647531481.306, 1647531482.342, 1647531483.293, 1647531484.33, 1647531485.366, 1647531486.317, 1647531487.354, 1647531488.39, 1647531489.427, 1647531490.378, 1647531491.414, 1647531492.451, 1647531493.488, 1647531494.438, 1647531495.475, 1647531496.512, 1647531497.549, 1647531498.586, 1647531499.536, 1647531500.573, 1647531501.61, 1647531502.56, 1647531503.597, 1647531504.634, 1647531505.67, 1647531506.621, 1647531507.658, 1647531508.694, 1647531509.645 ), tzone = "", class = c("POSIXct", "POSIXt")), LAT = c(17.8799454, 17.8799729, 17.8799952, 17.8800159, 17.8800416, 17.8800708, 17.8801, 17.8801292, 17.8801567, 17.8801877, 17.8802237, 17.8802581, 17.8802873, 17.8803148, 17.8803474, 17.8803818, 17.8804161, 17.8804471, 17.8804763, 17.8805089, 17.8805381, 17.880569, 17.8806034, 17.880636, 17.8806721, 17.8807048, 17.8807374, 17.8808061, 17.8808405, 17.8808783, 17.8808783, 17.8809556, 17.8809968, 17.8810346, 17.8810724, 17.8810724, 17.8811497, 17.8811892, 17.8812288, 17.88127, 17.8813112, 17.8813524, 17.8813954, 17.8814383, 17.8814813, 17.8815208, 17.8815603, 17.8815964, 17.8816359, 17.8816737, 17.8817132, 17.8817562, 17.8817974, 17.8818438, 17.8818885, 17.8819314, 17.8819726, 17.8820104, 17.88205), LON = c(-62.8613544, -62.8613338, -62.8613063, -62.8612857, -62.861265, -62.8612513, -62.8612307, -62.8612101, -62.8611894, -62.8611757, -62.8611688, -62.8611482, -62.8611276, -62.8611139, -62.8611001, -62.8610795, -62.8610658, -62.861052, -62.8610314, -62.8610176, -62.860997, -62.8609833, -62.8609627, -62.8609489, -62.8609352, -62.8609214, -62.8609008, -62.8608733, -62.8608665, -62.8608596, -62.8608596, -62.8608459, -62.860839, -62.8608321, -62.8608252, -62.8608252, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608115, -62.8608046, -62.8607977, -62.8607909, -62.860784, -62.8607771, -62.8607771, -62.8607771, -62.8607771, -62.8607771, -62.8607771, -62.8607703, -62.8607634, -62.8607496, -62.8607428)), class = "data.frame", row.names = c(NA, -59L)) and the df containing the information I want to filter by dput(df1) structure(list(date = structure(19068, class = "Date"), RaceStartTime = structure(1647531480, tzone = "", class = c("POSIXct", "POSIXt"))), class = "data.frame", row.names = "event.2") I have tried the following df1$lon <- df2$LON[match(df1$RaceStartTime, df2$Time)] I have also tried df1$lon <- df2%>% filter(Time == df1$RaceStartTime) Both of these produce empty rows, can some one point out the obvious mistake?! EDIT: The structure appears the same str(df1$RaceStartTime) POSIXct[1:1], format: "2022-03-17 15:38:00" str(df2$Time) POSIXct[1:59], format: "2022-03-17 15:37:30" Thanks
POSIXct format by default only prints whole seconds, but its underlying representation can contain fractional seconds (as your data in df2 does). You can remove the fractional seconds by doing: df2$Time <- lubridate::floor_date(df2$Time) So now you get: df2%>% filter(Time == df1$RaceStartTime) #> Time LAT LON #> 1 2022-03-17 15:38:00 17.88088 -62.86086
Keep only letters in all rows of specific column - remove all other characters
This is how example data look like: exp_data # Name Greg Matt # 1 Y.L[+12,000]STISKDLITY.M NA L[+12] # 2 Y.L[+12,000]STISKDLITY.M NA L[+12] dput: exp_data <- structure(list(Name = structure(c(71L,71L), .Label = c("F.AM[+15,995]KTKAAL.A", "F.AMKTKAAL.A", "F.EKIKAAY.L", "F.EKIKAAYL.S", "F.NPTAGC[+58,005]ASL[+12,000]AKEM[+12,000]F[+1151,607].A", "F.QGRVTM[+15,995].T", "F.SGSNSGNTATL.T", "F.TGYY.M", "F.TNC[+58,005]DF[+1151,607]EKIKAAY.L", "L.DKSITSL[+370,222]Y.A", "L.DY[+12,000]WGQGTL.V", "L.DYWGQGTL.V", "L.EQVSQL.Q", "L.EQVSQLQGLW.R", "L.EWMGW.I", "L.ITY[+1151,607]M[+15,995]SGTKSTEF.N", "L.KQQGGGLEVL.F", "L.KQQGGGLEVLF.Q", "L.L[+504,270]KQQGGGLEVL.F", "L.LKQQGGGL.E", "L.LKQQGGGLEVL.F", "L.QGLW.R", "L.RSDDTAVY.Y", "L.RSDDTAVYY.C", "L.SRLRSDDTAVY.Y", "L.SRLRSDDTAVYY.C", "L.STISKDL[+12,000]ITY.M", "L.STISKDLITY.M", "L.STISKDLITY[+1012,607]M[+15,995].S", "L.STISKDLITY[+12,000].M", "L.STISKDLITY[+12,000]M[+386,228].S", "L.STISKDLITY[+2918,448].M", "L.STISKDLITY[+762,322]M[+15,995].S", "L.STISKDLITYM.S", "L.STISKDLITYM[+1282,648].S", "L.STISKDLITYM[+1456,695].S", "L.STISKDLITYM[+1490,759].S", "L.STISKDLITYM[+371,206].S", "L.TEIQSL.T", "L.TISRVEAGDEADY.Y", "L.TISRVEAGDEADY[+12,000]Y.C", "L.TISRVEAGDEADYY.C", "L.TISRVEAGDEADYY[+12,000].C", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY.V", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[+1239,661].V", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[+1987,847].V", "L.VVY[+1501,680]DDSDRPSGIPERF.S", "L.VVYDDSDRPSGIPERF.S", "M.KKARKSKVTTNKC[+58,005]L[+2909,467]EQVSQLQGL.W", "M.SGTKSTEF.N", "M.TELDYW.G", "M.TRDTSISTAY.M", "M.TRDTSISTAY[+12,000].M", "M.TRDTSISTAYM.E", "M.TRDTSISTAYMEL.S", "W.GQGTL.V", "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY.V", "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY[+1239,661].V", "W.INPNSGGTNY.A", "W.INPNSGGTNY[+12,000].A", "W.VRQAPGQGL.E", "W.VRQAPGQGLEW.M", "W.VRQAPGQGLEW[+12,000]M[+486,244].G", "W.VRQAPGQGLEWM.G", "W.Y[+12,000]QQKPGQAPVLVVY.D", "W.YQQKPGQAPVL.V", "W.YQQKPGQAPVL[+12,000]VVY.D", "W.YQQKPGQAPVLVVY.D", "Y.AQKF.Q", "Y.DDSDRPSGIPERF.S", "Y.L[+12,000]STISKDLITY.M", "Y.LSTISKDL.I", "Y.LSTISKDL[+12,000]ITY.M", "Y.LSTISKDLITY.M", "Y.M[+12,000]SGTKSTEF.N", "Y.M[+15,995]EL.S", "Y.M[+15,995]ELSRL.R", "Y.M[+15,995]SGTKSTEF.N", "Y.MELSRL.R", "Y.MSGTKSTEF.N", "Y.QQKPGQAPVL.V", "Y.QQKPGQAPVL[+12,000]VVY.D", "Y.QQKPGQAPVL[+12,000]VVYDDSDRPSGIPERF.S", "Y.QQKPGQAPVLVVY.D", "Y.QQKPGQAPVLVVYDDSDRPSGIPERF.S", "Y.TFTGY.Y", "Y.TFTGYY.M", "Y.TILDKSITSL.Y", "Y.VLTQPPSVSVAPGQTARITC[+58,005]GGNNIGSKSVHW.Y", "Y.WGQGTL.V", "Y.YMHW.V"), class = "factor"), Greg = c(NA, NA), Matt = structure(c(6L, 6L), .Label = c("","C[+58]", "C[+58], F[+1152]", "C[+58], F[+1152], L[+12], M[+12]", "C[+58], L[+2909]", "L[+12]", "L[+370]", "L[+504]", "M[+12]", "M[+1283]", "M[+1457]", "M[+1491]", "M[+16]", "M[+16], Y[+1013]", "M[+16], Y[+1152]", "M[+16], Y[+762]", "M[+371]", "M[+386], Y[+12]", "M[+486], W[+12]", "Y[+12]", "Y[+1240]", "Y[+1502]", "Y[+1988]", "Y[+2918]"), class = "factor")), row.names = 1:2, class = "data.frame") I would like to focus on column named Name and keep only letters in all of the rows. Data frame is extremely long and rows contains all type of characters (numbers, dots, question marks, etc) at the begining of the string, in the middle, in the end, between specific letters. I would like to keep only letters in all of these rows.
Using gsub: exp_data$Name <- gsub("[^A-Za-z]+", "", exp_data$Name)
exp_data$clean_name = gsub(x = exp_data$Name, pattern = "[^a-zA-Z]", replacement = "") exp_data # Name Greg Matt clean_name # 1 Y.L[+12,000]STISKDLITY.M NA L[+12] YLSTISKDLITYM # 2 Y.L[+12,000]STISKDLITY.M NA L[+12] YLSTISKDLITYM
summing integer64 columns not doing what I expect
I do not understand what is going on here. Why does sum work outside of data.table and not inside it? data.table version is 1.94 and bit64 is loaded. dput(dt) structure(list(Date = c(20150422L, 20150422L, 20150422L, 20150422L, 20150423L, 20150423L, 20150423L, 20150423L, 20150424L, 20150424L, 20150424L, 20150424L), totcap = structure(c(5.30519039464278e-314, 5.34352625144878e-314, 5.21151503979773e-314, 5.18159473949947e-314, 5.36659973716195e-314, 5.3767197559193e-314, 5.31749562227391e-314, 5.48717086915892e-314, 5.34891674084389e-314, 5.22243170680067e-314, 5.22969347328787e-314, 5.23636617172838e-314), class = "integer64")), .Names = c("Date", "totcap"), class = c("data.table", "data.frame"), row.names = c(NA, -12L), .internal.selfref = ) > sum(dt$totcap) integer64 [1] 128782928014 > dt[,sum(totcap),by=Date] Date V1 1: 20150422 2.104183e-313 2: 20150423 2.154799e-313 3: 20150424 2.103741e-313
generating and filling new data frames in lapply-"do not know how to convert x to class POSIXlt"
I am trying to generate a new data frame containing weekly encounter histories for an animal based on one row in a data frame that contains the animal ID (BandNo) first and last day we tracked it (FDay, Lday), and the fate of the animal when we stopped tracking it (fate) as well as other covariates. here is an example data frame for one individual, object "a" structure(list(BandNo = structure(1L, .Label = c("1234", "4201", "4203", "4205", "4207", "4208", "4209", "4213", "4214", "4215", "4216", "4217", "4219", "4221", "4223", "4224", "4226", "4227", "4228", "4229", "4230", "4231", "4232", "4233", "4234", "4236", "4237", "4238", "4239", "4241", "4242", "4245", "4247", "4248", "4249", "4253", "4254", "4256", "4257", "4258", "4259", "4261", "4262", "4263", "4264", "4271", "4272", "4273", "4276", "4277", "4280", "4282", "4284", "4288", "4289", "4292", "4293", "4296", "4298", "4299", "4501", "4502", "4503", "4504", "4505", "4507", "4508", "4509", "4510", "4511", "4512", "4513", "4514", "4515", "4516", "4517", "4518", "4519", "4520", "4521", "4525", "4526", "4527", "4529", "4530", "4532", "4535", "4539", "4596", "4598", "4599", "6101", "6102", "6104", "6105", "6106", "6107", "6108", "6109", "6111", "6112", "6113", "6114", "6115", "6116", "6118", "6119", "8002", "8003", "8004", "8005", "8006", "8007", "8008", "8009", "8010", "8011", "8012", "8013", "8014", "8015", "8017", "8018", "8019", "8020", "8021", "8097", "8098", "8099", "8402", "8403", "8404", "8405", "8406", "8408", "8409", "8410", "8411", "8412", "8413", "8414", "8416", "8417", "8418", "8419", "8422", "8423", "8426", "8427", "8429", "8430", "8431", "8432", "8433", "8458", "8497", "8498"), class = "factor"), FDay = structure(1380171600, class = c("POSIXct", "POSIXt"), tzone = "America/Bogota"), Lday = structure(1392094800, class = c("POSIXct", "POSIXt"), tzone = "America/Bogota"), ObsLength = 138, Fate = "Predation", FieldName = structure(7L, .Label = c("Bryan", "Dassow", "H1", "H2", "NARD", "SAY160", "SAY320", "SAY40A", "Schaeffer", "SIB", "Wessels"), class = "factor"), Landscape = structure(2L, .Label = c("CHW", "SAY", "SIB"), class = "factor"), Sex = structure(1L, .Label = c("F", "M"), class = "factor")), .Names = c("BandNo", "FDay", "Lday", "ObsLength", "Fate", "FieldName", "Landscape", "Sex"), row.names = 1L, class = "data.frame") I can successfully create the new data frame I want (y) using this code : library(lubridate) mydate<-seq(from=a$FDay,to=a$Lday,by='week') newband<-rep(a$BandNo,length(mydate)) newfate<-rep("Survive",length(mydate)) newfate[length(mydate)]<-a$Fate y<-data.frame(newband,mydate,newfate) y$FieldName<-a$FieldName y$Sex<-a$Sex y$Landscape<-a$Landscape y$WeekID<-week(a$mydate) y$Year<-year(a$mydate)) but when I try to apply it over a list of one-row data frames using the following code, I get the error message "do not know how to convert x to class POSIXlt" the previous steps, used in lapply now, giving the error b<-list(a) d<-lapply(b,function(x){ mydate<-seq(from=x$FDay,to=x$Lday,by='week') newband<-rep(x$BandNo,length(mydate)) newfate<-rep("Survive",length(mydate)) newfate[length(mydate)]<-x$Fate y<-data.frame(newband,mydate,newfate) y$FieldName<-x$FieldName y$Sex<-x$Sex y$Landscape<-x$Landscape y$WeekID<-week(x$mydate) y$Year<-year(x$mydate)}) Thanks for any help!
I made a real bonehead mistake...I should have been referencing y$ mydate at the end. here is what that lapply function should look like: d<-lapply(adult2,function(x){ mydate<-seq(from=x$FDay,to=x$Lday,by='week') newband<-rep(x$BandNo,length(mydate)) newfate<-rep("Survive",length(mydate)) newfate[length(mydate)]<-x$Fate y<-data.frame(newband,mydate,newfate) y$FieldName<-x$FieldName y$Sex<-x$Sex y$Landscape<-x$Landscape y$WeekID<-week(y$mydate) y$Year<-year(y$mydate) return(y)} )
"Subscript out of bounds" when running apply() in TERR, but works in plain R
When I try running the following piece of code in Spotfire Professional as a "R Script - Tibco Enterprise Runtime for R": mydata_broken <- structure( list( Var1 = list(3.99083333270391, 3.99083333270391, 3.99083333270391, 3.99083333270391), Var2 = list(3.99083333270391, 3.99083333270391, 3.99083333270391, 3.99083333270391)), row.names = c("1", "2", "3", "4"), class = "data.frame", out.attrs = list(dim = c(2L, 2L), dimnames = list( Var1 = c("Var1=3.99083333270391", "Var1=3.99083333270391"), Var2 = c("Var2=3.99083333270391", "Var2=3.99083333270391") ) ) ) mydata_ok <- structure( list( Var1 = list(3.99083333270391), Var2 = list(3.99083333270391)), row.names = "1", class = "data.frame", out.attrs = list(dim = c(1L, 1L), dimnames = list( Var1 = "Var1=3.99083333270391", Var2 = "Var2=3.99083333270391") ) ) out <- apply(mydata_broken, 1, function(y) mean(as.numeric(y))) I get the following error message: TIBCO Enterprise Runtime for R returned an error: 'Error in expand.names(x) : subscript out of bounds'. at Spotfire.Dxp.Data.DataFunctions.Executors.LocalFunctionClient.OnExecuting(FunctionClient funcClient) (rest of stack trace omitted) However, the same code works flawlessly in plain R. If I replace mydata_broken with mydata_ok in the call to apply(), everything works as expected (both in TERR and plain R). Things I've tried so far: use yy instead of y in the anonymous function provided to apply() (to rule out some stupid naming issues regarding y) put everything in a local({...}) block and check it in R, as was suggested in why a "subscript out of bounds" error in Shiny, but not R? Version & configuration information Spotfire 5.5.0, build version 5.5.0.31, build date: 22.05.2013 R version 3.0.2, 64bit (2013-09-25) Windows 7, 64bit So, my question is: Am I making some stupid mistake here? Or is this a bug in the Spotfire R runtime? UPDATE I'd like to reopen the question, because I got a viable workaround from Spotfire support, and I'd like to add it as an answer.
Here's a short summary of the response I got from Spotfire support: it's indeed a bug in TERR (apparently, TERR is not able to read the list() structure properly, causing a fault in the dimensions of the matrix it was supposed to create); they're currently working on fixing it as a workaround, you can use c() instead of list() in the data definition Modified definition of data that works in TERR mydata_working <- structure( list( Var1 = c(3.99083333270391, 3.99083333270391, 3.99083333270391, 3.99083333270391), Var2 = c(3.99083333270391, 3.99083333270391, 3.99083333270391, 3.99083333270391)), row.names = c("1", "2", "3", "4"), class = "data.frame", out.attrs = list(dim = c(2L, 2L), dimnames = list( Var1 = c("Var1=3.99083333270391", "Var1=3.99083333270391"), Var2 = c("Var2=3.99083333270391", "Var2=3.99083333270391") ) ) )