Merging two dataframes on a date range in R - r

In R I want to merge two dataframes on a range of dates, taking all rows from the second dataframe which fall on and between two columns of dates from the first dataframe. I couldn't find a strictly R function or version of the merge function that could do this, but I know there's a 'between' function in sql and I was thinking of trying the sqldf package (although I'm not well versed in sql). If there's a more R-ish way to do this, that would be preferable. Thank you in advance for your help!
df1 <- structure(list(ID = 1:2, PtID = structure(c(1L, 1L), .Label = c("T031", "T040", "T045", "T064", "T074", "T081", "T092", "T094", "T096", "T105", "T107", "T108", "T115", "T118", "T120", "T124", "T125", "T128", "T130", "T132", "T138", "T140", "T142", "T142_R1", "T146", "T158", "T159", "T160", "T164", "T166", "T169", "T171", "T173", "T197", "T208", "T214", "T221"), class = "factor"), StartDateTime = structure(list(sec = c(0, 0), min = c(11L, 35L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), EndDateTime = structure(list(sec = c(0, 0), min = c(16L, 37L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt"))), .Names = c("ID", "PtID", "StartDateTime", "EndDateTime"), row.names = 1:2, class = "data.frame")
df1
ID PtID StartDateTime EndDateTime
1 1 T031 2012-10-23 17:11:00 2012-10-23 17:16:00
2 2 T031 2012-10-23 17:35:00 2012-10-23 17:37:00
The second dataframe has several IDs (which match the first dataframe) and timestamps on the minute level.
df2
df2 <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), dateTime = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = 2:44, hour = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L), mday = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L), mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), year = c(112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L), wday = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), lat = c(33.06621406, 33.06616621, 33.06617305, 33.06617624, 33.06617932, 33.06618161, 33.06618326, 33.06618604, 33.06615089, 33.06628004, 33.06618461, 33.06615113, 33.0661362, 33.06620301, 33.0662218, 33.06624283, 33.06622268, 33.06622425, 33.06622787, 33.06623042, 33.06623318, 33.06623654, 33.06623826, 33.06623919, 33.06623907, 33.06624009, 33.06623804, 33.06624255, 33.06624377, 33.06624446, 33.06624242, 33.06624254, 33.06624513, 33.06624582, 33.06615573, 33.06625534, 33.06618541, 33.06613825, 33.06613624, 33.06614027, 33.06614551, 33.06614844, 33.06615393), lon = c(-116.6105531, -116.6105651,-116.6105613, -116.6105553, -116.610551, -116.610549, -116.6105484, -116.6105512, -116.6105712, -116.6104996, -116.6104711, -116.6104854, -116.6105596, -116.6104509, -116.610524, -116.6105535, -116.6105461, -116.6105461, -116.6105477, -116.6105498, -116.6105478, -116.6105473, -116.6105473, -116.6105488, -116.6105497, -116.6105479, -116.610545, -116.6105461, -116.6105448, -116.610543, -116.6105409, -116.6105395, -116.6105367, -116.6105337, -116.6105344, -116.6104779, -116.6104953,-116.6105222, -116.610526, -116.6105255, -116.6105282, -116.6105265,-116.6105282)), .Names = c("ID", "dateTime", "lat", "lon"), row.names = 1023:1065, class = "data.frame")
So the desired output would look like this:
ID PtID DateTime lat lon
1 T031 2012-10-23 17:11:00 33.06628 -116.6105
1 T031 2012-10-23 17:12:00 33.06618 -116.6105
1 T031 2012-10-23 17:13:00 33.06615 -116.6105
1 T031 2012-10-23 17:14:00 33.06614 -116.6106
1 T031 2012-10-23 17:15:00 33.06620 -116.6105
1 T031 2012-10-23 17:16:00 33.06622 -116.6105
2 T031 2012-10-23 17:35:00 33.06625 -116.6105
2 T031 2012-10-23 17:36:00 33.06616 -116.6105
2 T031 2012-10-23 17:37:00 33.06626 -116.6105
So with sqldf maybe something like this?
sqldf("SELECT df2.ID, df2.lon, df2.lat, FROM df1
INNER JOIN df2 ON df1.ID = df2.ID
WHERE df2.DateTime BETWEEN df1.StartDateTime AND df1.EndDateTime")

In general, its not a good idea to use POSIXlt in data frames. Use POSIXct instead. Also your SQL statement is ok except the comma before FROM needs to be removed:
df1a <- transform(df1,
StartDateTime = as.POSIXct(StartDateTime),
EndDateTime = as.POSIXct(EndDateTime))
df2a <- transform(df2, dateTime = as.POSIXct(dateTime))
The SQL statement in the question has an extraneous commma before FROM.
Here is a slightly simplified statement. This one uses a left join instead to ensure that all ID's from df1a are included even if they have no matches in df2a.
sqldf("SELECT df1a.ID, PtID, dateTime, lat, lon
FROM df1a LEFT JOIN df2a
ON df1a.ID = df2a.ID AND dateTime BETWEEN StartDateTime AND EndDateTime")

You may want to look into defining your data as zoo objects. merge.zoo does something very close to what you ask. Refer to this question for more: R: merge two irregular time series

Related

Plotting Piecewise growth curves

I am trying to plot a piecewise growth curve similar to this first plot. I used the separate slopes coding scheme and placed a breakpoint at time 2
| time | 0 | 1 | 2 | 5 | 10 | 15 | 20|
| time1 | 0 | 1 | 2 | 2 | 2 | 2 | 2 |
| time2 | 0 | 0 | 0 | 1 | 2 | 3 | 4 |
I used the following code to create my growth model
m1 <- lmer(sdmtwr ~ time1 + time2 + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I'm also exploring an interaction with a 2-level categorical predictor with the following code
m2 <- lmer(sdmtwr ~ (time1 + time2)*edu + (time1 | id) + (0 + time2 | id), data = SDMT, REML = FALSE)
I've attempted to create the plots with the ggplot2, sjPlot, and effects packages to no avail, and I am at a loss due to limited programming experience. I have only ever been able to plot segments separately for both the baseline and interaction models.
If anyone could provide assistance on the appropriate code, I would appreciate it!
Edit: Here is the dput summary (edited for length to show edu, time1, and time2)
> dput(sdmt)
structure(list(id = c(3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 6L,
6L, 6L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 62L, 62L, 62L, 62L,
108L, 108L, 108L, 108L, 119L, 119L, 120L, 120L, 120L, 120L, 132L,
132L, 132L, 132L, 132L, 148L, 148L, 148L, 148L, 148L, 148L, 175L,
175L, 175L, 178L, 178L, 178L, 178L, 201L, 201L, 201L, 201L, 201L,
201L, 201L, 253L, 253L, 253L, 253L, 327L, 327L, 327L, 327L, 336L,
336L, 336L, 336L, 336L, 336L, 343L, 343L, 360L, 360L, 360L, 366L,
366L, 366L), time = c(0L, 2L, 10L, 15L, 20L, 5L, 10L, 15L, 2L,
2L, 15L, 20L, 0L, 1L, 2L, 5L, 10L, 15L, 20L, 5L, 10L, 15L, 20L,
0L, 2L, 15L, 20L, 0L, 2L, 0L, 10L, 15L, 20L, 0L, 1L, 5L, 10L,
20L, 1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 2L, 0L, 1L, 2L, 5L, 0L,
1L, 2L, 5L, 10L, 15L, 20L, 0L, 1L, 5L, 15L, 0L, 1L, 10L, 20L,
0L, 1L, 5L, 10L, 15L, 20L, 0L, 10L, 1L, 5L, 10L, 0L, 10L, 15L
), sdmtwr = c(20L, 24L, 18L, 19L, 9L, 17L, 24L, 17L, 41L, 33L,
27L, 29L, 31L, 29L, 26L, 29L, 32L, 20L, 19L, 40L, 42L, 46L, 38L,
14L, 25L, 24L, 29L, 46L, 45L, 29L, 26L, 34L, 38L, 30L, 33L, 71L,
52L, 51L, 29L, 33L, 50L, 55L, 40L, 39L, 32L, 34L, 35L, 28L, 37L,
37L, 36L, 37L, 29L, 52L, 51L, 50L, 44L, 42L, 30L, 43L, 43L, 41L,
33L, 46L, 49L, 38L, 52L, 50L, 48L, 49L, 49L, 50L, 40L, 39L, 18L,
NA, 3L, 31L, 43L, 47L), time_seg1 = c(0, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2,
0, 2, 2, 2, 0, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 1, 2,
2, 0, 1, 2, 2, 2, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 2,
2, 0, 2, 1, 2, 2, 0, 2, 2), time_seg2 = c(0, 0, 2, 3, 4, 1, 2,
3, 0, 0, 3, 4, 0, 0, 0, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 3, 4, 0,
0, 0, 2, 3, 4, 0, 0, 1, 2, 4, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 2, 3, 4, 0, 0, 1, 3, 0, 0, 2, 4, 0, 0, 1, 2,
3, 4, 0, 2, 0, 1, 2, 0, 2, 3), ed_dich = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, NA, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L), .Label = c("< HS",
">= HS"), class = "factor")), row.names = c(NA, -80L), class = "data.frame")
What I think you want is a piecewise linear spline. You can do this with a truncated power basis function. In your model, you would include time and a function that is time-2 if time is greater than 2 and 0 otherwise. This makes a piecewise linear function that meet each other at time=2. You can do this in the model as follows:
library(lme4)
mod <- lmer(sdmtwr ~ time + I(ifelse(time > 2, time-2, 0)) +
(1 |id), data=tmp, REML=TRUE)
Then, you could use the ggpredict() function from the ggeffects package to produce the plot:
library(ggeffects)
g <- ggpredict(mod, "time")
plot(g)
Note: I couldn't get it to run with random effects on the time variables, but with more data perhaps you'll be able to get it to work.

R function keeps returning empty data frame

I'm trying to write an R function that loops through a given dataframe to filter it a bit. The data in the dataframe consists of travel information between two lines in the London subway an I'd like to cut off the top percent. Here's the output of the str() function for the input data:
'data.frame': 71748 obs. of 9 variables:
$ depart : Factor w/ 52 levels "Bank","Barkingside",..: 22 22 22 22 22 25 25 25 25 25 ...
$ arrival : Factor w/ 48 levels "Bank","Barkingside",..: 48 43 38 5 8 1 42 48 41 43 ...
$ traveltime : num 433 1102 161 584 891 ...
$ departuretime: POSIXlt, format: "2014-03-24 18:17:20" "2014-03-24 18:17:20" "2014-03-24 18:17:20" ...
$ arrivaltime : POSIXlt, format: "2014-03-24 18:24:33" "2014-03-24 18:35:42" "2014-03-24 18:20:01" ...
$ lcid : Factor w/ 28 levels "1000001","1000002",..: 1 1 1 1 1 1 1 1 1 1 ...
$ tripno : Factor w/ 25 levels "1","10","11",..: 2 2 2 2 2 2 2 2 2 2 ...
$ destination : Factor w/ 18 levels "Debden","Ealing Broadway",..: 3 3 3 3 3 3 3 3 3 3 ...
$ line : Factor w/ 1 level "C": 1 1 1 1 1 1 1 1 1 1 ...
Here's the functions I wrote:
#cut off top percent of travel times for each combination of arrival and
#departure stations to remove outliers
cutOffTopPercent <- function(data, percentage=0.99){
res <- data.frame()
#loop through all combinations of depart and arrival stations
for(i in 1:length(levels(data$depart))){
for(j in 1:length(levels(data$arrival))){
#create variables for departure/arrival station to make code easier to read
departureStation <- levels(data$depart)[i]
arrivalStation <- levels(data$arrival)[j]
#create a subset containing only the current departure and arrival station
dataSubset <- data[data$depart == departureStation & data$arrival == arrivalStation,]
#get top value that's allowed
upperBorder <- getTopPercentileBottom(dataSubset, percentage)
#remove records with values higher than than allowed
dataSubset <- dataSubset[dataSubset$traveltime < upperBorder,]
#glue the subset to the end result
res <- rbind(res,dataSubset)
}
}
return(res)
}
#returns the traveltime that marks where the given percentage of traveltimes starts
getTopPercentileBottom <- function(data, percentile){
upperBorder <- quantile(data$traveltime, probs = percentile)
return(upperBorder)
}
The cutOffTopPercent() function always returns an empty data frame however. I can't find my error. I've been trying to go to the steps manually, but when I do so, all the data subsets get appended to the res dataframe correctly.
Can anyone see what I did wrong, or suggest a better approach to what I'm trying to do?
EDIT:
a dput of the first 30 records in my input data:
structure(list(depart = structure(c(22L, 22L, 22L, 22L, 22L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 25L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L), .Label = c("Bank",
"Barkingside", "Bethnal Green", "Bond Street", "Buckhurst Hill",
"Chancery Lane", "Chigwell", "Debden", "Ealing Broadway", "East Acton",
"Epping", "Fairlop", "Gants Hill", "Grange Hill", "Greenford",
"Hainault", "Hanger Lane", "Holborn", "Holland Park", "Lancaster Gate",
"Leyton", "Leytonstone", "Liverpool Street", "Loughton", "Marble Arch",
"Mile End", "Newbury Park", "Newbury Park Loop", "North Acton",
"North Acton Junction", "Northolt", "Notting Hill Gate", "Oxford Circus",
"Perivale", "Queensway", "Redbridge", "Roding Valley", "Ruislip Gardens",
"Shepherd's Bush", "Shepherds Bush (Central Line)", "Snaresbrook",
"South Ruislip", "South Woodford", "St. Paul's", "Stratford",
"Theydon Bois", "Tottenham Court Road", "Wanstead", "West Acton",
"West Ruislip", "White City", "Woodford"), class = "factor"),
arrival = structure(c(48L, 43L, 38L, 5L, 8L, 1L, 42L, 48L,
41L, 43L, 6L, 38L, 5L, 4L, 16L, 30L, 44L, 20L, 8L, 3L, 24L,
19L, 1L, 42L, 48L, 41L, 43L, 6L, 38L, 5L), .Label = c("Bank",
"Barkingside", "Bethnal Green", "Bond Street", "Buckhurst Hill",
"Chancery Lane", "Chigwell", "Debden", "East Acton", "Fairlop",
"Gants Hill", "Grange Hill", "Greenford", "Hainault", "Hanger Lane",
"Holborn", "Holland Park", "Lancaster Gate", "Leyton", "Leytonstone",
"Liverpool Street", "Loughton", "Marble Arch", "Mile End",
"Newbury Park", "North Acton", "North Acton Junction", "Northolt",
"Notting Hill Gate", "Oxford Circus", "Perivale", "Queensway",
"Redbridge", "Roding Valley", "Ruislip Gardens", "Shepherd's Bush",
"Shepherds Bush (Central Line)", "Snaresbrook", "South Ruislip",
"South Woodford", "St. Paul's", "Stratford", "Theydon Bois",
"Tottenham Court Road", "Wanstead", "West Acton", "White City",
"Woodford"), class = "factor"), traveltime = c(433, 1102,
161, 584, 891, 829, 1473, 2273, 629, 2942, 467, 2001, 2424,
75, 351, 165, 249, 1840, 2731, 1148, 1289, 1653, 580, 1224,
2024, 380, 2693, 218, 1752, 2175), departuretime = structure(list(
sec = c(20, 20, 20, 20, 20, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 49, 49, 49, 49,
49, 49, 49, 49), min = c(17L, 17L, 17L, 17L, 17L, 46L,
46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L,
46L, 46L, 46L, 46L, 46L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L), hour = c(18L, 18L, 18L, 18L, 18L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L), mday = c(24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L
), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L), wday = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), yday = c(82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L
), isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("sec", "min", "hour",
"mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt",
"POSIXt"), tzone = "GMT"), arrivaltime = structure(list(sec = c(33,
42, 1, 4, 11, 29, 13, 33, 9, 42, 27, 1, 4, 55, 31, 25, 49,
20, 11, 48, 9, 13, 29, 13, 33, 9, 42, 27, 1, 4), min = c(24L,
35L, 20L, 27L, 32L, 0L, 11L, 24L, 57L, 35L, 54L, 20L, 27L,
47L, 52L, 49L, 50L, 17L, 32L, 5L, 8L, 14L, 0L, 11L, 24L,
57L, 35L, 54L, 20L, 27L), hour = c(18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 17L, 18L, 17L, 18L, 18L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 17L, 18L,
18L), mday = c(24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L,
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L), mon = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L,
114L, 114L, 114L, 114L), wday = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), yday = c(82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L, 82L,
82L, 82L, 82L, 82L, 82L, 82L, 82L), isdst = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
)), .Names = c("sec", "min", "hour", "mday", "mon", "year",
"wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt"), tzone = "GMT"),
lcid = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("1000001", "1000002", "1000003",
"1000004", "1000005", "1000006", "1000007", "1000008", "1000009",
"1000010", "1000045", "1000054", "1000070", "1000088", "1000089",
"1000090", "1000097", "1000098", "1000099", "1000100", "1000101",
"1000102", "1000103", "1000104", "1000105", "1000106", "1000107",
"1000109"), class = "factor"), tripno = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"2", "20", "21", "22", "23", "24", "3", "4", "5", "6", "7",
"8", "81", "9"), class = "factor"), destination = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Debden",
"Ealing Broadway", "Epping", "Grange Hill via Woodford",
"Hainault", "Hainault via Newbury Park", "Hainault via Woodford",
"Leytonstone", "Loughton", "Marble Arch", "Newbury Park",
"North Acton", "Northolt", "Ruislip Gardens", "West Ruislip",
"White City", "Woodford", "Woodford Via Hainault"), class = "factor"),
line = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "C", class = "factor")), .Names = c("depart",
"arrival", "traveltime", "departuretime", "arrivaltime", "lcid",
"tripno", "destination", "line"), row.names = c(NA, 30L), class = "data.frame")
Here a vectorized version of your code. Basically I used Map to avoid double loops and filling the result manullay (using rbind, very solw).
cutOffTopPercent <-
function(data,percent=0.99){
cut_off_dep_arr <-
function(dep,arr){
dataSubset <- data[data$depart == dep & data$arrival == arr,]
upperBorder <- getTopPercentileBottom(dataSubset, percent)
dataSubset[dataSubset$traveltime <= upperBorder,] ## <= not <
}
Map(cut_off_dep_arr,df$depart,df$arrival)
}
cutOffTopPercent(data=df)

Is `format` more secure than `$` when extracting hours from a POSIXlt vector?

Starting R with a bare-bone
l#np350v5c:~$ R --vanilla
> search()
[1] ".GlobalEnv" "package:stats" "package:graphics"
[4] "package:grDevices" "package:utils" "package:datasets"
[7] "package:methods" "Autoloads" "package:base"
.. this is a dump of data (emergency accesses hours in a northern Italy hospital) which gave a strange (to me) behaviour:
times <- structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), min = c(5L, 43L, 2L, 47L, 15L, 18L, 46L, 50L, 58L,
26L, 14L, 54L, 28L, 11L, 32L, 17L, 51L, 40L, 17L, 47L, 21L, 57L,
59L, 34L, 45L, 15L, 10L, 25L, 27L, 31L, 5L, 34L, 5L, 36L, 16L,
2L, 20L, 0L, 24L, 1L, 54L, 59L, 28L, 24L, 24L, 19L, 26L, 1L,
48L, 0L, 10L, 18L, 43L, 38L, 24L, 21L, 37L, 36L, 54L, 11L, 27L,
29L, 34L, 32L, 33L, 43L, 40L, 53L, 56L, 48L, 47L, 54L, 11L, 37L,
14L, 46L, 30L, 54L, 0L, 38L, 27L, 57L, 21L, 31L, 21L, 37L, 17L,
41L, 21L, 14L, 33L, 33L, 31L, 6L, 30L, 48L, 49L, 26L, 9L, 0L,
19L, 45L, 5L, 9L, 29L, 15L, 34L, 48L, 20L, 25L, 1L, 49L, 48L,
46L, 47L, 18L, 48L, 35L, 56L, 24L, 41L, 13L, 37L, 53L, 57L, 11L,
9L, 43L, 30L, 11L, 55L, 56L, 12L, 35L, 14L, 48L, 22L, 44L, 25L,
51L, 51L, 27L, 58L, 23L, 17L, 42L, 21L, 54L, 59L, 40L, 37L, 43L,
15L, 12L, 22L, 15L, 55L, 7L, 21L, 59L, 34L, 38L, 15L, 8L, 57L,
49L, 6L, 1L, 51L, 46L, 49L, 20L, 46L, 56L, 32L, 36L, 56L, 47L,
58L, 23L, 14L, 56L, 4L, 44L, 25L, 44L, 22L, 21L, 36L, 35L, 58L,
27L, 22L, 44L, 16L, 5L, 34L, 46L, 52L, 18L, 0L, 32L, 49L, 3L,
16L, 53L, 57L, 58L, 35L, 21L, 32L, 57L, 7L, 20L, 29L, 26L, 48L,
53L, 9L, 59L, 58L, 30L, 57L, 34L, 6L, 29L, 57L, 10L, 25L, 15L,
26L, 29L, 20L, 24L, 36L, 54L, 46L, 24L, 14L, 10L, 48L, 22L, 17L,
39L, 59L, 33L, 12L, 0L, 29L, 36L, 31L, 57L, 38L, 10L, 29L, 42L,
36L, 16L, 2L, 21L, 35L, 4L, 16L, 33L, 35L, 14L, 37L, 25L, 51L,
12L, 45L, 15L, 7L, 33L, 42L, 28L, 19L, 40L, 5L, 39L, 13L, 23L,
47L, 31L, 7L, 12L, 8L, 7L, 24L, 37L, 51L, 49L, 11L, 0L, 23L,
30L, 37L, 48L, 26L, 42L, 33L, 8L, 17L, 4L, 51L, 26L, 48L, 17L,
43L, 35L, 35L, 27L, 27L, 47L, 17L, 24L, 43L, 55L, 20L, 54L, 38L,
58L, 2L, 37L, 26L, 3L, 25L, 18L, 0L, 58L, 57L, 12L, 10L, 51L,
37L, 23L, 57L, 14L, 7L, 22L, 50L, 14L, 24L, 27L, 42L, 53L, 6L,
21L, 56L, 17L, 4L, 6L, 30L, 47L, 42L, 20L, 17L, 0L, 35L, 59L,
46L, 50L, 16L, 15L, 42L, 26L, 36L, 8L, 35L, 2L, 59L, 12L, 14L,
58L, 3L, 0L, 37L, 36L, 23L, 29L, 45L, 44L, 32L, 25L, 1L, 50L,
17L, 56L, 58L, 53L, 35L, 17L, 14L, 38L, 27L, 27L, 8L, 14L, 7L,
24L, 13L, 42L, 21L, 12L, 38L, 24L, 30L, 27L, 55L, 23L, 31L, 43L,
22L, 47L, 50L, 27L, 56L, 22L, 54L, 23L, 46L, 17L, 30L, 41L, 54L,
41L, 51L, 44L, 34L, 42L, 3L, 57L, 9L, 51L, 54L, 58L, 53L, 58L,
4L, 12L, 12L, 35L, 55L, 5L, 4L, 15L, 56L, 14L, 48L, 57L, 13L,
19L, 25L, 24L, 24L, 2L, 54L), hour = c(-3, -4, -3, -2, -4, -1,
-5, -4, -5, -5, -5, -4, -3, -2, -4, -2, -2, -4, -4, -1, -2, -5,
-5, -2, -2, -2, -5, -1, -1, -4, -3, -4, -4, -3, -4, -3, -1, -2,
-2, -1, -2, -5, -5, -3, -2, -2, -3, -3, -4, -1, -4, -3, -4, -2,
-5, -2, -4, -5, -4, -2, -5, -1, -5, -3, -2, -1, -3, -5, -1, -3,
-5, -1, -5, -1, -3, -1, -2, -5, -3, -1, -5, -1, -1, -3, -5, -1,
-2, -4, -4, -5, -3, -5, -4, -1, -5, -2, -5, -3, -5, -5, -2, -1,
-5, -3, -5, -3, -2, -4, -3, -1, -1, -2, -3, -1, -4, -3, -4, -5,
-1, -5, -3, -3, -1, -3, -3, -4, -4, -2, -5, -5, -1, -3, -5, -2,
-3, -2, -1, -5, -3, -5, -1, -1, -1, -3, -3, -5, -1, -2, -4, -2,
-4, -1, -4, -5, -1, -5, -1, -1, -4, -2, -5, -5, -3, -1, -5, -3,
-4, -5, -4, -5, -3, -5, -5, -5, -2, -5, -3, -5, -3, -4, -4, -5,
-5, -1, -4, -4, -1, -3, -1, -3, -3, -4, -2, -2, -4, -3, -1, -4,
-5, -3, -1, -3, -4, -3, -5, -1, -3, -5, -4, -5, -2, -4, -1, -3,
-5, -2, -5, -3, -4, -2, -5, -4, -1, -5, -3, -5, -1, -2, -2, -4,
-3, -4, -2, -4, -3, -4, -2, -5, -1, -1, -2, -1, -3, -5, -1, -1,
-2, -4, -4, -5, -3, -3, -3, -4, -4, -4, -4, -3, -4, -2, -5, -4,
-1, -4, -5, -4, -3, -3, -5, -2, -3, -1, -4, -1, -5, -2, -1, -1,
-4, -3, -2, -5, -4, -3, -4, -1, -3, -4, -5, -3, -2, -4, -1, -4,
-4, -2, -5, -3, -5, -1, -3, -4, -2, -1, -2, -3, -5, -3, -1, -1,
-3, -4, -4, -2, -2, -1, -2, -1, -4, -2, -5, -2, -1, -3, -5, -1,
-5, -3, -3, -5, -2, -1, -1, -4, -5, -5, -4, -1, -3, -5, -2, -4,
-1, -2, -4, -5, -5, -1, -5, -5, -4, -2, -5, -2, -3, -2, -2, -2,
-3, -2, -4, -4, -5, -1, -2, -5, -3, -1, -1, -4, -1, -5, -3, -5,
-4, -2, -4, -3, -4, -4, -3, -2, -2, -5, -2, -1, -1, -1, -3, -5,
-4, -5, -1, -1, -3, -2, -4, -2, -2, -1, -2, -4, -3, -5, -2, -1,
-4, -4, -1, -4, -2, -3, -2, -1, -5, -5, -4, -2, -1, -5, -3, -3,
-4, -5, -3, -4, -1, -3, -2, -2, -2, -4, -1, -2, -2, -2, -5, -1,
-4, -2, -4, -2, -5, -4, -2, -3, -2, -1, -1, -1, -3, -2, -5, -3,
-5, -2, -1), mday = c(24L, 30L, 13L, 17L, 11L, 17L, 1L, 26L,
21L, 1L, 9L, 6L, 7L, 17L, 17L, 4L, 24L, 23L, 31L, 2L, 22L, 19L,
12L, 17L, 26L, 13L, 12L, 26L, 14L, 20L, 22L, 14L, 26L, 29L, 7L,
16L, 19L, 10L, 19L, 17L, 15L, 22L, 4L, 22L, 6L, 22L, 6L, 24L,
18L, 11L, 13L, 26L, 5L, 2L, 1L, 12L, 15L, 21L, 22L, 24L, 25L,
18L, 4L, 18L, 28L, 4L, 21L, 25L, 18L, 4L, 8L, 10L, 21L, 11L,
11L, 20L, 23L, 14L, 16L, 2L, 31L, 3L, 21L, 3L, 1L, 13L, 26L,
20L, 17L, 4L, 3L, 13L, 10L, 23L, 16L, 1L, 28L, 27L, 16L, 29L,
6L, 15L, 6L, 14L, 4L, 17L, 15L, 4L, 19L, 26L, 20L, 22L, 24L,
1L, 16L, 18L, 12L, 21L, 26L, 11L, 30L, 19L, 26L, 4L, 3L, 2L,
26L, 30L, 14L, 16L, 21L, 20L, 29L, 26L, 17L, 23L, 8L, 19L, 23L,
14L, 14L, 5L, 28L, 6L, 15L, 13L, 8L, 6L, 1L, 2L, 3L, 5L, 16L,
17L, 3L, 23L, 20L, 27L, 28L, 1L, 31L, 26L, 14L, 30L, 22L, 9L,
31L, 5L, 19L, 9L, 27L, 26L, 24L, 12L, 27L, 20L, 9L, 4L, 9L, 4L,
18L, 9L, 13L, 10L, 23L, 27L, 11L, 21L, 6L, 6L, 6L, 9L, 23L, 14L,
27L, 23L, 17L, 19L, 29L, 16L, 18L, 4L, 5L, 29L, 14L, 16L, 19L,
25L, 14L, 16L, 27L, 12L, 11L, 26L, 2L, 17L, 1L, 20L, 2L, 3L,
5L, 7L, 27L, 27L, 17L, 6L, 4L, 11L, 5L, 15L, 13L, 19L, 1L, 29L,
18L, 29L, 17L, 23L, 31L, 26L, 19L, 17L, 14L, 21L, 17L, 13L, 5L,
13L, 4L, 27L, 13L, 18L, 4L, 24L, 23L, 21L, 25L, 25L, 2L, 24L,
25L, 28L, 6L, 10L, 15L, 9L, 7L, 8L, 9L, 22L, 17L, 11L, 15L, 24L,
14L, 23L, 18L, 28L, 3L, 20L, 25L, 5L, 17L, 21L, 24L, 21L, 24L,
3L, 31L, 21L, 18L, 27L, 30L, 25L, 13L, 8L, 21L, 16L, 22L, 24L,
3L, 16L, 4L, 22L, 15L, 30L, 2L, 16L, 28L, 24L, 26L, 20L, 9L,
3L, 3L, 4L, 11L, 5L, 30L, 19L, 24L, 3L, 24L, 5L, 14L, 4L, 23L,
18L, 7L, 16L, 24L, 3L, 27L, 4L, 30L, 22L, 28L, 17L, 25L, 3L,
19L, 18L, 26L, 8L, 24L, 18L, 17L, 6L, 17L, 25L, 6L, 23L, 14L,
4L, 5L, 15L, 5L, 4L, 19L, 4L, 7L, 24L, 28L, 23L, 28L, 9L, 7L,
27L, 26L, 25L, 4L, 19L, 24L, 18L, 18L, 7L, 16L, 11L, 10L, 21L,
6L, 30L, 15L, 1L, 16L, 16L, 21L, 17L, 8L, 19L, 1L, 23L, 10L,
18L, 2L, 8L, 20L, 28L, 25L, 28L, 25L, 23L, 5L, 4L, 31L, 2L, 21L,
30L, 1L, 4L, 18L, 8L, 25L, 1L, 25L, 2L, 5L, 20L, 2L, 17L, 5L,
5L, 30L, 30L, 17L, 5L, 18L, 21L, 24L, 20L, 26L, 31L, 15L, 30L,
16L, 6L, 18L, 28L, 7L, 25L, 24L, 7L, 23L, 9L, 8L, 25L, 11L, 20L,
19L, 24L, 5L, 5L, 26L, 26L, 7L, 29L, 22L), mon = c(10L, 4L, 7L,
7L, 4L, 10L, 11L, 5L, 5L, 5L, 1L, 5L, 10L, 9L, 1L, 6L, 7L, 7L,
0L, 5L, 7L, 10L, 6L, 4L, 4L, 6L, 11L, 10L, 8L, 3L, 6L, 1L, 5L,
6L, 11L, 8L, 4L, 5L, 2L, 8L, 0L, 4L, 1L, 1L, 11L, 0L, 2L, 11L,
6L, 1L, 4L, 6L, 9L, 6L, 4L, 10L, 0L, 9L, 5L, 1L, 8L, 1L, 6L,
6L, 4L, 3L, 8L, 11L, 7L, 4L, 11L, 9L, 5L, 4L, 6L, 0L, 7L, 0L,
1L, 10L, 11L, 4L, 7L, 7L, 9L, 9L, 9L, 10L, 3L, 1L, 9L, 3L, 5L,
11L, 6L, 10L, 10L, 0L, 11L, 3L, 9L, 10L, 6L, 8L, 5L, 7L, 7L,
8L, 1L, 9L, 2L, 11L, 1L, 6L, 7L, 10L, 2L, 8L, 8L, 8L, 8L, 4L,
1L, 0L, 0L, 5L, 6L, 6L, 3L, 5L, 7L, 7L, 11L, 6L, 1L, 8L, 10L,
9L, 2L, 10L, 10L, 0L, 3L, 9L, 9L, 7L, 7L, 1L, 9L, 2L, 2L, 0L,
7L, 0L, 7L, 10L, 7L, 5L, 7L, 5L, 7L, 11L, 4L, 10L, 7L, 11L, 6L,
11L, 10L, 6L, 2L, 6L, 0L, 7L, 10L, 2L, 9L, 4L, 1L, 2L, 7L, 8L,
3L, 10L, 10L, 8L, 0L, 9L, 3L, 11L, 6L, 11L, 5L, 2L, 8L, 2L, 11L,
11L, 1L, 8L, 1L, 6L, 8L, 4L, 4L, 3L, 1L, 1L, 8L, 10L, 7L, 3L,
8L, 5L, 4L, 1L, 7L, 7L, 6L, 2L, 6L, 9L, 6L, 11L, 8L, 6L, 10L,
2L, 1L, 7L, 6L, 10L, 5L, 4L, 1L, 0L, 1L, 0L, 11L, 2L, 6L, 9L,
11L, 11L, 10L, 11L, 7L, 8L, 4L, 6L, 9L, 4L, 8L, 9L, 9L, 10L,
10L, 3L, 7L, 9L, 4L, 8L, 2L, 10L, 10L, 4L, 3L, 1L, 9L, 7L, 9L,
3L, 5L, 0L, 8L, 9L, 7L, 8L, 5L, 7L, 8L, 8L, 10L, 1L, 7L, 2L,
9L, 8L, 2L, 5L, 0L, 10L, 5L, 6L, 2L, 10L, 1L, 8L, 7L, 0L, 1L,
3L, 9L, 3L, 6L, 4L, 10L, 0L, 3L, 5L, 4L, 10L, 9L, 7L, 4L, 3L,
0L, 3L, 3L, 1L, 9L, 5L, 3L, 3L, 8L, 11L, 10L, 4L, 11L, 0L, 7L,
1L, 0L, 4L, 2L, 2L, 0L, 0L, 7L, 4L, 4L, 10L, 8L, 3L, 8L, 11L,
8L, 0L, 0L, 6L, 6L, 1L, 0L, 3L, 4L, 2L, 9L, 1L, 6L, 4L, 3L, 1L,
0L, 0L, 11L, 1L, 4L, 3L, 7L, 10L, 2L, 1L, 0L, 0L, 5L, 4L, 8L,
10L, 7L, 10L, 8L, 8L, 1L, 8L, 11L, 8L, 10L, 7L, 11L, 4L, 8L,
1L, 10L, 3L, 10L, 5L, 10L, 7L, 9L, 9L, 2L, 10L, 0L, 9L, 4L, 7L,
7L, 11L, 1L, 11L, 1L, 1L, 4L, 2L, 3L, 3L, 5L, 10L, 0L, 7L, 9L,
7L, 10L, 10L, 4L, 2L, 0L, 0L, 1L, 7L, 8L, 6L, 9L, 9L, 11L, 4L,
6L, 8L, 9L, 0L, 8L, 6L, 4L, 6L, 7L, 4L, 0L, 0L, 9L, 1L, 4L, 0L,
1L, 8L, 1L, 3L, 7L), year = c(112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L
), wday = c(6L, 3L, 1L, 5L, 5L, 6L, 6L, 2L, 4L, 5L, 4L, 3L, 3L,
3L, 5L, 3L, 5L, 4L, 2L, 6L, 3L, 1L, 4L, 4L, 6L, 5L, 3L, 1L, 5L,
5L, 0L, 2L, 2L, 0L, 5L, 0L, 6L, 0L, 1L, 1L, 0L, 2L, 6L, 3L, 4L,
0L, 2L, 1L, 3L, 6L, 0L, 4L, 5L, 1L, 2L, 1L, 0L, 0L, 5L, 5L, 2L,
6L, 3L, 3L, 1L, 3L, 5L, 2L, 6L, 5L, 6L, 3L, 4L, 5L, 3L, 5L, 4L,
6L, 4L, 5L, 1L, 4L, 2L, 5L, 1L, 6L, 5L, 2L, 2L, 6L, 3L, 5L, 0L,
0L, 1L, 4L, 3L, 5L, 0L, 0L, 6L, 4L, 5L, 5L, 1L, 5L, 3L, 2L, 0L,
5L, 2L, 6L, 5L, 0L, 4L, 0L, 1L, 5L, 3L, 2L, 0L, 6L, 0L, 3L, 2L,
6L, 4L, 1L, 6L, 6L, 2L, 1L, 6L, 4L, 5L, 0L, 4L, 5L, 5L, 3L, 3L,
4L, 6L, 6L, 1L, 1L, 3L, 1L, 1L, 5L, 6L, 4L, 4L, 2L, 5L, 5L, 1L,
3L, 2L, 5L, 5L, 3L, 1L, 5L, 3L, 0L, 2L, 3L, 1L, 1L, 2L, 4L, 2L,
0L, 2L, 2L, 2L, 5L, 4L, 0L, 6L, 0L, 5L, 6L, 5L, 4L, 3L, 0L, 5L,
4L, 5L, 0L, 6L, 3L, 4L, 5L, 1L, 3L, 3L, 0L, 6L, 3L, 3L, 2L, 1L,
1L, 0L, 6L, 5L, 5L, 1L, 4L, 2L, 2L, 3L, 5L, 3L, 1L, 1L, 6L, 4L,
0L, 5L, 4L, 1L, 5L, 0L, 0L, 0L, 3L, 5L, 1L, 5L, 2L, 6L, 0L, 5L,
1L, 1L, 1L, 4L, 3L, 5L, 5L, 6L, 4L, 0L, 4L, 5L, 5L, 6L, 5L, 2L,
3L, 2L, 3L, 0L, 3L, 4L, 3L, 5L, 5L, 2L, 6L, 4L, 3L, 6L, 3L, 2L,
3L, 3L, 3L, 5L, 2L, 5L, 2L, 6L, 5L, 0L, 1L, 2L, 3L, 6L, 2L, 5L,
3L, 3L, 1L, 6L, 4L, 3L, 2L, 6L, 3L, 2L, 4L, 2L, 0L, 3L, 2L, 5L,
1L, 4L, 0L, 0L, 3L, 5L, 1L, 6L, 0L, 6L, 2L, 2L, 5L, 4L, 3L, 3L,
4L, 1L, 0L, 3L, 0L, 2L, 4L, 5L, 2L, 5L, 5L, 5L, 1L, 5L, 5L, 5L,
5L, 5L, 4L, 6L, 2L, 6L, 4L, 6L, 0L, 3L, 0L, 1L, 2L, 1L, 5L, 2L,
3L, 5L, 4L, 6L, 3L, 6L, 4L, 5L, 6L, 4L, 5L, 6L, 5L, 6L, 1L, 5L,
4L, 1L, 5L, 0L, 0L, 0L, 0L, 2L, 3L, 1L, 1L, 0L, 0L, 5L, 3L, 4L,
0L, 3L, 6L, 0L, 0L, 3L, 5L, 6L, 6L, 6L, 4L, 6L, 3L, 5L, 5L, 2L,
2L, 4L, 0L, 0L, 5L, 4L, 4L, 4L, 4L, 2L, 0L, 3L, 2L, 6L, 3L, 5L,
4L, 3L, 1L, 2L, 2L, 1L, 5L, 5L, 0L, 5L, 5L, 4L, 1L, 3L, 6L, 5L,
1L, 3L, 2L, 1L, 2L, 0L, 0L, 3L, 5L, 0L, 3L, 1L, 6L, 3L, 1L, 3L,
5L, 3L, 5L, 5L, 5L, 6L, 4L, 0L, 3L, 2L, 0L, 3L), yday = c(328L,
150L, 225L, 229L, 131L, 321L, 335L, 177L, 172L, 152L, 39L, 157L,
311L, 290L, 47L, 185L, 236L, 235L, 30L, 153L, 234L, 323L, 193L,
137L, 146L, 194L, 346L, 330L, 257L, 110L, 203L, 44L, 177L, 210L,
341L, 259L, 139L, 161L, 78L, 260L, 14L, 142L, 34L, 52L, 340L,
21L, 65L, 358L, 199L, 41L, 133L, 207L, 278L, 183L, 121L, 316L,
14L, 294L, 173L, 54L, 268L, 48L, 185L, 199L, 148L, 94L, 264L,
359L, 230L, 124L, 342L, 283L, 172L, 131L, 192L, 19L, 235L, 13L,
46L, 306L, 365L, 123L, 233L, 215L, 274L, 286L, 299L, 324L, 107L,
34L, 276L, 103L, 161L, 357L, 197L, 305L, 332L, 26L, 350L, 119L,
279L, 319L, 187L, 257L, 155L, 229L, 227L, 247L, 49L, 299L, 79L,
356L, 54L, 182L, 228L, 322L, 71L, 264L, 269L, 254L, 273L, 139L,
56L, 3L, 2L, 153L, 207L, 211L, 104L, 167L, 233L, 232L, 363L,
207L, 47L, 266L, 312L, 292L, 82L, 318L, 318L, 4L, 118L, 279L,
288L, 225L, 220L, 36L, 274L, 61L, 62L, 4L, 228L, 16L, 215L, 327L,
232L, 178L, 240L, 152L, 243L, 360L, 134L, 334L, 234L, 343L, 212L,
339L, 323L, 190L, 86L, 207L, 23L, 224L, 331L, 79L, 282L, 124L,
39L, 63L, 230L, 252L, 103L, 314L, 327L, 270L, 10L, 294L, 96L,
340L, 187L, 343L, 174L, 73L, 270L, 82L, 351L, 353L, 59L, 259L,
48L, 185L, 248L, 149L, 134L, 106L, 49L, 55L, 257L, 320L, 239L,
102L, 254L, 177L, 122L, 47L, 213L, 232L, 183L, 62L, 186L, 280L,
208L, 361L, 260L, 187L, 308L, 70L, 35L, 227L, 194L, 323L, 152L,
149L, 48L, 28L, 47L, 22L, 365L, 85L, 200L, 290L, 348L, 355L,
321L, 347L, 217L, 256L, 124L, 208L, 286L, 138L, 247L, 297L, 296L,
325L, 329L, 115L, 214L, 297L, 145L, 271L, 65L, 314L, 319L, 129L,
97L, 38L, 282L, 234L, 290L, 101L, 166L, 23L, 257L, 296L, 230L,
271L, 154L, 232L, 268L, 248L, 321L, 51L, 236L, 80L, 297L, 246L,
90L, 172L, 17L, 331L, 181L, 206L, 72L, 312L, 51L, 259L, 234L,
23L, 33L, 106L, 277L, 112L, 196L, 150L, 306L, 15L, 118L, 175L,
146L, 324L, 282L, 215L, 123L, 94L, 10L, 95L, 120L, 49L, 297L,
154L, 114L, 95L, 257L, 338L, 327L, 138L, 341L, 15L, 236L, 33L,
26L, 124L, 89L, 81L, 27L, 16L, 237L, 123L, 139L, 322L, 269L,
98L, 267L, 352L, 260L, 5L, 16L, 206L, 187L, 53L, 13L, 94L, 125L,
74L, 278L, 34L, 200L, 124L, 97L, 54L, 27L, 22L, 362L, 39L, 127L,
117L, 238L, 329L, 63L, 49L, 23L, 17L, 169L, 127L, 259L, 315L,
222L, 325L, 249L, 273L, 45L, 244L, 350L, 259L, 325L, 229L, 342L,
139L, 244L, 53L, 314L, 108L, 306L, 159L, 324L, 240L, 298L, 301L,
84L, 327L, 4L, 277L, 151L, 214L, 233L, 364L, 31L, 338L, 48L,
38L, 145L, 60L, 115L, 92L, 156L, 324L, 1L, 229L, 278L, 217L,
334L, 334L, 137L, 64L, 17L, 20L, 54L, 232L, 269L, 212L, 288L,
303L, 350L, 126L, 199L, 271L, 280L, 24L, 267L, 188L, 143L, 190L,
220L, 145L, 10L, 19L, 292L, 54L, 125L, 4L, 56L, 269L, 37L, 119L,
234L), isdst = c(0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt"))
Then, trying to extract hours in different ways
df <- data.frame(times,
with.dollar = times$hour,
with.format = as.numeric(format(times, "%H"))
)
head(df)
and my results are
times with.dollar with.format
1 2012-11-23 21:05:00 -3 21
2 2012-05-29 20:43:00 -4 20
3 2012-08-12 21:02:00 -3 21
4 2012-08-16 22:47:00 -2 22
5 2012-05-10 20:15:00 -4 20
6 2012-11-16 23:18:00 -1 23
Another test (not in a data.frame... simple vectors)
> any(times$hour == as.numeric(format(times, "%H")))
[1] FALSE
With times$hour it seems to be counting hours starting from the next days in some cases (all of the cases here reported).
Could you reproduce that? any idea why?
Looking at ?POSIXlt this could be a bug because not all hours are within 0:23 range.
If so, for the moment it would be safer to use format rather $ for POSIXlt vector
> R.version
_
platform x86_64-pc-linux-gnu
arch x86_64
os linux-gnu
system x86_64, linux-gnu
status
major 3
minor 0.3
year 2014
month 03
day 06
svn rev 65126
language R
version.string R version 3.0.3 (2014-03-06)
nickname Warm Puppy

plotting x-axes with custom label in R

I've to plot these data:
day temperature
02/01/2012 13:30:00 10
10/01/2012 20:30:00 8
15/01/2012 13:30:00 12
25/01/2012 20:30:00 6
02/02/2012 13:30:00 5
10/02/2012 20:30:00 3
15/02/2012 13:30:00 6
25/02/2012 20:30:00 -1
02/03/2012 13:30:00 4
10/03/2012 20:30:00 -2
15/03/2012 13:30:00 7
25/03/2012 20:30:00 1
in the x-axis I want to label only the month and the day (e.g. Jan 02 ). How can I do this using the command plot() and axis()?
First, you will need to put your date text into a dtae class (e.g. as.POSIXct):
df <- structure(list(day = structure(list(sec = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), min = c(30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L), hour = c(13L, 20L, 13L, 20L, 13L, 20L,
13L, 20L, 13L, 20L, 13L, 20L), mday = c(2L, 10L, 15L, 25L, 2L,
10L, 15L, 25L, 2L, 10L, 15L, 25L), mon = c(0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L), year = c(112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L), wday = c(1L,
2L, 0L, 3L, 4L, 5L, 3L, 6L, 5L, 6L, 4L, 0L), yday = c(1L, 9L,
14L, 24L, 32L, 40L, 45L, 55L, 61L, 69L, 74L, 84L), isdst = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt")), temperature = c(10L, 8L,
12L, 6L, 5L, 3L, 6L, -1L, 4L, -2L, 7L, 1L)), .Names = c("day",
"temperature"), row.names = c(NA, -12L), class = "data.frame")
df
df$day <- as.POSIXct(df$day, format="%d/%m/%Y %H:%M:%S")
Your dates should now plot correctly. Don't apply the x-axis, by using the argument xaxt="n". Afterwards, you can create a sequence of dates where you would like your axis labeled, and apply this with axis.POSIXct:
plot(df$day, df$temperature, t="l", ylab="Temperature", xlab="Date", xaxt="n")
SEQ <- seq(min(df$day), max(df$day), by="months")
axis.POSIXct(SEQ, at=SEQ, side=1, format="%b %Y")
Similarly, to get a daily axis, simply modify the SEQ and axis.POSIXct code accordingly. For example, you may try:
plot(df$day, df$temperature, t="l", ylab="Temperature", xlab="Date", xaxt="n")
SEQ <- seq(min(df$day), max(df$day), by="days")
axis.POSIXct(SEQ, at=SEQ, side=1, format="%b %d")

R tells me " object 'train' not found "

In my customized function I met a strange problem.
I'm writing a function to do cross-validation with logistic and clogit(in survival) regression.Thus I need to generate a training set and testing set.I've marked the part to do it.
I need to compare the classic logistic regression and the conditional logistic regression.So I use an 'if' statement to distinguish those two functions.
Here's the problem.It seems that the glm function can find the train vector and doing well,but clogit can't find it!Even if the train vector is output correctly.
When I test each line out of my function gcv,clogit works again.
Can somebody tell me why is clogit not working with train?
I called this function as:
gcv(as.numeric(FNDX)~HIGD+DEG+CHK+AGP1+AGMN+NLV+LIV+WT+AGLP+MST+strata(STR),bbdm,method="clogit")
and the error message is
Error in `[.data.frame`(bbdm, train, ) : object 'train' not found
Do you need traceback() information?
and the data set is bbdm13 in http://www.umass.edu/statdata/statdata/stat-logistic.html.
There are NA in the original data,or use the sample after the code :)
Related codes are as following:
gcv<-function(formula,data=NULL,method="rpart",cross=5,times=10,k=7,layer=5,seed=0)
{
set=data;
n=nrow(set);
set.seed(as.vector(Sys.time()));
bb1=1:n;
bb2=rep(1:cross,ceiling(n/cross))[1:n];
bb2=sample(bb2,n);
samp=sample(c(1:n),size=n);
m=ceiling(n/cross);
smp<-mat.or.vec(cross,m);
j=rep(0,cross)
for (i in 1:n)
{
smp[bb2[i],j[bb2[i]]]=i
j[bb2[i]]=j[bb2[i]]+1
}
# Here we separate the original set into 5(variable cross)sets,
# each time we take one out and treat it as the testing set
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula","data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response<-model.response(mf)
#code copied from function.lm
reslvl<-length(levels(response))
tra<-mat.or.vec(reslvl,reslvl);
tes<-mat.or.vec(reslvl,reslvl);
for (i in 1:cross)
{
test<-smp[i,];
train<-setdiff(1:200,test);
show(train); #THe 'train' set can be shown here.
#some "if" and "else"statements are hidden
if (method=="logistic")#logistic is running well
{
bb.log<-step(glm(formula,set,family=binomial),trace=FALSE)
tra<-tra+as.vector(t(table(response[train],
bin(predict.glm(bb.log,set[train,],type="response")))))
tes<-tes+as.vector(t(table(response[test],
bin(predict.glm(bb.log,set[test,],type="response")))))
}
else if (method=="clogit")#clogit is meeting a problem.
{
library("survival")
bb.clog<-step(clogit(formula,bbdm[train,]),trace=FALSE)
tra<-tra+as.vector(t(table( response[train],
bin(predict(bb.clog,set[train,])))))
tes<-tes+as.vector(t(table( response[test],
bin(predict(bb.clog,set[test,])))))
}
}
tra<-tra/cross;
tes<-tes/cross;
trainrate=1-sum(diag(tra))/sum(tra)
testrate=1-sum(diag(tes))/sum(tes)
result<-list(Train=tra,TrainRate=trainrate,Test=tes,TestRate=testrate)
result
}
Sample Data:
STR OBS AGMT FNDX HIGD DEG CHK AGP1 AGMN NLV LIV WT AGLP MST
1 1 1 39 1 9 0 1 23 13 0 5 118 39 1
2 1 2 39 0 10 0 2 16 11 1 3 175 39 3
3 1 3 39 0 11 0 2 20 12 1 3 135 39 2
4 1 4 39 0 12 1 1 21 11 0 3 125 40 1
5 2 1 38 1 14 2 1 24 14 1 3 118 39 1
6 2 2 38 0 12 1 2 20 15 0 2 183 38 1
7 2 3 38 0 9 0 2 19 11 0 5 218 38 1
8 2 4 38 0 13 1 1 23 13 0 2 192 37 1
9 3 1 38 1 9 0 1 22 15 2 2 125 38 1
10 3 2 38 0 10 0 2 20 14 0 2 123 38 1
11 3 3 38 0 15 1 1 19 13 3 2 140 37 1
12 3 4 38 0 12 1 1 18 13 0 2 160 38 1
13 4 1 38 1 15 1 1 24 14 2 3 150 38 5
14 4 2 38 0 15 2 1 26 13 1 1 130 38 2
15 4 3 38 0 12 1 2 23 14 0 4 140 38 1
16 4 4 38 0 12 1 1 25 16 0 2 130 38 1
17 5 1 38 1 12 1 1 21 17 0 2 150 38 2
18 5 2 38 0 12 1 2 20 12 1 2 148 38 1
19 5 3 38 0 14 2 1 22 13 0 2 134 39 1
20 5 4 38 0 13 1 1 16 14 0 6 138 38 4
21 6 1 38 1 13 1 1 24 12 1 3 116 39 1
22 6 2 38 0 12 1 2 19 12 0 2 145 35 2
23 6 3 38 0 14 2 2 21 10 4 3 195 35 1
24 6 4 38 0 14 4 1 25 8 0 1 180 38 2
25 7 1 37 1 17 4 1 26 13 1 4 137 37 5
26 7 2 37 0 15 2 1 20 11 2 2 135 37 2
27 7 3 37 0 9 0 1 18 10 2 3 155 37 1
28 7 4 37 0 12 1 2 22 13 2 2 120 38 1
29 8 1 36 1 12 1 1 23 14 0 2 126 36 2
30 8 2 36 0 10 0 1 20 12 1 2 191 36 1
31 8 3 36 0 10 0 2 17 10 1 3 185 37 1
32 8 4 36 0 12 1 2 23 12 0 2 119 37 1
33 9 1 35 1 12 1 1 23 14 0 3 129 36 1
34 9 2 35 0 14 1 2 21 11 0 3 170 34 2
35 9 3 36 0 12 1 1 22 14 0 4 110 36 1
36 9 4 35 0 14 2 2 24 11 0 2 155 35 1
37 10 1 35 1 12 1 2 21 12 0 2 105 29 1
38 10 2 36 0 17 3 1 26 13 1 2 115 36 1
39 10 3 36 0 12 1 2 22 12 2 3 120 36 1
40 10 4 36 0 12 1 1 33 16 0 1 150 36 1
Structure:
structure(list(STR = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L), .Label = c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50"), class = "factor"), OBS = structure(c(1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"),
AGMT = c(39L, 39L, 39L, 39L, 38L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L, 38L,
38L, 38L, 37L, 37L, 37L, 37L, 36L, 36L, 36L, 36L, 35L, 35L,
36L, 35L, 35L, 36L, 36L, 36L, 35L, 35L, 35L, 35L, 34L, 35L,
34L, 34L, 33L, 33L, 32L, 33L, 33L, 33L, 33L, 33L, 32L, 32L,
32L, 32L, 31L, 30L, 31L, 31L, 68L, 68L, 68L, 68L, 64L, 64L,
64L, 64L, 63L, 63L, 63L, 63L, 62L, 62L, 62L, 62L, 61L, 61L,
61L, 61L, 61L, 62L, 62L, 61L, 61L, 62L, 61L, 61L, 61L, 61L,
61L, 61L, 60L, 60L, 60L, 60L, 58L, 58L, 58L, 58L, 55L, 55L,
55L, 55L, 55L, 55L, 55L, 55L, 52L, 52L, 52L, 52L, 52L, 52L,
52L, 52L, 51L, 51L, 51L, 51L, 49L, 49L, 49L, 49L, 48L, 48L,
48L, 48L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 47L, 46L, 46L,
46L, 46L, 46L, 46L, 46L, 46L, 45L, 45L, 45L, 45L, 45L, 45L,
45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 45L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 43L, 43L, 43L, 43L, 28L, 27L,
28L, 28L, 53L, 53L, 53L, 53L, 56L, 56L, 56L, 56L, 41L, 41L,
41L, 41L, 41L, 41L, 40L, 41L, 41L, 42L, 41L, 41L), FNDX = structure(c(2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
HIGD = c(9L, 10L, 11L, 12L, 14L, 12L, 9L, 13L, 9L, 10L, 15L,
12L, 15L, 15L, 12L, 12L, 12L, 12L, 14L, 13L, 13L, 12L, 14L,
14L, 17L, 15L, 9L, 12L, 12L, 10L, 10L, 12L, 12L, 14L, 12L,
14L, 12L, 17L, 12L, 12L, 20L, 10L, 12L, 14L, 12L, 18L, 12L,
12L, 20L, 15L, 12L, 14L, 18L, 12L, 13L, 18L, 12L, 12L, 15L,
12L, 17L, 10L, 13L, 13L, 14L, 8L, 16L, 12L, 12L, 20L, 13L,
12L, 10L, 12L, 5L, 12L, 12L, 12L, 16L, 10L, 8L, 13L, 8L,
16L, 11L, 9L, 15L, 14L, 12L, 18L, 6L, 12L, 10L, 8L, 12L,
8L, 13L, 12L, 11L, 13L, 12L, 12L, 13L, 12L, 14L, 12L, 12L,
11L, 12L, 12L, 12L, 10L, 12L, 14L, 8L, 12L, 12L, 14L, 9L,
12L, 7L, 16L, 15L, 15L, 20L, 12L, 12L, 14L, 17L, 12L, 12L,
12L, 17L, 15L, 12L, 10L, 12L, 10L, 11L, 17L, 10L, 12L, 14L,
8L, 12L, 12L, 12L, 11L, 12L, 12L, 8L, 13L, 12L, 12L, 12L,
19L, 12L, 12L, 13L, 12L, 17L, 12L, 16L, 14L, 16L, 18L, 12L,
12L, 12L, 12L, 12L, 12L, 16L, 16L, 12L, 12L, 16L, 11L, 12L,
12L, 16L, 12L, 12L, 11L, 12L, 12L, 16L, 12L, 12L, 12L, 12L,
16L, 10L, 11L, 15L, 12L, 14L, 10L, 15L, 13L), DEG = structure(c(1L,
1L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 3L, 2L, 2L,
2L, 2L, 3L, 2L, 2L, 2L, 3L, 5L, 5L, 3L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 2L, 4L, 2L, 2L, 5L, 1L, 2L, 2L, 2L, 5L,
2L, 2L, 5L, 2L, 2L, 3L, 5L, 2L, 2L, 5L, 2L, 2L, 2L, 2L, 4L,
1L, 2L, 2L, 3L, 1L, 4L, 2L, 2L, 5L, 2L, 2L, 1L, 2L, 1L, 2L,
2L, 2L, 4L, 1L, 1L, 2L, 1L, 4L, 1L, 1L, 3L, 2L, 2L, 5L, 1L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 3L, 2L, 2L, 3L, 2L, 3L, 2L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 2L, 2L, 3L, 1L, 2L, 1L,
4L, 3L, 3L, 5L, 2L, 2L, 3L, 5L, 2L, 2L, 2L, 5L, 2L, 2L, 1L,
2L, 1L, 1L, 4L, 1L, 2L, 3L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 2L, 5L, 2L, 4L, 2L, 4L, 5L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 1L, 2L, 2L, 4L,
2L, 2L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 1L, 1L, 2L, 2L,
2L, 1L, 2L, 2L), .Label = c("0", "1", "2", "3", "4"), class = "factor"),
CHK = structure(c(1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L,
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L), .Label = c("1",
"2"), class = "factor"), AGP1 = c(23, 16, 20, 21, 24, 20,
19, 23, 22, 20, 19, 18, 24, 26, 23, 25, 21, 20, 22, 16, 24,
19, 21, 25, 26, 20, 18, 22, 23, 20, 17, 23, 23, 21, 22, 24,
21, 26, 22, 33, 26, 18, 19, 21, 25, 27, 20, 25, 26, 21, 24,
25, 28, 21, 20, 21, 30, 25, 20, 23, 30, 21, 23, 24, 22, 34,
23, 19, 30, 28, 26, 25, 21, 24, 24, 24, 26, 26, 32, 22, 28,
26, 28, 27, 22, 30, 25, 26, 26, 33, 25, 29, 21, 18, 22, 23,
28, 25, 24, 33, 20, 25, 24, 24, 30, 30, 30, 24, 24, 23, 16,
26, 24, 28, 20, 25, 23, 21, 23, 20, 24, 24, 22, 24, 25, 25,
24, 25, 22, 22, 23, 19, 26, 20, 24, 22, 19, 23, 23, 21, 27,
19, 26, 15, 27, 23, 22, 17, 33, 25, 20, 22, 24, 23, 20, 30,
18, 22, 30, 22, 25, 23, 23, 23, 25, 27, 27, 25, 24, 22, 23,
18, 27, 31, 14, 20, 29, 22, 20, 23, 29, 28, 23, 26, 21, 27,
26, 25, 25, 20, 21, 22, 40, 21, 21, 26, 34, 21, 30, 21),
AGMN = c(13L, 11L, 12L, 11L, 14L, 15L, 11L, 13L, 15L, 14L,
13L, 13L, 14L, 13L, 14L, 16L, 17L, 12L, 13L, 14L, 12L, 12L,
10L, 8L, 13L, 11L, 10L, 13L, 14L, 12L, 10L, 12L, 14L, 11L,
14L, 11L, 12L, 13L, 12L, 16L, 11L, 13L, 11L, 12L, 10L, 13L,
11L, 16L, 14L, 11L, 12L, 12L, 14L, 12L, 13L, 13L, 13L, 11L,
9L, 16L, 14L, 14L, 11L, 13L, 12L, 14L, 13L, 12L, 14L, 14L,
11L, 10L, 15L, 12L, 14L, 11L, 16L, 15L, 12L, 12L, 14L, 13L,
15L, 14L, 16L, 11L, 15L, 13L, 17L, 11L, 13L, 13L, 15L, 13L,
17L, 15L, 17L, 11L, 13L, 15L, 12L, 16L, 12L, 10L, 16L, 13L,
12L, 14L, 14L, 14L, 12L, 15L, 12L, 12L, 14L, 13L, 14L, 12L,
11L, 11L, 16L, 12L, 13L, 13L, 14L, 12L, 13L, 13L, 11L, 11L,
12L, 11L, 14L, 12L, 14L, 13L, 12L, 15L, 13L, 12L, 15L, 11L,
13L, 13L, 12L, 12L, 11L, 13L, 14L, 13L, 11L, 11L, 12L, 11L,
12L, 12L, 15L, 17L, 13L, 10L, 16L, 12L, 13L, 12L, 12L, 13L,
14L, 13L, 15L, 15L, 12L, 17L, 15L, 12L, 12L, 14L, 12L, 12L,
11L, 16L, 12L, 11L, 12L, 11L, 17L, 11L, 13L, 12L, 16L, 13L,
14L, 12L, 15L, 16L, 12L, 14L, 13L, 13L, 12L, 12L), NLV = c(0,
1, 1, 0, 1, 0, 0, 0, 2, 0, 3, 0, 2, 1, 0, 0, 0, 1, 0, 0,
1, 0, 4, 0, 1, 2, 2, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 2, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 2, 1, 0, 2,
0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 4, 1, 0, 0, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0,
0, 0, 0, 0, 2, 1, 1, 1, 0), LIV = c(5, 3, 3, 3, 3, 2, 5,
2, 2, 2, 2, 2, 3, 1, 4, 2, 2, 2, 2, 6, 3, 2, 3, 1, 4, 2,
3, 2, 2, 2, 3, 2, 3, 3, 4, 2, 2, 2, 3, 1, 4, 2, 3, 2, 1,
4, 3, 1, 4, 1, 2, 2, 5, 2, 2, 1, 1, 2, 2, 2, 0, 3, 2, 3,
3, 3, 3, 7, 3, 3, 5, 2, 5, 2, 3, 3, 3, 2, 2, 3, 3, 1, 3,
2, 4, 1, 4, 3, 2, 1, 3, 2, 3, 5, 2, 3, 2, 2, 2, 3, 5, 3,
3, 0, 2, 2, 2, 6, 4, 3, 3, 4, 2, 2, 6, 3, 3, 3, 2, 5, 5,
4, 2, 5, 4, 2, 3, 3, 3, 1, 2, 0, 4, 5, 2, 3, 1, 3, 2, 5,
11, 3, 7, 1, 4, 4, 6, 3, 2, 1, 1, 3, 3, 2, 1, 3, 4, 2, 2,
5, 4, 3, 3, 4, 3, 3, 1, 2, 1, 1, 5, 7, 2, 1, 2, 6, 3, 1,
2, 2, 4, 3, 4, 1, 6, 4, 4, 2, 3, 4, 5, 4, 1, 3, 4, 3, 2,
2, 2, 2), WT = c(118L, 175L, 135L, 125L, 118L, 183L, 218L,
192L, 125L, 123L, 140L, 160L, 150L, 130L, 140L, 130L, 150L,
148L, 134L, 138L, 116L, 145L, 195L, 180L, 137L, 135L, 155L,
120L, 126L, 191L, 185L, 119L, 129L, 170L, 110L, 155L, 105L,
115L, 120L, 150L, 135L, 110L, 170L, 145L, 170L, 140L, 240L,
100L, 92L, 160L, 155L, 132L, 110L, 145L, 155L, 110L, 129L,
131L, 218L, 115L, 110L, 130L, 97L, 120L, 130L, 150L, 123L,
145L, 135L, 132L, 205L, 127L, 120L, 145L, 175L, 144L, 123L,
170L, 134L, 155L, 125L, 140L, 120L, 134L, 150L, 117L, 147L,
124L, 129L, 170L, 153L, 130L, 145L, 140L, 155L, 116L, 115L,
175L, 179L, 119L, 153L, 185L, 280L, 140L, 126L, 193L, 140L,
116L, 140L, 138L, 175L, 155L, 125L, 113L, 110L, 190L, 114L,
126L, 159L, 170L, 156L, 161L, 150L, 115L, 95L, 235L, 145L,
123L, 145L, 155L, 115L, 190L, 120L, 110L, 148L, 120L, 132L,
115L, 125L, 120L, 155L, 170L, 180L, 179L, 137L, 107L, 144L,
189L, 80L, 142L, 150L, 154L, 90L, 150L, 102L, 110L, 101L,
109L, 210L, 198L, 124L, 133L, 120L, 165L, 130L, 240L, 125L,
183L, 130L, 105L, 123L, 180L, 130L, 104L, 158L, 160L, 108L,
127L, 145L, 127L, 132L, 140L, 178L, 130L, 130L, 265L, 195L,
125L, 105L, 161L, 135L, 185L, 115L, 140L, 145L, 195L, 138L,
118L, 129L, 180L), AGLP = c(39L, 39L, 39L, 40L, 39L, 38L,
38L, 37L, 38L, 38L, 37L, 38L, 38L, 38L, 38L, 38L, 38L, 38L,
39L, 38L, 39L, 35L, 35L, 38L, 37L, 37L, 37L, 38L, 36L, 36L,
37L, 37L, 36L, 34L, 36L, 35L, 29L, 36L, 36L, 36L, 35L, 35L,
36L, 36L, 34L, 35L, 34L, 35L, 33L, 33L, 32L, 33L, 33L, 29L,
29L, 33L, 32L, 32L, 26L, 32L, 30L, 30L, 31L, 31L, 50L, 53L,
35L, 46L, 53L, 44L, 42L, 50L, 52L, 46L, 51L, 50L, 33L, 39L,
53L, 39L, 53L, 50L, 41L, 45L, 56L, 36L, 52L, 52L, 34L, 54L,
50L, 55L, 53L, 56L, 55L, 43L, 51L, 42L, 50L, 47L, 53L, 55L,
42L, 25L, 44L, 50L, 55L, 47L, 52L, 50L, 47L, 50L, 36L, 45L,
40L, 48L, 50L, 43L, 42L, 42L, 52L, 50L, 45L, 51L, 49L, 44L,
44L, 49L, 48L, 48L, 48L, 29L, 47L, 47L, 45L, 45L, 47L, 29L,
47L, 39L, 46L, 45L, 46L, 40L, 46L, 46L, 46L, 39L, 45L, 38L,
45L, 46L, 45L, 45L, 28L, 45L, 45L, 40L, 40L, 33L, 45L, 45L,
46L, 35L, 44L, 45L, 44L, 44L, 44L, 44L, 33L, 44L, 43L, 43L,
21L, 39L, 29L, 27L, 27L, 29L, 50L, 49L, 43L, 49L, 47L, 42L,
50L, 47L, 27L, 31L, 36L, 41L, 41L, 41L, 40L, 41L, 42L, 41L,
41L, 41L), MST = structure(c(1L, 3L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 1L, 2L, 1L, 1L, 4L, 1L, 2L,
1L, 2L, 5L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L,
1L, 5L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 5L,
4L, 1L, 5L, 4L, 4L, 1L, 5L, 3L, 1L, 5L, 1L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 4L, 1L, 4L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 4L, 5L, 1L, 1L, 1L, 1L, 3L,
5L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 4L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 4L, 1L, 1L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 4L, 1L, 1L, 1L,
1L, 3L, 4L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5"), class = "factor")), .Names = c("STR",
"OBS", "AGMT", "FNDX", "HIGD", "DEG", "CHK", "AGP1", "AGMN",
"NLV", "LIV", "WT", "AGLP", "MST"), row.names = c(NA, -200L), class = "data.frame")
Could it be bbdm[train] that it can't find, rather than train itself? What error message do you get?
You can use the browser command to debug here. i.e.
gcv<-function(formula,data=NULL,method="rpart",cross=5,times=10,k=7,layer=5,seed=0)
{
set=data;
n=nrow(set);
set.seed(as.vector(Sys.time()));
bb1=1:n;
bb2=rep(1:cross,ceiling(n/cross))[1:n];
bb2=sample(bb2,n);
samp=sample(c(1:n),size=n);
m=ceiling(n/cross);
smp<-mat.or.vec(cross,m);
j=rep(0,cross)
for (i in 1:n)
{
smp[bb2[i],j[bb2[i]]]=i
j[bb2[i]]=j[bb2[i]]+1
}
# Here we separate the original set into 5(variable cross)sets,
# each time we take one out and treat it as the testing set
mf <- match.call(expand.dots = FALSE)
m <- match(c("formula","data"), names(mf), 0L)
mf <- mf[c(1L, m)]
mf$drop.unused.levels <- TRUE
mf[[1L]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
response<-model.response(mf)
#code copied from function.lm
reslvl<-length(levels(response))
tra<-mat.or.vec(reslvl,reslvl);
tes<-mat.or.vec(reslvl,reslvl);
for (i in 1:cross)
{
test<-smp[i,];
train<-setdiff(1:200,test);
show(train); #THe 'train' set can be shown here.
#some "if" and "else"statements are hidden
if (method=="logistic")#logistic is running well
{
bb.log<-step(glm(formula,set,family=binomial),trace=FALSE)
tra<-tra+as.vector(t(table(response[train],
bin(predict.glm(bb.log,set[train,],type="response")))))
tes<-tes+as.vector(t(table(response[test],
bin(predict.glm(bb.log,set[test,],type="response")))))
}
else if (method=="clogit")#clogit is meeting a problem.
{
##### BROWSER() CALL ##########
browser()
library("survival")
bb.clog<-step(clogit(formula,bbdm[train,]),trace=FALSE)
tra<-tra+as.vector(t(table( response[train],
bin(predict(bb.clog,set[train,])))))
tes<-tes+as.vector(t(table( response[test],
bin(predict(bb.clog,set[test,])))))
}
}
tra<-tra/cross;
tes<-tes/cross;
trainrate=1-sum(diag(tra))/sum(tra)
testrate=1-sum(diag(tes))/sum(tes)
result<-list(Train=tra,TrainRate=trainrate,Test=tes,TestRate=testrate)
result
}
Browser can be used to debug functions like this. Essentially, when you run the code, you'll enter into the environment at the moment browser was called. This will allow you to explore and see if the variables are what you thought they were. You can do an ls() to see which objects are defined, or try to find the value of train or (my suspicion) bbdm to see that they're all properly defined.

Resources