Group dates by bimester - r

Here is a sample of the data I'm currently working on:
x <- structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
min = c(59L, 32L, 23L, 26L, 20L, 14L, 1L, 5L, 32L, 2L),
hour = c(10L, 15L, 12L, 12L, 16L, 18L, 18L, 9L, 14L, 12L),
mday = c(9L, 15L, 2L, 15L, 20L, 26L, 11L, 22L, 9L, 16L),
mon = c(4L, 11L, 10L, 7L, 9L, 8L, 10L, 8L, 8L, 4L),
year = c(111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L),
wday = c(1L, 4L, 3L, 1L, 4L, 1L, 5L, 4L, 5L, 1L),
yday = c(128L, 348L, 305L, 226L, 292L, 268L, 314L, 264L, 251L, 135L),
isdst = c(0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L)),
.Names = c("sec", "min", "hour", "mday", "mon", "year",
"wday", "yday", "isdst"),
class = c("POSIXlt", "POSIXt"))
So that
> x
[1] "2011-05-09 10:59:00" "2011-12-15 15:32:00" "2011-11-02 12:23:00"
[4] "2011-08-15 12:26:00" "2011-10-20 16:20:00" "2011-09-26 18:14:00"
[7] "2011-11-11 18:01:00" "2011-09-22 09:05:00" "2011-09-09 14:32:00"
[10] "2011-05-16 12:02:00"
Say I want to tabulate the distribution of x by month. This is how I accomplish it:
> table(strftime(x, '%m'))
05 08 09 10 11 12
2 1 3 1 2 1
Now I want to do a similar tabulation, but this time I want to group the data by bimester (and possibly by trimester or semester, later on). I've taken a look at the help page for strptime, but couldn't find an appropriate separator.
This is the best I have come up with so far:
> table(cut(x = as.numeric(strftime(x, '%m')),
breaks = c(1, 3, 5, 7, 9, 11, 13),
labels = c('1-2', '3-4', '5-6', '7-8', '9-10', '11-12'),
right = FALSE))
1-2 3-4 5-6 7-8 9-10 11-12
0 0 2 1 4 3
It is a convoluted way of reaching this, but it's OK for a simple example and a single case. However, this approach will give me headaches down the road, since I'll want those data to remain POSIX (not to mention it makes my code scarier than it should). Is there an elegant solution for this?

If you're sticking with table and vectors (as opposed to have a rectangular data/output, in which case I'd use data.table), you could do:
table(2*(x$mon %/% 2) + 1)
#
# 5 7 9 11
# 2 1 4 3

You could do away with using any type of format-ting of the date values themselves and just create a lookup vector for your groupings. This would also allow total flexibility in specifying what months fit into what categories. E.g.:
src <- factor(rep(c('01-02','03-04','05-06','07-08','09-10','11-12'),each=2))
src[x$mon+1]
#[1] 05-06 11-12 11-12 07-08 09-10 09-10 11-12 09-10 09-10 05-06
#Levels: 01-02 03-04 05-06 07-08 09-10 11-12
table(src[x$mon+1])
#01-02 03-04 05-06 07-08 09-10 11-12
# 0 0 2 1 4 3

Related

How to insert new rows for missing data with intervals that could vary by a few minutes in R

I would like to insert rows when there are missing data within a 5 minute interval glucose sensor dataset. I have managed to complete this using the tsibble package but there can be time drifts in the data e.g. the sensor records a value at 4 minutes instead of 5. This causes the inserted time stamps to become unsynchronised throughout the remainder of the data frame.
Is there a way to complete this for a time interval that should be 5 minutes, but could be between 4 and 6 minutes? The dataset also includes multiple different IDs.
The ultimate aim is then to fill in the missing data gaps based upon a set criteria (i.e. max fill <= 3 rows) using the existing data.
Reprex pasted below.
library(tsibble, warn.conflicts = FALSE)
#> Warning: package 'tsibble' was built under R version 4.1.1
Data <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
gl = c(125L, 133L, 132L, 130L, 133L, 135L, 166L, 161L, 67L, 66L, 67L, 69L, 67L),
time = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
min = c(42L, 47L, 51L, 56L, 6L, 11L, 11L, 16L, 2L, 17L, 22L, 27L, 32L),
hour = c(9L, 9L, 9L, 9L, 10L, 10L, 11L, 11L, 0L, 0L, 0L, 0L, 0L),
mday = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L),
mon = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
year = c(121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L, 121L,121L),
wday = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 0L, 0L, 0L, 0L,0L),
yday = c(92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 93L, 93L,93L, 93L, 93L),
isdst = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,0L, 0L, 0L, 0L)),
class = c("POSIXlt", "POSIXt"), tzone = "GMT"),
dif = structure(c(NA, 5, 4, 5, 10, 5, 60, 5, NA, 15, 5, 5, 5),
units = "mins", class = "difftime")),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"),
row.names = c(NA, -13L), groups = structure(list(id = 1:2, .rows = structure(list(1:8, 9:13),
ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -2L), .drop = TRUE))
x <- new_interval(minute = 5)
tsdata <- build_tsibble(Data, key = id, index = time, interval = x)
tsdata <- fill_gaps(tsdata, .full = FALSE)
This is probably not a final answer to what you are looking for, but it might get you started in getting what you want..
library(data.table)
library(zoo)
# Split to list by id
L <- split(DT, by = "id")
# Interpolate gl based on time
ans <- lapply(L, function(x) {
# build time series by minute
temp <- data.table::data.table(
id = unique(x$id),
time = seq(min(x$time), max(x$time), by = 60))
# join in measured data
temp[x, gl_measured := i.gl, on = .(time)]
# imterpolate gl-values
temp[, gl_approx := zoo::na.approx(gl_measured)]
})
# Bind list together again
final <- data.table::rbindlist(ans)

R: how to display a table with a heat map-type representation of percentage values

R: how to display a table with a heat map-type representation of percentage values, as in Excel. same as displayed in SC.
In heat-map table/plot, I want all columns shown below except Total(%), with conditional formatting such that lower values are displayed in green while higher values are displayed in red.
The 0 or early(%) column should not be highlighted in heat map.
Check attached screenshot of excel to understand what I am looking for.
I am unable to understand what to do in this type of excel to R conversion.
In database displayed below columns in table.
User 0 or early(%) <=5(%) <=10(%) <=15(%) <=20(%) <=25(%) TOTAL (%)
A 57 15 18 5 5 0 100
B 64 22 12 2 0 0 100
C 73 12 10 3 2 0 100
D 45 37 7 4 3 5 100
E 87 4 2 2 1 4 100
F 44 39 3 0 1 13 100
G 84 7 2 5 2 0 100
H 90 3 0 7 0 0 100
I 88 2 2 7 2 0 100
J 43 17 0 34 6 0 100
K 69 4 2 20 2 2 100
L 37 5 5 0 5 49 100
M 69 18 0 10 3 0 100
N 59 8 3 30 0 0 100
O 91 6 3 0 0 0 100
P 50 7 10 27 3 3 100
Q 40 23 7 13 10 7 100
If you want to reproduce the same "heatmap" than the one you obtained with excel, I will rather consider using formattable package instead of ggplot2. formattable allow to make data frames to be rendered as HTML table with formatter functions applied, which resembles conditional formatting in Microsoft Excel (https://cran.r-project.org/web/packages/formattable/vignettes/formattable-data-frame.html).
I inspired from #MrFlick's answer on this post: Is it possible to use more than 2 colors in the color_tile function? to build the following answer.
First, we are creating a function that will make the color pattern for the heatmap. Based on your excel output, 0% values are green and then you have a gradient from yellow to orange to red.
library(formattable)
color_tile2 <- function (...) {
formatter("span", style = function(x) {
style(display = "block",
padding = "0 4px",
`border-radius` = "4px",
`background-color` = ifelse(x ==0, "green", csscolor(matrix(as.integer(colorRamp(...)(normalize(as.numeric(x)))),
byrow=TRUE, dimnames=list(c("red","green","blue"), NULL), nrow=3))))
},
x ~ percent(x/100))}
Here, applying the function made below to the dataframe and getting particular columns colored and other not:
library(formattable)
formattable(df, align = "c", list(
area(col = `<=5(%)`:`<=25(%)`) ~color_tile2(c("yellow","orange","red")),
User = FALSE,
`TOTAL_(%)` = FALSE,
`0_or_early(%)` = formatter("span", style = ~style(color = "darkgreen"), x ~ percent(x/100)))
)
Does it look what you are trying to get ?
Reproducible example
structure(list(User = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J", "K", "L", "M", "N", "O", "P", "Q"), `0_or_early(%)` = c(57L,
64L, 73L, 45L, 87L, 44L, 84L, 90L, 88L, 43L, 69L, 37L, 69L, 59L,
91L, 50L, 40L), `<=5(%)` = c(15L, 22L, 12L, 37L, 4L, 39L, 7L,
3L, 2L, 17L, 4L, 5L, 18L, 8L, 6L, 7L, 23L), `<=10(%)` = c(18L,
12L, 10L, 7L, 2L, 3L, 2L, 0L, 2L, 0L, 2L, 5L, 0L, 3L, 3L, 10L,
7L), `<=15(%)` = c(5L, 2L, 3L, 4L, 2L, 0L, 5L, 7L, 7L, 34L, 20L,
0L, 10L, 30L, 0L, 27L, 13L), `<=20(%)` = c(5L, 0L, 2L, 3L, 1L,
1L, 2L, 0L, 2L, 6L, 2L, 5L, 3L, 0L, 0L, 3L, 10L), `<=25(%)` = c(0L,
0L, 0L, 5L, 4L, 13L, 0L, 0L, 0L, 0L, 2L, 49L, 0L, 0L, 0L, 3L,
7L), `TOTAL_(%)` = c(100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L)), row.names = c(NA,
-17L), class = c("data.table", "data.frame"))

inconsistent datetime difference output

I am simply trying to calculate difference between observations for each group.
Dataset:
structure(list(IDYEAR = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "A0712002", class = "factor"),
MONTH = c(12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), DAY = c(5L,
6L, 6L, 7L, 8L, 8L, 9L, 9L, 10L, 12L, 12L, 13L, 13L, 13L,
14L, 14L, 14L, 15L, 15L), YEAR = c(2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L), HOUR = c(9L, 19L,
23L, 1L, 1L, 3L, 19L, 21L, 17L, 17L, 19L, 17L, 19L, 23L,
3L, 9L, 19L, 3L, 11L), MINUTE = c(43L, 43L, 43L, 42L, 42L,
42L, 42L, 43L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L,
42L, 42L), SECOND = c(24L, 13L, 13L, 41L, 54L, 54L, 54L,
12L, 54L, 54L, 48L, 43L, 59L, 55L, 43L, 44L, 54L, 43L, 55L
), DATETIME = structure(c(12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 7L, 11L, 10L), .Label = c("12/10/2002 17:42",
"12/12/2002 17:42", "12/12/2002 19:42", "12/13/2002 17:42",
"12/13/2002 19:42", "12/13/2002 23:42", "12/14/2002 19:42",
"12/14/2002 3:42", "12/14/2002 9:42", "12/15/2002 11:42",
"12/15/2002 3:42", "12/5/2002 9:43", "12/6/2002 19:43", "12/6/2002 23:43",
"12/7/2002 1:42", "12/8/2002 1:42", "12/8/2002 3:42", "12/9/2002 19:42",
"12/9/2002 21:43"), class = "factor"), GRP1700 = c(873L,
873L, 874L, 875L, 875L, 876L, 876L, 876L, 876L, 876L, 877L,
877L, 877L, 877L, 877L, 878L, 878L, 878L, 879L), ID1700 = structure(c(1L,
1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 7L), .Label = c("A0712002873", "A0712002874", "A0712002875",
"A0712002876", "A0712002877", "A0712002878", "A0712002879"
), class = "factor")), .Names = c("IDYEAR", "MONTH", "DAY",
"YEAR", "HOUR", "MINUTE", "SECOND", "DATETIME", "GRP1700", "ID1700"
), class = "data.frame", row.names = c(NA, -19L))
Code
rm(list = ls())
dfa1<-read.csv("test.csv")
head(dfa1)
dput(dfa1)
dfa1[["TESTDATE"]]<-as.POSIXct(dfa1$DATETIME,format="%m/%d/%Y %H:%M",tz="GMT")
dfa1$ID1700<-as.factor(dfa1$ID1700)
dfa1<-dfa1 %>%
arrange(IDYEAR, GRP1700, TESTDATE) %>%
group_by(ID1700) %>%
mutate(TIME1700 = TESTDATE - lag (TESTDATE))
write.csv(dfa1, "test2.csv")
Output:
TESTDATE TIME1700
1 2002-12-05 09:43:00 NA days
2 2002-12-06 19:43:00 1.416667 days
3 2002-12-06 23:43:00 NA days
4 2002-12-07 01:42:00 NA days
5 2002-12-08 01:42:00 1.000000 days
6 2002-12-08 03:42:00 NA days
7 2002-12-09 19:42:00 40.000000 days
8 2002-12-09 21:43:00 2.016667 days
9 2002-12-10 17:42:00 19.983333 days
10 2002-12-12 17:42:00 48.000000 days
11 2002-12-12 19:42:00 NA days
12 2002-12-13 17:42:00 22.000000 days
13 2002-12-13 19:42:00 2.000000 days
14 2002-12-13 23:42:00 4.000000 days
15 2002-12-14 03:42:00 4.000000 days
16 2002-12-14 09:42:00 NA days
17 2002-12-14 19:42:00 10.000000 days
18 2002-12-15 03:42:00 8.000000 days
19 2002-12-15 11:42:00 NA days
I noticed that some of the output is in hours (line 8 - line 7) whereas some of the output is in days (line 5 - line 4). How can I recieve consistent output (hours would be preferred)? Thanks in advance.

Merging two dataframes on a date range in R

In R I want to merge two dataframes on a range of dates, taking all rows from the second dataframe which fall on and between two columns of dates from the first dataframe. I couldn't find a strictly R function or version of the merge function that could do this, but I know there's a 'between' function in sql and I was thinking of trying the sqldf package (although I'm not well versed in sql). If there's a more R-ish way to do this, that would be preferable. Thank you in advance for your help!
df1 <- structure(list(ID = 1:2, PtID = structure(c(1L, 1L), .Label = c("T031", "T040", "T045", "T064", "T074", "T081", "T092", "T094", "T096", "T105", "T107", "T108", "T115", "T118", "T120", "T124", "T125", "T128", "T130", "T132", "T138", "T140", "T142", "T142_R1", "T146", "T158", "T159", "T160", "T164", "T166", "T169", "T171", "T173", "T197", "T208", "T214", "T221"), class = "factor"), StartDateTime = structure(list(sec = c(0, 0), min = c(11L, 35L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), EndDateTime = structure(list(sec = c(0, 0), min = c(16L, 37L), hour = c(17L, 17L), mday = c(23L, 23L), mon = c(9L, 9L), year = c(112L, 112L), wday = c(2L, 2L), yday = c(296L, 296L), isdst = c(1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt"))), .Names = c("ID", "PtID", "StartDateTime", "EndDateTime"), row.names = 1:2, class = "data.frame")
df1
ID PtID StartDateTime EndDateTime
1 1 T031 2012-10-23 17:11:00 2012-10-23 17:16:00
2 2 T031 2012-10-23 17:35:00 2012-10-23 17:37:00
The second dataframe has several IDs (which match the first dataframe) and timestamps on the minute level.
df2
df2 <- structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), dateTime = structure(list(sec = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = 2:44, hour = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L), mday = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L), mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), year = c(112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L), wday = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), yday = c(296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L, 296L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"), class = c("POSIXlt", "POSIXt")), lat = c(33.06621406, 33.06616621, 33.06617305, 33.06617624, 33.06617932, 33.06618161, 33.06618326, 33.06618604, 33.06615089, 33.06628004, 33.06618461, 33.06615113, 33.0661362, 33.06620301, 33.0662218, 33.06624283, 33.06622268, 33.06622425, 33.06622787, 33.06623042, 33.06623318, 33.06623654, 33.06623826, 33.06623919, 33.06623907, 33.06624009, 33.06623804, 33.06624255, 33.06624377, 33.06624446, 33.06624242, 33.06624254, 33.06624513, 33.06624582, 33.06615573, 33.06625534, 33.06618541, 33.06613825, 33.06613624, 33.06614027, 33.06614551, 33.06614844, 33.06615393), lon = c(-116.6105531, -116.6105651,-116.6105613, -116.6105553, -116.610551, -116.610549, -116.6105484, -116.6105512, -116.6105712, -116.6104996, -116.6104711, -116.6104854, -116.6105596, -116.6104509, -116.610524, -116.6105535, -116.6105461, -116.6105461, -116.6105477, -116.6105498, -116.6105478, -116.6105473, -116.6105473, -116.6105488, -116.6105497, -116.6105479, -116.610545, -116.6105461, -116.6105448, -116.610543, -116.6105409, -116.6105395, -116.6105367, -116.6105337, -116.6105344, -116.6104779, -116.6104953,-116.6105222, -116.610526, -116.6105255, -116.6105282, -116.6105265,-116.6105282)), .Names = c("ID", "dateTime", "lat", "lon"), row.names = 1023:1065, class = "data.frame")
So the desired output would look like this:
ID PtID DateTime lat lon
1 T031 2012-10-23 17:11:00 33.06628 -116.6105
1 T031 2012-10-23 17:12:00 33.06618 -116.6105
1 T031 2012-10-23 17:13:00 33.06615 -116.6105
1 T031 2012-10-23 17:14:00 33.06614 -116.6106
1 T031 2012-10-23 17:15:00 33.06620 -116.6105
1 T031 2012-10-23 17:16:00 33.06622 -116.6105
2 T031 2012-10-23 17:35:00 33.06625 -116.6105
2 T031 2012-10-23 17:36:00 33.06616 -116.6105
2 T031 2012-10-23 17:37:00 33.06626 -116.6105
So with sqldf maybe something like this?
sqldf("SELECT df2.ID, df2.lon, df2.lat, FROM df1
INNER JOIN df2 ON df1.ID = df2.ID
WHERE df2.DateTime BETWEEN df1.StartDateTime AND df1.EndDateTime")
In general, its not a good idea to use POSIXlt in data frames. Use POSIXct instead. Also your SQL statement is ok except the comma before FROM needs to be removed:
df1a <- transform(df1,
StartDateTime = as.POSIXct(StartDateTime),
EndDateTime = as.POSIXct(EndDateTime))
df2a <- transform(df2, dateTime = as.POSIXct(dateTime))
The SQL statement in the question has an extraneous commma before FROM.
Here is a slightly simplified statement. This one uses a left join instead to ensure that all ID's from df1a are included even if they have no matches in df2a.
sqldf("SELECT df1a.ID, PtID, dateTime, lat, lon
FROM df1a LEFT JOIN df2a
ON df1a.ID = df2a.ID AND dateTime BETWEEN StartDateTime AND EndDateTime")
You may want to look into defining your data as zoo objects. merge.zoo does something very close to what you ask. Refer to this question for more: R: merge two irregular time series

plotting x-axes with custom label in R

I've to plot these data:
day temperature
02/01/2012 13:30:00 10
10/01/2012 20:30:00 8
15/01/2012 13:30:00 12
25/01/2012 20:30:00 6
02/02/2012 13:30:00 5
10/02/2012 20:30:00 3
15/02/2012 13:30:00 6
25/02/2012 20:30:00 -1
02/03/2012 13:30:00 4
10/03/2012 20:30:00 -2
15/03/2012 13:30:00 7
25/03/2012 20:30:00 1
in the x-axis I want to label only the month and the day (e.g. Jan 02 ). How can I do this using the command plot() and axis()?
First, you will need to put your date text into a dtae class (e.g. as.POSIXct):
df <- structure(list(day = structure(list(sec = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), min = c(30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L), hour = c(13L, 20L, 13L, 20L, 13L, 20L,
13L, 20L, 13L, 20L, 13L, 20L), mday = c(2L, 10L, 15L, 25L, 2L,
10L, 15L, 25L, 2L, 10L, 15L, 25L), mon = c(0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L), year = c(112L, 112L, 112L, 112L,
112L, 112L, 112L, 112L, 112L, 112L, 112L, 112L), wday = c(1L,
2L, 0L, 3L, 4L, 5L, 3L, 6L, 5L, 6L, 4L, 0L), yday = c(1L, 9L,
14L, 24L, 32L, 40L, 45L, 55L, 61L, 69L, 74L, 84L), isdst = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt")), temperature = c(10L, 8L,
12L, 6L, 5L, 3L, 6L, -1L, 4L, -2L, 7L, 1L)), .Names = c("day",
"temperature"), row.names = c(NA, -12L), class = "data.frame")
df
df$day <- as.POSIXct(df$day, format="%d/%m/%Y %H:%M:%S")
Your dates should now plot correctly. Don't apply the x-axis, by using the argument xaxt="n". Afterwards, you can create a sequence of dates where you would like your axis labeled, and apply this with axis.POSIXct:
plot(df$day, df$temperature, t="l", ylab="Temperature", xlab="Date", xaxt="n")
SEQ <- seq(min(df$day), max(df$day), by="months")
axis.POSIXct(SEQ, at=SEQ, side=1, format="%b %Y")
Similarly, to get a daily axis, simply modify the SEQ and axis.POSIXct code accordingly. For example, you may try:
plot(df$day, df$temperature, t="l", ylab="Temperature", xlab="Date", xaxt="n")
SEQ <- seq(min(df$day), max(df$day), by="days")
axis.POSIXct(SEQ, at=SEQ, side=1, format="%b %d")

Resources