Updating specific fields in MongoDB subdocuments (rmongodb) - r

I have some problems understanding the way how specific fields of a subdocument (as opposed to the entire subdocument) can be updated.
I seem to have understood how to query for certain field values in subdocuments, but I'm lost with respect to how a BSON document needs to be structured that only changes the fields queried.
Still feel like I'm not fully understanding how how "plain MongoDB syntax" translates into R syntax and how the update operators exactly work. Any hints in that respect would be greatly appreciated.
Preliminaries
pkg <- "rmongodb"
lib <- file.path(R.home(), "library")
if (!suppressWarnings(require(pkg, lib.loc=lib, character.only=TRUE))) {
install.packages(pkg, lib=lib)
require(pkg, lib.loc=lib, character.only=TRUE)
}
db <- "__test"
ns.0 <- "user"
ns <- paste(db, ns.0, sep=".")
con <- mongo.create(db=db)
Ensuring empty DB
mongo.remove(mongo=con, ns=ns)
Inserting documents
This section simply ensures some example data in the DB. It's just an auxiliary part which you can skip mentally!! Continue with section “Querying“ and see "Actual querying" to get an idea of the document structure which might be hard to grasp from the R code below.
BSON for document 1
blist <- NULL
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.append(buf, name="host",
value="unittest.com")
mongo.bson.buffer.start.array(buf, "paths")
mongo.bson.buffer.start.object(buf, "1")
mongo.bson.buffer.append(buf, name="path",
value="home")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/home")
mongo.bson.buffer.start.array(buf, "queries")
mongo.bson.buffer.start.object(buf, "1")
mongo.bson.buffer.append(buf, name="query",
value="?somequery")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/home?somequery")
mongo.bson.buffer.finish.object(buf) # finish query:1
mongo.bson.buffer.start.object(buf, "2")
mongo.bson.buffer.append(buf, name="query",
value="?someotherquery")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/home?someotherquery")
mongo.bson.buffer.finish.object(buf) # finish query:2
mongo.bson.buffer.finish.object(buf) # finish queries
mongo.bson.buffer.finish.object(buf) # finish path:1
mongo.bson.buffer.start.object(buf, "2")
mongo.bson.buffer.append(buf, name="path",
value="somepage")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/somepage")
mongo.bson.buffer.start.array(buf, "queries")
mongo.bson.buffer.start.object(buf, "1")
mongo.bson.buffer.append(buf, name="query",
value="?somequery")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/somepage?somequery")
mongo.bson.buffer.finish.object(buf) # finish query:1
mongo.bson.buffer.start.object(buf, "2")
mongo.bson.buffer.append(buf, name="query",
value="?someotherquery")
mongo.bson.buffer.append(buf, name="url",
value="www.unittest.com/somepage?someotherquery")
mongo.bson.buffer.finish.object(buf) # finish query:2
mongo.bson.buffer.finish.object(buf) # finish queries
mongo.bson.buffer.finish.object(buf) # finish path:2
mongo.bson.buffer.finish.object(buf) # finish paths
mongo.bson.buffer.finish.object(buf) # finish buf
b <- mongo.bson.from.buffer(buf)
blist <- c(blist, list(b))
BSON for document 2
EDIT 2012-01-23
I removed this section to make the question a bit easier to grasp.
Actual insert
sapply(blist, function(ii) {
mongo.insert(mongo=con, ns=ns, b=ii)
})
Querying
BSON for query
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.start.object(buf, "paths")
mongo.bson.buffer.start.object(buf, "$elemMatch")
mongo.bson.buffer.start.object(buf, "queries")
mongo.bson.buffer.start.object(buf, "$elemMatch")
mongo.bson.buffer.append(buf, name="query", value="?somequery")
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
query <- mongo.bson.from.buffer(buf)
> query
paths : 3
$elemMatch : 3
queries : 3
$elemMatch : 3
query : 2 ?somequery
Actual query
> mongo.find.one(mongo=con, ns=ns, query=query)
_id : 7 50feff31ba54a032514b6181
host : 2 unittest.com
paths : 4
1 : 3
path : 2 home
url : 2 www.unittest.com/home
queries : 4
1 : 3
query : 2 ?somequery
url : 2 www.unittest.com/home?somequery
2 : 3
query : 2 ?someotherquery
url : 2 www.unittest.com/home?someotherquery
2 : 3
path : 2 somepage
url : 2 www.unittest.com/somepage
queries : 4
1 : 3
query : 2 ?somequery
url : 2 www.unittest.com/somepage?somequery
2 : 3
query : 2 ?someotherquery
url : 2 www.unittest.com/somepage?someotherquery
Updating
BSON for update
I would like to set the value of the query field in query subdocuments. I had a look at the MongoDB Manual and tried something like this (using the $set and $ operators because there are arrays involved):
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.start.object(buf, "$set")
mongo.bson.buffer.start.object(buf, "paths")
mongo.bson.buffer.start.object(buf, "$")
mongo.bson.buffer.start.object(buf, "queries")
mongo.bson.buffer.start.object(buf, "$")
mongo.bson.buffer.append(
buf,
name="name",
value="abcd"
)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.finish.object(buf)
bnew <- mongo.bson.from.buffer(buf)
> bnew
$set : 3
paths : 3
$ : 3
queries : 3
$ : 3
name : 2 abcd
Actual update
Apparently, this wasn't a good choice ;-)
res <- mongo.update(mongo=con, ns=ns, criteria=query,
objNew=bnew, flags=mongo.update.multi)
> res
[1] FALSE
2: http://docs.mongodb.org/manual/applications/update/#update-operators zU

Try this for bnew:
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.start.object(buf, "$set")
mongo.bson.buffer.append(buf, "paths.0.queries.1.query", "?newquery")
mongo.bson.buffer.finish.object(buf)
bnew = mongo.bson.from.buffer(buf)
this will replace the 2nd query in the 1st of queries.

Related

Extracting All Emails Using GmailR

I'm trying to extract all the emails from my gmail account to do some analysis. The end goal is a dataframe of emails. I'm using the gmailR package.
So far I've extracted all the email threads and "expanded" them by mapping all the thread IDs to gm_thread(). Here's the code for that:
threads <- gm_threads(num_results = 5)
thread_ids <- gm_id(threads)
#extract all the thread ids
threads_expanded <- map(thread_ids, gm_thread)
This returns a list of all the threads. The structure of this is a list of gmail_thread objects. When you drill down one level into the list of thread objects, str(threads_expanded[[1]], max.level = 1), you get a single thread object which looks like:
List of 3
$ id : chr "xxxx"
$ historyId: chr "yyyy"
$ messages :List of 3
- attr(*, "class")= chr "gmail_thread"
Then, if you drill down further into the messages composing the threads, you start to get the useful info. str(threads_expanded[[1]]$messages, max.level = 1) gets you a list of the gmail_message objects for that thread:
List of 3
$ :List of 8
..- attr(*, "class")= chr "gmail_message"
$ :List of 8
..- attr(*, "class")= chr "gmail_message"
$ :List of 8
..- attr(*, "class")= chr "gmail_message"
Where I'm stuck is actually extracting all the useful information from each email within all the threads. The end goal is a dataframe with a column for the message_id, thread_id, to, from, etc. I'm imagining something like this:
message_id | thread_id | to | from | ... |
-------------------------------------------------------------------------
1234 | abcd | me#gmail.com | pam#gmail.com | ... |
1235 | abcd | pam#gmail.com | me#gmail.com | ... |
1236 | abcf | me#gmail.com | tim#gmail.com | ... |
It's not the prettiest answer, but it works. I'm going to work on vectorizing it later:
threads <- gm_threads(num_results = 5)
thread_ids <- gm_id(threads)
#extract all the thread ids
threads_expanded <- map(thread_ids, gm_thread)
msgs <- vector()
for(i in (1:length(threads_expanded))){
msgs <- append(msgs, values = threads_expanded[[i]]$messages)
}
#extract all the individual messages from each thread
msg_ids <- unlist(map(msgs, gm_id))
#get the message id for each message
msg_body <- vector()
#get message body, store in vector
for(msg in msgs){
body <- gm_body(msg)
attchmnt <- nrow(gm_attachments(msg))
if(length(body) != 0 && attchmnt == 0){
#does not return a null value, rather an empty list or list
of length 0, so if,
#body is not 0 (there is something there) and there are no attachemts,
#add it to vector
msg_body <- append(msg_body, body)
#if there is no to info, fill that spot with an empty space
}
else{
msg_body <- append(msg_body, "")
#if there is no attachment but the body is also empty add "" to the list
}
}
msg_body <- unlist(msg_body)
msg_datetime <- msgs %>%
map(gm_date) %>%
unlist()%>%
dmy_hms()
#get datetime info, store in vector
message_df <- tibble(msg_ids, msg_datetime, msg_body)
#all the other possible categories, e.g., to, from, cc, subject, etc.,
#either use a similar for loop or a map call

Export XML data (from URL) to data.frame or CSV in R

I'm trying to upload XML from URL and save him as CSV:
attached the script :
query <-"https://commission-detail.api.cj.com/v3/commissions?date-type=posting&start-date=2016-03-01&end-date=2016-03-30"
token <-"xxxx"
xmlfile <- xmlTreeParse(GET(url=query, add_headers(Authorization=token)))
but i received the results in the following structure.
$doc
$file
[1] "<buffer>"
$version
[1] "1.0"
$children
$children$`cj-api`
<cj-api>
<commissions total-matched="4">
<commission>
<action-status>closed</action-status>
<action-type>advanced sale</action-type>
<aid>10789406</aid>
<commission-id>1965209327</commission-id>
<country>US</country>
<event-date>2016-03-02T04:22:04-0800</event-date>
<locking-date>2016-05-10</locking-date>
<order-id>1786924</order-id>
<original>true</original>
<original-action-id>1691086180</original-action-id>
<posting-date>2016-03-02T05:03:45-0800</posting-date>
<website-id>7991782</website-id>
<action-tracker-id>337452</action-tracker-id>
<action-tracker-name>JimmyJazz Sale</action-tracker-name>
<cid>3010924</cid>
<advertiser-name>Jimmy Jazz</advertiser-name>
<commission-amount>0.16</commission-amount>
<order-discount>0.00</order-discount>
<sid/>
<sale-amount>1.99</sale-amount>
</commission>
<commission>
<action-status>locked</action-status>
<action-type>advanced sale</action-type>
<aid>12378040</aid>
<commission-id>1969836131</commission-id>
<country>IL</country>
<event-date>2016-03-14T05:53:36-0700</event-date>
<locking-date>2016-05-13</locking-date>
<order-id>27307042</order-id>
<original>true</original>
<original-action-id>1695118411</original-action-id>
<posting-date>2016-03-14T06:30:52-0700</posting-date>
<website-id>7991782</website-id>
<action-tracker-id>361197</action-tracker-id>
<action-tracker-name>Sale</action-tracker-name>
<cid>3848495</cid>
<advertiser-name>boohoo.com</advertiser-name>
<commission-amount>0.40</commission-amount>
<order-discount>0.00</order-discount>
<sid/>
<sale-amount>2.88</sale-amount>
</commission>
<commission>
<action-status>locked</action-status>
<action-type>advanced sale</action-type>
<aid>12378040</aid>
<commission-id>1970220452</commission-id>
<country>GB</country>
<event-date>2016-03-15T03:15:11-0700</event-date>
<locking-date>2016-05-14</locking-date>
<order-id>27330813</order-id>
<original>true</original>
<original-action-id>1695483653</original-action-id>
<posting-date>2016-03-15T04:01:28-0700</posting-date>
<website-id>7991782</website-id>
<action-tracker-id>361197</action-tracker-id>
<action-tracker-name>Sale</action-tracker-name>
<cid>3848495</cid>
<advertiser-name>boohoo.com</advertiser-name>
<commission-amount>0.60</commission-amount>
<order-discount>0.00</order-discount>
<sid>DnoAwoTTYtLs</sid>
<sale-amount>4.31</sale-amount>
</commission>
<commission>
<action-status>locked</action-status>
<action-type>advanced sale</action-type>
<aid>12378040</aid>
<commission-id>1972164361</commission-id>
<country>IL</country>
<event-date>2016-03-20T06:15:41-0700</event-date>
<locking-date>2016-05-19</locking-date>
<order-id>27439097</order-id>
<original>true</original>
<original-action-id>1697317694</original-action-id>
<posting-date>2016-03-20T07:00:46-0700</posting-date>
<website-id>7991782</website-id>
<action-tracker-id>361197</action-tracker-id>
<action-tracker-name>Sale</action-tracker-name>
<cid>3848495</cid>
<advertiser-name>boohoo.com</advertiser-name>
<commission-amount>1.01</commission-amount>
<order-discount>0.00</order-discount>
<sid>9rftdVKxGwud</sid>
<sale-amount>7.24</sale-amount>
</commission>
</commissions>
</cj-api>
attr(,"class")
[1] "XMLDocumentContent"
$dtd
$external
NULL
$internal
NULL
attr(,"class")
[1] "DTDList"
attr(,"class")
[1] "XMLDocument" "XMLAbstractDocument"
other option i've tried to do is .
xmlfile2 <-
read.csv(text=rawToChar(
GET(url=query, add_headers(Authorization=token))
[["content"]]), header = TRUE, sep =',')
but then i've received the data in the following way:
X..xml.version.1.0.encoding.UTF.8..
1 <cj-api><commissions total-matched=4><commission><action-status>closed</action-status><action-type>advanced sale</action-type><aid>10789406</aid><commission-id>1965209327</commission-id><country>US</country><event-date>2016-03-02T04:22:04-0800</event-date><locking-date>2016-05-10</locking-date><order-id>1786924</order-id><original>true</original><original-action-id>1691086180</original-action-id><posting-date>2016-03-02T05:03:45-0800</posting-date><website-id>7991782</website-id><action-tracker-id>337452</action-tracker-id><action-tracker-name>JimmyJazz Sale </action-tracker-name><cid>3010924</cid><advertiser-name>Jimmy Jazz</advertiser-name><commission-amount>0.16</commission-amount><order-discount>0.00</order-discount><sid></sid><sale-amount>1.99</sale-amount></commission><commission><action-status>locked</action-status><action-type>advanced sale</action-type><aid>12378040</aid><commission-id>1969836131</commission-id><country>IL</country><event-date>2016-03-14T05:53:36-0700</event-date><locking-date>2016-05-13</locking-date><order-id>27307042</order-id><original>true</original><original-action-id>1695118411</original-action-id><posting-date>2016-03-14T06:30:52-0700</posting-date><website-id>7991782</website-id><action-tracker-id>361197</action-tracker-id><action-tracker-name>Sale</action-tracker-name><cid>3848495</cid><advertiser-name>boohoo.com</advertiser-name><commission-amount>0.40</commission-amount><order-discount>0.00</order-discount><sid></sid><sale-amount>2.88</sale-amount></commission><commission><action-status>locked</action-status><action-type>advanced sale</action-type><aid>12378040</aid><commission-id>1970220452</commission-id><country>GB</country><event-date>2016-03-15T03:15:11-0700</event-date><locking-date>2016-05-14</locking-date><order-id>27330813</order-id><original>true</original><original-action-id>1695483653</original-action-id><posting-date>2016-03-15T04:01:28-0700</posting-date><website-id>7991782</website-id><action-tracker-id>361197</action-tracker-id><action-tracker-name>Sale</action-tracker-name><cid>3848495</cid><advertiser-name>boohoo.com</advertiser-name><commission-amount>0.60</commission-amount><order-discount>0.00</order-discount><sid>DnoAwoTTYtLs</sid><sale-amount>4.31</sale-amount></commission><commission><action-status>locked</action-status><action-type>advanced sale</action-type><aid>12378040</aid><commission-id>1972164361</commission-id><country>IL</country><event-date>2016-03-20T06:15:41-0700</event-date><locking-date>2016-05-19</locking-date><order-id>27439097</order-id><original>true</original><original-action-id>1697317694</original-action-id><posting-date>2016-03-20T07:00:46-0700</posting-date><website-id>7991782</website-id><action-tracker-id>361197</action-tracker-id><action-tracker-name>Sale</action-tracker-name><cid>3848495</cid><advertiser-name>boohoo.com</advertiser-name><commission-amount>1.01</commission-amount><order-discount>0.00</order-discount><sid>9rftdVKxGwud</sid><sale-amount>7.24</sale-amount></commission></commissions></cj-api>
running the following
xmlfile <- xmlTreeParse(GET(url=query, add_headers(Authorization=token)), useInternalNodes = T)
xmlToDataFrame(xmlfile)
i received
commission
1 closedadvanced sale107894061965209327US2016-03-02T04:22:04-08002016-05-101786924true16910861802016-03-02T05:03:45-08007991782337452JimmyJazz Sale 3010924Jimmy Jazz0.160.001.99
NA
1 lockedadvanced sale123780401969836131IL2016-03-14T05:53:36-07002016-05-1327307042true16951184112016-03-14T06:30:52-07007991782361197Sale3848495boohoo.com0.400.002.88
NA
1 lockedadvanced sale123780401970220452GB2016-03-15T03:15:11-07002016-05-1427330813true16954836532016-03-15T04:01:28-07007991782361197Sale3848495boohoo.com0.600.00DnoAwoTTYtLs4.31
NA
1 lockedadvanced sale123780401972164361IL2016-03-20T06:15:41-07002016-05-1927439097true16973176942016-03-20T07:00:46-07007991782361197Sale3848495boohoo.com1.010.009rftdVKxGwud7.24
how can i saved this XML in data frame .
Thanks

r mongolite - date query

Question
Using the mongolite package in R, how do you query a database for a given date?
Example Data
Consider a test collection with two entries
library(mongolite)
## create dummy data
df <- data.frame(id = c(1,2),
dte = as.POSIXct(c("2015-01-01","2015-01-02")))
> df
id dte
1 1 2015-01-01
2 2 2015-01-02
## insert into database
mong <- mongo(collection = "test", db = "test", url = "mongodb://localhost")
mong$insert(df)
Mongo shell query
To find the entries after a given date I would use
db.test.find({"dte" : {"$gt" : new ISODate("2015-01-01")}})
How can I reproduce this query in R using mongolite?
R attempts
So far I have tried
qry <- paste0('{"dte" : {"$gt" : new ISODate("2015-01-01")}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"$gt" : new ISODate("2015-01-01")}}
qry <- paste0('{"dte" : {"$gt" : "2015-01-01"}}')
mong$find(qry)
Imported 0 records. Simplifying into dataframe...
data frame with 0 columns and 0 rows
qry <- paste0('{"dte" : {"gt" : ', as.POSIXct("2015-01-01"), '}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"gt" : 2015-01-01}}
qry <- paste0('{"dte" : {"gt" : new ISODate("', as.POSIXct("2015-01-01"), '")}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"gt" : new ISODate("2015-01-01")}}
#user2754799 has the correct method, but I've made a couple of small changes so that it answers my question. If they want to edit their answer with this solution I'll accept it.
d <- as.integer(as.POSIXct(strptime("2015-01-01","%Y-%m-%d"))) * 1000
## or more concisely
## d <- as.integer(as.POSIXct("2015-01-01")) * 1000
data <- mong$find(paste0('{"dte":{"$gt": { "$date" : { "$numberLong" : "', d, '" } } } }'))
as this question keeps showing up at the top of my google results when i forget AGAIN how to query dates in mongolite and am too lazy to go find the documentation:
the above Mongodb shell query,
db.test.find({"dte" : {"$gt" : new ISODate("2015-01-01")}})
now translates to
mong$find('{"dte":{"$gt":{"$date":"2015-01-01T00:00:00Z"}}}')
optionally, you can add millis:
mong$find('{"dte":{"$gt":{"$date":"2015-01-01T00:00:00.000Z"}}}')
if you use the wrong datetime format, you get a helpful error message pointing you to the correct format: use ISO8601 format yyyy-mm-ddThh:mm plus timezone, either "Z" or like "+0500"
of course, this is also documented in the mongolite manual
try mattjmorris's answer from github
library(GetoptLong)
datemillis <- as.integer(as.POSIXct("2015-01-01")) * 1000
data <- data_collection$find(qq('{"createdAt":{"$gt": { "$date" : { "$numberLong" : "#{datemillis}" } } } }'))
reference: https://github.com/jeroenooms/mongolite/issues/5#issuecomment-160996514
Prior converting your date by multiplying it with 1000, do this: options(scipen=1000), as the lack of this workaround will affect certain dates.
This is explained here:

Debugging SQLite R*tree

I have an SQLite database containing an R*tree virtual table. This table is behaving rather oddly and I'm at a loss as to what is wrong. I would appreciate any pointers to aspects I could investigate!
> dbGetQuery(con, 'PRAGMA integrity_check')
integrity_check
1 ok
It seems fine...
> dbGetQuery(con, 'SELECT * FROM peakLoc LIMIT 5')
peakID scanStart scanEnd mzMin mzMax
1 18481 5540 5904 435.1880 435.2095
2 18429 5555 5644 408.7411 408.7459
3 18251 5621 5710 432.7190 432.7285
4 16415 6081 6173 432.2292 432.2470
5 16391 6089 6351 454.1823 454.1960
The general look of the R*tree table
> dbGetQuery(con, 'SELECT MIN(scanEnd), MAX(scanEnd) FROM peakLoc')
MIN(scanEnd) MAX(scanEnd)
1 51 19369
The bounds of scanEnd
> dbGetQuery(con, 'SELECT * FROM peakLoc WHERE scanEnd > 5000 LIMIT 5')
peakID scanStart scanEnd mzMin mzMax
1 20987 4839 6284 410.1729 410.2035
2 6705 9827 10132 738.8564 738.8674
3 15190 6482 6756 615.3235 615.3395
4 15189 6482 6756 509.2193 509.2258
5 12001 7449 7710 855.4534 855.4631
So far so good...
> dbGetQuery(con, 'SELECT * FROM peakLoc WHERE scanEnd > 6000 LIMIT 5')
[1] peakID scanStart scanEnd mzMin mzMax
<0 rows> (or 0-length row.names)
Where are the records?
The same is happening for the other columns with bigger-than once the comparator gets to an arbitrary large number. This behaviour is only present in the R*tree table - the regular tables works fine...
Have I stumbled upon a constraint in the R*tree module that I do not know about? All records in the R*tree comes from one big insert and I have not touched the underlying tables that the R*tree relies on...
edit:
On request from CL I've tried to create a reproducible example. At least on my system the following produces an R*tree with the same behaviour:
set.seed(1)
library(RSQLite)
con <- dbConnect(dbDriver('SQLite'), ':memory:')
dbGetQuery(con, 'CREATE VIRTUAL TABLE test USING rtree(id, xmin, xmax, ymin, ymax)')
x <- abs(rnorm(100))
y <- abs(rnorm(100))
data <- data.frame(id=1:100, xmin=x, xmax=x+2, ymin=y, ymax=y+3)
dbGetPreparedQuery(con, 'INSERT INTO test VALUES ($id, $xmin, $xmax, $ymin, $ymax)', bind.data=data)
dbGetQuery(con, 'SELECT max(xmax) FROM test')
dbGetQuery(con, 'SELECT * FROM test WHERE xmax > 4 LIMIT 5')
dbGetQuery(con, 'SELECT * FROM test WHERE +xmax > 4 LIMIT 5')
edit 2:
A database created with the commands given in the first edit can be downloaded from this link:
https://dl.dropboxusercontent.com/u/2323585/testdb.sqlite

Using $or array in query

I'm trying to query a MongoDB via the R driver rmongodb. The following query works on the cmd line (result: 204,915):
db.col1.count(
{
$or: [
{'status.time':{$gt: ISODate('2013-09-10 00:00:00')}},
{'editings.time':{$gt: ISODate('2013-09-10 00:00:00')}}
]
} );
Translating this into R, I tried:
d<-strptime('2013-09-10', format='%Y-%m-%d')
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.start.array(buf, "$or")
mongo.bson.buffer.start.object(buf, 'status.time')
mongo.bson.buffer.append(buf, "$gt", d)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.start.object(buf, 'editings.time')
mongo.bson.buffer.append(buf, "$gt", d)
mongo.bson.buffer.finish.object(buf)
EDIT: This is what the query prints in R:
>mongo.bson.from.buffer(buf)
$or : 4
status.time : 3
$gt : 9 79497984
editings.time : 3
$gt : 9 79497984
Executing the query using...
mongo.count(mongo, db1.col1, query=mongo.bson.from.buffer(buf))
...gives me "-1". I tried several variants of the BSON, all with the same result. Using only one of the conditions (without the $or array) works, however. Does anyone see my mistake?
BTW: I'm aware of the thread rmongodb: using $or in query, however, the suggested answer to use the alternative driver RMongo does not satisfy other requirements of my code.
your way of creating an mongo bson array is wrong. You are missing the parts
mongo.bson.buffer.start.object(buf, "0")
...
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.start.object(buf, "1")
...
mongo.bson.buffer.finish.object(buf)
For a working example please check the latest comment on:
https://github.com/mongosoup/rmongodb/issues/17
I hope this works for now. I am working on an easier solution!
To avoid having to compose the sequence of mongo.bson.buffer-statements I wrote a package (rmongodbHelper) that will translate a JSON or a list() to a BSON object which can then be used with rmongodb.
First let's setup the environment:
library(rmongodb)
# install rmongodbHelper package from GitHub
library(devtools)
devtools::install_github("joyofdata/rmongodbHelper")
library(rmongodbHelper)
# the MongoDB instance
ns <- "dbx.collx"
M <- mongo.create()
mongo.is.connected(M)
mongo.remove(M, ns, json_to_bson("{}"))
# inserting a number of dummy objects
# JSON keys currently are expected to be wrapped in double quotes!
objs <- c(
'{"_id":"__int(0)", "dates":{}}',
'{"_id":"__int(1)", "dates":{"a":"__time(2013-01-01)", "b":"__time(2013-01-01)"}}',
'{"_id":"__int(2)", "dates":{"a":"__time(2013-01-01)", "b":"__time(2014-01-01)"}}',
'{"_id":"__int(3)", "dates":{"a":"__time(2014-01-01)", "b":"__time(2013-01-01)"}}',
'{"_id":"__int(4)", "dates":{"a":"__time(2014-01-01)", "b":"__time(2014-01-01)"}}'
)
for(obj in objs) {
mongo.insert(M, ns, json_to_bson(obj))
}
Let's see via MongoDB shell if they were successfully inserted:
> use dbx
switched to db dbx
> db.collx.find().pretty()
{ "_id" : 0, "dates" : { } }
{
"_id" : 1,
"dates" : {
"a" : ISODate("2013-01-01T00:00:00Z"),
"b" : ISODate("2013-01-01T00:00:00Z")
}
}
[...]
{
"_id" : 4,
"dates" : {
"a" : ISODate("2014-01-01T00:00:00Z"),
"b" : ISODate("2014-01-01T00:00:00Z")
}
}
Now let's search for documents with a query:
# searching for those objects
# JSON keys currently are expected to be wrapped in double quotes!
json_qry <-
'{
"$or": [
{"dates.a":{"$gt": "__time(2013-06-10)"}},
{"dates.b":{"$gt": "__time(2013-06-10)"}}
]
}'
cur <- mongo.find(M, "dbx.collx", json_to_bson(json_qry))
while(mongo.cursor.next(cur)) {
print(mongo.cursor.value(cur))
}
And this is what we get in the end:
_id : 16 2
dates : 3
a : 9 -211265536
b : 9 1259963392
_id : 16 3
dates : 3
a : 9 1259963392
b : 9 -211265536
_id : 16 4
dates : 3
a : 9 1259963392
b : 9 1259963392
keys - also operators like $or - need to be put in double quotes.
"x":3 will lead to 3 being casted as double
"x":"__int(3)" will lead to 3 being casted as integer

Resources