Question
Using the mongolite package in R, how do you query a database for a given date?
Example Data
Consider a test collection with two entries
library(mongolite)
## create dummy data
df <- data.frame(id = c(1,2),
dte = as.POSIXct(c("2015-01-01","2015-01-02")))
> df
id dte
1 1 2015-01-01
2 2 2015-01-02
## insert into database
mong <- mongo(collection = "test", db = "test", url = "mongodb://localhost")
mong$insert(df)
Mongo shell query
To find the entries after a given date I would use
db.test.find({"dte" : {"$gt" : new ISODate("2015-01-01")}})
How can I reproduce this query in R using mongolite?
R attempts
So far I have tried
qry <- paste0('{"dte" : {"$gt" : new ISODate("2015-01-01")}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"$gt" : new ISODate("2015-01-01")}}
qry <- paste0('{"dte" : {"$gt" : "2015-01-01"}}')
mong$find(qry)
Imported 0 records. Simplifying into dataframe...
data frame with 0 columns and 0 rows
qry <- paste0('{"dte" : {"gt" : ', as.POSIXct("2015-01-01"), '}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"gt" : 2015-01-01}}
qry <- paste0('{"dte" : {"gt" : new ISODate("', as.POSIXct("2015-01-01"), '")}}')
mong$find(qry)
Error: Invalid JSON object: {"dte" : {"gt" : new ISODate("2015-01-01")}}
#user2754799 has the correct method, but I've made a couple of small changes so that it answers my question. If they want to edit their answer with this solution I'll accept it.
d <- as.integer(as.POSIXct(strptime("2015-01-01","%Y-%m-%d"))) * 1000
## or more concisely
## d <- as.integer(as.POSIXct("2015-01-01")) * 1000
data <- mong$find(paste0('{"dte":{"$gt": { "$date" : { "$numberLong" : "', d, '" } } } }'))
as this question keeps showing up at the top of my google results when i forget AGAIN how to query dates in mongolite and am too lazy to go find the documentation:
the above Mongodb shell query,
db.test.find({"dte" : {"$gt" : new ISODate("2015-01-01")}})
now translates to
mong$find('{"dte":{"$gt":{"$date":"2015-01-01T00:00:00Z"}}}')
optionally, you can add millis:
mong$find('{"dte":{"$gt":{"$date":"2015-01-01T00:00:00.000Z"}}}')
if you use the wrong datetime format, you get a helpful error message pointing you to the correct format: use ISO8601 format yyyy-mm-ddThh:mm plus timezone, either "Z" or like "+0500"
of course, this is also documented in the mongolite manual
try mattjmorris's answer from github
library(GetoptLong)
datemillis <- as.integer(as.POSIXct("2015-01-01")) * 1000
data <- data_collection$find(qq('{"createdAt":{"$gt": { "$date" : { "$numberLong" : "#{datemillis}" } } } }'))
reference: https://github.com/jeroenooms/mongolite/issues/5#issuecomment-160996514
Prior converting your date by multiplying it with 1000, do this: options(scipen=1000), as the lack of this workaround will affect certain dates.
This is explained here:
Related
I am trying to extract data from a JSON file, and am still not clear about the error coming. My data is like this:
"Tracker":{"Sep 30, 2021":{"DC":4,"DN":"0:0",
DC = {}
for day, daily_data in read_content['Tracker'].items():
for value in daily_data['Disturbances Count']:
DC[datetime] = value
Im getting the following error
---------------------------------------------------------------------------
TypeError: 'int' object is not iterable
The solution for the above code only to add str
DC = {}
for day, daily_data in read_content['Tracker'].items():
for value in str(daily_data['Disturbances Count']):
DC[datetime] = value
Trying to move on from my troubles with RISmed (see Problems with RISmed and large(ish) data sets), I decided to use rentrez and entrez_summary to retrieve a large list of pubmed titles from a query:
set_entrez_key("######") #I did provide my real API key here
Sys.getenv("ENTREZ_KEY")
rm(list=ls())
library(rentrez)
query="(United States[AD] AND France[AD] AND 1995:2020[PDAT])"
results<-entrez_search(db="pubmed",term=query,use_history=TRUE)
results
results$web_history
for (seq_start in seq(0, results$count, 100)) {
if (seq_start == 0) {
summary.append.l <- entrez_summary(
db = "pubmed",
web_history = results$web_history,
retmax = 100,
retstart = seq_start
)
}
Sys.sleep(0.1) #slow things down in case THAT'S a factor here....
summary.append.l <- append(
summary.append.l,
entrez_summary(
db = "pubmed",
web_history = results$web_history,
retmax = 100,
retstart = seq_start
)
)
}
The good news...i didn't get a flat out rejection from NCBI like i did with RISMed and EUtilsGet. The bad news...it's not completing. (I get either
Error in curl::curl_fetch_memory(url, handle = handle) :
transfer closed with outstanding read data remaining
or
Error: parse error: premature EOF
(right here) ------^
I almost think there's something about using an affiliation search string in the query, because if I change the query to
query="monoclonal[Title] AND antibody[Title] AND 2010:2020[PDAT]"
it completes the run, despite having about the same number of records to deal with. So...any ideas why a particular search string would result in problems with the NCBI servers?
I am trying to write a generic upsert for a tick Database in R.
The python code would be:
collection.update({'symbol':'somesymbol', 'sha':'SoM3__W3|Re|7__Sh#'},
{'$set':{segment:5},
'$addToSet': {'parent':parent_id}}},
upsert=True)
In R I am using rmongodb and trying to build the BSON Objects
#get the query
mtch_b<-mongo.bson.buffer.create()
mongo.bson.buffer.append(mtch_b, "symbol", "somesymbol")
mongo.bson.buffer.append(mtch_b, "sha", "SoM3__W3|Re|7__Sh#")
mtch<-mongo.bson.from.buffer(mtch_b)
#set the segment
qry_b<-mongo.bson.buffer.create()
mongo.bson.buffer.start.object(qry_b, "$set")
mongo.bson.buffer.append(qry_b, "segment", 5)
mongo.bson.buffer.start.object(qry_b, "$addToSet")
mongo.bson.buffer.append(qry_b, "parent", "Initial")
mongo.bson.buffer.finish.object(qry_b) #end of $addtoSet object
mongo.bson.buffer.finish.object(qry_b) #end of $set object
qry_bsn <-mongo.bson.from.buffer(qry_b)
mongo.update(mongo, "M__test.tmp", mtch, qry_bsn, flags=mongo.update.upsert)
When I run this I get an error:
"The dollar ($) prefixed field '$addToSet' in '$addToSet' is not valid for storage."
looking at the qry_bsn:
qry_bsn
$set : 3
segment : 4
0 : 1 1.000000
1 : 1 2.000000
2 : 1 3.000000
3 : 1 4.000000
$addToSet : 3
parent : 2 Initial
When I remove the $addToSet, append and finish objects of the $addToSet object the query runs fine.
Any help on how to do this would be much appreciated.
I can't find reason for not using mongo.bson.from.list. It make all mongo.bson.buffer.* calls for you. And it is much less chance to produce a bug with bson construction.
query <- mongo.bson.from.list(list("symbol" = "somesymbol", "sha" = "SoM3__W3|Re|7__Sh#"))
upd_obj <- mongo.bson.from.list(list('$set' = list('segment' = 1:4), '$addToSet' = list('parent' = 'PARENT_ID')))
mongo.update(mongo = mongo, ns = "M__test.tmp", criteria = query, objNew = upd_obj, flags=mongo.update.upsert)
I am trying to use readLines in R but I am getting below error
orders1<-readLines(orders,2)
# Error in readLines(orders, 2) : 'con' is not a connection
Code :
orders<-read.csv("~/orders.csv")
orders
orders1<-readLines(orders,2)
orders1
Data:
id,item,quantity_ordered,item_cost
1,playboy roll,1,12
1,rockstar roll,1,10
1,spider roll,1,8
1,keystone roll,1,25
1,salmon sashimi,6,3
1,tuna sashimi,6,2.5
1,edamame,1,6
2,salmon skin roll,1,8
2,playboy roll,1,12
2,unobtanium roll,1,35
2,tuna sashimi,4,2.5
2,yellowtail hand roll,1,7
4,california roll,1,4
4,cucumber roll,1,3.5
5,unagi roll,1,6.5
5,firecracker roll,1,9
5,unobtanium roll,1,35
,chicken teriaki hibachi,1,7.95
,diet coke,1,1.95
I'm guessing you want this:
orders1 <- readLines( file("~/orders.csv") )
It's not clear why you want to do your own parsing or substitution, but that should give readLines a valid connection object.
I'm trying to query a MongoDB via the R driver rmongodb. The following query works on the cmd line (result: 204,915):
db.col1.count(
{
$or: [
{'status.time':{$gt: ISODate('2013-09-10 00:00:00')}},
{'editings.time':{$gt: ISODate('2013-09-10 00:00:00')}}
]
} );
Translating this into R, I tried:
d<-strptime('2013-09-10', format='%Y-%m-%d')
buf <- mongo.bson.buffer.create()
mongo.bson.buffer.start.array(buf, "$or")
mongo.bson.buffer.start.object(buf, 'status.time')
mongo.bson.buffer.append(buf, "$gt", d)
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.start.object(buf, 'editings.time')
mongo.bson.buffer.append(buf, "$gt", d)
mongo.bson.buffer.finish.object(buf)
EDIT: This is what the query prints in R:
>mongo.bson.from.buffer(buf)
$or : 4
status.time : 3
$gt : 9 79497984
editings.time : 3
$gt : 9 79497984
Executing the query using...
mongo.count(mongo, db1.col1, query=mongo.bson.from.buffer(buf))
...gives me "-1". I tried several variants of the BSON, all with the same result. Using only one of the conditions (without the $or array) works, however. Does anyone see my mistake?
BTW: I'm aware of the thread rmongodb: using $or in query, however, the suggested answer to use the alternative driver RMongo does not satisfy other requirements of my code.
your way of creating an mongo bson array is wrong. You are missing the parts
mongo.bson.buffer.start.object(buf, "0")
...
mongo.bson.buffer.finish.object(buf)
mongo.bson.buffer.start.object(buf, "1")
...
mongo.bson.buffer.finish.object(buf)
For a working example please check the latest comment on:
https://github.com/mongosoup/rmongodb/issues/17
I hope this works for now. I am working on an easier solution!
To avoid having to compose the sequence of mongo.bson.buffer-statements I wrote a package (rmongodbHelper) that will translate a JSON or a list() to a BSON object which can then be used with rmongodb.
First let's setup the environment:
library(rmongodb)
# install rmongodbHelper package from GitHub
library(devtools)
devtools::install_github("joyofdata/rmongodbHelper")
library(rmongodbHelper)
# the MongoDB instance
ns <- "dbx.collx"
M <- mongo.create()
mongo.is.connected(M)
mongo.remove(M, ns, json_to_bson("{}"))
# inserting a number of dummy objects
# JSON keys currently are expected to be wrapped in double quotes!
objs <- c(
'{"_id":"__int(0)", "dates":{}}',
'{"_id":"__int(1)", "dates":{"a":"__time(2013-01-01)", "b":"__time(2013-01-01)"}}',
'{"_id":"__int(2)", "dates":{"a":"__time(2013-01-01)", "b":"__time(2014-01-01)"}}',
'{"_id":"__int(3)", "dates":{"a":"__time(2014-01-01)", "b":"__time(2013-01-01)"}}',
'{"_id":"__int(4)", "dates":{"a":"__time(2014-01-01)", "b":"__time(2014-01-01)"}}'
)
for(obj in objs) {
mongo.insert(M, ns, json_to_bson(obj))
}
Let's see via MongoDB shell if they were successfully inserted:
> use dbx
switched to db dbx
> db.collx.find().pretty()
{ "_id" : 0, "dates" : { } }
{
"_id" : 1,
"dates" : {
"a" : ISODate("2013-01-01T00:00:00Z"),
"b" : ISODate("2013-01-01T00:00:00Z")
}
}
[...]
{
"_id" : 4,
"dates" : {
"a" : ISODate("2014-01-01T00:00:00Z"),
"b" : ISODate("2014-01-01T00:00:00Z")
}
}
Now let's search for documents with a query:
# searching for those objects
# JSON keys currently are expected to be wrapped in double quotes!
json_qry <-
'{
"$or": [
{"dates.a":{"$gt": "__time(2013-06-10)"}},
{"dates.b":{"$gt": "__time(2013-06-10)"}}
]
}'
cur <- mongo.find(M, "dbx.collx", json_to_bson(json_qry))
while(mongo.cursor.next(cur)) {
print(mongo.cursor.value(cur))
}
And this is what we get in the end:
_id : 16 2
dates : 3
a : 9 -211265536
b : 9 1259963392
_id : 16 3
dates : 3
a : 9 1259963392
b : 9 -211265536
_id : 16 4
dates : 3
a : 9 1259963392
b : 9 1259963392
keys - also operators like $or - need to be put in double quotes.
"x":3 will lead to 3 being casted as double
"x":"__int(3)" will lead to 3 being casted as integer