marklogic 8 - How to search collection wise - xquery

I have tried this below mentioned Xquery in my query console, but I need search the data from collections wise?
let $value1 := "antony"
let $value2 := "cse"
for $uri1 in cts:uris((),(), (
cts:element-query(xs:QName("P"),
cts:and-query((
cts:element-attribute-value-query(xs:QName("P"),xs:QName("name"),$value1),
cts:element-attribute-value-query(xs:QName("P"),xs:QName("value"),$value2)
))
)
))
let $xml := doc($uri1)
return $xml//PS/P [#name eq "volume"]/#value
Please suggest me how to add the collection in above mentioned XQuery?

First I would say that since you ultimately want documents rather than URIs, it would be more effective to use cts:search directly. You can add the "unfiltered" option if you want to avoid the cost of filtering, e.g.:
let $value1 := "antony"
let $value2 := "cse"
return
cts:search( doc(),
cts:element-query(xs:QName("P"),
cts:and-query((
cts:element-attribute-value-query(xs:QName("P"),xs:QName("name"),$value1),
cts:element-attribute-value-query(xs:QName("P"),xs:QName("value"),$value2)
))
), "unfiltered"
)//PS/P[#name eq "volume"]/#value
or, since you only care about /PS/P elements:
cts:search( doc()//PS/P,
cts:element-query(xs:QName("P"),
cts:and-query((
cts:element-attribute-value-query(xs:QName("P"),xs:QName("name"),"antony"),
cts:element-attribute-value-query(xs:QName("P"),xs:QName("value"),"cse")
))
), "unfiltered"
)[#name eq "volume"]/#value
To search within a collection, replace the doc() with collection("yourcollection"):
cts:search( collection("yourcollection")//PS/P,
cts:element-query(xs:QName("P"),
cts:and-query((
cts:element-attribute-value-query(xs:QName("P"),xs:QName("name"),"antony"),
cts:element-attribute-value-query(xs:QName("P"),xs:QName("value"),"cse")
))
), "unfiltered"
)[#name eq "volume"]/#value

Related

cts:search pass the node dynamically

Is there a way to have the below code working?
let $node := 'childnode'
return
cts:search(/root/$node,
cts:and-query((
cts:collection-query('collection')
))
)
One way to dynamically build and execute the XPath for the cts:search $expression parameter is to generate the search expression as a string and then use xdmp:value() to parse and execute:
let $node := 'childnode'
let $expression:= "/doc/" || $node
let $query as cts:query := cts:and-query((
cts:collection-query('collection')
))
return xdmp:value("cts:search("||$expression||"," ||$query||")")

xdmp:spawn-function() Alternative

we have around "20 million" documents in the database and we have created badges of "10000" and use
xdmp:spawn-function() to query over these 20 million documents and perform delete operations according to some conditions . But running it through query console, query is getting timeout .. Any alternate option we can look for so that the query doesn't get timed-out
xquery version "1.0-ml";
declare variable $versionToMaintain := 10;
declare variable $batchSize := 10000;
declare function local:delete($values) {
for $value in $values
let $versionToDelete := $value[3] - $versionToMaintain
return
if ($versionToDelete > 0) then
let $query := cts:and-query((
cts:collection-query('collection name 2'),
cts:element-range-query(xs:QName('version'), '<=', xs:int($versionToDelete)),
cts:element-value-query(xs:QName('id'),$value[2]),
cts:element-range-query(xs:QName('c:created-on'), '<=', xs:dateTime(xdmp:parseDateTime('[Y0001]-[M01]-[D01]')
))
return (cts:uris((), (), $query) ! xdmp:document-delete(.))
else ()
};
let $totalDocs :=
xdmp:estimate(
cts:search(
collection("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1")),
"unfiltered"
)
)
let $totalBatches := fn:ceiling($totalDocs div $batchSize)
for $x in (1 to $totalBatches)
let $values :=
cts:value-tuples(
(
cts:uri-reference(),
cts:element-reference(xs:QName('id')),
cts:element-reference(xs:QName('version'))
),
("skip=" || ($x - 1) * $batchSize, "truncate=" || $batchSize),
cts:and-query((
cts:collection-query("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1"))
))
)
return
xdmp:spawn-function(function(){
local:delete($values)
})
Well, I think the reason why it is taking so long and potentially timing out is that you are doing a lot of cts:value-tuples() in the for loop iterating over the batches, rather than pushing that work out into the spawned function that gets called for each batch.
Move the paginated cts:value-tuples() call inside of the local:delete(), and pass in the $x batch value, instead of the tuples.
xquery version "1.0-ml";
declare variable $versionToMaintain := 10;
declare variable $batchSize := 10000;
declare function local:delete($x) {
let $values :=
cts:value-tuples(
(
cts:uri-reference(),
cts:element-reference(xs:QName('id')),
cts:element-reference(xs:QName('version'))
),
("skip=" || ($x - 1) * $batchSize, "truncate=" || $batchSize),
cts:and-query((
cts:collection-query("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1"))
))
)
for $value in $values
let $versionToDelete := $value[3] - $versionToMaintain
return
if ($versionToDelete > 0) then
let $query := cts:and-query((
cts:collection-query('collection name 2'),
cts:element-range-query(xs:QName('version'), '<=', xs:int($versionToDelete)),
cts:element-value-query(xs:QName('id'),$value[2]),
cts:element-range-query(xs:QName('c:created-on'), '<=', xs:dateTime(xdmp:parseDateTime('[Y0001]-[M01]-[D01]')))
return (cts:uris((), (), $query) ! xdmp:document-delete(.))
else ()
};
let $totalDocs :=
xdmp:estimate(
cts:search(
collection("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1")),
"unfiltered"
)
)
let $totalBatches := fn:ceiling($totalDocs div $batchSize)
for $x in (1 to $totalBatches)
return
xdmp:spawn-function(function(){
local:delete($x)
})
Deleting bulk content from MarkLogic database (with a bucket assignment) is always a challenge. Like Mads suggested, you should consider using CoRB. It is easier to tune the performance with different options available.
Secondly, you can consider using a tiered storage approach - like a range partition or query partition (provided the license requirements are met) where you can archive the documents that match the required criteria to a group of forests. You can then use the forest-clear() to do the job for you.

Inserting a document and reading it in same transaction in MarkLogic

Below is the code snippet I am using for one of the functionality
declare function local:matchCounts($Id as xs:string, $status as xs:string) as xs:int {
xdmp:estimate(cts:search(/count, cts:and-query((
cts:element-attribute-value-query(xs:QName("count"), xs:QName("Id"), $Id, "exact"),
cts:element-attribute-value-query(xs:QName("child"), xs:QName("MatchStatus"), $status, "exact")
)), "unfiltered"))
};
declare function local:saveCountsMatchC($Id as xs:string) {
let $evenCount := local:matchCounts($Id, "even")
let $oddCount := local:matchCounts($Id, "odd")
return ($evenCount, $oddCount)
};
declare function local:matchingProcess($Id as xs:string) {
let $total-records := 1000
let $batch-size := 50
let $pagination := 0
let $bs :=
for $records in 1 to fn:ceiling($total-records div $batch-size )
let $start := fn:sum($pagination + 1)
let $end := fn:sum($batch-size + $pagination)
let $_ := xdmp:set($pagination, $end)
return
xdmp:spawn-function
(
function() {
for $each at $pos in ($start to $end)
let $id := sem:uuid-string()
let $xml := if(($pos mod 2) eq 0) then <count Id='{$Id}'><child MatchStatus='even'></child></count>
else <count Id='{$Id}'><child MatchStatus='odd'></child></count>
return xdmp:document-insert(concat("/", $id, ".xml"), $xml)
},
<options xmlns="xdmp:eval"><result>{fn:true()}</result><commit>auto</commit><update>true</update></options>
)
let $_ := $bs
return local:saveCountsMatchC($Id)
};
local:matchingProcess("1")
The requirement over here is to iterate 1000 documents using batch size of 50, so basically I am using spawn function to create 20 batches of size 50 which inserts 1000 documents in my database.
Once those documents are inserted, I need to read those documents in same transaction. Here 500 documents have MatchStatus='odd' and 500 documents have MatchStatus='even'
The query should return (500,500) as output; Instead it returns (0,0)
I am using <result>{fn:true()}</results> option so that my next statement waits for all spawn task to be completed, but its not happeneing.
Can anybody help me with the requirement?
Note: Need to insert 1000 documents and then read them in same function call only
Your code that executes the spawns does not perform updates itself, so will run in so-called query mode. In query mode only updates from before the start of the code are visible.
You could try running in update mode (declare option xdmp:transaction-mode "update";), but usually it is easier to just spawn or eval the counting/reading of your updates as well. E.g. wrap the xdmp:estimate in an xdmp:spawn-function with result true as well.
HTH!

Sorting multiple maps in marklogic 8

This is more of an XQuery than MarkLogic. I have three map:map and each map has key-value pair of "id" and score. I would like to sort all the distinct ids based on the score from each maps.
For eg:
map1 : 1:2048, 5:2000
map2 : 2:5000, 1:1000, 4:3000
map3 : 6:100, 7:5000, 2:2000
In the above example, each map is id:score for key value (did not know how to represent here :))..
I want the sorted list of id from three maps based on score..
Is there a good way or better way of doing the sorting, or do I have to union the keys of the map and iterate the sequence of keys and sort them ?
This seems like a great use case for folding. Its part of Xquery 3.0 spec.
Folding can go through a sequence of items and gets the result for each item as it goes through. In this example $combinedMaps is the result of the last call and $mapToMerge is the item in the sequence it is currently going through.
Here an example of what you would want to do.
declare function local:sortMaps(
$newMap as map:map,
$mapA as map:map,
$mapB as map:map
) as map:map {
let $build :=
for $key in map:keys($mapA)
let $otherMapValue :=
(map:get($mapB, $key), 0)[1]
let $value := map:get($mapA, $key)
return
if ($value gt $otherMapValue) then (
map:put($newMap, $key, $value)
) else (
map:put($newMap, $key, $otherMapValue)
)
return $newMap
};
let $map1 :=
map:new((
map:entry("1",2048),
map:entry("5",2000)
))
let $map2 :=
map:new((
map:entry("2",5000),
map:entry("1",1000),
map:entry("4",3000)
))
let $map3 :=
map:new((
map:entry("6",100),
map:entry("7",5000),
map:entry("2",2000)
))
let $maps := ($map1, $map2, $map3)
return
fn:fold-left(
function($combinedMaps, $mapToMerge) {
let $newMap := map:map()
let $newMap := local:sortMaps($newMap, $combinedMaps, $mapToMerge)
let $newMap := local:sortMaps($newMap, $mapToMerge, $combinedMaps)
return $newMap
},
$maps[1],
$maps
)

Combined search query for a few xml documents

I have in each books directory /books/{book_id}/ a couple of xml documents.
/books/{book_id}/basic.xml and /books/{book_id}/formats.xml.
First one is
<document book_id="{book_id}">
<title>The book</title>
</document>
and the second is
<document book_id="{book_id}">
<format>a</format>
<format>b</format>
<format>c</format>
</document>
How can I find all books in /books/ directory with format eq 'a' and title eq *'book'* by one query? I have done one variant when I first finding all books by format by cts:search() and then filter the result in "for loop" by checking title in basic.xml file.
Thank you!
This question is listed as MarkLogic as well as xQuery. For completeness, I have included a MarkLogic solution that is a single statement:
let $res := cts:search(doc(), cts:and-query(
(
cts:element-word-query(xs:QName("title"), '*book*', ('wildcarded'))
,
cts:element-attribute-range-query(xs:QName("document"), xs:QName("book_id"), '=', cts:element-attribute-values(xs:QName("document"), xs:QName("book_id"), (), (), cts:element-value-query(xs:QName("format"), 'b')))
)
)
)
OK. Now lets break this down and have a look.
Note: This sample requires a single range index on the attribute book_id.
I tool advantage of the fact that you have the same attribute in the same namespace in both types of documents. This allowed the following:
I could use a single index
Then I used element-attribute-values for the list of book_ids
-- This was constrained by the 'format' element
The list of book_ids above was used to filter the books (range query)
Which was then further filtered by the title
This approach joins the two documents using a range index which is super-fast - especially on the integer value of the book_id
It should be noted that in this articular case, I was able to isolate the proper documents because title elements only exist in one type of document.
Now, lets look at a cleaner example of the same query.
(: I used a word-query so that I could do wildcarded searches for document with 'book' in the title. This is because your sample has a title 'The Book', yet you search for 'book' so I can olnly conclude that you meant to have wildcard searches :)
let $title-constraint := "*book*"
(: This could also be a sequence :)
let $format-constraint := "a"
(: used for the right-side of the element-range-query :)
let $format-filter := cts:element-attribute-values(xs:QName("document"), xs:QName("book_id"), (), (), cts:element-value-query(xs:QName("format"), $format-constraint))
(: final results :)
let $res := cts:search(doc(), cts:and-query((
cts:element-word-query(xs:QName("title"), $title-constraint, ('wildcarded'))
,
cts:element-attribute-range-query(xs:QName("document"), xs:QName("book_id"), '=', $format-filter)
)
) )
return $res
Maybe stating the obvious, the best approach would be to change the model so the format is in the same document as the title and can be matched by a single query.
If that's not possible, one alternative would be to turn on the uri lexicon in the database configuration (if it's not enabled already).
Assuming that the title is more selective than the format, something along the following lines might work.
let $title-uris := cts:uris((), (), cts:and-query((
cts:directory-query("/books/", "infinity"),
cts:element-word-query(xs:QName("title"), "book")
)))
let $title-dirs :=
for $uri in $title-uris
return fn:replace($uri, "/basic\.xml$", "/")
let $format-uris := cts:uris((), (), cts:and-query((
cts:directory-query($title-dirs),
cts:element-value-query(xs:QName("format"), "a")
)))
let $book-docs :=
for $uri in $format-uris
return fn:replace($uri, "/format\.xml$", "/basic.xml")
for $doc in fn:doc($book-docs)
return ... do something with the basic document ...
The extra cost beyond the document reads consists of two lookups in the uri lexicon and the string manipulation. The benefit is in reading only the documents that match.
In general, it's better at scale to use the indexes to match the relevant documents instead of reading the documents into memory and filtering out the irrelevant documents. The cts:uris() and cts:search() functions always match using the indexes first (and only filter when the search option is specified). XPaths optimize by matching with the indexes when possible but have to fallback to filtering for some predicates. Unless you're careful, it's usually better to limit XPaths to navigation of nodes in memory.
Hoping that helps,
How can I find all books in /books/ directory with format eq 'a' and title eq 'book' by one query?
Try:
doc('basic.xml')/document[#book_id='X']/title[contains(., 'book')]]
[doc('format.xml')/document[#book_id='X'][format = 'a']
The last predicate, if it turns empty, will result in the title to not be found. If it exists, then title will be returned.
You should, of course, replace X with your ID. And you can set the relative path to include the ID. If you have a set of ID's you want to go over, you can do this:
for $id in ('{book_id1}', '{book_id2}')
return
doc(concat($id, '/basic.xml'))/document[#book_id=$id]/title[contains(., 'book')]]
[doc(concat($id, '/format.xml'))/document[#book_id=$id][format = 'a']
You'll get the drift ;)
PS: I'm not sure if {...} is a legal URI pathpart, but I assume you'll replace it with something sensible. Otherwise, escape it with the appropriate percent-encoding.
I think I found better solution
let $book_ids := cts:values(
cts:element-attribute-reference(xs:QName("document"), xs:QName("book_id") ),
(),
("map"),
cts:and-query((
cts:directory-query(("/books/"), "infinity"),
cts:element-query(xs:QName("title"),"book")
))
)
return
cts:search(
/,
cts:and-query((
cts:element-attribute-value-query(xs:QName("document"), xs:QName("book_id"), map:keys($book_ids)),
cts:element-value-query(xs:QName("format"), "a"),
))
)

Resources