Inserting a document and reading it in same transaction in MarkLogic - xquery

Below is the code snippet I am using for one of the functionality
declare function local:matchCounts($Id as xs:string, $status as xs:string) as xs:int {
xdmp:estimate(cts:search(/count, cts:and-query((
cts:element-attribute-value-query(xs:QName("count"), xs:QName("Id"), $Id, "exact"),
cts:element-attribute-value-query(xs:QName("child"), xs:QName("MatchStatus"), $status, "exact")
)), "unfiltered"))
};
declare function local:saveCountsMatchC($Id as xs:string) {
let $evenCount := local:matchCounts($Id, "even")
let $oddCount := local:matchCounts($Id, "odd")
return ($evenCount, $oddCount)
};
declare function local:matchingProcess($Id as xs:string) {
let $total-records := 1000
let $batch-size := 50
let $pagination := 0
let $bs :=
for $records in 1 to fn:ceiling($total-records div $batch-size )
let $start := fn:sum($pagination + 1)
let $end := fn:sum($batch-size + $pagination)
let $_ := xdmp:set($pagination, $end)
return
xdmp:spawn-function
(
function() {
for $each at $pos in ($start to $end)
let $id := sem:uuid-string()
let $xml := if(($pos mod 2) eq 0) then <count Id='{$Id}'><child MatchStatus='even'></child></count>
else <count Id='{$Id}'><child MatchStatus='odd'></child></count>
return xdmp:document-insert(concat("/", $id, ".xml"), $xml)
},
<options xmlns="xdmp:eval"><result>{fn:true()}</result><commit>auto</commit><update>true</update></options>
)
let $_ := $bs
return local:saveCountsMatchC($Id)
};
local:matchingProcess("1")
The requirement over here is to iterate 1000 documents using batch size of 50, so basically I am using spawn function to create 20 batches of size 50 which inserts 1000 documents in my database.
Once those documents are inserted, I need to read those documents in same transaction. Here 500 documents have MatchStatus='odd' and 500 documents have MatchStatus='even'
The query should return (500,500) as output; Instead it returns (0,0)
I am using <result>{fn:true()}</results> option so that my next statement waits for all spawn task to be completed, but its not happeneing.
Can anybody help me with the requirement?
Note: Need to insert 1000 documents and then read them in same function call only

Your code that executes the spawns does not perform updates itself, so will run in so-called query mode. In query mode only updates from before the start of the code are visible.
You could try running in update mode (declare option xdmp:transaction-mode "update";), but usually it is easier to just spawn or eval the counting/reading of your updates as well. E.g. wrap the xdmp:estimate in an xdmp:spawn-function with result true as well.
HTH!

Related

xdmp:spawn-function() Alternative

we have around "20 million" documents in the database and we have created badges of "10000" and use
xdmp:spawn-function() to query over these 20 million documents and perform delete operations according to some conditions . But running it through query console, query is getting timeout .. Any alternate option we can look for so that the query doesn't get timed-out
xquery version "1.0-ml";
declare variable $versionToMaintain := 10;
declare variable $batchSize := 10000;
declare function local:delete($values) {
for $value in $values
let $versionToDelete := $value[3] - $versionToMaintain
return
if ($versionToDelete > 0) then
let $query := cts:and-query((
cts:collection-query('collection name 2'),
cts:element-range-query(xs:QName('version'), '<=', xs:int($versionToDelete)),
cts:element-value-query(xs:QName('id'),$value[2]),
cts:element-range-query(xs:QName('c:created-on'), '<=', xs:dateTime(xdmp:parseDateTime('[Y0001]-[M01]-[D01]')
))
return (cts:uris((), (), $query) ! xdmp:document-delete(.))
else ()
};
let $totalDocs :=
xdmp:estimate(
cts:search(
collection("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1")),
"unfiltered"
)
)
let $totalBatches := fn:ceiling($totalDocs div $batchSize)
for $x in (1 to $totalBatches)
let $values :=
cts:value-tuples(
(
cts:uri-reference(),
cts:element-reference(xs:QName('id')),
cts:element-reference(xs:QName('version'))
),
("skip=" || ($x - 1) * $batchSize, "truncate=" || $batchSize),
cts:and-query((
cts:collection-query("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1"))
))
)
return
xdmp:spawn-function(function(){
local:delete($values)
})
Well, I think the reason why it is taking so long and potentially timing out is that you are doing a lot of cts:value-tuples() in the for loop iterating over the batches, rather than pushing that work out into the spawned function that gets called for each batch.
Move the paginated cts:value-tuples() call inside of the local:delete(), and pass in the $x batch value, instead of the tuples.
xquery version "1.0-ml";
declare variable $versionToMaintain := 10;
declare variable $batchSize := 10000;
declare function local:delete($x) {
let $values :=
cts:value-tuples(
(
cts:uri-reference(),
cts:element-reference(xs:QName('id')),
cts:element-reference(xs:QName('version'))
),
("skip=" || ($x - 1) * $batchSize, "truncate=" || $batchSize),
cts:and-query((
cts:collection-query("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1"))
))
)
for $value in $values
let $versionToDelete := $value[3] - $versionToMaintain
return
if ($versionToDelete > 0) then
let $query := cts:and-query((
cts:collection-query('collection name 2'),
cts:element-range-query(xs:QName('version'), '<=', xs:int($versionToDelete)),
cts:element-value-query(xs:QName('id'),$value[2]),
cts:element-range-query(xs:QName('c:created-on'), '<=', xs:dateTime(xdmp:parseDateTime('[Y0001]-[M01]-[D01]')))
return (cts:uris((), (), $query) ! xdmp:document-delete(.))
else ()
};
let $totalDocs :=
xdmp:estimate(
cts:search(
collection("collection name 1"),
cts:not-query(cts:element-value-query(xs:QName('version'), "1")),
"unfiltered"
)
)
let $totalBatches := fn:ceiling($totalDocs div $batchSize)
for $x in (1 to $totalBatches)
return
xdmp:spawn-function(function(){
local:delete($x)
})
Deleting bulk content from MarkLogic database (with a bucket assignment) is always a challenge. Like Mads suggested, you should consider using CoRB. It is easier to tune the performance with different options available.
Secondly, you can consider using a tiered storage approach - like a range partition or query partition (provided the license requirements are met) where you can archive the documents that match the required criteria to a group of forests. You can then use the forest-clear() to do the job for you.

How to execute any update statement from Collector.xqy in Data Hub Framework?

I am having a complete logic in which FIRST i need to modify or delete the document from both STAGING and FINAL Database and at last i need to insert the filtered data into my FINAL Database in DataHub Framework.
I stamped my code inside collector.xqy but it says Cannot apply an update function from a query
The code is as below-
let $a :=
for $i in cts:search(doc(),cts:collection-query(("ABC")))
return
let $uri := fn:base-uri($i)
let $a := $i/*:envelope/*:a/text()
let $b := $i/*:envelope/*:b/text()
let $c := if(($a eq "123") or ($b eq "345")) then base-uri($i) else ()
let $condition :=
for $j in $c
let $id1 := $j/*:envelope/*:id1/text()
let $id2 := $j/*:envelope/*:id2/text()
let $node1 := $j/*:envelope/*:NODE1
let $node2 := $j/*:envelope/*:NODE2
let $result :=
xdmp:invoke-function(
function() {
cts:search(doc(),
cts:and-query((
cts:or-query((
cts:element-value-query(xs:QName("id1"),$id1),
cts:element-value-query(xs:QName("id2"),$id2)
)),
cts:collection-query(("ABC"))
))
)
},
<options xmlns="xdmp:eval">
<database>{xdmp:database("FINAL")}</database>
</options>)
return
if(fn:exists($result) eq fn:true()) then
()
else (
xdmp:node-replace($node1,<NODE1>Replacing Node 1</NODE1>),
xdmp:node-replace($node2,<NODE2>Replacing Node 2</NODE2>)
)
return $uri
return ()
This code is not working from collector.xqy since it is having update statement. I cannot write this in writer.xqy because initial condition i.e;let $c := if(($a eq "123") or ($b eq "345")) then base-uri($i) else () i need to check from STAGING database.
Any Suggestions ?
You could run the check against the STAGING database from the writer by invoking it against that database:
let $c :=
xdmp:invoke-function(
function() {
if(($a eq "123") or ($b eq "345")) then base-uri($i) else ()
},
map:entry("database", $config:STAGING-DATABASE)
)
Assumes that you have imported the config library module.

How to update nodes in a different Database or how to update external nodes ?- XDMP-UPEXTNODES

I am trying to update a document in a different Database then my current DB. But it is giving me the below error-
XDMP-UPEXTNODES: xdmp:node-replace(fn:doc("/C:/Users/Downloads/abc.csv-0-2")/*:envelope/*:root/*:Status, <Status>1000</Status>) -- Cannot update external nodes
I am using the below code-
let $temp :=
for $i in $result
let $error := $i/*:envelope/*:ErrorMessage
let $status := $i/*:envelope/*:Status
return
if(fn:exists($i) eq fn:true()) then (
xdmp:invoke-function(
function() {
xdmp:node-replace($status,<Status>1000</Status>),
xdmp:node-replace($error,<ErrorMessage>Change Error in other Database-2</ErrorMessage>)
},
<options xmlns="xdmp:eval">
<database>{xdmp:database("DATABASE-2")}</database>
</options>))
else ()
I want to update the Error and Status node of my Database-2.
$result is the document i fetched from Database-2.
This code i am running from Database-1
Any Suggestions ?
You cannot pass database nodes as variable for updating purposes like that. Instead you should pass through the database uri, and get a fresh copy of the element you'd like to update inside the invoked function. Maybe you can push a bit more logic inside the invoked function to make that easier. Something like:
for $i in $result
let $uri := xdmp:node-uri($i)
return xdmp:invoke-function(function() {
let $doc := fn:doc($uri)
let $error := $doc/*:envelope/*:ErrorMessage
let $status := $doc/*:envelope/*:Status
return if(fn:exists($doc) eq fn:true()) then (
xdmp:node-replace($status, <Status>1000</Status>),
xdmp:node-replace($error, <ErrorMessage>Change Error in other Database-2</ErrorMessage>)
) else ()
}, map:entry("database", xdmp:database("DATABASE-2")))
Be careful though. It sounds like $i is pointing to the actual document in Database-2 as well, and it could easily result in dead-locks; the invoking query could be putting a read lock on $i, causing the invoked function to be unable to update it.
HTH!

I need some help on an XQuery sequence merge that preserves order

I am working on a function to merge a set of sequences that will preserve the order of all of the sequences as best as possible. Doing a distinct-values($sequences) on all of the sequences does not preserve the order.
I have the following MarkLogic XQuery code:
xquery version "1.0-ml";
declare function local:map-sequence($map, $list as xs:string*) {
let $count := fn:count($list) - 1
return for $idx in (1 to $count)
return if (map:contains($map, $list[$idx]))
then map:put($map, $list[$idx], fn:distinct-values((map:get($map, $list[$idx]), $list[$idx + 1])))
else map:put($map, $list[$idx], $list[$idx + 1])
};
declare function local:first($map) {
let $all-children := for $key in map:keys($map) return map:get($map, $key)
return distinct-values(map:keys($map)[not(.=$all-children)])
};
declare function local:next($map, $key as xs:string) {
if (map:contains($map, $key))
then if (fn:count(map:get($map, $key)) eq 1)
then map:get($map, $key)
else
let $children := map:get($map, $key)
return
for $next in $children
let $others := $children[fn:not(.=$next)]
let $descedents := local:descendents($map, $next)
return if ($descedents[.=$others])
then $next
else ()
else ()
};
declare function local:descendents($map, $key as xs:string) {
for $child in map:get($map, $key)
return ($child, local:descendents($map, $child))
};
declare function local:sequence($map, $key as xs:string) {
let $next := local:next($map, $key)
return if (fn:count($next) gt 1)
then
for $choice in $next
return $choice
else if (fn:count($next) eq 1)
then ($next, local:sequence($map, $next))
else ()
};
let $map := map:map()
let $seq1 := local:map-sequence($map, ('fred', 'barney', 'pebbles'))
let $seq2 := local:map-sequence($map, ('fred', 'wilma', 'betty', 'pebbles'))
let $seq3 := local:map-sequence($map, ('barney', 'wilma', 'betty'))
let $first := local:first($map)
return ($map,
for $top in $first
return ($top, local:sequence($map, $top))
)
it returns
{"barney":["pebbles", "wilma"], "fred":["barney", "wilma"], "wilma":"betty", "betty":"pebbles"}
fred
barney
wilma
betty
pebbles
It still needs work. If you add:
let $seq4 := local:map-sequence($map, ('fred', 'bambam'))
bambam does not show up. I am still working on it, but if others have suggestions, then I would like to hear them.
Thanks,
Loren
As far as I understand your problem, each sequence represents a hierarchy of values, so from the sequence ("foo", "bar", "baz") we can follow that "foo" < "bar", "foo" < "baz" and "bar" < "baz" should preferably hold in the resulting ordering.
From your expected output it seems that you want the values to be sorted from the one with the smallest number of (transitive) predecessors ("fred" in your case) to that with the most ones ("pebbles" with four predecessors: ("barney", "fred", "betty", "wilma")).
I do not have access to MarkLogic and its proprietary maps, so I'll use standard XQuery 3.0 maps instead. The underlying algorithms should be easy to translate.
As a first step we build a map of all immediate predecessors of each unique value found in at least one of the input sequences. Because XQuery 3.0 maps cannot be modified in-place, we use fn:fold-left(...) to build one up incrementally. Note also that even the first element of each list is added to the map with an empty sequence of predecessors.
declare function local:add-preds($map0, $list as xs:string*) {
fn:fold-left(
1 to fn:count($list),
$map0,
function($map, $idx) {
map:put(
$map,
$list[$idx],
(: add the current predecessor to the list :)
fn:distinct-values((map:get($map, $list[$idx]), $list[$idx - 1]))
)
}
)
};
Next we need the transitive closure of this map of predecessors, so we need to gather all values that can be reached from a given key by a chain of predecessors. We can do this using a simple depth-first search:
declare function local:transitive($preds) {
map:merge(
for $key in map:keys($preds)
return map:entry($key, local:all-predecessors($preds, $key, $key)[not(. = $key)])
)
};
declare function local:all-predecessors($succ, $key, $seen0) {
fold-left(
map:get($succ, $key),
$seen0,
function($seen, $next) {
if($next = $seen) then $seen
else local:all-predecessors($succ, $next, ($seen, $next))
}
)
};
This transforms your example initial predecessor map
map {
"bambam": "fred",
"pebbles": ("barney", "betty"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": "wilma"
}
and transforms it into
map {
"bambam": "fred",
"pebbles": ("barney", "fred", "betty", "wilma"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": ("wilma", "fred", "barney")
}
With that map your sorting now becomes very easy: Just take all keys in the map, order them by the number of their predecessors, and output them:
let $map0 := map{}
let $map1 := local:add-preds($map0, ('fred', 'barney', 'pebbles'))
let $map2 := local:add-preds($map1, ('fred', 'wilma', 'betty', 'pebbles'))
let $map3 := local:add-preds($map2, ('barney', 'wilma', 'betty'))
let $map4 := local:add-preds($map3, ('fred', 'bambam'))
let $trans := local:transitive($map4)
for $key in map:keys($trans)
order by count(map:get($trans, $key))
return $key
This returns your desired result: "fred", "bambam", "barney", "wilma", "betty", "pebbles"

MarkLogic 7 spawn-function

I have a REST endpoint and it needs to proces a long list of codes. Because this may trigger time-outs I try to use spawn-function and do the magic in the background. But it looks like the spawn-function is holding the 200 OK response from my REST endpoint, so it's not really spawning.
I've added the log lines to check where it strands. All log lines pop up in the debug log.
With small amounts of data, this works fine. With a larger set (60k codes) it fails.
After changing the code to spawn the function for each item in $text, so 60k spawns, I get this error:
2015-07-28 10:20:02.326 Debug: Forest::insert: STRLF3-content-001-1 XDMP-INMMFULL: In-memory storage full; list: table=5%, wordsused=3%, wordsfree=95%, overhead=1%; tree: table=8%, wordsused=3%, wordsfree=97%, overhead=0%
Inserted data:
{
ProjectID: 102124,
Text: "2311\n2253\n2312\n6626\n2253\n1234"
}
Calling the spawn proces:
(: ======================================================================= :)
(: ! Load Transactions into seperate XML files :)
(: ======================================================================= :)
declare
%roxy:params("")
function strlf:post(
$context as map:map,
$params as map:map,
$input as document-node()*
) as document-node()?
{
map:put($context, "output-types", "application/json"),
xdmp:set-response-code(200, "OK"),
document {
(: Get project ID :)
let $_ := xdmp:log('TransTest - stap1', 'debug')
let $project := json:transform-from-json($input)/ns:ProjectID
let $_ := xdmp:log('TransTest - stap2', 'debug')
let $codes := json:transform-from-json($input)/ns:Text
(: Clean current project :)
let $_ := xdmp:log('TransTest - stap3', 'debug')
let $uridir := fn:concat('/app/transactie/', $project/text(), '/', '*')
let $_ := xdmp:log('TransTest - stap4', 'debug')
let $kill := xdmp:document-delete(cts:uri-match($uridir))
(: Spawn the trannies :)
let $_ := xdmp:log('TransTest - stap5', 'debug')
(: return 'ja' :)
let $_ := xdmp:spawn-function(strlf:spawner($project, $codes, $uridir),
<options xmlns="xdmp:eval">
<transaction-mode>update-auto-commit</transaction-mode>
</options>)
return 'done'
}
};
Function strlf:spawner:
declare private function strlf:spawner(
$project,
$codes,
$uridir
)
{
(: Tokenize on lines :)
let $text := fn:tokenize($codes, fn:codepoints-to-string(10))
let $loop :=
for $regel in $text
let $tokregel := fn:tokenize($regel, ",")
let $intvalue :=
if (fn:contains($regel, ","))
then fn:substring-after($regel, "€")
else 1
let $code :=
if (fn:contains($regel, ","))
then $tokregel[1]
else $regel
(: Build map of maps, p4 should be postcode :)
let $map := map:map()
let $_ := map:put($map, 'code', $code)
let $_ := map:put($map, 'p4', fn:substring($code[1], 1, 4))
let $_ := map:put($map, 'value', $intvalue)
let $_ := map:put($map, 'projectid', $project/text())
(: Create unverified random doc id :)
let $docid := fn:string(xdmp:random(1000000000000))
(: Build URI :)
let $uridoc := fn:concat('/app/transactie/', $project/text(), '/', $docid, '.xml')
(: Save transaction document and skip header :)
return
(if (map:get($map, 'code') != 'CODE')
then xdmp:document-insert
(
$uridoc,
<transaction xmlns='http://www.dikw.nl/transactions' projectid='{map:get($map, 'projectid')}' code='{map:get($map, 'code')}' p4='{map:get($map, 'p4')}'>
<value>{map:get($map, 'value')}</value>
</transaction>
)
else ())
(: Empty return :)
return $loop
};
Correct, you have strlf:spawner($project, $codes, $uridir) as first argument to xdmp:spawn-function, causing it to get executed, and the result being passed into xdmp:spawn-function. And since the spawner function returns an empty sequence, no error is being thrown by spawn-function.
The fix is pretty simple, wrap your spawner call in an anonymous function:
let $_ := xdmp:spawn-function(function () { strlf:spawner($project, $codes, $uridir) },
<options xmlns="xdmp:eval">
<transaction-mode>update-auto-commit</transaction-mode>
</options>)
HTH!

Resources