MarkLogic 7 spawn-function - xquery

I have a REST endpoint and it needs to proces a long list of codes. Because this may trigger time-outs I try to use spawn-function and do the magic in the background. But it looks like the spawn-function is holding the 200 OK response from my REST endpoint, so it's not really spawning.
I've added the log lines to check where it strands. All log lines pop up in the debug log.
With small amounts of data, this works fine. With a larger set (60k codes) it fails.
After changing the code to spawn the function for each item in $text, so 60k spawns, I get this error:
2015-07-28 10:20:02.326 Debug: Forest::insert: STRLF3-content-001-1 XDMP-INMMFULL: In-memory storage full; list: table=5%, wordsused=3%, wordsfree=95%, overhead=1%; tree: table=8%, wordsused=3%, wordsfree=97%, overhead=0%
Inserted data:
{
ProjectID: 102124,
Text: "2311\n2253\n2312\n6626\n2253\n1234"
}
Calling the spawn proces:
(: ======================================================================= :)
(: ! Load Transactions into seperate XML files :)
(: ======================================================================= :)
declare
%roxy:params("")
function strlf:post(
$context as map:map,
$params as map:map,
$input as document-node()*
) as document-node()?
{
map:put($context, "output-types", "application/json"),
xdmp:set-response-code(200, "OK"),
document {
(: Get project ID :)
let $_ := xdmp:log('TransTest - stap1', 'debug')
let $project := json:transform-from-json($input)/ns:ProjectID
let $_ := xdmp:log('TransTest - stap2', 'debug')
let $codes := json:transform-from-json($input)/ns:Text
(: Clean current project :)
let $_ := xdmp:log('TransTest - stap3', 'debug')
let $uridir := fn:concat('/app/transactie/', $project/text(), '/', '*')
let $_ := xdmp:log('TransTest - stap4', 'debug')
let $kill := xdmp:document-delete(cts:uri-match($uridir))
(: Spawn the trannies :)
let $_ := xdmp:log('TransTest - stap5', 'debug')
(: return 'ja' :)
let $_ := xdmp:spawn-function(strlf:spawner($project, $codes, $uridir),
<options xmlns="xdmp:eval">
<transaction-mode>update-auto-commit</transaction-mode>
</options>)
return 'done'
}
};
Function strlf:spawner:
declare private function strlf:spawner(
$project,
$codes,
$uridir
)
{
(: Tokenize on lines :)
let $text := fn:tokenize($codes, fn:codepoints-to-string(10))
let $loop :=
for $regel in $text
let $tokregel := fn:tokenize($regel, ",")
let $intvalue :=
if (fn:contains($regel, ","))
then fn:substring-after($regel, "€")
else 1
let $code :=
if (fn:contains($regel, ","))
then $tokregel[1]
else $regel
(: Build map of maps, p4 should be postcode :)
let $map := map:map()
let $_ := map:put($map, 'code', $code)
let $_ := map:put($map, 'p4', fn:substring($code[1], 1, 4))
let $_ := map:put($map, 'value', $intvalue)
let $_ := map:put($map, 'projectid', $project/text())
(: Create unverified random doc id :)
let $docid := fn:string(xdmp:random(1000000000000))
(: Build URI :)
let $uridoc := fn:concat('/app/transactie/', $project/text(), '/', $docid, '.xml')
(: Save transaction document and skip header :)
return
(if (map:get($map, 'code') != 'CODE')
then xdmp:document-insert
(
$uridoc,
<transaction xmlns='http://www.dikw.nl/transactions' projectid='{map:get($map, 'projectid')}' code='{map:get($map, 'code')}' p4='{map:get($map, 'p4')}'>
<value>{map:get($map, 'value')}</value>
</transaction>
)
else ())
(: Empty return :)
return $loop
};

Correct, you have strlf:spawner($project, $codes, $uridir) as first argument to xdmp:spawn-function, causing it to get executed, and the result being passed into xdmp:spawn-function. And since the spawner function returns an empty sequence, no error is being thrown by spawn-function.
The fix is pretty simple, wrap your spawner call in an anonymous function:
let $_ := xdmp:spawn-function(function () { strlf:spawner($project, $codes, $uridir) },
<options xmlns="xdmp:eval">
<transaction-mode>update-auto-commit</transaction-mode>
</options>)
HTH!

Related

No updating expression error while updating databse (BaseX)

I am using BaseX version 8.6.6 i am getting the error " expression must all be updating or return empty sequence" while updating database below is the code:
declare %private %updating function local:ingest-job()
{
let $contentpath := 'D:\2019\bloomsbury-ingest-content\TEI.zip'
let $result := let $archive := file:read-binary($contentpath)
for $entry in archive:entries($archive)[fn:ends-with(., '.xml')]
let $rootNode := fn:name(fn:parse-xml(archive:extract-text($archive, $entry))/*)
return
let $docId := fn:parse-xml(archive:extract-text($archive, $entry))/*/#xml:id/string()[$rootNode='TEI']
let $cid := fn:replace($docId,'[a-zA-z-]','')
let $jobID := fn:concat($cid,'-',fn:string(fn:format-dateTime(fn:current-dateTime(), '[Y0001][M01][D01][H01][m01][s01][f01]')))
let $jobChunk := <job>
<job-info>
<id>{$jobID}</id>
<cid>{$cid}</cid>
</job-info>
</job>
return
(
db:add('testdb',$jobChunk,fn:concat('/jobs/',$jobID,'.xml')),
<result><status>Success</status><message>Job created</message><jobid>{$jobID}</jobid></result>
)
return db:output(<results>{$result}</results>)
};

Inserting a document and reading it in same transaction in MarkLogic

Below is the code snippet I am using for one of the functionality
declare function local:matchCounts($Id as xs:string, $status as xs:string) as xs:int {
xdmp:estimate(cts:search(/count, cts:and-query((
cts:element-attribute-value-query(xs:QName("count"), xs:QName("Id"), $Id, "exact"),
cts:element-attribute-value-query(xs:QName("child"), xs:QName("MatchStatus"), $status, "exact")
)), "unfiltered"))
};
declare function local:saveCountsMatchC($Id as xs:string) {
let $evenCount := local:matchCounts($Id, "even")
let $oddCount := local:matchCounts($Id, "odd")
return ($evenCount, $oddCount)
};
declare function local:matchingProcess($Id as xs:string) {
let $total-records := 1000
let $batch-size := 50
let $pagination := 0
let $bs :=
for $records in 1 to fn:ceiling($total-records div $batch-size )
let $start := fn:sum($pagination + 1)
let $end := fn:sum($batch-size + $pagination)
let $_ := xdmp:set($pagination, $end)
return
xdmp:spawn-function
(
function() {
for $each at $pos in ($start to $end)
let $id := sem:uuid-string()
let $xml := if(($pos mod 2) eq 0) then <count Id='{$Id}'><child MatchStatus='even'></child></count>
else <count Id='{$Id}'><child MatchStatus='odd'></child></count>
return xdmp:document-insert(concat("/", $id, ".xml"), $xml)
},
<options xmlns="xdmp:eval"><result>{fn:true()}</result><commit>auto</commit><update>true</update></options>
)
let $_ := $bs
return local:saveCountsMatchC($Id)
};
local:matchingProcess("1")
The requirement over here is to iterate 1000 documents using batch size of 50, so basically I am using spawn function to create 20 batches of size 50 which inserts 1000 documents in my database.
Once those documents are inserted, I need to read those documents in same transaction. Here 500 documents have MatchStatus='odd' and 500 documents have MatchStatus='even'
The query should return (500,500) as output; Instead it returns (0,0)
I am using <result>{fn:true()}</results> option so that my next statement waits for all spawn task to be completed, but its not happeneing.
Can anybody help me with the requirement?
Note: Need to insert 1000 documents and then read them in same function call only
Your code that executes the spawns does not perform updates itself, so will run in so-called query mode. In query mode only updates from before the start of the code are visible.
You could try running in update mode (declare option xdmp:transaction-mode "update";), but usually it is easier to just spawn or eval the counting/reading of your updates as well. E.g. wrap the xdmp:estimate in an xdmp:spawn-function with result true as well.
HTH!

How to execute any update statement from Collector.xqy in Data Hub Framework?

I am having a complete logic in which FIRST i need to modify or delete the document from both STAGING and FINAL Database and at last i need to insert the filtered data into my FINAL Database in DataHub Framework.
I stamped my code inside collector.xqy but it says Cannot apply an update function from a query
The code is as below-
let $a :=
for $i in cts:search(doc(),cts:collection-query(("ABC")))
return
let $uri := fn:base-uri($i)
let $a := $i/*:envelope/*:a/text()
let $b := $i/*:envelope/*:b/text()
let $c := if(($a eq "123") or ($b eq "345")) then base-uri($i) else ()
let $condition :=
for $j in $c
let $id1 := $j/*:envelope/*:id1/text()
let $id2 := $j/*:envelope/*:id2/text()
let $node1 := $j/*:envelope/*:NODE1
let $node2 := $j/*:envelope/*:NODE2
let $result :=
xdmp:invoke-function(
function() {
cts:search(doc(),
cts:and-query((
cts:or-query((
cts:element-value-query(xs:QName("id1"),$id1),
cts:element-value-query(xs:QName("id2"),$id2)
)),
cts:collection-query(("ABC"))
))
)
},
<options xmlns="xdmp:eval">
<database>{xdmp:database("FINAL")}</database>
</options>)
return
if(fn:exists($result) eq fn:true()) then
()
else (
xdmp:node-replace($node1,<NODE1>Replacing Node 1</NODE1>),
xdmp:node-replace($node2,<NODE2>Replacing Node 2</NODE2>)
)
return $uri
return ()
This code is not working from collector.xqy since it is having update statement. I cannot write this in writer.xqy because initial condition i.e;let $c := if(($a eq "123") or ($b eq "345")) then base-uri($i) else () i need to check from STAGING database.
Any Suggestions ?
You could run the check against the STAGING database from the writer by invoking it against that database:
let $c :=
xdmp:invoke-function(
function() {
if(($a eq "123") or ($b eq "345")) then base-uri($i) else ()
},
map:entry("database", $config:STAGING-DATABASE)
)
Assumes that you have imported the config library module.

How to execute the conditions Sequentially in MarkLogic?

I am having a requirement where i need to check certain conditions and after all conditions got checked- I need to insert the document in Database-2 with FLAG value as "True".
The code is as below-
for $i in cts:search(doc(),cts:collection-query(("MyCollection")))
return
let $condition_1 := if{...} then <Flag>FALSE</Flag> else ()
let $condition_2 := if{...} then <Flag>FALSE</Flag> else ()
let $condition_3 := if{...} then <Flag>FALSE</Flag> else ()
let $condition_4 := if{...} then <Flag>FALSE</Flag> else ()
Once i will execute all the conditions then these conditions will alter my FLAG node from "True" to "False" as shown in the above code.
At last i need to check which ever document is having <Flag>True</Flag> i need to insert only those document to Database-2.
I am running this code from Database-1.
Any Suggestions ?
You can also use XQuery's quantified expressions in cases like this:
for $i in cts:search(doc(), cts:collection-query(("MyCollection")))
where every $check in (
(...),
(...)
) satisfies $check
return xdmp:invoke-function(etc)
Where ... still represents an expression that returns a boolean. Instead of every, you might want not(some $check in (..., ...) satisfies $check).
I typically find it's easiest to do this type of stacked condition checking where all conditions must be true in XQuery with this technique using a map:
let $conditionMap = map:map();
let $_ := map:put($conditionMap, "check", fn:false())
let $condition_1 := if{...} then () else map:put($conditionMap, "check", fn:true())
let $condition_2 := if{...} then () else map:put($conditionMap, "check", fn:true())
let $condition_3 := if{...} then () else map:put($conditionMap, "check", fn:true())
let $condition_4 := if{...} then () else map:put($conditionMap, "check", fn:true())
let $flag := <Flag>{if(map:get($conditionMap, "check") eq fn:false()) then "FALSE" else "TRUE"}</Flag>
let $documentInsert := ...
An alternative technique (I find more efficient but less readable) is:
let $flag :=
if($condition_1) then
if($condition_2) then
if($condition_3) then
if($condition_4) then <Flag>TRUE</flag>
else <Flag>FALSE</flag>
else <Flag>FALSE</flag>
else <Flag>FALSE</flag>
else <Flag>FALSE</flag>
let $documentInsert := ...
You can probably put the documentInsert piece together from your other question.
If you have conditions that evaluate to boolean, then just evaluate the boolean result of all of those conditions, convert that to a string, and then use fn:upper-case() to get it's upper-case value:
for $i in cts:search(doc(), cts:collection-query(("MyCollection")))
let $condition_1 := {...}
let $condition_2 := {...}
let $condition_3 := {...}
let $condition_4 := {...}
return
<Flag>{
fn:upper-case(fn:string( ($condition_1 and $condition_2 and $condition_3 and $condition_4))
}</Flag>

I need some help on an XQuery sequence merge that preserves order

I am working on a function to merge a set of sequences that will preserve the order of all of the sequences as best as possible. Doing a distinct-values($sequences) on all of the sequences does not preserve the order.
I have the following MarkLogic XQuery code:
xquery version "1.0-ml";
declare function local:map-sequence($map, $list as xs:string*) {
let $count := fn:count($list) - 1
return for $idx in (1 to $count)
return if (map:contains($map, $list[$idx]))
then map:put($map, $list[$idx], fn:distinct-values((map:get($map, $list[$idx]), $list[$idx + 1])))
else map:put($map, $list[$idx], $list[$idx + 1])
};
declare function local:first($map) {
let $all-children := for $key in map:keys($map) return map:get($map, $key)
return distinct-values(map:keys($map)[not(.=$all-children)])
};
declare function local:next($map, $key as xs:string) {
if (map:contains($map, $key))
then if (fn:count(map:get($map, $key)) eq 1)
then map:get($map, $key)
else
let $children := map:get($map, $key)
return
for $next in $children
let $others := $children[fn:not(.=$next)]
let $descedents := local:descendents($map, $next)
return if ($descedents[.=$others])
then $next
else ()
else ()
};
declare function local:descendents($map, $key as xs:string) {
for $child in map:get($map, $key)
return ($child, local:descendents($map, $child))
};
declare function local:sequence($map, $key as xs:string) {
let $next := local:next($map, $key)
return if (fn:count($next) gt 1)
then
for $choice in $next
return $choice
else if (fn:count($next) eq 1)
then ($next, local:sequence($map, $next))
else ()
};
let $map := map:map()
let $seq1 := local:map-sequence($map, ('fred', 'barney', 'pebbles'))
let $seq2 := local:map-sequence($map, ('fred', 'wilma', 'betty', 'pebbles'))
let $seq3 := local:map-sequence($map, ('barney', 'wilma', 'betty'))
let $first := local:first($map)
return ($map,
for $top in $first
return ($top, local:sequence($map, $top))
)
it returns
{"barney":["pebbles", "wilma"], "fred":["barney", "wilma"], "wilma":"betty", "betty":"pebbles"}
fred
barney
wilma
betty
pebbles
It still needs work. If you add:
let $seq4 := local:map-sequence($map, ('fred', 'bambam'))
bambam does not show up. I am still working on it, but if others have suggestions, then I would like to hear them.
Thanks,
Loren
As far as I understand your problem, each sequence represents a hierarchy of values, so from the sequence ("foo", "bar", "baz") we can follow that "foo" < "bar", "foo" < "baz" and "bar" < "baz" should preferably hold in the resulting ordering.
From your expected output it seems that you want the values to be sorted from the one with the smallest number of (transitive) predecessors ("fred" in your case) to that with the most ones ("pebbles" with four predecessors: ("barney", "fred", "betty", "wilma")).
I do not have access to MarkLogic and its proprietary maps, so I'll use standard XQuery 3.0 maps instead. The underlying algorithms should be easy to translate.
As a first step we build a map of all immediate predecessors of each unique value found in at least one of the input sequences. Because XQuery 3.0 maps cannot be modified in-place, we use fn:fold-left(...) to build one up incrementally. Note also that even the first element of each list is added to the map with an empty sequence of predecessors.
declare function local:add-preds($map0, $list as xs:string*) {
fn:fold-left(
1 to fn:count($list),
$map0,
function($map, $idx) {
map:put(
$map,
$list[$idx],
(: add the current predecessor to the list :)
fn:distinct-values((map:get($map, $list[$idx]), $list[$idx - 1]))
)
}
)
};
Next we need the transitive closure of this map of predecessors, so we need to gather all values that can be reached from a given key by a chain of predecessors. We can do this using a simple depth-first search:
declare function local:transitive($preds) {
map:merge(
for $key in map:keys($preds)
return map:entry($key, local:all-predecessors($preds, $key, $key)[not(. = $key)])
)
};
declare function local:all-predecessors($succ, $key, $seen0) {
fold-left(
map:get($succ, $key),
$seen0,
function($seen, $next) {
if($next = $seen) then $seen
else local:all-predecessors($succ, $next, ($seen, $next))
}
)
};
This transforms your example initial predecessor map
map {
"bambam": "fred",
"pebbles": ("barney", "betty"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": "wilma"
}
and transforms it into
map {
"bambam": "fred",
"pebbles": ("barney", "fred", "betty", "wilma"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": ("wilma", "fred", "barney")
}
With that map your sorting now becomes very easy: Just take all keys in the map, order them by the number of their predecessors, and output them:
let $map0 := map{}
let $map1 := local:add-preds($map0, ('fred', 'barney', 'pebbles'))
let $map2 := local:add-preds($map1, ('fred', 'wilma', 'betty', 'pebbles'))
let $map3 := local:add-preds($map2, ('barney', 'wilma', 'betty'))
let $map4 := local:add-preds($map3, ('fred', 'bambam'))
let $trans := local:transitive($map4)
for $key in map:keys($trans)
order by count(map:get($trans, $key))
return $key
This returns your desired result: "fred", "bambam", "barney", "wilma", "betty", "pebbles"

Resources