How to get score of a registered-query - xquery

I'm trying to calculate a score for a complex match query.
For example:
if conditionA and conditionB and (conditionC or conditionD)
then score = 10
else score = 0
This is the solution I've come up:
let $idReq := cts:register(
cts:and-query((
cts:path-range-query("/person/name", "=", 'val1', ("score-function=linear", "collation=http://marklogic.com/collation//S1")),
cts:path-range-query("/person/country", "=", 'country', ("score-function=linear", "collation=http://marklogic.com/collation//S1")),
cts:or-query((
cts:path-range-query("/person/city", "=", 'city', ("score-function=linear", "collation=http://marklogic.com/collation//S1")),
cts:path-range-query("/person/school", "=", '', ("score-function=linear", "collation=http://marklogic.com/collation//S1"))
))
))
)
return
cts:score(cts:search(fn:doc(), cts:registered-query($idReq, ("unfiltered"), 10)))
All the indexes exists and the collation too.
When I execute this registered query, I always get 0 for the score.
EDITED
I've narrow down the problem , and it can reproduced by combining cts:register with cts:path-range-query.
let $query := cts:path-range-query("/person/name", "=", "val1", ("score-function=linear", "collation=http://marklogic.com/collation//S1"))
let $idReq := cts:register($query)
return
cts:score(
cts:search(fn:doc(),
cts:registered-query($idReq,("unfiltered"), 10)
(: $query :)
)
)
EDITED
Setup index config for testing:
import module namespace admin = "http://marklogic.com/xdmp/admin" at "/MarkLogic/admin.xqy";
let $dbid := xdmp:database("Documents")
let $config :=
admin:database-add-range-path-index(
admin:get-configuration(), $dbid,
admin:database-range-path-index(
$dbid, "string", "/person/name",
"http://marklogic.com/collation//S1",
fn:false(), "ignore"))
return admin:save-configuration($config)
Sample data:
xdmp:document-insert(
'/test/person1.xml',
<person>
<name>val1</name>
<city>city</city>
<country>country</country>
</person>
)

Related

Count number of word occurrences working slow BaseX xquery

I want to count occurrences of the words in the XML document, query giving the actual count but it is working slow.
There are only two xml files size (236 KB, 155 KB) and it is taking 17 sec to produce result.
Below is the query:
let $doc := db:open('test','/ieee/test.xml')
let $tokens := $doc//text()/fn:tokenize(fn:normalize-space(.),'\s')
let $stringtoken := for $x at $pos in $tokens[position() = 1 to fn:last()-1]
let $y := string-join($tokens[position() = $pos to $pos + 1],' ')
return $y
return
<results>
{
for $result in distinct-values($stringtoken)
let $count := count($stringtoken[. = $result])
return
<term word="{$result}" count="{$count}"></term>
}
</results>
In the above query let $count := count($stringtoken[. = $result]) is taking too much time.
Any suggestion to improve the performance of the code much appreciated.
The group by statement will speed up your query a lot:
return <results>{
for $grouped-token in $stringtoken
group by $token := $grouped-token
let $count := count($grouped-token)
return <term word="{ $token }" count="{ $count }"/>
}</results>

MarkLogic Xquery: How to sort string values in a for loop

I've got a sequence that needs to sort a list based off earliest year vs. latest year. Due to some unique values in the year element, it is making the sort a little more complicated. Is there any way to achieve the following?
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return
for $date in $dates
order by $date
return $date
the dates element text is usually the year in the data, but outlier cases have a pre- or post- attached. Any way to achieve this minimally?
I am not sure if this is minimal, but it works:
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return
for $date in $dates
let $year :=
if (fn:contains($date, "-"))
then fn:substring-after($date, "-")
else $date
let $prepost :=
if (fn:starts-with($date, "pre"))
then -1
else if (fn:starts-with($date, "post"))
then 1
else 0
order by $year, $prepost
return $date
Just FYI: Definitely not minimal, but I wanted to know what fn:sort does when a sequence is returned. Turns out it does the right thing.
xquery version "3.1";
declare variable $local:ascending := 1;
declare variable $local:descending := -1;
declare function local:sort-prefixed-years ($y, $order) {
if (fn:contains($y, "-"))
then (
let $p := fn:tokenize($y, "-")
let $m :=
switch($p[1])
case "pre" return -1 * $order
case "post" return 1 * $order
default return 0
return (xs:integer($p[2]) * $order, $m)
)
else (xs:integer($y) * $order, 0)
};
declare function local:sort-prefixed-years-ascending ($prefixed-year) {
local:sort-prefixed-years($prefixed-year, $local:ascending)
};
declare function local:sort-prefixed-years-descending ($prefixed-year) {
local:sort-prefixed-years($prefixed-year, $local:descending)
};
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return sort($dates, (), local:sort-prefixed-years-descending#1)

Trouble with making xquery function recursive

I'm having some difficulty with making a function I've written recursive. I need to be able to turn this xml:
<entry ref="22">
<headword>abaishen</headword>
<part_of_speech> v. </part_of_speech>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>-et</variant>
<variant>-it</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
</entry>
Into this XML -- essentially replacing the ending of any entry that begins with an "-" with the stem of the last complete entry:
<entry ref="22">
<headword>abaishen</headword>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>abaishet</variant>
<variant>abaishit</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
<part_of_speech> v. </part_of_speech>
</entry>
The issue I'm running into is that second entry, the -it one, returns "abaishet" with the code I currently have:
declare function local:hyphen-replace($f) {
let $j :=
if (substring($f/text(), 1, 1) = "-") then
let $ending := substring-after($f/text(),"-")
let $ending-length := string-length($ending)
let $previous := $f/preceding-sibling::*[1]
let $previous-length := string-length($previous)
return
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
else
element {name($f)} {concat(substring($previous,1,($previous-length - $ending-length)),$ending)}
else
$f
return $j
};
declare function local:verbCheck($nodes as node()*) as node()* {
let $d := $nodes/part_of_speech
let $s := functx:siblings($d)
let $p := for $node in $nodes
return
let $d := $node/part_of_speech
let $s := functx:siblings($d)
return
if ($d/text() = " v. ") then
for $f in $s
let $j :=
local:hyphen-replace($f)
return ($j)
else
<empty/>
return
($p,$d)
};
<list>
{
let $collection := concat($collection, '?select=*.xml')
let $q := collection($collection)
let $v := local:buildNodes($q)
let $entries :=
for $n in $v
return <entry ref="{$n/#ref}">{local:verbCheck($n)}</entry>
return local:remove-empty-elements($entries)
}
</list>
It's obvious to me that my problem is with this piece of code in local:hypen-replace:
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
because it's calling to the immediately previous item and replacing the "-it" node with it's information. But I don't know how to rewrite it to make it work recursively properly. Any suggestions would be appreciated. Thank you.

How can I format a decimal in xquery?

I'm trying to format decimals in XQuery. The decimals are currency, so the format should be ,###.##.
For example:
5573652.23 should be 5,573,652.23
and
352769 should be 352,769 (or 352,769.00 if it's easier/cleaner)
Right now I'm using this function from http://www.xqueryhacker.com/2009/09/format-number-in-xquery/, but I can't use decimals with it:
declare function local:format-int($i as xs:int) as xs:string
{
let $input :=
if ($i lt 0) then fn:substring(fn:string($i), 2)
else fn:string($i)
let $rev := fn:reverse(fn:string-to-codepoints(fn:string($input)))
let $comma := fn:string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and fn:not($i eq count($rev)))
then $comma else ()
)
return fn:concat(
if ($i lt 0) then '-' else (),
fn:codepoints-to-string(fn:reverse($chars))
)
};
I'm using Saxon 9HE for my processor.
Any help would be greatly appreciated.
----- UPDATE -----
Based on Dimitre's answer, I modified the function to save the decimal portion and add it to the end of the return string.
New Function
declare function local:format-dec($i as xs:decimal) as xs:string
{
let $input := tokenize(string(abs($i)),'\.')[1]
let $dec := substring(tokenize(string($i),'\.')[2],1,2)
let $rev := reverse(string-to-codepoints(string($input)))
let $comma := string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and not($i eq count($rev)))
then $comma else ()
)
return concat(if ($i lt 0) then '-' else (),
codepoints-to-string(reverse($chars)),
if ($dec != '') then concat('.',$dec) else ()
)
};
Use:
let $n := 5573652.23
return
concat(local:format-int(xs:int(floor($n))),
'.',
substring(string($n - floor($n)), 3)
)
This produces exactly the wanted, correct result:
5,573,652.23
This doesn't work for you?:
format-number(5573652.23,",###.##")
You can play with this here. I am pretty sure that saxon supports this function.
Edit: This function is not supported in saxon (see comments below).
With XQuery 3.0 and Saxon-HE 9.7 Parser you can do the following:
declare decimal-format local:de decimal-separator = "," grouping-separator = ".";
declare decimal-format local:en decimal-separator = "." grouping-separator = ",";
let $numbers := (1234.567, 789, 1234567.765)
for $i in $numbers
return (
format-number($i,"#.###,##","local:de"),
format-number($i,"#,###.##","local:en")
)
The output is:
<?xml version="1.0" encoding="UTF-8"?>1.234,57 1,234.57 789,0 789.0 1.234.567,76
1,234,567.76

Correct way to access Multi-Dimensional Array with string indexes in Lua?

I'm trying to have a good access to multi-dimensional arrays with string indexes in Lua, here's basically what I'm trying to do:
rules =
{
{"S_RIGHT", "A_STOP", "S_RESULT"},
}
matrix = {}
for _,v in pairs(rules) do
if( matrix[ v[1] ] == nil ) then
matrix[ v[1] ] = {}
end
matrix[ v[1] ][ v[2] ] = v[3]
end
-- results in error ( attempt to index field 'S_NO' a nil value)
var = matrix["S_NO"]["S_RESULT"]
assert(var == nil, "Var should be nil")
A way to do it but quite verbose is:
var = matrix["S_NO"]
if var ~= nil then
var = var["S_RESULT"]
end
assert(var == nil, "Var should be nil")
Is there a way to make the first case to work ? ( less verbose )
Ok,
Found the answer.
If matrix is going to be read-only a correct approach would be:
local empty = {}
setmetatable(matrix, {__index=function() return empty end})
If I would like to allow writes and it's specifically two levels of tables, I could do:
setmetatable(matrix, {__index=function(t,k) local new={} rawset(t,k,new) return new end}
Hope this helps!

Resources