Ordering by a sequence of values in XQuery - xquery

I've got some XML data that takes this form:
<products>
<product version="1.2.3"/>
<product version="1.10.0"/>
<product version="2.1.6"/>
</products>
...And so on. I want to order these in XQuery by version number. Trouble is, if I just do order by $thing/#version, it does a lexicographic comparison that puts 1.10.0 before 1.2.3, which is wrong.
What I really want to do is something like:
order by tokenize($thing/#version, '\.') ! number(.)
Unfortunately this doesn't work because XQuery doesn't let you use an entire sequence as an ordering key. How can I get something like this?
A solution that doesn't rely on all the version numbers having the same number of dots would be preferable, but I'll take what I can get.

All you can do is normalize the version numbers so you can apply lexical ordering.
Determine maximum string length in a version step
Pad it with 0's (or space if you prefer, but you will have to change the code for this)
Tokenize each version, pad each version step, rejoin them
Compare based on padded version
I didn't clean up that code and pulled two functions from functx, but it works and should be fine for embedding as needed. The code is also able to deal with single-letters, if necessary you could replace all occurences of "alpha", ... for example by "a", ...
declare namespace functx = "http://www.functx.com";
declare function functx:repeat-string
( $stringToRepeat as xs:string? ,
$count as xs:integer ) as xs:string {
string-join((for $i in 1 to $count return $stringToRepeat),
'')
} ;
declare function functx:pad-integer-to-length
( $integerToPad as xs:anyAtomicType? ,
$length as xs:integer ) as xs:string {
if ($length < string-length(string($integerToPad)))
then error(xs:QName('functx:Integer_Longer_Than_Length'))
else concat
(functx:repeat-string(
'0',$length - string-length(string($integerToPad))),
string($integerToPad))
} ;
declare function local:version-compare($a as xs:string, $max-length as xs:integer)
as xs:string*
{
string-join(tokenize($a, '\.') ! functx:pad-integer-to-length(., $max-length), '.')
};
let $bs := ("1.42", "1.5", "1", "1.42.1", "1.43", "2")
let $max-length := max(
for $b in $bs
return tokenize($b, '\.') ! string-length(.)
)
for $b in $bs
let $normalized := local:version-compare($b, $max-length)
order by $normalized
return $b
Returns:
1 1.5 1.42 1.42.1 1.43 2

Order by doesn't accept a sequence, but you can explicitly tokenize the versions and add them to the order by, separated by commas (note the exclusion of parens).
let $products :=
<products>
<product version="1.2.3"/>
<product version="1.10.0"/>
<product version="2.1.6"/>
</products>
for $p in $products/product
let $toks := tokenize($p/#version, '\.')
let $main := xs:integer($toks[1])
let $point := xs:integer($toks[2])
let $sub := xs:integer($toks[3])
order by $main, $point, $sub
return $p
Update: for a variable number of tokens, you could make the order by more robust:
order by
if (count($toks) gt 0) then $main else (),
if (count($toks) gt 1) then $point else (),
if (count($toks) gt 2) then $sub else ()

I did something similar to Jens's answer:
let $products := //product
let $max-length := max($products/#version ! string-length(.))
for $product in $products
order by string-join(
for $part in tokenize($product/#version, '\.')
return string-join((
for $_ in 1 to $max-length - string-length($part) return ' ',
$part)))
return $product

Here's a version that will handle an arbitrary number of segments, as long as they're numeric and all version strings have the same number of segments. It also assumes no one component ever exceeds 999.
This simply combines each numeric segment into a single big number and sorts by that.
declare function local:version-order ($version as xs:string) as xs:double
{
fn:sum (
let $toks := fn:tokenize ($version, "\.")
let $count := fn:count ($toks)
for $tok at $idx in $toks
return xs:double ($tok) * math:pow (1000, ($count - $idx))
)
};
let $products :=
<products>
<product version="1.10.0"/>
<product version="2.1.6"/>
<product version="1.2.3"/>
</products>
for $p in $products/product
order by local:version-order ($p/#version)
return $p

Related

XQuery - wrong indexes in substring after reverse-string function use

Im trying to implement base64 coding in a very simple way. In my approach (lets for a second put away whether its appropriate or not) I need to reverse strings and then concate them. After that this concated string is used in substring function. Strings are joined properly but when I use substring basex seems to lose it.
Funny thing is substring works for well for all indexes starting at 8. So substring($string, 1, 8) and higher gives correct output. But everything below that is messed up. Starting with one disappeared number: substring($string, 1, 7 (and below) ) results in 6 length string.
Moreover substring can start only with 1st or 0 index. Anything greater results in empty return.
declare variable $array := [];
declare function bs:encode
( $input as xs:string ) {
bs:integer-to-binary(string-to-codepoints($input), "", $array)
} ;
declare function bs:integer-to-binary
( $input as xs:integer*, $string as xs:string, $array as array(xs:string) ) {
let $strings :=
for $i in $input
return
if ($i != 0)
then if ($i mod 2 = 0)
then bs:integer-to-binary(xs:integer($i div 2), concat($string, 0), $array)
else bs:integer-to-binary(xs:integer($i div 2), concat($string, 1), $array)
else if ($i <= 0)
then array:append($array, $string)
return bs:check-if-eight($strings)
} ;
declare function bs:check-if-eight
( $strings as item()+ ) {
let $fullBinary :=
for $string in $strings
return if (string-length($string) < 8)
then bs:check-if-eight(concat($string, 0))
else $string (: add as private below :)
return bs:concat-strings($fullBinary)
} ;
declare function bs:concat-strings
( $strings as item()+ ) {
let $firstStringToConcat := functx:reverse-string($strings[position() = 1])
let $secondStringToConcat := functx:reverse-string($strings[position() = 2])
let $thirdStringToConcat := functx:reverse-string($strings[position() = 3])
let $concat :=
concat
($firstStringToConcat,
$secondStringToConcat,
$thirdStringToConcat)
(: this returns correct string of binary value for Cat word :)
return bs:divide-into-six($concat)
} ;
declare function bs:divide-into-six
( $binaryString as xs:string) {
let $sixBitString := substring($binaryString, 1, 6)
(: this should return 010000 instead i get 000100 which is not even in $binaryString at all :)
return $sixBitString
} ;
bs:encode("Cat")
I expect first six letters from string (010000) instead I get some random sequence I guess (00100). The whole module is meant to encode strings into base64 format but for now (the part i uploaded) should just throw first six bits for 'C'
Alright so I figured it out I guess.
First of all in function concat-strings I changed concat to fn:string-join. It allowed me to pass as an argument symbol that separates joined strings.
declare function bs:concat-strings ( $strings as item()+ ) {
let $firstStringToConcat := xs:string(functx:reverse-string($strings[position() = 1]))
let $secondStringToConcat := xs:string(functx:reverse-string($strings[position() = 2]))
let $thirdStringToConcat := xs:string(functx:reverse-string($strings[position() = 3]))
let $concat :=
****fn:string-join(****
($firstStringToConcat,
$secondStringToConcat,
$thirdStringToConcat),****'X'****)
return bs:divide-into-six($concat) } ;
I saw that my input looked like this:
XXXXXXXX01000011XXXXXXXXXXXXXXXXX01100001XXXXXXXXXXXXXXXXX01110100XXXXXXXX
Obviously it had to looping somewhere without clear for loop and as I novice to Xquery i must have been missed that. And indeed. I found it in check-if-eight function:
> declare function bs:check-if-eight ( $strings as item()+ ) {
> **let $fullBinary :=**
> for $string in $strings
> return if (string-length($string) < 8)
> then bs:check-if-eight(concat($string, 0))
> else $string (: add as private below :)
> **return bs:concat-strings($fullBinary)** } ;
Despite being above FOR keyword, $fullBinary variable was in a loop and produced empty spaces(?) and it was clearly shown when i used X as a separator.
DISCLAIMER: I thought about this before and used functx:trim but for some reason it doesnt work like I expected. So it might not for you too if having similar issue.
At this point it was clear that let $fullBinary cannot be bided in FLWR statement at least can't trigger concat-strings function. I changed it and now it produces only string and now im trying to figure out new sequence of running whole module but I think the main problem here is solved.

I need some help on an XQuery sequence merge that preserves order

I am working on a function to merge a set of sequences that will preserve the order of all of the sequences as best as possible. Doing a distinct-values($sequences) on all of the sequences does not preserve the order.
I have the following MarkLogic XQuery code:
xquery version "1.0-ml";
declare function local:map-sequence($map, $list as xs:string*) {
let $count := fn:count($list) - 1
return for $idx in (1 to $count)
return if (map:contains($map, $list[$idx]))
then map:put($map, $list[$idx], fn:distinct-values((map:get($map, $list[$idx]), $list[$idx + 1])))
else map:put($map, $list[$idx], $list[$idx + 1])
};
declare function local:first($map) {
let $all-children := for $key in map:keys($map) return map:get($map, $key)
return distinct-values(map:keys($map)[not(.=$all-children)])
};
declare function local:next($map, $key as xs:string) {
if (map:contains($map, $key))
then if (fn:count(map:get($map, $key)) eq 1)
then map:get($map, $key)
else
let $children := map:get($map, $key)
return
for $next in $children
let $others := $children[fn:not(.=$next)]
let $descedents := local:descendents($map, $next)
return if ($descedents[.=$others])
then $next
else ()
else ()
};
declare function local:descendents($map, $key as xs:string) {
for $child in map:get($map, $key)
return ($child, local:descendents($map, $child))
};
declare function local:sequence($map, $key as xs:string) {
let $next := local:next($map, $key)
return if (fn:count($next) gt 1)
then
for $choice in $next
return $choice
else if (fn:count($next) eq 1)
then ($next, local:sequence($map, $next))
else ()
};
let $map := map:map()
let $seq1 := local:map-sequence($map, ('fred', 'barney', 'pebbles'))
let $seq2 := local:map-sequence($map, ('fred', 'wilma', 'betty', 'pebbles'))
let $seq3 := local:map-sequence($map, ('barney', 'wilma', 'betty'))
let $first := local:first($map)
return ($map,
for $top in $first
return ($top, local:sequence($map, $top))
)
it returns
{"barney":["pebbles", "wilma"], "fred":["barney", "wilma"], "wilma":"betty", "betty":"pebbles"}
fred
barney
wilma
betty
pebbles
It still needs work. If you add:
let $seq4 := local:map-sequence($map, ('fred', 'bambam'))
bambam does not show up. I am still working on it, but if others have suggestions, then I would like to hear them.
Thanks,
Loren
As far as I understand your problem, each sequence represents a hierarchy of values, so from the sequence ("foo", "bar", "baz") we can follow that "foo" < "bar", "foo" < "baz" and "bar" < "baz" should preferably hold in the resulting ordering.
From your expected output it seems that you want the values to be sorted from the one with the smallest number of (transitive) predecessors ("fred" in your case) to that with the most ones ("pebbles" with four predecessors: ("barney", "fred", "betty", "wilma")).
I do not have access to MarkLogic and its proprietary maps, so I'll use standard XQuery 3.0 maps instead. The underlying algorithms should be easy to translate.
As a first step we build a map of all immediate predecessors of each unique value found in at least one of the input sequences. Because XQuery 3.0 maps cannot be modified in-place, we use fn:fold-left(...) to build one up incrementally. Note also that even the first element of each list is added to the map with an empty sequence of predecessors.
declare function local:add-preds($map0, $list as xs:string*) {
fn:fold-left(
1 to fn:count($list),
$map0,
function($map, $idx) {
map:put(
$map,
$list[$idx],
(: add the current predecessor to the list :)
fn:distinct-values((map:get($map, $list[$idx]), $list[$idx - 1]))
)
}
)
};
Next we need the transitive closure of this map of predecessors, so we need to gather all values that can be reached from a given key by a chain of predecessors. We can do this using a simple depth-first search:
declare function local:transitive($preds) {
map:merge(
for $key in map:keys($preds)
return map:entry($key, local:all-predecessors($preds, $key, $key)[not(. = $key)])
)
};
declare function local:all-predecessors($succ, $key, $seen0) {
fold-left(
map:get($succ, $key),
$seen0,
function($seen, $next) {
if($next = $seen) then $seen
else local:all-predecessors($succ, $next, ($seen, $next))
}
)
};
This transforms your example initial predecessor map
map {
"bambam": "fred",
"pebbles": ("barney", "betty"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": "wilma"
}
and transforms it into
map {
"bambam": "fred",
"pebbles": ("barney", "fred", "betty", "wilma"),
"fred": (),
"wilma": ("fred", "barney"),
"barney": "fred",
"betty": ("wilma", "fred", "barney")
}
With that map your sorting now becomes very easy: Just take all keys in the map, order them by the number of their predecessors, and output them:
let $map0 := map{}
let $map1 := local:add-preds($map0, ('fred', 'barney', 'pebbles'))
let $map2 := local:add-preds($map1, ('fred', 'wilma', 'betty', 'pebbles'))
let $map3 := local:add-preds($map2, ('barney', 'wilma', 'betty'))
let $map4 := local:add-preds($map3, ('fred', 'bambam'))
let $trans := local:transitive($map4)
for $key in map:keys($trans)
order by count(map:get($trans, $key))
return $key
This returns your desired result: "fred", "bambam", "barney", "wilma", "betty", "pebbles"

Sorting multiple maps in marklogic 8

This is more of an XQuery than MarkLogic. I have three map:map and each map has key-value pair of "id" and score. I would like to sort all the distinct ids based on the score from each maps.
For eg:
map1 : 1:2048, 5:2000
map2 : 2:5000, 1:1000, 4:3000
map3 : 6:100, 7:5000, 2:2000
In the above example, each map is id:score for key value (did not know how to represent here :))..
I want the sorted list of id from three maps based on score..
Is there a good way or better way of doing the sorting, or do I have to union the keys of the map and iterate the sequence of keys and sort them ?
This seems like a great use case for folding. Its part of Xquery 3.0 spec.
Folding can go through a sequence of items and gets the result for each item as it goes through. In this example $combinedMaps is the result of the last call and $mapToMerge is the item in the sequence it is currently going through.
Here an example of what you would want to do.
declare function local:sortMaps(
$newMap as map:map,
$mapA as map:map,
$mapB as map:map
) as map:map {
let $build :=
for $key in map:keys($mapA)
let $otherMapValue :=
(map:get($mapB, $key), 0)[1]
let $value := map:get($mapA, $key)
return
if ($value gt $otherMapValue) then (
map:put($newMap, $key, $value)
) else (
map:put($newMap, $key, $otherMapValue)
)
return $newMap
};
let $map1 :=
map:new((
map:entry("1",2048),
map:entry("5",2000)
))
let $map2 :=
map:new((
map:entry("2",5000),
map:entry("1",1000),
map:entry("4",3000)
))
let $map3 :=
map:new((
map:entry("6",100),
map:entry("7",5000),
map:entry("2",2000)
))
let $maps := ($map1, $map2, $map3)
return
fn:fold-left(
function($combinedMaps, $mapToMerge) {
let $newMap := map:map()
let $newMap := local:sortMaps($newMap, $combinedMaps, $mapToMerge)
let $newMap := local:sortMaps($newMap, $mapToMerge, $combinedMaps)
return $newMap
},
$maps[1],
$maps
)

assigning an operator to a variable in xquery

is there a way to assign a numeric operator to a variable in Xquery?
I have to perform an arithmetic expression on a given pair of values depending upon a node tag.
I've managed to do this but its resulted in a lot of duplicate code. I'd like to simplify the query so that instead of:
Function for Add
Repeated if code - this calls out to other functions but is still repeated
$value1 + $value2
Function for Minus
Repeated if code
$value1 - $value2
etc for multiply, div etc
I'd like to set up a function and send a variable to it, something similar to this:
$value1 $operator $value2
Is there a simple way to do this in xquery?
thank you for your help.
If your query processor supports XQuery 3.0, you can use function items for that:
declare function local:foo($operator, $x, $y) {
let $result := $operator($x, $y)
return 2 * $result
};
local:foo(...) can then be called like this:
let $plus := function($a, $b) { $a + $b },
$mult := function($a, $b) { $a * $b }
return (
local:foo($plus, 1, 2),
local:foo($mult, 3, 4)
)
Why don't you use a simple if-else construct? E.g.
if (repeated code says you should add) then
$value1 + $value2
else
$value1 - $value2
You could also simple put the repeated code in another function instead of copying the code.

Updating counter in XQuery

I want to create a counter in xquery. My initial attempt looked like the following:
let $count := 0
for $prod in $collection
let $count := $count + 1
return
<counter>{$count }</counter>
Expected result:
<counter>1</counter>
<counter>2</counter>
<counter>3</counter>
Actual result:
<counter>1</counter>
<counter>1</counter>
<counter>1</counter>
The $count variable either failing to update or being reset. Why can't I reassign an existing variable? What would be a better way to get the desired result?
Try using 'at':
for $d at $p in $collection
return
element counter { $p }
This will give you the position of each '$d'. If you want to use this together with the order by clause, this won't work since the position is based on the initial order, not on the sort result. To overcome this, just save the sorted result of the FLWOR expression in a variable, and use the at clause in a second FLWOR that just iterates over the first, sorted result.
let $sortResult := for $item in $collection
order by $item/id
return $item
for $sortItem at $position in $sortResult
return <item position="{$position}"> ... </item>
As #Ranon said, all XQuery values are immutable, so you can't update a variable. But if you you really need an updateable number (shouldn't be too often), you can use recursion:
declare function local:loop($seq, $count) {
if(empty($seq)) then ()
else
let $prod := $seq[1],
$count := $count + 1
return (
<count>{ $count }</count>,
local:loop($seq[position() > 1], $count)
)
};
local:loop($collection, 0)
This behaves exactly as you intended with your example.
In XQuery 3.0 a more general version of this function is even defined in the standard library: fn:fold-right($f, $zero, $seq)
That said, in your example you should definitely use at $count as shown by #tohuwawohu.
Immutable variables
XQuery is a functional programming language, which involves amongst others immutable variables, so you cannot change the value of a variable. On the other hand, a powerful collection of functions is available to you, which solves lots of daily programming problems.
let $count := 0
for $prod in $collection]
let $count := $count + 1
return
<counter>{$count }</counter>
let $count in line 1 defines this variable in all scope, which are all following lines in this case. let $count in line 3 defines a new $count which is 0+1, valid in all following lines within this code block - which isn't defined. So you indeed increment $count three times by one, but discard the result immediatly.
BaseX' query info shows the optimized version of this query which is
for $prod in $collection
return element { "counter" } { 1 }
The solution
To get the total number of elements in $collection, you can just use
return count($collection)
For a list of XQuery functions, you could have a look at the XQuery part of functx which contains both a list of XQuery functions and also some other helpful functions which can be included as a module.
Specific to MarkLogic you can also use xdmp:set. But this breaks functional language assumptions, so use it conservatively.
http://docs.marklogic.com/5.0doc/docapp.xqy#display.xqy?fname=http://pubs/5.0doc/apidoc/ExsltBuiltins.xml&category=Extension&function=xdmp:set
For an example of xdmp:set in real-world code, the search parser https://github.com/mblakele/xqysp/blob/master/src/xqysp.xqy might be helpful.
All the solution above are valid but I would like to mention that you can use the XQuery Scripting extension to set variable values:
variable $count := 0;
for $prod in (1 to 10)
return {
$count := $count + 1;
<counter>{$count}</counter>
}
You can try this example live at http://www.zorba-xquery.com/html/demo#twh+3sJfRpHhZR8pHhOdsmqOTvQ=
Use xdmp:set instead of the below query
let $count := 0
for $prod in (1 to 4)
return ( xdmp:set($count,number($count+1)) ,<counter>{$count }</counter>
I think you are looking for something like:
XQUERY:
for $x in (1 to 10)
return
<counter>{$x}</counter>
OUTPUT:
<counter>1</counter>
<counter>2</counter>
<counter>3</counter>
<counter>4</counter>
<counter>5</counter>
<counter>6</counter>
<counter>7</counter>
<counter>8</counter>
<counter>9</counter>
<counter>10</counter>

Resources