Trouble with making xquery function recursive - xquery

I'm having some difficulty with making a function I've written recursive. I need to be able to turn this xml:
<entry ref="22">
<headword>abaishen</headword>
<part_of_speech> v. </part_of_speech>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>-et</variant>
<variant>-it</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
</entry>
Into this XML -- essentially replacing the ending of any entry that begins with an "-" with the stem of the last complete entry:
<entry ref="22">
<headword>abaishen</headword>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>abaishet</variant>
<variant>abaishit</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
<part_of_speech> v. </part_of_speech>
</entry>
The issue I'm running into is that second entry, the -it one, returns "abaishet" with the code I currently have:
declare function local:hyphen-replace($f) {
let $j :=
if (substring($f/text(), 1, 1) = "-") then
let $ending := substring-after($f/text(),"-")
let $ending-length := string-length($ending)
let $previous := $f/preceding-sibling::*[1]
let $previous-length := string-length($previous)
return
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
else
element {name($f)} {concat(substring($previous,1,($previous-length - $ending-length)),$ending)}
else
$f
return $j
};
declare function local:verbCheck($nodes as node()*) as node()* {
let $d := $nodes/part_of_speech
let $s := functx:siblings($d)
let $p := for $node in $nodes
return
let $d := $node/part_of_speech
let $s := functx:siblings($d)
return
if ($d/text() = " v. ") then
for $f in $s
let $j :=
local:hyphen-replace($f)
return ($j)
else
<empty/>
return
($p,$d)
};
<list>
{
let $collection := concat($collection, '?select=*.xml')
let $q := collection($collection)
let $v := local:buildNodes($q)
let $entries :=
for $n in $v
return <entry ref="{$n/#ref}">{local:verbCheck($n)}</entry>
return local:remove-empty-elements($entries)
}
</list>
It's obvious to me that my problem is with this piece of code in local:hypen-replace:
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
because it's calling to the immediately previous item and replacing the "-it" node with it's information. But I don't know how to rewrite it to make it work recursively properly. Any suggestions would be appreciated. Thank you.

Related

Count number of word occurrences working slow BaseX xquery

I want to count occurrences of the words in the XML document, query giving the actual count but it is working slow.
There are only two xml files size (236 KB, 155 KB) and it is taking 17 sec to produce result.
Below is the query:
let $doc := db:open('test','/ieee/test.xml')
let $tokens := $doc//text()/fn:tokenize(fn:normalize-space(.),'\s')
let $stringtoken := for $x at $pos in $tokens[position() = 1 to fn:last()-1]
let $y := string-join($tokens[position() = $pos to $pos + 1],' ')
return $y
return
<results>
{
for $result in distinct-values($stringtoken)
let $count := count($stringtoken[. = $result])
return
<term word="{$result}" count="{$count}"></term>
}
</results>
In the above query let $count := count($stringtoken[. = $result]) is taking too much time.
Any suggestion to improve the performance of the code much appreciated.
The group by statement will speed up your query a lot:
return <results>{
for $grouped-token in $stringtoken
group by $token := $grouped-token
let $count := count($grouped-token)
return <term word="{ $token }" count="{ $count }"/>
}</results>

I don't know the syntax error in this xquery code

let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $data1 in $countries
let $count :=xs:integer("0")
let $name :=$data1/name
let $car_code :=$data1/#car_code
for $data2 in $airports
where $car_code = $data2/#country
$count:= $count+ 1
where xs:integer($count)>25
return
<country>
<name>{data($name)}</name>
<count>{data($count)}</count>
</country>
This is my code, when I ran this code, there is syntax error:
error: syntax error, unexpected $[err:XPST0003]
$count:=$count+1
Perhaps
let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $country in $countries
let $airport-count := count($airports[#country = $country/#car_code])
where $airport-count > 25
return
<country>
{
$country/name,
<count>{$airport-count}</count>
}
</country>
Actually I found the answer
let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $data1 in $countries
let $name :=$data1/name
let $car_code :=$data1/#car_code
let $count := count($airports[#country eq $car_code])
where $count>25
return
<country>
<name>{data($name)}</name>
<count>{data($count)}</count>
</country>

Control selection order from a sequence

How do I control the order of selection from a for $x in (...) ? In the last line of XQuery below, I want the result to be in the order $retmax, $retmin, but it comes out in reverse order.
<result>
{
let $max := max(doc("countries.xml")//country/(#population div #area))
let $min := min(doc("countries.xml")//country/(#population div #area))
for $country in doc("countries.xml")//country
let $density := $country/(#population div #area)
let $ret_min := if ($density = $min)
then <lowest density="{$density}">{data($country/#name)}</lowest>
else ()
let $ret_max := if ($density = $max)
then <highest density="{$density}">{data($country/#name)}</highest>
else ()
for $r in ($ret_max, $ret_min) return $r
}
</result>
produces:
<result>
<lowest density="0.026752619966905682">Greenland</lowest>
<highest density="31052.3125">Macau</highest>
</result>
but I want:
<result>
<highest density="31052.3125">Macau</highest>
<lowest density="0.026752619966905682">Greenland</lowest>
</result>
Here's how I'd write it...
let $ordered_countries := for $country in doc("countries.xml")//country
let $density := $country/(#population div #area)
order by $density
return $country
let $low := $ordered_countries[1]
let $high := $ordered_countries[fn:last()]
return (
<lowest density="{$low/(#population div #area)}">{data($low/#name)}</lowest>,
<highest density="{$high/(#population div #area)}">{data($high/#name)}</highest>
)

How can I format a decimal in xquery?

I'm trying to format decimals in XQuery. The decimals are currency, so the format should be ,###.##.
For example:
5573652.23 should be 5,573,652.23
and
352769 should be 352,769 (or 352,769.00 if it's easier/cleaner)
Right now I'm using this function from http://www.xqueryhacker.com/2009/09/format-number-in-xquery/, but I can't use decimals with it:
declare function local:format-int($i as xs:int) as xs:string
{
let $input :=
if ($i lt 0) then fn:substring(fn:string($i), 2)
else fn:string($i)
let $rev := fn:reverse(fn:string-to-codepoints(fn:string($input)))
let $comma := fn:string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and fn:not($i eq count($rev)))
then $comma else ()
)
return fn:concat(
if ($i lt 0) then '-' else (),
fn:codepoints-to-string(fn:reverse($chars))
)
};
I'm using Saxon 9HE for my processor.
Any help would be greatly appreciated.
----- UPDATE -----
Based on Dimitre's answer, I modified the function to save the decimal portion and add it to the end of the return string.
New Function
declare function local:format-dec($i as xs:decimal) as xs:string
{
let $input := tokenize(string(abs($i)),'\.')[1]
let $dec := substring(tokenize(string($i),'\.')[2],1,2)
let $rev := reverse(string-to-codepoints(string($input)))
let $comma := string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and not($i eq count($rev)))
then $comma else ()
)
return concat(if ($i lt 0) then '-' else (),
codepoints-to-string(reverse($chars)),
if ($dec != '') then concat('.',$dec) else ()
)
};
Use:
let $n := 5573652.23
return
concat(local:format-int(xs:int(floor($n))),
'.',
substring(string($n - floor($n)), 3)
)
This produces exactly the wanted, correct result:
5,573,652.23
This doesn't work for you?:
format-number(5573652.23,",###.##")
You can play with this here. I am pretty sure that saxon supports this function.
Edit: This function is not supported in saxon (see comments below).
With XQuery 3.0 and Saxon-HE 9.7 Parser you can do the following:
declare decimal-format local:de decimal-separator = "," grouping-separator = ".";
declare decimal-format local:en decimal-separator = "." grouping-separator = ",";
let $numbers := (1234.567, 789, 1234567.765)
for $i in $numbers
return (
format-number($i,"#.###,##","local:de"),
format-number($i,"#,###.##","local:en")
)
The output is:
<?xml version="1.0" encoding="UTF-8"?>1.234,57 1,234.57 789,0 789.0 1.234.567,76
1,234,567.76

XQuery wrap result in computed node

I am trying to do some simple pagination in XQuery. I would like my root element of the returned XML to have (as attributes) various properties about the pagination (current page etc).
However I can't seem to find a way to add these dynamic attributes to my root element.
I've tried playing with the
element name {expr} and attribute name {expr}
functions, but can't seem to get them to work.
<result>{
let $results :=
for $item in doc('mydoc')/root/item
return $item
let $requested-page-nbr := 2
let $items-per-page := 10
let $count := count($results)
let $last-page-nbr := fn:ceiling($count div $items-per-page)
let $actual-page-nbr := if ($requested-page-nbr gt $last-page-nbr) then $last-page-nbr else $requested-page-nbr
let $start-item := $items-per-page * $actual-page-nbr - ( $items-per-page - 1 )
let $natural-end-item := $actual-page-nbr * $items-per-page
let $actual-end-item := if ($count ge $natural-end-item) then $natural-end-item else $count
for $j in ($start-item to $actual-end-item )
let $current := item-at($results, $j)
return
<document-summary
requested-page-nbr="{$requested-page-nbr}"
items-per-page="{$items-per-page}"
count="{$count}"
last-page-nbr="{$last-page-nbr}"
actual-page-nbr="{$actual-page-nbr}"
start-item="{$start-item}"
natural-end-item="{$natural-end-item}"
actual-end-item="{$actual-end-item}">
{($current)}
</document-summary>
}</result>
to add an attribute to the root:
<result>{attribute page {3}}</result>
in your case you probably want to do something like: (?)
...
return (
attribute page {$actual-page-nbr},
for $j in ($start-item to $actual-end-item )
let $current := item-at($results, $j)
return
<document-summary
requested-page-nbr="{$requested-page-nbr}"
items-per-page="{$items-per-page}"
count="{$count}"
last-page-nbr="{$last-page-nbr}"
actual-page-nbr="{$actual-page-nbr}"
start-item="{$start-item}"
natural-end-item="{$natural-end-item}"
actual-end-item="{$actual-end-item}">
{($current)}
</document-summary>)
...
does that answer your question?
I don't think that is the proper XQuery way...
This XQuery:
declare variable $requested-page-nbr external;
declare variable $items-per-page external;
declare variable $items := /root/item;
declare variable $firsties := $items[position() mod $items-per-page = 1];
for $first in $firsties
let $actual-page-nbr := index-of($firsties,$first)
let $group := $first|
$first/following-sibling::item[position() < $items-per-page]
let $previous := ($actual-page-nbr - 1) * $items-per-page
where $actual-page-nbr = $requested-page-nbr
return
<result>
<document-summary requested-page-nbr="{$requested-page-nbr}"
items-per-page="{$items-per-page}"
count="{count($items)}"
last-page-nbr="{count($firsties)}"
actual-page-nbr="{$actual-page-nbr}"
start-item="{$previous + 1}"
natural-end-item="{$previous + $items-per-page}"
actual-end-item="{$previous + count($group)}">{
$group
}</document-summary>
</result>
With this input:
<root>
<item>1</item>
<item>2</item>
<item>3</item>
<item>4</item>
<item>5</item>
<item>6</item>
<item>7</item>
<item>8</item>
<item>9</item>
<item>10</item>
<item>11</item>
<item>12</item>
<item>13</item>
</root>
With $requested-page-nbr set to 2 and $items-per-page set to 3, output:
<result>
<document-summary requested-page-nbr="2"
items-per-page="3"
count="13"
last-page-nbr="5"
actual-page-nbr="2"
start-item="4"
natural-end-item="6"
actual-end-item="6">
<item>4</item>
<item>5</item>
<item>6</item>
</document-summary>
</result>
With $requested-page-nbr set to 4 and $items-per-page set to 4, output:
<result>
<document-summary requested-page-nbr="4"
items-per-page="4"
count="13"
last-page-nbr="4"
actual-page-nbr="4"
start-item="13"
natural-end-item="16"
actual-end-item="13">
<item>13</item>
</document-summary>
</result>
As for returning a "page" of results, where you are using
{($current)}
we are using something like the following
{ subsequence($results, $start-item, $items-per-page) }

Resources