I am using XQuery and I need to convert the (,) sepereated format or using group by in Xquery.
Final Output
grp|A|#A
grp|A4|#A
grp|A2|#A
grp|A1|#A
Expected Output
grp|A,A4,A2,A1|#A
My Xquery:
let $root := 'grp'
let $uri := for $i in fn:collection('collection')
return fn:document-uri($i)
let $result := for $each-uri in $uri
let $title := (doc($each-uri)//grp/title/text())[1]
let $id := (doc($each-uri)//grp/#id)[1]
let $root := 'grp'
return fn:concat($root,'|',$id,'|',$title)
return fn:distinct-values($result)
I think only a group-by clause and a string-join call were missing.
let $root := 'grp'
let $uri := for $i in fn:collection('collection')
return fn:document-uri($i)
let $result := for $each-uri in $uri
let $title := (doc($each-uri)//grp/title/text())[1]
let $id := (doc($each-uri)//grp/#id)[1]
let $root := 'grp'
group by $root, $title
return fn:concat($root,'|',string-join($id, ","),'|',$title)
return fn:distinct-values($result)
Although distinct-values may need to be moved to the IDs, and concat can be replaced with string-join as well, like so:
let $root := 'grp'
let $uri := for $i in fn:collection('collection')
return fn:document-uri($i)
let $result := for $each-uri in $uri
let $title := (doc($each-uri)//grp/title/text())[1]
let $id := (doc($each-uri)//grp/#id)[1]
let $root := 'grp'
group by $root, $title
return string-join(
($root, string-join(distinct-values($id), ","), $title),
'|'
)
return $result
Related
I want to count occurrences of the words in the XML document, query giving the actual count but it is working slow.
There are only two xml files size (236 KB, 155 KB) and it is taking 17 sec to produce result.
Below is the query:
let $doc := db:open('test','/ieee/test.xml')
let $tokens := $doc//text()/fn:tokenize(fn:normalize-space(.),'\s')
let $stringtoken := for $x at $pos in $tokens[position() = 1 to fn:last()-1]
let $y := string-join($tokens[position() = $pos to $pos + 1],' ')
return $y
return
<results>
{
for $result in distinct-values($stringtoken)
let $count := count($stringtoken[. = $result])
return
<term word="{$result}" count="{$count}"></term>
}
</results>
In the above query let $count := count($stringtoken[. = $result]) is taking too much time.
Any suggestion to improve the performance of the code much appreciated.
The group by statement will speed up your query a lot:
return <results>{
for $grouped-token in $stringtoken
group by $token := $grouped-token
let $count := count($grouped-token)
return <term word="{ $token }" count="{ $count }"/>
}</results>
let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $data1 in $countries
let $count :=xs:integer("0")
let $name :=$data1/name
let $car_code :=$data1/#car_code
for $data2 in $airports
where $car_code = $data2/#country
$count:= $count+ 1
where xs:integer($count)>25
return
<country>
<name>{data($name)}</name>
<count>{data($count)}</count>
</country>
This is my code, when I ran this code, there is syntax error:
error: syntax error, unexpected $[err:XPST0003]
$count:=$count+1
Perhaps
let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $country in $countries
let $airport-count := count($airports[#country = $country/#car_code])
where $airport-count > 25
return
<country>
{
$country/name,
<count>{$airport-count}</count>
}
</country>
Actually I found the answer
let $d := doc('mondial.xml')
let $airports := $d/mondial/airport
let $countries := $d/mondial/country
for $data1 in $countries
let $name :=$data1/name
let $car_code :=$data1/#car_code
let $count := count($airports[#country eq $car_code])
where $count>25
return
<country>
<name>{data($name)}</name>
<count>{data($count)}</count>
</country>
I've got a sequence that needs to sort a list based off earliest year vs. latest year. Due to some unique values in the year element, it is making the sort a little more complicated. Is there any way to achieve the following?
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return
for $date in $dates
order by $date
return $date
the dates element text is usually the year in the data, but outlier cases have a pre- or post- attached. Any way to achieve this minimally?
I am not sure if this is minimal, but it works:
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return
for $date in $dates
let $year :=
if (fn:contains($date, "-"))
then fn:substring-after($date, "-")
else $date
let $prepost :=
if (fn:starts-with($date, "pre"))
then -1
else if (fn:starts-with($date, "post"))
then 1
else 0
order by $year, $prepost
return $date
Just FYI: Definitely not minimal, but I wanted to know what fn:sort does when a sequence is returned. Turns out it does the right thing.
xquery version "3.1";
declare variable $local:ascending := 1;
declare variable $local:descending := -1;
declare function local:sort-prefixed-years ($y, $order) {
if (fn:contains($y, "-"))
then (
let $p := fn:tokenize($y, "-")
let $m :=
switch($p[1])
case "pre" return -1 * $order
case "post" return 1 * $order
default return 0
return (xs:integer($p[2]) * $order, $m)
)
else (xs:integer($y) * $order, 0)
};
declare function local:sort-prefixed-years-ascending ($prefixed-year) {
local:sort-prefixed-years($prefixed-year, $local:ascending)
};
declare function local:sort-prefixed-years-descending ($prefixed-year) {
local:sort-prefixed-years($prefixed-year, $local:descending)
};
let $dates := ('1982', '2019', '2095', 'pre-1982', 'post-2095')
return sort($dates, (), local:sort-prefixed-years-descending#1)
I'm having some difficulty with making a function I've written recursive. I need to be able to turn this xml:
<entry ref="22">
<headword>abaishen</headword>
<part_of_speech> v. </part_of_speech>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>-et</variant>
<variant>-it</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
</entry>
Into this XML -- essentially replacing the ending of any entry that begins with an "-" with the stem of the last complete entry:
<entry ref="22">
<headword>abaishen</headword>
<variant>abeishen</variant>
<variant>abaissen</variant>
<variant>abeisen</variant>
<variant>abashen</variant>
<variant>abasshen</variant>
<variant>abassen</variant>
<variant>abeeshen</variant>
<variant>abesen</variant>
<variant>abessen</variant>
<variant>abaished</variant>
<variant>abaishet</variant>
<variant>abaishit</variant>
<variant>abaisht</variant>
<variant>abaist</variant>
<variant>abasht</variant>
<variant>abast</variant>
<part_of_speech> v. </part_of_speech>
</entry>
The issue I'm running into is that second entry, the -it one, returns "abaishet" with the code I currently have:
declare function local:hyphen-replace($f) {
let $j :=
if (substring($f/text(), 1, 1) = "-") then
let $ending := substring-after($f/text(),"-")
let $ending-length := string-length($ending)
let $previous := $f/preceding-sibling::*[1]
let $previous-length := string-length($previous)
return
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
else
element {name($f)} {concat(substring($previous,1,($previous-length - $ending-length)),$ending)}
else
$f
return $j
};
declare function local:verbCheck($nodes as node()*) as node()* {
let $d := $nodes/part_of_speech
let $s := functx:siblings($d)
let $p := for $node in $nodes
return
let $d := $node/part_of_speech
let $s := functx:siblings($d)
return
if ($d/text() = " v. ") then
for $f in $s
let $j :=
local:hyphen-replace($f)
return ($j)
else
<empty/>
return
($p,$d)
};
<list>
{
let $collection := concat($collection, '?select=*.xml')
let $q := collection($collection)
let $v := local:buildNodes($q)
let $entries :=
for $n in $v
return <entry ref="{$n/#ref}">{local:verbCheck($n)}</entry>
return local:remove-empty-elements($entries)
}
</list>
It's obvious to me that my problem is with this piece of code in local:hypen-replace:
if (substring($previous/text(), 1, 1) = "-") then
local:hyphen-replace($previous)
because it's calling to the immediately previous item and replacing the "-it" node with it's information. But I don't know how to rewrite it to make it work recursively properly. Any suggestions would be appreciated. Thank you.
I'm trying to format decimals in XQuery. The decimals are currency, so the format should be ,###.##.
For example:
5573652.23 should be 5,573,652.23
and
352769 should be 352,769 (or 352,769.00 if it's easier/cleaner)
Right now I'm using this function from http://www.xqueryhacker.com/2009/09/format-number-in-xquery/, but I can't use decimals with it:
declare function local:format-int($i as xs:int) as xs:string
{
let $input :=
if ($i lt 0) then fn:substring(fn:string($i), 2)
else fn:string($i)
let $rev := fn:reverse(fn:string-to-codepoints(fn:string($input)))
let $comma := fn:string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and fn:not($i eq count($rev)))
then $comma else ()
)
return fn:concat(
if ($i lt 0) then '-' else (),
fn:codepoints-to-string(fn:reverse($chars))
)
};
I'm using Saxon 9HE for my processor.
Any help would be greatly appreciated.
----- UPDATE -----
Based on Dimitre's answer, I modified the function to save the decimal portion and add it to the end of the return string.
New Function
declare function local:format-dec($i as xs:decimal) as xs:string
{
let $input := tokenize(string(abs($i)),'\.')[1]
let $dec := substring(tokenize(string($i),'\.')[2],1,2)
let $rev := reverse(string-to-codepoints(string($input)))
let $comma := string-to-codepoints(',')
let $chars :=
for $c at $i in $rev
return (
$c,
if ($i mod 3 eq 0 and not($i eq count($rev)))
then $comma else ()
)
return concat(if ($i lt 0) then '-' else (),
codepoints-to-string(reverse($chars)),
if ($dec != '') then concat('.',$dec) else ()
)
};
Use:
let $n := 5573652.23
return
concat(local:format-int(xs:int(floor($n))),
'.',
substring(string($n - floor($n)), 3)
)
This produces exactly the wanted, correct result:
5,573,652.23
This doesn't work for you?:
format-number(5573652.23,",###.##")
You can play with this here. I am pretty sure that saxon supports this function.
Edit: This function is not supported in saxon (see comments below).
With XQuery 3.0 and Saxon-HE 9.7 Parser you can do the following:
declare decimal-format local:de decimal-separator = "," grouping-separator = ".";
declare decimal-format local:en decimal-separator = "." grouping-separator = ",";
let $numbers := (1234.567, 789, 1234567.765)
for $i in $numbers
return (
format-number($i,"#.###,##","local:de"),
format-number($i,"#,###.##","local:en")
)
The output is:
<?xml version="1.0" encoding="UTF-8"?>1.234,57 1,234.57 789,0 789.0 1.234.567,76
1,234,567.76