I have an XML fragment where I want to have different queries based in the existence of the id attribute:
<author order="1"
id="99999999"
initials="A."
given-names="Able"
surname="Baker"
fullname="Able Baker"/>
I have tried:
let $first-query := if ($first)
then cts:or-query((
cts:element-attribute-word-match(xs:QName("author"), xs:QName("given-names"), $first || "*", ("collation=http://marklogic.com/collation/codepoint")),
cts:element-attribute-word-match(xs:QName("author"), xs:QName("initials"), $first || "*", ("collation=http://marklogic.com/collation/codepoint"))
))
else ()
let $last-query := if ($last)
then cts:element-attribute-word-match(xs:QName("author"), xs:QName("surname"), $last || "*", ("collation=http://marklogic.com/collation/codepoint"))
else ()
let $author-no-id-query :=
cts:and-query((
cts:not-query(
cts:element-attribute-value-query(xs:QName("author"), xs:QName("id"), "*")
),
$first-query,
$last-query
))
let $query := cts:element-query(xs:QName("author"),
cts:or-query(($author-no-id-query, $author-id-query
)))
If the id exists, then a different query takes place and a match against the id occurs. How do I detect an absence of an attribute in MarkLogic?
I have inserted two test documents into the database:
xdmp:document-insert('/example.xml', <author order="1"
id="99999999"
initials="A."
given-names="Able"
surname="Baker"
fullname="Able Baker"/>)
xdmp:document-insert('/example2.xml', <author order="1"
initials="A."
given-names="Able"
surname="Baker"
fullname="Able Baker"/>)
And run the following query against these documents:
cts:search(fn:doc(),
cts:element-query(xs:QName('author'), cts:and-query((
cts:not-query(cts:element-attribute-value-query(xs:QName('author'), xs:QName('id'), '*', ("wildcarded")))
)
)))
This search only matches the document where the ID attribute does not exist.
Related
<MasterData>
<Name>AA</Name>
<EmpId>123</EmpId>
<AccountNo>111</AccountNo>
<IFSC>ABC</IFSC>
<AccountData>
<AccountNo>111</AccountNo>
<IFSC>ABC</IFSC>
</AccountData>
<AccountData>
<AccountNo>222</AccountNo>
<IFSC>DEF</IFSC>
</AccountData>
</MasterData>
I have an xml like this in my database,I have a requirement to check the combination of AccountNo+IFSC present in the MasterData(not under the AccountData section) and compare with all documents present in the collection and check whether its matching to the data present in the AccountData section,If its matching identify the URI of the document.
First identify the unique combination of AccountNo+IFSC from Masterdata section and then check whether this combination present under any of the AccountData section, there are more elements in this xml other than AccountNo and IFSC
If you had range indexes on the AccountNo and IFSC elements, then you could:
retrieve the set of values from AccountNo, IFSC, and a cts:uri-reference() with cts:value-tuples().
create a map using a composite key with the AccountNo and IFSC values and the URIs as the values for those map entries
prune any entry that only has one URI associated
return the map that will have the set of URIs corresponding to each combo of AccountNo and IFSC value
Something like this:
let $accountNumber-IFSC :=
cts:value-tuples(
(
cts:element-reference(xs:QName("AccountNo")),
cts:element-reference(xs:QName("IFSC")),
cts:uri-reference()
)
)
let $map := map:new()
let $_create_map_value_to_uris := for $co-occurrence in $accountNumber-IFSC
let $key := concat($co-occurrence[1], " ", $co-occurrence[2])
let $value := (map:get($map, $key), $co-occurrence[3])
return map:put($map, $key, $value)
let $_prune_single_uri :=
for $key in map:keys($map)
let $value := map:get($map, $key)
where not(tail($value))
return
map:put($map, $key, ())
return
$map
If you just wanted the list of URIs, you can invert the map: -$map and return it's keys: return map:keys(-$map)
If you had a range-index on the EmpId you could pivot on that instead of the document URIs.
Using the Optic API functions, you can do something similar with element-range indexes:
import module namespace op = "http://marklogic.com/optic" at "/MarkLogic/optic.xqy";
op:from-lexicons(
map:entry("AccountNo", cts:element-reference(xs:QName("AccountNo")))
=> map:with("IFSC", cts:element-reference(xs:QName("IFSC")))
=> map:with("URI", cts:uri-reference())
)
=> op:group-by(
("IFSC", "AccountNo"),
(
op:group-concat("URIs", "URI", map:entry("separator", ", ")),
op:count("count", op:col("URI"))
)
)
=> op:where(op:gt(op:col("count"), 1))
=> op:result()
a.xml:
<execution xmlns="http://www.example.org">
<header>
<messageId>FX123</messageId>
</header>
<isCorrection>false</isCorrection>
<trade>
<tradeHeader>
<partyTradeIdentifier>
<partyReference href="ptyA"/>
<tradeId>12345</tradeId>
</partyTradeIdentifier>
<tradeDate>2019-12-21</tradeDate>
</tradeHeader>
<fxTargetKnockoutForward>
<target>
<accumulationRegion>
<lowerBound>
<condition>AtOrAbove</condition>
</lowerBound>
<upperBound>
<condition>Below</condition>
</upperBound>
</accumulationRegion>
<accumulationRegion>
<lowerBound>
<condition>AtOrAbove</condition>
<initialValue>1.1000</initialValue>
</lowerBound>
<multiplier>2</multiplier>
</accumulationRegion>
<knockoutLevel>
<amount>
<currency>CAD</currency>
<amount>100000.00</amount>
</amount>
<targetStyle>Exact</targetStyle>
</knockoutLevel>
</target>
<expirySchedule>
<adjustedDate>2019-12-23</adjustedDate>
<adjustedDate>2020-01-27</adjustedDate>
<adjustedDate>2020-02-25</adjustedDate>
<adjustedDate>2020-03-26</adjustedDate>
</expirySchedule>
<settlementSchedule>
<dateAdjustments>
<businessDayConvention>FOLLOWING</businessDayConvention>
<businessCenters>
<businessCenter>CATO</businessCenter>
<businessCenter>USNY</businessCenter>
</businessCenters>
</dateAdjustments>
<adjustedDate>2019-12-24</adjustedDate>
<adjustedDate>2020-01-28</adjustedDate>
<adjustedDate>2020-02-26</adjustedDate>
<adjustedDate>2020-03-27</adjustedDate>
</settlementSchedule>
<fixingInformationSource>
<rateSource>Reuters</rateSource>
<rateSourcePage>WMRSPOT09</rateSourcePage>
</fixingInformationSource>
</fxTargetKnockoutForward>
</trade>
</execution>
Logic: I pass in-memory XML (a.xml) and targeted element (“trade”) as parameters -> the function local:array-qname evaluates all of this element’s descendants -> Whenever the descendant’s node name is the same as its sibling’s node name, it is considered a candidate -> the function walks backwards to retrieve all of its ancestor node name (except the root node) up to the passed element (“trade”) level.
The desired result: string array objects, of each object contains all of the candidate's sequential ancestor node names and its own node name. The expected result is:
( ("trade","fxTargetKnockoutForward","target","accumulationRegion"),
("trade","fxTargetKnockoutForward","expirySchedule","adjustedDate"),
("trade","fxTargetKnockoutForward","settlementSchedule","dateAdjustments","businessCenters","businessCenter"),
("trade","fxTargetKnockoutForward","settlementSchedule","adjustedDate") )
The library module is:
xquery version "1.0-ml";
declare function local:array-qname(
$doc as node()*,
$element as xs:string
) as xs:string*
{
let $e := $doc//*[name() = $element]
for $d in $e/descendant::*[name() = name(following-sibling::*[1])],
$a in $d/ancestor::*[not(name() = name($doc/*))]/name(.)
return
for $_ in $a
return
<a>
( {xs:QName($a)},{xs:QName(local-name($d))} )
</a>
};
let $doc := doc("a.xml")
return
local:array-qname($doc, "trade")
But it goes awry:
(trade,fxTargetKnockoutForward,target,accumulationRegion),
(trade,fxTargetKnockoutForward,expirySchedule,adjustedDate),
(trade,fxTargetKnockoutForward,settlementSchedule,dateAdjustments,businessCenters,businessCenter),
(trade,fxTargetKnockoutForward,settlementSchedule,adjustedDate),
How can I get my module work?
The following solution is compliant with the requirement…
declare function local:array-qname(
$doc as document-node(),
$name as xs:string
) {
for $e in $doc//*[name() = $name]
for $d in $e/descendant::*[name() = name(following-sibling::*[1])]
return <a>{
for $name in $d/ancestor-or-self::*[not(. << $e)]
return node-name($name)
}</a>
};
let $doc := doc('a.xml')
return local:array-qname($doc, 'trade')
…but it differs from the expected output as it yields duplicate paths. If duplicates are to be avoided, and if a string representation is sufficient, distinct-values can be used:
distinct-values(
for $e in $doc//*[name() = $name]
for $d in $e/descendant::*[name() = name(following-sibling::*[1])]
return string-join($d/ancestor-or-self::*[not(. << $e)]/name(), ' ')
)
With
declare variable $element-name as xs:QName external := QName('http://www.example.org', 'trade');
let $base := //*[node-name() = $element-name]
for $d in $base//*[node-name() = following-sibling::*[1]/node-name()]
return
'('
|| $element-name
|| ': ('
|| ($d/ancestor-or-self::* except $d/ancestor::*[node-name() = $element-name]/ancestor-or-self::*)/node-name() => string-join(', ')
|| '))'
I get
(trade: (fxTargetKnockoutForward, target, accumulationRegion))
(trade: (fxTargetKnockoutForward, expirySchedule, adjustedDate))
(trade: (fxTargetKnockoutForward, expirySchedule, adjustedDate))
(trade: (fxTargetKnockoutForward, expirySchedule, adjustedDate))
(trade: (fxTargetKnockoutForward, settlementSchedule, dateAdjustments, businessCenters, businessCenter))
(trade: (fxTargetKnockoutForward, settlementSchedule, adjustedDate))
(trade: (fxTargetKnockoutForward, settlementSchedule, adjustedDate))
(trade: (fxTargetKnockoutForward, settlementSchedule, adjustedDate))
I am not sure from your description "Whenever the descendant’s node name is the same as its sibling’s node name, it is considered a candidate -> the function walks backwards to retrieve all of its ancestor node name (except the root node) up to the passed element (“trade”) level." why duplicate adjustedDate are not in your desired output as it seems the samples contains various elements of that name that meet the condition.
I have a csv file. I've managed import these data into MarkLogic using mlcp which then created a xml file in MarkLogic.
Now in csv I have this format "6/29/2013 5:00:00 PM" random in one of the column. How do I use xquery and probably node-replace as a transform function to convert this date into a different format such as "2013-06-29" as MarkLogic default date format?
Any help is appreciated...
I have created transform.xqy and install it on Modules in MLogic. I'm
thinking about using "xdmp:node-replace" to replace the date with expected
format. Or should I go thorugh the csv column by column (How to do?) and
use "castable as xs:dateTime" to determine date value or not. Yet, even
just printing out the content value/uri, always giving me error.
xquery version "1.0-ml";
module namespace example = "http://test.com/example";
(: If the input document is XML, insert #NEWATTR, with the value
: specified in the input parameter. If the input document is not
: XML, leave it as-is.
:)
declare function example:transform(
$content as map:map,
$context as map:map
) as map:map*
{
let $the-doc-uri := map:get($content, "uri")
let $the-doc := map:get($content, "value")
return
trace($the-doc, 'The value of doc is: ')
};
The MarkLogic documentation contains a full example of an MLCP transform:
https://docs.marklogic.com/guide/mlcp/import#id_65640
It shows this example, which adds an attribute to the XML content:
declare function example:transform(
$content as map:map,
$context as map:map
) as map:map*
{
let $attr-value :=
(map:get($context, "transform_param"), "UNDEFINED")[1]
let $the-doc := map:get($content, "value")
return
if (fn:empty($the-doc/element()))
then $content
else
let $root := $the-doc/*
return (
map:put($content, "value",
document {
$root/preceding-sibling::node(),
element {fn:name($root)} {
attribute { fn:QName("", "NEWATTR") } {$attr-value},
$root/#*,
$root/node()
},
$root/following-sibling::node()
}
), $content
)
};
Keep in mind you are supposed to update the "value" property of the $content map:map, and return $content to get your transformation result added to the database. I suggest using a (potentially recursive) typeswitch to identify element nodes, and then adjusting their value accordingly..
HTH!
finally did it.
The thing is I must use mem:node-replace because it is on the fly, on memory. While xdmp:node-replace is when the data is already on MarkLogic.
The rest is as expected I must use format-date and xdmp:parse-dateTime to get date format as required.
Here is some snippets
xquery version "1.0-ml";
module namespace ns_transform = "this_is_my_namespace";
import module namespace mem = "http://xqdev.com/in-mem-update" at "/MarkLogic/appservices/utils/in-mem-update.xqy";
declare variable $ns := "this_is_my_namespace";
declare function ns_transform:transform(
$content as map:map,
$context as map:map
) as map:map*
{
let $doc := map:get($content, "value")
let $format_in := "[M]/[D]/[Y0001] [h01]:[m01]:[s01] [P]"
let $format_out := "[Y0001]-[M01]-[D01]"
let $old_date := $doc/*:root_doc/*:date/text()
let $new_date := format-date(xs:date(xdmp:parse-dateTime($format_in, $old_date)), $format_out)
let $new_doc := mem:node-replace($doc/*:root_doc/*:date,element {fn:QName($ns, "date")}{$new_date})
let $_ := map:put($content, "value", $new_doc)
return $content
};
I have written xquery to return results in normal way.
let $results := //data:data
return
<result>
{
for $i in $results
return
<documentInformation>
<id>{data($i/DATA:ID)}</id>
<status>{data($i/#status)}</status>
<title>{data($i/data:title)}</title>
<displayName>{data($i/DATA:DISPLAYNAME)}</displayName>
</documentInformation>
}
</result>
Now, I have to filter out the results in for loop with some condition like
(pseudo logic)
if id = 'abc' and status ="closed"
then skip the row
else add row.
I have tried several ways. but could not run the query..
Try this:
<result>
{
for $i in //data:data
where fn:not($i/DATA:ID = 'abc' and $i/#status = "closed")
return
<documentInformation>
<id>{data($i/DATA:ID)}</id>
<status>{data($i/#status)}</status>
<title>{data($i/data:title)}</title>
<displayName>{data($i/DATA:DISPLAYNAME)}</displayName>
</documentInformation>
}
</result>
Note that the XPath //data:data may have a lot of work to do, but that's a separate matter.
You Can also use if condition instead of where
<result>
{
for $i in //data:data
return
if($i/DATA:ID != 'abc' and $i/#status != "closed")
then
(
<documentInformation>
<id>{data($i/DATA:ID)}</id>
<status>{data($i/#status)}</status>
<title>{data($i/data:title)}</title>
<displayName>{data($i/DATA:DISPLAYNAME)}</displayName>
</documentInformation>
)
else ()
}
</result>
I am a newbie in XQuery, and My problem is about distinct values, I am using the following codes to retrieving movie reviewers
xquery version "1.0";
declare boundary-space preserve;
<result>
{for $reviews in doc("reviews.xml")/reviews/review,
$movie in doc("movies.xml")/movies/movie
where $reviews/movie_title = $movie/movie_title
and $movie//movie_genre = "Drama"
and $movie//month > 6
order by $reviews/movie_reviewer descending
return
(<reviewer>{distinct-values($reviews/movie_reviewer)}</reviewer>, '
')
}
</result>
and later I change the code to
xquery version "1.0";
declare boundary-space preserve;
<result>
{for $reviews in doc("reviews.xml")/reviews/review,
$movie in doc("movies.xml")/movies/movie
where $reviews/movie_title = $movie/movie_title
and $movie//movie_genre = "Drama"
and $movie//month > 6
return
{for $content in distinct-values($reviews/movie_reviewer)
order by $content descending
return (<reviewer>{$content}</reviewer>, '
')}
}
</result>
but I got the similar result as
<result>
<reviewer>Wesley Barry</reviewer>
<reviewer>Michael Gordon</reviewer>
<reviewer>Michael Gordon</reviewer>
<reviewer>Michael Gordon</reviewer>
<reviewer>John Frankenheimer</reviewer>
<reviewer>J. Lee Thompson</reviewer>
<reviewer>J. Lee Thompson</reviewer>
<reviewer>Charles Walters</reviewer>
<reviewer>Charles Walters</reviewer>
</result>
how can I make the result like
<result>
<reviewer>Wesley Barry</reviewer>
<reviewer>Michael Gordon</reviewer>
<reviewer>John Frankenheimer</reviewer>
<reviewer>J. Lee Thompson</reviewer>
<reviewer>Charles Walters</reviewer>
</result>
?
I know this is quite basic, but I just can't get the point
Please follow #Ranon's advice. However, guessing from your query: The easiest way would be if you could use XQuery 3.0 and the new group-by-statement.
xquery version "3.0";
declare boundary-space preserve;
<result>
{for $reviews in doc("reviews.xml")/reviews/review,
$movie in doc("movies.xml")/movies/movie
let $reviewer := $reviews/movie_reviewer
where $reviews/movie_title = $movie/movie_title
and $movie//movie_genre = "Drama"
and $movie//month > 6
group by $reviewer
order by $reviewer descending
return
(<reviewer>{$reviewer}</reviewer>, '
')
}
</result>