扩展树缓存已满错误需要调整查询 [英] Expanded tree cache full error need to tune the query
问题描述
说明:
-
$enumValues
将具有我必须查看的字符串序列 -
$assetSubGroup
将具有来自XML(用于循环)的元素值,即我必须在上述维护的序列中匹配的字符串 - 如果不匹配,我必须保留几个元素值并返回.
$enumValues
will have sequence of strings that I have to look into$assetSubGroup
will have a element value from XML (for loop) i.e string that I have to match in above maintained sequence- If match is not, I have to hold few element values and return.
以下所有三项尝试均使我扩展了树缓存完整错误.有大约470000资产,即我正在查询的XML.
All three of my attempts below are giving me expanded tree cache full errors. There are ~470000 assets i.e XML I'm querying.
如何调整这些查询以避免扩展的树缓存错误?
How can I tune these queries to avoid expanded tree cache errors?
方法1:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report0.txt")
let $enumValues := (:all sequence of strings goes here :)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 := concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " ")
let $result2 :=
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
let $result3 := ($result1, $result2)
return xdmp:save($report-uri, text{$result3}), xdmp:elapsed-time()
方法2:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report1.txt")
let $enumValues := (:all string value sequence goes here that has to match:)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 :=( concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " "),
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
)
return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()
方法3:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report2.txt")
let $enumValues := (:sequence of strings are passed:)
let $result1 :=( concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " "),
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
if ($flag)
then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()
推荐答案
如果发现编写流式"查询时遇到问题,另一种选择是使用批处理工具,例如
If you find that you are having trouble writing a "streamable" query, another alternative would be to use a batch tool, such as CORB2 that uses multiple threads to execute a module for each document to produce it's output in a separate transaction and collects the results into a final output file. By breaking out the work into separate transactions, you don't have to worry about expanded tree cache errors or timeouts, and you can adjust the thread count to perform more work concurrently and get it done faster than a single query execution.
用于生成用|
分隔的文本文件的示例CORB2选项文件如下所示(您需要根据环境调整 XCC-CONNECTION-URI 值):
An example CORB2 options file to produce your |
delimited text file would look something like this (you would need to adjust the XCC-CONNECTION-URI value for your environment):
XCC-CONNECTION-URI=xcc://user:password@host:port
THREAD-COUNT=8
URIS-MODULE=selector.xqy|ADHOC
PROCESS-MODULE=process.xqy|ADHOC
PROCESS-TASK=com.marklogic.developer.corb.ExportBatchToFileTask
EXPORT-FILE-DIR=D:\output\
EXPORT-FILE-NAME=get-asset-sub-group-values_report.txt
PRE-BATCH-TASK=com.marklogic.developer.corb.PreBatchUpdateFileTask
EXPORT-FILE-TOP-CONTENT=asset-id|upi|assetSubGroup|asset-type|asset-sub-type|originator|originator-identifier|mm-project-id|sap-project-id
BATCH-URI-DELIM=|
创建URI选择器模块selector.xqy
,该模块将找到所有要处理的URI:
Create the URIs selector module selector.xqy
that will find all of the URIs to process:
xquery version "1.0-ml";
let $uris := cts:uris("", (), cts:and-query((
cts:collection-query("metadata-search"),
cts:element-value-query(xs:QName("AssetID"), "*")
)) )
return (fn:count($uris), $uris)
创建将为每个URI调用的流程模块process.xqy
:
Create the process module process.xqy
that will be called for each URI:
xquery version "1.0-ml";
declare variable $URI external;
let $each-search-copy := fn:doc($URI)/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
if ($flag)
then()
else (
(: NOTE you could use string-join() instead of concat()
string-join(($asset-id, $upi, $assetSubGroup, $asset-type, $asset-sub-type, $originator, $originator-identifier, $mm-project-id, $sap-project-id), "|")
:)
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id)
))
像这样调用CORB作业(调整XCC和CORB jar以及您的属性文件的路径和文件名):
Invoking the CORB job like this (adjust paths and filenames for the XCC and CORB jars and your properties file):
java -server -cp .:marklogic-xcc-8.0.8.jar:marklogic-corb-2.4.1.jar
-DOPTIONS-FILE=myjob.properties com.marklogic.developer.corb.Manager
或者,如果您使用的是 ml-gradle ,
Or, if you are using ml-gradle, use the corb task
这篇关于扩展树缓存已满错误需要调整查询的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!