扩展树缓存已满错误需要调整查询 [英] Expanded tree cache full error need to tune the query

查看:85
本文介绍了扩展树缓存已满错误需要调整查询的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

说明:

  • $enumValues将具有我必须查看的字符串序列
  • $assetSubGroup将具有来自XML(用于循环)的元素值,即我必须在上述维护的序列中匹配的字符串
  • 如果不匹配,我必须保留几个元素值并返回.
  • $enumValues will have sequence of strings that I have to look into
  • $assetSubGroup will have a element value from XML (for loop) i.e string that I have to match in above maintained sequence
  • If match is not, I have to hold few element values and return.

以下所有三项尝试均使我扩展了树缓存完整错误.有大约470000资产,即我正在查询的XML.

All three of my attempts below are giving me expanded tree cache full errors. There are ~470000 assets i.e XML I'm querying.

如何调整这些查询以避免扩展的树缓存错误?

How can I tune these queries to avoid expanded tree cache errors?

方法1:

let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"

let $report-uri := concat($output-dir, $query-name, "_report0.txt")

let $enumValues := (:all sequence of strings goes here :)

let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))

let $result1 := concat('asset-id' , "|",  'upi',  "|", 'assetSubGroup',  "|",  'asset-type',  "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier',  "|",  'mm-project-id',  "|" , 'sap-project-id' , "
")
let $result2 :=
  for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
    let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
    let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
    let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
    let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
    let $originator := $each-search-copy/biblioCore/originator[1]/text()
    let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
    let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
    let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
    let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
    let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
    let $flag := map:keys($map2 - $map1)
    return
        if ($flag)
            then(

                     concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")
                 )
            else (
                  if($assetSubGroup) then() 
                  else (     

                        concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")

            ))
let $result3 := ($result1, $result2)    
return xdmp:save($report-uri, text{$result3}), xdmp:elapsed-time()

方法2:

let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report1.txt")

let $enumValues := (:all string value sequence goes here that has to match:)

let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))

let $result1 :=( concat('asset-id' , "|",  'upi',  "|", 'assetSubGroup',  "|",  'asset-type',  "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier',  "|",  'mm-project-id',  "|" , 'sap-project-id' , "
"),

  for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
    let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
    let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
    let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
    let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
    let $originator := $each-search-copy/biblioCore/originator[1]/text()
    let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
    let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
    let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
    let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
    let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
    let $flag := map:keys($map2 - $map1)
    return
        if ($flag)
            then(

                     concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")
                 )
            else (
                  if($assetSubGroup) then() 
                  else (     

                        concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")

            ))
            )

return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()

方法3:

let $query-name := "get-asset-sub-group-values"
let $output-dir :=  "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report2.txt")


let $enumValues := (:sequence of strings are passed:)

let $result1 :=( concat('asset-id' , "|",  'upi',  "|", 'assetSubGroup',  "|",  'asset-type',  "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier',  "|",  'mm-project-id',  "|" , 'sap-project-id' , "
"),

  for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
    let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
    let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
    let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
    let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
    let $originator := $each-search-copy/biblioCore/originator[1]/text()
    let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
    let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
    let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
    let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
    let $flag := ($assetSubGroup eq $enumValues)
    return
        if ($flag)
            then()
            else (
                  concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id , "
")
                 ))

return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()

推荐答案

如果发现编写流式"查询时遇到问题,另一种选择是使用批处理工具,例如

If you find that you are having trouble writing a "streamable" query, another alternative would be to use a batch tool, such as CORB2 that uses multiple threads to execute a module for each document to produce it's output in a separate transaction and collects the results into a final output file. By breaking out the work into separate transactions, you don't have to worry about expanded tree cache errors or timeouts, and you can adjust the thread count to perform more work concurrently and get it done faster than a single query execution.

用于生成用|分隔的文本文件的示例CORB2选项文件如下所示(您需要根据环境调整 XCC-CONNECTION-URI 值):

An example CORB2 options file to produce your | delimited text file would look something like this (you would need to adjust the XCC-CONNECTION-URI value for your environment):

XCC-CONNECTION-URI=xcc://user:password@host:port
THREAD-COUNT=8
URIS-MODULE=selector.xqy|ADHOC
PROCESS-MODULE=process.xqy|ADHOC
PROCESS-TASK=com.marklogic.developer.corb.ExportBatchToFileTask
EXPORT-FILE-DIR=D:\output\
EXPORT-FILE-NAME=get-asset-sub-group-values_report.txt
PRE-BATCH-TASK=com.marklogic.developer.corb.PreBatchUpdateFileTask
EXPORT-FILE-TOP-CONTENT=asset-id|upi|assetSubGroup|asset-type|asset-sub-type|originator|originator-identifier|mm-project-id|sap-project-id
BATCH-URI-DELIM=|

创建URI选择器模块selector.xqy,该模块将找到所有要处理的URI:

Create the URIs selector module selector.xqy that will find all of the URIs to process:

xquery version "1.0-ml";
let $uris := cts:uris("", (), cts:and-query((
  cts:collection-query("metadata-search"), 
  cts:element-value-query(xs:QName("AssetID"), "*")
)) )
return (fn:count($uris), $uris)

创建将为每个URI调用的流程模块process.xqy:

Create the process module process.xqy that will be called for each URI:

xquery version "1.0-ml";

declare variable $URI external;

let $each-search-copy := fn:doc($URI)/metadata

let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
    if ($flag)
        then()
        else (
              (: NOTE you could use string-join() instead of concat()

                 string-join(($asset-id, $upi, $assetSubGroup, $asset-type, $asset-sub-type, $originator, $originator-identifier, $mm-project-id, $sap-project-id), "|")  

              :)
              concat($asset-id , "|",  $upi,  "|", $assetSubGroup,  "|",  $asset-type,  "|", $asset-sub-type, "|", $originator, "|", $originator-identifier,  "|",  $mm-project-id,  "|" , $sap-project-id)
             ))

像这样调用CORB作业(调整XCC和CORB jar以及您的属性文件的路径和文件名):

Invoking the CORB job like this (adjust paths and filenames for the XCC and CORB jars and your properties file):

java -server -cp .:marklogic-xcc-8.0.8.jar:marklogic-corb-2.4.1.jar
    -DOPTIONS-FILE=myjob.properties com.marklogic.developer.corb.Manager

或者,如果您使用的是 ml-gradle

Or, if you are using ml-gradle, use the corb task

这篇关于扩展树缓存已满错误需要调整查询的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆