连接时的 BaseX 查询优化 [英] BaseX query optimization on join

查看:48
本文介绍了连接时的 BaseX 查询优化的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

在以下 Stackoverflow 中的问题得到修复后,我当我尝试进行如下联接时,还有另一个问题.最后一个查询需要大约 250 毫秒,而前两个查询只需要 16 毫秒.有没有更好的方法来执行两个项目之间的连接?

After the issue in the following Stackoverflow is fixed, I have another problem when I try to make a join as below. The last query takes about 250ms while the first two take only 16ms. Is there a better way to perform join between two items?

注意:您可以从这个链接中找到测试数据.

Note: You can find the test data from this link.

  let $PlGeTys :=
  /root/PlGeTys/PlGeTy[
    isOfPlCt/@href=/root/PlCts/PlCt[
      environment='AIR'
    ]/@id
  ]

let $PlSpTys :=
  /root/PlSpTys/PlSpTy[
    isOfPlGeTy/@href=$PlGeTys/@id
  ]

for  $PlGeTy in  $PlGeTys,
 $PlSpTy in  $PlSpTys
 where $PlSpTy/isOfPlGeTy/@href=$PlGeTy/@id
 return <done>{$PlGeTy, $PlSpTy }</done>

这里是查询信息:

Compiling:
- applying attribute index for $PlGeTys_0/@*:id
- rewriting where clause(s)
Query:
let $PlGeTys := /root/PlGeTys/PlGeTy[ isOfPlCt/@href=/root/PlCts/PlCt[ environment='AIR' ]/@id ] let $PlSpTys := /root/PlSpTys/PlSpTy[ isOfPlGeTy/@href=$PlGeTys/@id ] for $PlGeTy in $PlGeTys, $PlSpTy in $PlSpTys where $PlSpTy/isOfPlGeTy/@href=$PlGeTy/@id return "done"
Optimized Query:
let $PlGeTys_0 := db:open-pre("Output6",0)/*:root/*:PlGeTys/*:PlGeTy[(*:isOfPlCt/@*:href = root()/*:root/*:PlCts/*:PlCt[(*:environment = "AIR")]/@*:id)] let $PlSpTys_1 := db:attribute("Output6", $PlGeTys_0/@*:id)/self::*:href/parent::*:isOfPlGeTy/parent::*:PlSpTy for $PlGeTy_2 in $PlGeTys_0 for $PlSpTy_3 in ($PlSpTys_1)[(isOfPlGeTy/@href = $PlGeTy_2/@*:id)] return "done"
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 18209 Bytes
- Read Locking: local [Output6]
- Write Locking: none
Timing:
- Parsing: 0.77 ms
- Compiling: 0.47 ms
- Evaluating: 215.71 ms
- Printing: 0.17 ms
- Total Time: 217.11 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <Let>
      <Var name="$PlGeTys" id="0"/>
      <IterPath>
        <DBNode name="Output6" pre="0"/>
        <IterStep axis="child" test="*:root"/>
        <IterStep axis="child" test="*:PlGeTys"/>
        <IterStep axis="child" test="*:PlGeTy">
          <CmpG op="=">
            <CachedPath>
              <IterStep axis="child" test="*:isOfPlCt"/>
              <IterStep axis="attribute" test="*:href"/>
            </CachedPath>
            <IterPath>
              <Root/>
              <IterStep axis="child" test="*:root"/>
              <IterStep axis="child" test="*:PlCts"/>
              <IterStep axis="child" test="*:PlCt">
                <CmpG op="=">
                  <CachedPath>
                    <IterStep axis="child" test="*:environment"/>
                  </CachedPath>
                  <Str value="AIR" type="xs:string"/>
                </CmpG>
              </IterStep>
              <IterStep axis="attribute" test="*:id"/>
            </IterPath>
          </CmpG>
        </IterStep>
      </IterPath>
    </Let>
    <Let>
      <Var name="$PlSpTys" id="1"/>
      <CachedPath>
        <ValueAccess data="Output6" type="ATTRIBUTE">
          <CachedPath>
            <VarRef>
              <Var name="$PlGeTys" id="0"/>
            </VarRef>
            <IterStep axis="attribute" test="*:id"/>
          </CachedPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlGeTy"/>
        <IterStep axis="parent" test="*:PlSpTy"/>
      </CachedPath>
    </Let>
    <For>
      <Var name="$PlGeTy" id="2"/>
      <VarRef>
        <Var name="$PlGeTys" id="0"/>
      </VarRef>
    </For>
    <For>
      <Var name="$PlSpTy" id="3"/>
      <IterFilter>
        <VarRef>
          <Var name="$PlSpTys" id="1"/>
        </VarRef>
        <CmpG op="=">
          <CachedPath>
            <IterStep axis="child" test="isOfPlGeTy"/>
            <IterStep axis="attribute" test="href"/>
          </CachedPath>
          <IterPath>
            <VarRef>
              <Var name="$PlGeTy" id="2"/>
            </VarRef>
            <IterStep axis="attribute" test="*:id"/>
          </IterPath>
        </CmpG>
      </IterFilter>
    </For>
    <Str value="done" type="xs:string"/>
  </GFLWOR>
</QueryPlan>

<小时>

更新二:

以上问题已修复.但是当我想在如下本地函数中使用它时,大约需要 700 毫秒.我做错了什么吗?

The issue above is fixed. But when I want to use it in a local function as below, it takes about 700 ms. Am I doing something wrong?

declare function local:result($root as element(root)) as element()* {
  let $PlCts := $root/PlCts/PlCt[environment = 'AIR']/@id
  for $PlGeTy in $root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
      $PlSpTy in $root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id]
  return <done>{ $PlGeTy, $PlSpTy }</done>
};

let $result := local:result(/root)
return $result

查询信息;

Compiling:
- removing redundant element()* cast.
- inlining local:result#1
- inlining $root_5 as element(root)
- simplifying flwor expression
Query:
declare function local:result($root as element(root)) as element()* { let $PlCts := $root/PlCts/PlCt[environment = 'AIR']/@id for $PlGeTy in $root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts], $PlSpTy in $root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id] return <done>{ $PlGeTy, $PlSpTy }</done> }; let $result := local:result(/root) return $result
Optimized Query:
let $PlCts_6 := ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlCts/PlCt[(environment = "AIR")]/@id let $result_4 := for $PlGeTy_7 in ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlGeTys/PlGeTy[(isOfPlCt/@href = $PlCts_6)] for $PlSpTy_8 in ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlSpTys/PlSpTy[(isOfPlGeTy/@href = $PlGeTy_7/@id)] return element done { (($PlGeTy_7, $PlSpTy_8)) } return $result_4
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 553 KB
- Read Locking: local [Output6]
- Write Locking: none
Timing:
- Parsing: 1.41 ms
- Compiling: 2.9 ms
- Evaluating: 581.5 ms
- Printing: 8.34 ms
- Total Time: 594.15 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <Let>
      <Var name="$PlCts" id="6"/>
      <IterPath>
        <TypeCheck type="element(root)" function="true">
          <IterPath>
            <DBNode name="Output6" pre="0"/>
            <IterStep axis="child" test="*:root"/>
          </IterPath>
        </TypeCheck>
        <IterStep axis="child" test="PlCts"/>
        <IterStep axis="child" test="PlCt">
          <CmpG op="=">
            <CachedPath>
              <IterStep axis="child" test="environment"/>
            </CachedPath>
            <Str value="AIR" type="xs:string"/>
          </CmpG>
        </IterStep>
        <IterStep axis="attribute" test="id"/>
      </IterPath>
    </Let>
    <Let>
      <Var name="$result" id="4"/>
      <GFLWOR>
        <For>
          <Var name="$PlGeTy" id="7"/>
          <IterPath>
            <TypeCheck type="element(root)" function="true">
              <IterPath>
                <DBNode name="Output6" pre="0"/>
                <IterStep axis="child" test="*:root"/>
              </IterPath>
            </TypeCheck>
            <IterStep axis="child" test="PlGeTys"/>
            <IterStep axis="child" test="PlGeTy">
              <CmpG op="=">
                <CachedPath>
                  <IterStep axis="child" test="isOfPlCt"/>
                  <IterStep axis="attribute" test="href"/>
                </CachedPath>
                <VarRef>
                  <Var name="$PlCts" id="6"/>
                </VarRef>
              </CmpG>
            </IterStep>
          </IterPath>
        </For>
        <For>
          <Var name="$PlSpTy" id="8"/>
          <IterPath>
            <TypeCheck type="element(root)" function="true">
              <IterPath>
                <DBNode name="Output6" pre="0"/>
                <IterStep axis="child" test="*:root"/>
              </IterPath>
            </TypeCheck>
            <IterStep axis="child" test="PlSpTys"/>
            <IterStep axis="child" test="PlSpTy">
              <CmpG op="=">
                <CachedPath>
                  <IterStep axis="child" test="isOfPlGeTy"/>
                  <IterStep axis="attribute" test="href"/>
                </CachedPath>
                <IterPath>
                  <VarRef>
                    <Var name="$PlGeTy" id="7"/>
                  </VarRef>
                  <IterStep axis="attribute" test="id"/>
                </IterPath>
              </CmpG>
            </IterStep>
          </IterPath>
        </For>
        <CElem>
          <QNm value="done" type="xs:QName"/>
          <List>
            <VarRef>
              <Var name="$PlGeTy" id="7"/>
            </VarRef>
            <VarRef>
              <Var name="$PlSpTy" id="8"/>
            </VarRef>
          </List>
        </CElem>
      </GFLWOR>
    </Let>
    <VarRef>
      <Var name="$result" id="4"/>
    </VarRef>
  </GFLWOR>
</QueryPlan>

<小时>

更新三:现在,当我使用额外的Cont"项目扩展查询时,我遇到了另一个问题,查询执行持续大约 600 毫秒.但是如果没有Cont"项,它只需要 35 毫秒.您对优化此查询有什么建议吗?


Update three: Now, I have another issue when I extend the query with the additional "Cont" items, The query execution lasts about 600 ms. But without "Cont" items, it takes only 35 ms. Do you any suggestion on optimizing this query?

let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id

for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
    $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id],
    $Cont in /root/Conts/Cont[@id=$PlSpTy/isOfCont/@href]
return <done>{ $PlGeTy, $PlSpTy, $Cont }</done>

这是用于测试的最新 XML 数据的链接.

This is the link of the latest XML data for testing.

查询信息:

Compiling:
- applying text index for "AIR"
- applying attribute index for $PlCts_0
- applying attribute index for $PlGeTy_1/@id
- applying attribute index for $PlSpTy_2/isOfCont/@href
- inlining $PlCts_0
Query:
let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts], $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id], $Cont in /root/Conts/Cont[@id=$PlSpTy/isOfCont/@href] return <done>{ $PlGeTy, $PlSpTy, $Cont }</done>
Optimized Query:
for $PlGeTy_1 in db:attribute("Output7", db:text("Output7", "AIR")/parent::*:environment/parent::*:PlCt/@*:id)/self::*:href/parent::*:isOfPlCt/parent::*:PlGeTy for $PlSpTy_2 in db:attribute("Output7", $PlGeTy_1/@id)/self::*:href/parent::*:isOfPlGeTy/parent::*:PlSpTy for $Cont_3 in db:attribute("Output7", $PlSpTy_2/isOfCont/@href)/self::*:id/parent::*:Cont return element done { (($PlGeTy_1, $PlSpTy_2, $Cont_3)) }
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 1159 KB
- Read Locking: local [Output7]
- Write Locking: none
Timing:
- Parsing: 0.39 ms
- Compiling: 0.68 ms
- Evaluating: 585.29 ms
- Printing: 14.36 ms
- Total Time: 600.72 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <For>
      <Var name="$PlGeTy" id="1"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <CachedPath>
            <ValueAccess data="Output7" type="TEXT" name="*:environment">
              <Str value="AIR" type="xs:string"/>
            </ValueAccess>
            <IterStep axis="parent" test="*:PlCt"/>
            <IterStep axis="attribute" test="*:id"/>
          </CachedPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlCt"/>
        <IterStep axis="parent" test="*:PlGeTy"/>
      </CachedPath>
    </For>
    <For>
      <Var name="$PlSpTy" id="2"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <IterPath>
            <VarRef>
              <Var name="$PlGeTy" id="1"/>
            </VarRef>
            <IterStep axis="attribute" test="id"/>
          </IterPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlGeTy"/>
        <IterStep axis="parent" test="*:PlSpTy"/>
      </CachedPath>
    </For>
    <For>
      <Var name="$Cont" id="3"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <IterPath>
            <VarRef>
              <Var name="$PlSpTy" id="2"/>
            </VarRef>
            <IterStep axis="child" test="isOfCont"/>
            <IterStep axis="attribute" test="href"/>
          </IterPath>
        </ValueAccess>
        <IterStep axis="self" test="*:id"/>
        <IterStep axis="parent" test="*:Cont"/>
      </CachedPath>
    </For>
    <CElem>
      <QNm value="done" type="xs:QName"/>
      <List>
        <VarRef>
          <Var name="$PlGeTy" id="1"/>
        </VarRef>
        <VarRef>
          <Var name="$PlSpTy" id="2"/>
        </VarRef>
        <VarRef>
          <Var name="$Cont" id="3"/>
        </VarRef>
      </List>
    </CElem>
  </GFLWOR>
</QueryPlan>

推荐答案

在那些 for 循环中,优化器已经缓存了两个大"XPath 表达式的结果.此外,您实际上是在比较 href/id 属性两次.

In those for loops, the results of the two "large" XPath expressions are already cached by the optimizer. Furthermore, you're actually comparing the href/id attributes twice.

清理查询以消除这种双重工作可将执行时间减少约 90%.

Cleaning up the query removing this double effort reduces the execution time by about 90%.

let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id
for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
    $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id]
return <done>{ $PlGeTy, $PlSpTy }</done>

这篇关于连接时的 BaseX 查询优化的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆