如何通过 $lookup 对“加入的"集合执行 $text 搜索? [英] How to perform a $text search on a 'joined' collection via $lookup?

查看:20
本文介绍了如何通过 $lookup 对“加入的"集合执行 $text 搜索?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我是 Mongo 的新手,使用 v3.2.我有 2 个集合 Parent &孩子.我想使用 Parent.aggregate 并使用 $lookup 来加入" Child 然后对 Child 中的字段执行 $text $search 并在父级上执行日期范围搜索.这可能吗……?

解决方案

根据已经给出的评论,确实不能执行 $text 搜索 $lookup 因为在第一个管道阶段之外的任何阶段都不会有可用的索引.确实,特别是考虑到您确实希望根据子"集合的结果进行连接",那么搜索子"确实会更好.

这带来了一个明显的结论,为了做到这一点,您使用初始 $text 查询,然后 $lookup父"而不是相反.

作为一个工作示例,仅将核心驱动程序用于演示目的:

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {如果(错误)抛出错误;var Parent = db.collection('父母');var Child = db.collection('children');异步系列([//清理功能(回调){async.each([Parent,Child],function(coll,callback) {coll.deleteMany({},callback);},打回来);},//创建索引功能(回调){Child.createIndex({ "text": "text" },callback);},//创建文档功能(回调){异步并行([功能(回调){Parent.insertMany([{ "_id": 1, "name": "Parent 1" },{ "_id": 2, "name": "Parent 2" },{ "_id": 3, "name": "Parent 3" }],打回来);},功能(回调){Child.insertMany([{_id":1,父母":1,"text": "小狗看到这么好玩笑了"},{_id":2,父母":1,"text": "敏捷的棕色狐狸跳过了懒狗"},{"_id": 3,父母":1,"text": "菜用勺子跑了"},{"_id": 4,父母":2,"text": "这里的玛菲特小姐"},{"_id": 5,父母":3,"text": "女士是一只狐狸"},{"_id": 6,父母":3,"text": "每只狗都有它的一天"}],打回来)}],打回来);},//用 $text 和 $lookup 聚合功能(回调){Child.aggregate([{$匹配":{"$text": { "$search": "fox dog" }}},{$项目":{父母":1,文本1,分数":{$meta":textScore"}}},{ "$sort": { "score": { "$meta": "textScore" } } },{$查找":{"from": "父母","localField": "父","foreignField": "_id",作为":父母"}},{ "$unwind": "$parent" },{$组":{"_id": "$parent._id","name": { "$first": "$parent.name" },孩子们": {$push":{"_id": "$_id","text": "$text",分数":$分数"}},"score": { "$sum": "$score" }}},{ "$sort": { "score": -1 } }],功能(错误,结果){console.log(JSON.stringify(result,undefined,2));回调(错误);})}],功能(错误){如果(错误)抛出错误;db.close();});});

这导致 $text 在每个 Parent 中填充的 Child 上的查询匹配,以及按 "score" 排序:

[{_id":1,"name": "父母 1",孩子们": [{_id":2,"text": "敏捷的棕色狐狸跳过了懒狗",分数":1.16666666666666667},{_id":1,"text": "小狗看到这么好玩笑了",分数":0.6}],分数":1.76666666666666666},{"_id": 3,"name": "父母 3",孩子们": [{"_id": 5,"text": "女士是一只狐狸",分数":0.75},{"_id": 6,"text": "每只狗都有自己的一天",分数":0.66666666666666666}],分数":1.41666666666666665}]

这最终是有道理的,并且比从父"查询以在 $lookup 然后使用 $match 进行后过滤"以删除任何不符合的子项"标准,然后丢弃没有任何匹配的父母".

同样的情况适用于猫鼬风格的引用",您在父"中包含子"的数组",而不是在子上进行记录.因此,只要子节点上的 "localField"(在这种情况下为 _id)与父节点上的数组中定义的类型相同,作为 "foriegnField"(如果它与 .populate() 一起工作,那么你仍然会在 $lookup 结果.

这一切都归结为反转您的思维并意识到 $text 结果是最重要的,因此那个"是需要启动操作的集合.

这是可能的,但请反过来做.

<小时>

在父级中使用 mongoose 样式和引用的子级列表

仅显示父引用和日期过滤的相反情况:

var async = require('async'),猫鼬 = 要求('猫鼬'),架构 = 猫鼬.架构;mongoose.connect('mongodb://localhost/rlookup');var parentSchema = 新架构({_身份证号码,名称":字符串,日期":日期,孩子":[{类型":数字,参考":孩子"}]});var childSchema = 新架构({_身份证号码,文本":{类型":字符串,索引":文本"}},{ "autoIndex": false });var Parent = mongoose.model("Parent",parentSchema),Child = mongoose.model("Child",childSchema);异步系列([功能(回调){async.each([Parent,Child],function(model,callback) {model.remove({},callback);},打回来);},功能(回调){Child.ensureIndexes({ "background": false },callback);},功能(回调){异步并行([功能(回调){Parent.create([{_id":1,"name": "父母 1",日期":新日期(2016-02-01"),儿童":[1,2]},{_id":2,"name": "父母 2",日期":新日期(2016-02-02"),儿童":[3,4]},{"_id": 3,"name": "父母 3",日期":新日期(2016-02-03"),儿童":[5,6]},{"_id": 4,"name": "家长 4",日期":新日期(2016-01-15"),儿童":[1,2,6]}],打回来)},功能(回调){Child.create([{_id":1,"text": "小狗看到这么好玩笑了"},{_id":2,"text": "敏捷的棕色狐狸跳过了懒狗"},{"_id": 3,"text": "这道菜被勺子弄坏了"},{"_id": 4,"text": "muffet 小姐在她的 tuffet"},{"_id": 5,"text": "女士是一只狐狸"},{"_id": 6,"text": "每只狗都有它的一天"}],打回来);}],打回来);},功能(回调){Child.aggregate([{$匹配":{"$text": { "$search": "fox dog" }}},{$项目":{文本1,分数":{$meta":textScore"}}},{ "$sort": { "score": { "$meta": "textScore" } } },{$查找":{"from": "父母","localField": "_id","foreignField": "儿童",作为":父母"}},{$项目":{文本1,分数":1,父母":{$过滤器":{"输入": "$parent","as": "父母",条件": {"$and": [{ "$gte": [ "$$parent.date", new Date("2016-02-01") ] },{ "$lt": [ "$$parent.date", new Date("2016-03-01") ] }]}}}}},{ "$unwind": "$parent" },{$组":{"_id": "$parent._id","name": { "$first": "$parent.name" },"日期": { "$first": "$parent.date" },孩子们": {$push":{"_id": "$_id","text": "$text",分数":$分数"}},"score": { "$sum": "$score" }}},{ "$sort": { "score": -1 } }],功能(错误,结果){console.log(JSON.stringify(result,undefined,2));回调(错误);})}],功能(错误){如果(错误)抛出错误;猫鼬.disconnect();});

输出:

[{_id":1,"name": "父母 1","日期": "2016-02-01T00:00:00.000Z",孩子们": [{_id":2,"text": "敏捷的棕色狐狸跳过了懒狗",分数":1.16666666666666667},{_id":1,"text": "小狗看到这么好玩笑了",分数":0.6}],分数":1.76666666666666666},{"_id": 3,"name": "父母 3","日期": "2016-02-03T00:00:00.000Z",孩子们": [{"_id": 5,"text": "女士是一只狐狸",分数":0.75},{"_id": 6,"text": "每只狗都有自己的一天",分数":0.66666666666666666}],分数":1.41666666666666665}]

请注意,由于日期不在 $filter.

i’m new at Mongo, using v3.2. I have 2 collections Parent & Child. I’d like to use Parent.aggregate and use $lookup to "join" Child then perform $text $search on a field in Child and a date-range seach on the parent. Is this possible...?

解决方案

In line with the comments already given, it is true that you cannot perform a $text search on the results of a $lookupsince there would not be an available index at any stage other than the very first pipeline stage. And it is true that especially considering that you really want the "join" to occur based on the results from the "child" collection, then it would indeed be better to search on the "child" instead.

Which brings the obvious conclusion that in order to do this you perform the aggregation on the "child" collection with the initial $text query and then $lookup the "parent" instead of the other way around.

As a working example, and just using the core driver for demonstration purposes:

MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
  if (err) throw err;

  var Parent = db.collection('parents');
  var Child = db.collection('children');

  async.series(
    [
      // Cleanup
      function(callback) {
        async.each([Parent,Child],function(coll,callback) {
          coll.deleteMany({},callback);
        },callback);
      },
      // Create Index
      function(callback) {
        Child.createIndex({ "text": "text" },callback);
      },
      // Create Documents
      function(callback) {
        async.parallel(
          [
            function(callback) {
              Parent.insertMany(
                [
                  { "_id": 1, "name": "Parent 1" },
                  { "_id": 2, "name": "Parent 2" },
                  { "_id": 3, "name": "Parent 3" }
                ],
                callback
              );
            },
            function(callback) {
              Child.insertMany(
                [
                  {
                    "_id": 1,
                    "parent": 1,
                    "text": "The little dog laughed to see such fun"
                  },
                  {
                    "_id": 2,
                    "parent": 1,
                    "text": "The quick brown fox jumped over the lazy dog"
                  },
                  {
                    "_id": 3,
                    "parent": 1,
                    "text": "The dish ran away with the spoon"
                  },
                  {
                    "_id": 4,
                    "parent": 2,
                    "text": "Miss muffet on here tuffet"
                  },
                  {
                    "_id": 5,
                    "parent": 3,
                    "text": "Lady is a fox"
                  },
                  {
                    "_id": 6,
                    "parent": 3,
                    "text": "Every dog has it's day"
                  }
                ],
                callback
              )
            }
          ],
          callback
        );
      },
      // Aggregate with $text and $lookup
      function(callback) {
        Child.aggregate(
          [
            { "$match": {
              "$text": { "$search": "fox dog" }
            }},
            { "$project": {
              "parent": 1,
              "text": 1,
              "score": { "$meta": "textScore" }
            }},
            { "$sort": { "score": { "$meta": "textScore" } } },
            { "$lookup": {
              "from": "parents",
              "localField": "parent",
              "foreignField": "_id",
              "as": "parent"
            }},
            { "$unwind": "$parent" },
            { "$group": {
              "_id": "$parent._id",
              "name": { "$first": "$parent.name" },
              "children": {
                "$push": {
                  "_id": "$_id",
                  "text": "$text",
                  "score": "$score"
                }
              },
              "score": { "$sum": "$score" }
            }},
            { "$sort": { "score": -1 } }
          ],
          function(err,result) {
            console.log(JSON.stringify(result,undefined,2));
            callback(err);
          }
        )
      }
    ],
    function(err) {
      if (err) throw err;
      db.close();
    }
  );

});

This results in the $text matches from the query on the Child populated within each Parent, as well as being ordered by "score":

[
  {
    "_id": 1,
    "name": "Parent 1",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

This ultimately makes sense and will be a lot more efficient than querying from the "parent" to find all "children" in a $lookup and then "post filtering" with $match to remove any "children" that did not meet criteria, and then subsequently discarding the "parents" without any match.

The same case is true for mongoose style "referencing" where you included an "array" of "children" within the "parent" instead of recording on the child. So as long as the "localField" on the child ( _id in that case ) is the same type as defined within the array on the parent as "foriegnField" ( which is will be if it was working with .populate() anyway ) then you are still getting the matched "parent(s)" for each "child" in the $lookup result.

This all comes down to reversing your thinking and realizing that the $text results are the most important thing, and therefore "that" is the collection on which the operation needs to be initiated.

It's possible, but just do it the other way around.


Using mongoose style with list of referenced children in the parent

Just showing the reverse case for references on the parent as well as date filtering:

var async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/rlookup');

var parentSchema = new Schema({
  "_id": Number,
  "name": String,
  "date": Date,
  "children": [{ "type": Number, "ref": "Child" }]
});

var childSchema = new Schema({
  "_id": Number,
  "text": { "type": String, "index": "text" }
},{ "autoIndex": false });

var Parent = mongoose.model("Parent",parentSchema),
    Child = mongoose.model("Child",childSchema);

async.series(
  [
    function(callback) {
      async.each([Parent,Child],function(model,callback) {
        model.remove({},callback);
      },callback);
    },
    function(callback) {
      Child.ensureIndexes({ "background": false },callback);
    },
    function(callback) {
      async.parallel(
        [
          function(callback) {
            Parent.create([
              {
                "_id": 1,
                "name": "Parent 1",
                "date": new Date("2016-02-01"),
                "children": [1,2]
              },
              {
                "_id": 2,
                "name": "Parent 2",
                "date": new Date("2016-02-02"),
                "children": [3,4]
              },
              {
                "_id": 3,
                "name": "Parent 3",
                "date": new Date("2016-02-03"),
                "children": [5,6]
              },
              {
                "_id": 4,
                "name": "Parent 4",
                "date": new Date("2016-01-15"),
                "children": [1,2,6]
              }
            ],callback)
          },
          function(callback) {
            Child.create([
              {
                "_id": 1,
                "text": "The little dog laughed to see such fun"
              },
              {
                "_id": 2,
                "text": "The quick brown fox jumped over the lazy dog"
              },
              {
                "_id": 3,
                "text": "The dish ran awy with the spoon"
              },
              {
                "_id": 4,
                "text": "Miss muffet on her tuffet"
              },
              {
                "_id": 5,
                "text": "Lady is a fox"
              },
              {
                "_id": 6,
                "text": "Every dog has it's day"
              }
            ],callback);
          }
        ],
        callback
      );
    },
    function(callback) {
      Child.aggregate(
        [
          { "$match": {
            "$text": { "$search": "fox dog" }
          }},
          { "$project": {
            "text": 1,
            "score": { "$meta": "textScore" }
          }},
          { "$sort": { "score": { "$meta": "textScore" } } },
          { "$lookup": {
            "from": "parents",
            "localField": "_id",
            "foreignField": "children",
            "as": "parent"
          }},
          { "$project": {
            "text": 1,
            "score": 1,
            "parent": {
              "$filter": {
                "input": "$parent",
                "as": "parent",
                "cond": {
                  "$and": [
                    { "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
                    { "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
                  ]
                }
              }
            }
          }},
          { "$unwind": "$parent" },
          { "$group": {
            "_id": "$parent._id",
            "name": { "$first": "$parent.name" },
            "date": { "$first": "$parent.date" },
            "children": {
              "$push": {
                "_id": "$_id",
                "text": "$text",
                "score": "$score"
              }
            },
            "score": { "$sum": "$score" }
          }},
          { "$sort": { "score": -1 } }
        ],
        function(err,result) {
          console.log(JSON.stringify(result,undefined,2));
          callback(err);
        }
      )
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);

With the output:

[
  {
    "_id": 1,
    "name": "Parent 1",
    "date": "2016-02-01T00:00:00.000Z",
    "children": [
      {
        "_id": 2,
        "text": "The quick brown fox jumped over the lazy dog",
        "score": 1.1666666666666667
      },
      {
        "_id": 1,
        "text": "The little dog laughed to see such fun",
        "score": 0.6
      }
    ],
    "score": 1.7666666666666666
  },
  {
    "_id": 3,
    "name": "Parent 3",
    "date": "2016-02-03T00:00:00.000Z",
    "children": [
      {
        "_id": 5,
        "text": "Lady is a fox",
        "score": 0.75
      },
      {
        "_id": 6,
        "text": "Every dog has it's day",
        "score": 0.6666666666666666
      }
    ],
    "score": 1.4166666666666665
  }
]

Noting that the "Parent 4" which would otherwise of had the largest ranking is removed since the date does not fall in the query range applied with $filter.

这篇关于如何通过 $lookup 对“加入的"集合执行 $text 搜索?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆