如何通过 $lookup 对“加入"集合执行 $text 搜索? [英] How to perform a $text search on a 'joined' collection via $lookup?
问题描述
我是 Mongo 的新手,使用 v3.2.我有 2 个收藏品 Parent &孩子.我想使用 Parent.aggregate 并使用 $lookup 加入" Child 然后对 Child 中的字段执行 $text $search 并在父节点上执行日期范围搜索.这可能吗……?
i’m new at Mongo, using v3.2. I have 2 collections Parent & Child. I’d like to use Parent.aggregate and use $lookup to "join" Child then perform $text $search on a field in Child and a date-range seach on the parent. Is this possible...?
推荐答案
根据已经给出的评论,您确实无法执行 $text
搜索 $lookup
因为在第一个管道阶段之外的任何阶段都没有可用的索引.确实,尤其是考虑到您确实希望根据child"集合的结果进行join",那么在child"上进行搜索确实会更好.
In line with the comments already given, it is true that you cannot perform a $text
search on the results of a $lookup
since there would not be an available index at any stage other than the very first pipeline stage. And it is true that especially considering that you really want the "join" to occur based on the results from the "child" collection, then it would indeed be better to search on the "child" instead.
这带来了一个明显的结论,即为了做到这一点,您使用初始 $text
查询然后 $lookup
是父级",而不是相反.
Which brings the obvious conclusion that in order to do this you perform the aggregation on the "child" collection with the initial $text
query and then $lookup
the "parent" instead of the other way around.
作为一个工作示例,仅将核心驱动程序用于演示目的:
As a working example, and just using the core driver for demonstration purposes:
MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
if (err) throw err;
var Parent = db.collection('parents');
var Child = db.collection('children');
async.series(
[
// Cleanup
function(callback) {
async.each([Parent,Child],function(coll,callback) {
coll.deleteMany({},callback);
},callback);
},
// Create Index
function(callback) {
Child.createIndex({ "text": "text" },callback);
},
// Create Documents
function(callback) {
async.parallel(
[
function(callback) {
Parent.insertMany(
[
{ "_id": 1, "name": "Parent 1" },
{ "_id": 2, "name": "Parent 2" },
{ "_id": 3, "name": "Parent 3" }
],
callback
);
},
function(callback) {
Child.insertMany(
[
{
"_id": 1,
"parent": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"parent": 1,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"parent": 1,
"text": "The dish ran away with the spoon"
},
{
"_id": 4,
"parent": 2,
"text": "Miss muffet on here tuffet"
},
{
"_id": 5,
"parent": 3,
"text": "Lady is a fox"
},
{
"_id": 6,
"parent": 3,
"text": "Every dog has it's day"
}
],
callback
)
}
],
callback
);
},
// Aggregate with $text and $lookup
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"parent": 1,
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "parent",
"foreignField": "_id",
"as": "parent"
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
db.close();
}
);
});
这会导致 $text
匹配每个 Parent
中填充的 Child
的查询,并按 "score"
排序:
This results in the $text
matches from the query on the Child
populated within each Parent
, as well as being ordered by "score"
:
[
{
"_id": 1,
"name": "Parent 1",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
这最终是有道理的,并且比从父"查询以在 $lookup
然后用 $match
进行后过滤"以删除任何不满足的子项"条件,然后丢弃没有任何匹配的父母".
This ultimately makes sense and will be a lot more efficient than querying from the "parent" to find all "children" in a $lookup
and then "post filtering" with $match
to remove any "children" that did not meet criteria, and then subsequently discarding the "parents" without any match.
同样的情况也适用于猫鼬风格的引用",您在父"中包含子"的数组",而不是在子上记录.因此,只要子级上的 "localField"
(在这种情况下为 _id
)与父级数组中定义的类型相同,即 "foriegnField"
(如果它正在使用 .populate()
无论如何),那么您仍然会为 $lookup
结果.
The same case is true for mongoose style "referencing" where you included an "array" of "children" within the "parent" instead of recording on the child. So as long as the "localField"
on the child ( _id
in that case ) is the same type as defined within the array on the parent as "foriegnField"
( which is will be if it was working with .populate()
anyway ) then you are still getting the matched "parent(s)" for each "child" in the $lookup
result.
这一切都归结为扭转您的想法并意识到 $text
结果是最重要的,因此那个"是需要启动操作的集合.
This all comes down to reversing your thinking and realizing that the $text
results are the most important thing, and therefore "that" is the collection on which the operation needs to be initiated.
这是可能的,但要反过来做.
It's possible, but just do it the other way around.
仅显示父级引用的反向案例以及日期过滤:
Just showing the reverse case for references on the parent as well as date filtering:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/rlookup');
var parentSchema = new Schema({
"_id": Number,
"name": String,
"date": Date,
"children": [{ "type": Number, "ref": "Child" }]
});
var childSchema = new Schema({
"_id": Number,
"text": { "type": String, "index": "text" }
},{ "autoIndex": false });
var Parent = mongoose.model("Parent",parentSchema),
Child = mongoose.model("Child",childSchema);
async.series(
[
function(callback) {
async.each([Parent,Child],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
Child.ensureIndexes({ "background": false },callback);
},
function(callback) {
async.parallel(
[
function(callback) {
Parent.create([
{
"_id": 1,
"name": "Parent 1",
"date": new Date("2016-02-01"),
"children": [1,2]
},
{
"_id": 2,
"name": "Parent 2",
"date": new Date("2016-02-02"),
"children": [3,4]
},
{
"_id": 3,
"name": "Parent 3",
"date": new Date("2016-02-03"),
"children": [5,6]
},
{
"_id": 4,
"name": "Parent 4",
"date": new Date("2016-01-15"),
"children": [1,2,6]
}
],callback)
},
function(callback) {
Child.create([
{
"_id": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"text": "The dish ran awy with the spoon"
},
{
"_id": 4,
"text": "Miss muffet on her tuffet"
},
{
"_id": 5,
"text": "Lady is a fox"
},
{
"_id": 6,
"text": "Every dog has it's day"
}
],callback);
}
],
callback
);
},
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "_id",
"foreignField": "children",
"as": "parent"
}},
{ "$project": {
"text": 1,
"score": 1,
"parent": {
"$filter": {
"input": "$parent",
"as": "parent",
"cond": {
"$and": [
{ "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
{ "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
]
}
}
}
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"date": { "$first": "$parent.date" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
输出:
[
{
"_id": 1,
"name": "Parent 1",
"date": "2016-02-01T00:00:00.000Z",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"date": "2016-02-03T00:00:00.000Z",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
注意 "Parent 4"
将被删除,因为日期不属于 $filter
.
Noting that the "Parent 4"
which would otherwise of had the largest ranking is removed since the date does not fall in the query range applied with $filter
.
这篇关于如何通过 $lookup 对“加入"集合执行 $text 搜索?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!