如何通过$ lookup在“联接"集合上执行$ text搜索? [英] How to perform a $text search on a 'joined' collection via $lookup?
问题描述
我是使用v3.2的Mongo新手.我有2个收藏夹Parent&孩子.我想使用Parent.aggregate并使用$ lookup来加入" Child,然后在Child的字段上执行$ text $ search,并在父级上执行日期范围搜索.这可能吗...?
i’m new at Mongo, using v3.2. I have 2 collections Parent & Child. I’d like to use Parent.aggregate and use $lookup to "join" Child then perform $text $search on a field in Child and a date-range seach on the parent. Is this possible...?
推荐答案
根据已经给出的注释,您确实不能执行 $text
搜索 $lookup
因为除了第一个管道阶段以外的任何阶段都没有可用的索引.确实,尤其是考虑到您确实希望根据子"集合的结果进行联接"时,确实最好搜索子".
In line with the comments already given, it is true that you cannot perform a $text
search on the results of a $lookup
since there would not be an available index at any stage other than the very first pipeline stage. And it is true that especially considering that you really want the "join" to occur based on the results from the "child" collection, then it would indeed be better to search on the "child" instead.
这带来了一个显而易见的结论:为此,您可以使用初始 $lookup
是父级",而不是相反.
Which brings the obvious conclusion that in order to do this you perform the aggregation on the "child" collection with the initial $text
query and then $lookup
the "parent" instead of the other way around.
作为一个工作示例,仅使用核心驱动程序进行演示:
As a working example, and just using the core driver for demonstration purposes:
MongoClient.connect('mongodb://localhost/rlookup',function(err,db) {
if (err) throw err;
var Parent = db.collection('parents');
var Child = db.collection('children');
async.series(
[
// Cleanup
function(callback) {
async.each([Parent,Child],function(coll,callback) {
coll.deleteMany({},callback);
},callback);
},
// Create Index
function(callback) {
Child.createIndex({ "text": "text" },callback);
},
// Create Documents
function(callback) {
async.parallel(
[
function(callback) {
Parent.insertMany(
[
{ "_id": 1, "name": "Parent 1" },
{ "_id": 2, "name": "Parent 2" },
{ "_id": 3, "name": "Parent 3" }
],
callback
);
},
function(callback) {
Child.insertMany(
[
{
"_id": 1,
"parent": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"parent": 1,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"parent": 1,
"text": "The dish ran away with the spoon"
},
{
"_id": 4,
"parent": 2,
"text": "Miss muffet on here tuffet"
},
{
"_id": 5,
"parent": 3,
"text": "Lady is a fox"
},
{
"_id": 6,
"parent": 3,
"text": "Every dog has it's day"
}
],
callback
)
}
],
callback
);
},
// Aggregate with $text and $lookup
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"parent": 1,
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "parent",
"foreignField": "_id",
"as": "parent"
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
db.close();
}
);
});
这会导致与查询中的 $text
匹配在每个Parent
内填充的Child
上,并按"score"
排序:
This results in the $text
matches from the query on the Child
populated within each Parent
, as well as being ordered by "score"
:
[
{
"_id": 1,
"name": "Parent 1",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
This ultimately makes sense and will be a lot more efficient than querying from the "parent" to find all "children" in a $lookup
and then "post filtering" with $match
to remove any "children" that did not meet criteria, and then subsequently discarding the "parents" without any match.
猫鼬样式引用"的情况相同,在父母"中包括孩子"的数组"而不是记录在孩子身上.因此,只要子代上的"localField"
(在这种情况下为_id
)与父代上数组中定义为"foriegnField"
的类型相同(无论如何它都将与.populate()
一起使用)那么您仍会在
The same case is true for mongoose style "referencing" where you included an "array" of "children" within the "parent" instead of recording on the child. So as long as the "localField"
on the child ( _id
in that case ) is the same type as defined within the array on the parent as "foriegnField"
( which is will be if it was working with .populate()
anyway ) then you are still getting the matched "parent(s)" for each "child" in the $lookup
result.
所有这些都归结于您的思维转变和意识到$text
结果是最重要的事情,因此"that"是需要在其上启动操作的集合.
This all comes down to reversing your thinking and realizing that the $text
results are the most important thing, and therefore "that" is the collection on which the operation needs to be initiated.
这是可能的,但是反之亦然.
It's possible, but just do it the other way around.
仅显示父级引用的反向情况以及日期过滤:
Just showing the reverse case for references on the parent as well as date filtering:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/rlookup');
var parentSchema = new Schema({
"_id": Number,
"name": String,
"date": Date,
"children": [{ "type": Number, "ref": "Child" }]
});
var childSchema = new Schema({
"_id": Number,
"text": { "type": String, "index": "text" }
},{ "autoIndex": false });
var Parent = mongoose.model("Parent",parentSchema),
Child = mongoose.model("Child",childSchema);
async.series(
[
function(callback) {
async.each([Parent,Child],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
Child.ensureIndexes({ "background": false },callback);
},
function(callback) {
async.parallel(
[
function(callback) {
Parent.create([
{
"_id": 1,
"name": "Parent 1",
"date": new Date("2016-02-01"),
"children": [1,2]
},
{
"_id": 2,
"name": "Parent 2",
"date": new Date("2016-02-02"),
"children": [3,4]
},
{
"_id": 3,
"name": "Parent 3",
"date": new Date("2016-02-03"),
"children": [5,6]
},
{
"_id": 4,
"name": "Parent 4",
"date": new Date("2016-01-15"),
"children": [1,2,6]
}
],callback)
},
function(callback) {
Child.create([
{
"_id": 1,
"text": "The little dog laughed to see such fun"
},
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog"
},
{
"_id": 3,
"text": "The dish ran awy with the spoon"
},
{
"_id": 4,
"text": "Miss muffet on her tuffet"
},
{
"_id": 5,
"text": "Lady is a fox"
},
{
"_id": 6,
"text": "Every dog has it's day"
}
],callback);
}
],
callback
);
},
function(callback) {
Child.aggregate(
[
{ "$match": {
"$text": { "$search": "fox dog" }
}},
{ "$project": {
"text": 1,
"score": { "$meta": "textScore" }
}},
{ "$sort": { "score": { "$meta": "textScore" } } },
{ "$lookup": {
"from": "parents",
"localField": "_id",
"foreignField": "children",
"as": "parent"
}},
{ "$project": {
"text": 1,
"score": 1,
"parent": {
"$filter": {
"input": "$parent",
"as": "parent",
"cond": {
"$and": [
{ "$gte": [ "$$parent.date", new Date("2016-02-01") ] },
{ "$lt": [ "$$parent.date", new Date("2016-03-01") ] }
]
}
}
}
}},
{ "$unwind": "$parent" },
{ "$group": {
"_id": "$parent._id",
"name": { "$first": "$parent.name" },
"date": { "$first": "$parent.date" },
"children": {
"$push": {
"_id": "$_id",
"text": "$text",
"score": "$score"
}
},
"score": { "$sum": "$score" }
}},
{ "$sort": { "score": -1 } }
],
function(err,result) {
console.log(JSON.stringify(result,undefined,2));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
输出:
[
{
"_id": 1,
"name": "Parent 1",
"date": "2016-02-01T00:00:00.000Z",
"children": [
{
"_id": 2,
"text": "The quick brown fox jumped over the lazy dog",
"score": 1.1666666666666667
},
{
"_id": 1,
"text": "The little dog laughed to see such fun",
"score": 0.6
}
],
"score": 1.7666666666666666
},
{
"_id": 3,
"name": "Parent 3",
"date": "2016-02-03T00:00:00.000Z",
"children": [
{
"_id": 5,
"text": "Lady is a fox",
"score": 0.75
},
{
"_id": 6,
"text": "Every dog has it's day",
"score": 0.6666666666666666
}
],
"score": 1.4166666666666665
}
]
Noting that the "Parent 4"
which would otherwise of had the largest ranking is removed since the date does not fall in the query range applied with $filter
.
这篇关于如何通过$ lookup在“联接"集合上执行$ text搜索?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!