在MongoDB中使用MapReduce加入两个集合 [英] Join two collections with MapReduce in MongoDB

查看:111
本文介绍了在MongoDB中使用MapReduce加入两个集合的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我已经知道MongoDB不支持联接操作,但是我必须使用mapReduce范例模拟$lookup(来自聚合框架).

I already know that MongoDB doesn't support join operations, but i have to simulate a $lookup (from the aggregation framework) with the mapReduce paradigm.

我的两个收藏是:

// Employees sample 
{
  "_id" : "1234",
  "first_name" : "John",
  "last_name" : "Bush",
  "departments" : 
  [ 
    { "dep_id" : "d001", "hire_date" : "date001" },
    { "dep_id" : "d004", "hire_date" : "date004" }
  ]
}
{ 
  "_id" : "5678", 
  "first_name" : "Johny", 
  "last_name" : "Cash", 
  "departments" : [ { "dep_id" : "d001", "hire_date" : "date03" } ] 
}
{ 
  "_id" : "9012", 
  "first_name" : "Susan", 
  "last_name" : "Bowdy", 
  "departments" : [ { "dep_id" : "d004", "hire_date" : "date04" } ] 
}

// Departments sample 
{
  "_id" : "d001",
  "dep_name" : "Sales",
  "employees" : [ "1234", "5678" ]
},
{
  "_id" : "d004",
  "name" : "Quality M",
  "employees" : [ "1234", "9012" ]
}

实际上我想得到这样的结果:

And actually I'd like to have a result like this:

{
  "_id" : "1234",
  "value" : 
  {
    "first_name" : "John",
    "departments" :
    [
      { "dep_id" : "d001", "dep_name" : "Sales" },
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ]
  }
}
{ 
  "_id" : "5678", 
  "value" : 
  { 
    "first_name" : "Johnny", 
    "departments" : [ { "dep_id" : "d001", "dep_name" : "Sales" } ]
  } 
}
{ 
  "_id" : "9012", 
  "value" : 
  { 
    "first_name" : "Susan", 
    "departments" : [ { "dep_id" : "d004", "dep_name" : "Quality M" } ] 
  } 
}

公共字段是dep_id(来自员工)和_id(来自部门).

The common fields are dep_id (from Employees) and _id (from Departments).

我的代码是下一个代码,但是它不能按我的需要工作.

My code is the next one, but it doesn't work as I need.

var mapD = function() {
  for (var i=0; i<this.employees.length; i++) {
    emit(this.employees[i], { dep_id: 0, dep_name: this.dep_name });
  }
}

var mapE = function() {
  for (var i=0; i<this.departments.length; i++) {
    emit(this._id, { dep_id: this.departments[i].dep_id, dep_name: 0 });
  }
}

var reduceLookUp = function(key, values) {
  var result = {dep_id: 0, dep_name: 0};
  values.forEach(function(value) {
    if (value.dep_name !== null && value.dep_name !== undefined) {
      result.dep_name = values.dep_name;
    }
    if (value.dep_id !== null && value.dep_id !== undefined) {
      result.dep_id = value.dep_id;
    }
  });
  return result;
};

db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });

我会很感激您的帮助!预先感谢.

I'll really apreciate your help! Thanks in advance.

推荐答案

在您的问题中,只能从Employees集合中获取first_name,并且只能从Departments集合中获取dep_name.

In your problem first_name can be fetched only from Employees collection and dep_name can be fetched only from Departments collection.

您可以使用MapReduce和聚合框架来实现它.

You can achieve it both with MapReduce and aggregation framework.

1. MapReduce解决方案

如果您按照以下方式修改地图并减少功能

If you modify your map and reduce functions as follows

var mapD = function() {
  for (var i=0; i<this.employees.length; i++)
    emit(this.employees[i], { dep_id: this._id, dep_name: this.dep_name });  
}

var mapE = function() { emit(this._id, { first_name: this.first_name }); }

var reduceLookUp = function(key, values) {
  var results = {};
  var departments = [];
  values.forEach(function(value) {
    var department = {};
    if (value.dep_id !== undefined) department["dep_id"] = value.dep_id;
    if (value.dep_name !== undefined) department["dep_name"] = value.dep_name;
    if (Object.keys(department).length > 0) departments.push(department);
    if (value.first_name !== undefined) results["first_name"] = value.first_name;
    if (value.departments !== undefined) results["departments"] = value.departments;
  });
  if (Object.keys(departments).length > 0) results["departments"] = departments;
  return results;
}

然后第一个MapReduce调用

then first MapReduce call

db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });

将插入joined集合

{ 
  "_id" : "1234", 
  "value" : 
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  }
}

第二次通话时

db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });

应插入

{ "_id" : "1234", "value" : { "first_name" : "John" } }

但是,根据文档reduce输出选项将

but, according to documentation, reduce output option will

如果输出集合将新结果与现有结果合并 已经存在. 如果现有文档的密钥与新文档的密钥相同 结果,将reduce函数同时应用于新的和现有的 文档并用结果覆盖现有文档

Merge the new result with the existing result if the output collection already exists. If an existing document has the same key as the new result, apply the reduce function to both the new and the existing documents and overwrite the existing document with the result

因此,在您的情况下,带有参数的reduce函数将再次被调用

Thus, reduce function will be called again in your case with parameters

key = "1234",
values =
[
  {
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
  },
  { "first_name" : "John" }
]

最终结果是

{ 
  "_id" : "1234", 
  "value" : 
  { 
    "first_name" : "John", 
    "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" }
    ] 
  } 
}

2.聚合框架解决方案

针对您的问题的更好解决方案是使用聚合框架代替Map-Reduce.在这里,您将使用 $lookup 阶段从Employees

A better solution for your problem is to use aggregation framework instead of Map-Reduce. Here you would use $lookup stage to fetch some data from Employees

db.Departments.aggregate([
  { $unwind: "$employees" },
  { 
    $lookup: 
      { 
        from: "Employees", 
        localField: "employees", 
        foreignField: "_id", 
        as: "employee"
      }
  },
  { $unwind: "$employee" },
  { 
    $group: 
      { 
        "_id": "$employees",
        "first_name": { $first: "$employee.first_name" }, 
        "departments": { $push: { dep_id: "$_id", dep_name: "$dep_name" } } 
      } 
  } 
]);

将导致

{ 
  "_id" : "1234",
  "first_name" : "John",
  "departments" : 
    [ 
      { "dep_id" : "d001", "dep_name" : "Sales" }, 
      { "dep_id" : "d004", "dep_name" : "Quality M" } 
    ] 
}

这篇关于在MongoDB中使用MapReduce加入两个集合的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆