汇总查询中的计数不正确 [英] Incorrect count from aggregation query
问题描述
在下面的文档集中,我试图找到唯一句子的总单词。总单词数必须为5(hello\nworld,您好吗?)+ 5(hello world,我很好)+ 3(下雨了吗?)+ 5(看美丽的老虎!)= 18
In the following document collection, I am trying to find the total words of unique sentences. The total words must come out as 5 (hello\nworld, how are you?) + 5 (hello world, I am fine) + 3(Is it raining?) + 5(Look at the beautiful tiger!) = 18
[
{
"sourceList": [
{
"source": "hello\nworld, how are you?",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "hello world, I am fine",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "Is it raining?",
"_id": ObjectId("5f0eb9946db57c0007841153")
}
]
},
{
"sourceList": [
{
"source": "Look at the beautiful tiger!",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "Is it raining?",
"_id": ObjectId("5f0eb9946db57c0007841153")
}
]
}
]
但使用以下查询
db.collection.aggregate([
{
"$unwind": "$sourceList"
},
{
$project: {
"sp": {
$split: [
"$sourceList.source",
"\n"
],
$split: [
"$sourceList.source",
" "
]
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": "$sizes"
}
}
}
])
它给出为 17
。这可能是什么原因?我首先尝试除以 \n
,然后除以 space
it gives as 17
. What could be the reason for this? I am first trying to split by \n
and then by space
编辑
我正在尝试查找唯一句子和总共唯一句子的字数。
I am trying to find word count for unique sentences and total unique sentences.
推荐答案
根据评论以及@micki的答案和我以前的答案,
As per the comments and addition to @micki's answer and my previous answer,
db.collection.aggregate([
{
"$unwind": "$sourceList"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$sourceList.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
"unique_sen": {
$first: "$data.unique_sen"
}
}
}
])
更新:
您无需在查询中转义。
db.collection.aggregate([
{
"$match": {
"url": "https://www.rootsresource.in"
}
},
{
"$unwind": "$translations"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$translations.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
"unique_sen": {
$first: "$data.unique_sen"
}
}
}
])
更新:
以上查询可在mongo 4.4中使用-$ first在4.4版的项目中可用
Above query works from mongo 4.4 - $first is available in project from 4.4
对于较早版本。
db.test.aggregate([
{
"$match": {
url: "https://www.rootsresource.in"
}
},
{
"$unwind": "$translations"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$translations.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
unique_sen: { $arrayElemAt: [ "$data.unique_sen", 0 ] }
}
}
])
这篇关于汇总查询中的计数不正确的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!