Incorrect count from aggregation query - database

In the following document collection, I am trying to find the total words of unique sentences. The total words must come out as 5 (hello\nworld, how are you?) + 5 (hello world, I am fine) + 3(Is it raining?) + 5(Look at the beautiful tiger!) = 18
[
{
"sourceList": [
{
"source": "hello\nworld, how are you?",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "hello world, I am fine",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "Is it raining?",
"_id": ObjectId("5f0eb9946db57c0007841153")
}
]
},
{
"sourceList": [
{
"source": "Look at the beautiful tiger!",
"_id": ObjectId("5f0eb9946db57c0007841153")
},
{
"source": "Is it raining?",
"_id": ObjectId("5f0eb9946db57c0007841153")
}
]
}
]
But with the below query
db.collection.aggregate([
{
"$unwind": "$sourceList"
},
{
$project: {
"sp": {
$split: [
"$sourceList.source",
"\n"
],
$split: [
"$sourceList.source",
" "
]
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": "$sizes"
}
}
}
])
it gives as 17. What could be the reason for this? I am first trying to split by \n and then by space
EDIT
I am trying to find word count for unique sentences and total unique sentences.

The problem is that here:
"sp": {
$split: [
"$sourceList.source",
"\n"
],
$split: [
"$sourceList.source",
" "
]
}
only the second $split gets executed by MongoDB and it returns hello\nworld as one string. There's no such "cascade" syntax, since it's simply the same JSON key $split so last wins.
In order to fix that you can use $reduce to apply $split by whitespace on an array of split by \n values:
{
$project: {
"sp": {
$reduce: {
input: { $split: [ "$sourceList.source", "\n" ] },
initialValue: [],
in: { $concatArrays: [ "$$value", { $split: [ "$$this", " " ] } ] }
}
}
}
}
Mongo Playground

As per the comments and addition to #micki's answer and my previous answer,
play
db.collection.aggregate([
{
"$unwind": "$sourceList"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$sourceList.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
"unique_sen": {
$first: "$data.unique_sen"
}
}
}
])
Update:
You don't need to escape in the query.
play
db.collection.aggregate([
{
"$match": {
"url": "https://www.rootsresource.in"
}
},
{
"$unwind": "$translations"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$translations.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
"unique_sen": {
$first: "$data.unique_sen"
}
}
}
])
UPDATE:
Above query works from mongo 4.4 - $first is available in project from 4.4
For older versions.
db.test.aggregate([
{
"$match": {
url: "https://www.rootsresource.in"
}
},
{
"$unwind": "$translations"
},
{
$project: {
"sp": {
$reduce: {
input: {
$split: [
"$translations.source",
"\n"
]
},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$split: [
"$$this",
" "
]
}
]
}
}
}
}
},
{
"$group": {
"_id": null,
"elements": {
$addToSet: "$sp"
}
}
},
{
"$project": {
"unique_sen": {
"$size": "$elements"
},
"elements": 1
}
},
{
"$unwind": "$elements"
},
{
"$project": {
"sizes": {
"$size": "$elements"
},
"unique_sen": 1
}
},
{
"$group": {
"_id": null,
"unique_count": {
"$sum": "$sizes"
},
"data": {
$push: "$$ROOT"
}
}
},
{
"$project": {
"unique_count": 1,
unique_sen: { $arrayElemAt: [ "$data.unique_sen", 0 ] }
}
}
])

Related

(MongoDB) $facet: using $group works, but changing to $project gives the error "$first's argument must be an array, but is object"

I'm using a $facet to get an intersection of IDs from two pipelines. Using $group in query_a and query_b in the following pipeline gives the list of IDs.
Pipeline 1:
[
{
"$facet": {
"query_a": [
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
],
"query_b": [
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
]
}
},
{
"$project": {
"intersection": {
"$setIntersection": [
"$query_a.ID",
"$query_b.ID"
]
},
"query_a": 1,
"query_b": 1
}
},
{
"$project": {
"_id": 0,
"data": {
"$map": {
"input": "$intersection",
"in": {
"intersection": "$$this",
"query_a": {
"$first": {
"$filter": {
"input": "$query_a",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
},
"query_b": {
"$first": {
"$filter": {
"input": "$query_b",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
}
}
}
}
}
},
{
"$unwind": "$data"
},
{
"$replaceRoot": {
"newRoot": "$data"
}
},
{
"$project": {
"intersection": 1
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
Changing $group to $project in query_a and query_b, so the list of IDs also includes not distinct values gives an error.
Pipeline 2:
[
{
"$facet": {
"query_a": [
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
],
"query_b": [
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
]
}
},
{
"$project": {
"intersection": {
"$setIntersection": [
"$query_a.ID",
"$query_b.ID"
]
},
"query_a": 1,
"query_b": 1
}
},
{
"$project": {
"_id": 0,
"data": {
"$map": {
"input": "$intersection",
"in": {
"intersection": "$$this",
"query_a": {
"$first": {
"$filter": {
"input": "$query_a",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
},
"query_b": {
"$first": {
"$filter": {
"input": "$query_b",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
}
}
}
}
}
},
{
"$unwind": "$data"
},
{
"$replaceRoot": {
"newRoot": "$data"
}
},
{
"$project": {
"intersection": 1
}
}
]
Error:
pymongo.errors.OperationFailure: PlanExecutor error during aggregation :: caused by :: $first's argument must be an array, but is object, full error: {'ok': 0.0, 'errmsg': "PlanExecutor error during aggregation :: caused by :: $first's argument must be an array, but is object"
Running the queries in separate pipelines works using either $group or $project.
Query using $group:
[
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
Query using $project:
[
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
I would appreciate any suggestions!
The problem was that I had to change {"$first": "$data"} to just "$data" when changing from $group to $project.

How to Get the size of the filtered array in Mongodb

db.FlaggedData.aggregate([
{
"$match": {
"_id": "d86d6b48-e949-4daa-8364-9ef008416ae8"
}
},
{
"$project": {
"users": {
"$slice": [
{
"$filter": {
"input": "$users",
"as": "users",
"cond": {
"$and": [
{ SOME FILTERS
}
]
}
}
},
0,
100
]
},
"count": {"$size": "$users"}
}
}
])
How do I get the size of the filtered results?
as what I'm doing now I'm getting the size of the Array, not the size of the filtered result.
thanks.
You need to put entire $slice into count.
db.collection.aggregate([
{
"$match": {
"_id": "d86d6b48-e949-4daa-8364-9ef008416ae8"
}
},
{
"$project": {
"users": {
"$slice": [
{
"$filter": {
"input": "$users",
"as": "user",
"cond": {
"$and": [
{
$eq: [
"$$user.name",
"123"
]
}
]
}
}
},
0,
100
]
},
"count": {
"$size": {
"$slice": [
{
"$filter": {
"input": "$users",
"as": "user",
"cond": {
"$and": [
{
$eq: [
"$$user.name",
"123"
]
}
]
}
}
},
0,
100
]
}
}
}
}
])
mongoplayground
OR
Use two project
db.collection.aggregate([
{
"$match": {
"_id": "d86d6b48-e949-4daa-8364-9ef008416ae8"
}
},
{
"$project": {
"users": {
"$slice": [
{
"$filter": {
"input": "$users",
"as": "user",
"cond": {
"$and": [
{
$eq: [
"$$user.name",
"123"
]
}
]
}
}
},
0,
100
]
}
}
},
{
"$project": {
users: {
"$size": "$users"
}
}
}
])
mongoplayground

Mongodb transform array using aggregation

I have an array in Mongodb collection:
some_array:
0: title: "Community"
model: "A"
1: title: "News"
model: "B"
How can I get only titles of array and list them as bellow:
titles: "Community, News"
You can group by null means group all titles and then project titles array only
Working Playground
db.collection.aggregate([
{
"$group": {
"_id": null,
"titles": {
"$push": "$title"
}
}
},
{
"$project": {
_id: 0,
titles: {
"$reduce": {
"input": "$titles",
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ { "$indexOfArray": [ "$titles", "$$this" ] }, 0 ] },
"then": { "$concat": [ "$$value", "$$this" ] },
"else": { "$concat": [ "$$value", ", ", "$$this" ] }
}
}
}
}
}
}
])
db.collection.aggregate([
{
$project: {
titles: "$somearray.title"
}
}
])
Working Mongo playground

Mongodb use $elemMatch into $filter

I have a document like this:
I need to return the documents and filter the nested array (lessons) where any item of subLessons into input array
[{
"_id": {
"$oid": "6081fedbee5d133dbffb42eb"
},
"name": "my quiz",
"lessons": [
{
"_id": "460c42e1-b0b7-437e-ab63-c59cce8ced0d",
"name": "section",
"subLesson": [
{
"$oid": "6081fed9ee5d133dbffb3cba"
},
{
"$oid": "6081fed9ee5d133dbffb3cc0"
}
]
},
{
"_id": "f7b5c95f-1a68-42ca-880c-22ef3831ff03",
"name": "ffff",
"subLesson": [
{
"$oid": "6081fed9ee5d133dbffb3cbb"
}
]
}
]
}
}]
I wrote the following query but it does not work. I do not know how to use $elemMatch in $filter
db.collection.aggregate([
{
"$project": {
_id: 1,
lessons: {
$filter: {
"input": "$lessons",
"as": "lesson",
"cond": {
"$$lesson.subLesson": {
"$elemMatch": {
"$in": [
ObjectId("6081fed9ee5d133dbffb3cba")
]
}
}
}
}
}
}
}
])
I am trying to find the record such that the result looks like the following.
[{
"_id": {
"$oid": "6081fedbee5d133dbffb42eb"
},
"lessons": [
{
"_id": "460c42e1-b0b7-437e-ab63-c59cce8ced0d",
"name": "zzzz",
"subLesson": [
{
"$oid": "6081fed9ee5d133dbffb3cba"
},
{
"$oid": "6081fed9ee5d133dbffb3cc0"
}
]
}
]
},
}]
Can anyone please help out to understand how can I make this work
thanks
You can use $in directly
db.collection.aggregate([
{
$project: {
lessons: {
$filter: {
input: "$lessons",
cond: {
$in: [ ObjectId("6081fed9ee5d133dbffb3cba"), "$$this.subLesson" ]
}
}
}
}
}
])
Working Mongo playground
Update 1
db.collection.aggregate([
{ "$unwind": "$lessons" },
{
"$match": {
"lessons.subLesson": {
$in: [ ObjectId("6081fed9ee5d133dbffb3cba"), ObjectId("6081fed9ee5d133dbffb3cbb") ]
}
}
},
{
$group: {
_id: "$_id",
name: { $first: "$name" },
lessons: { $push: "$lessons" }
}
}
])
Mongo Playground

Mongo DB $look up Method for Fields in Arrays instead of Collections

I have a user document with the following structure:
{
"_id": {
"$oid": "5e636c552b872f00178033bf"
},
"finance": {
"expenditure": [
{
"status": true,
"_id": {
"$oid": "5e636d442b872f00178033d4"
},
"amount": {
"$numberInt": "900"
},
"category": "Coffee"
},
{
"status": true,
"_id": {
"$oid": "5e636d492b872f00178033d5"
},
"amount": {
"$numberInt": "1000"
},
"category": "Coffee"
},
{
"status": true,
"_id": {
"$oid": "5e636d532b872f00178033d6"
},
"amount": {
"$numberInt": "3000"
},
"category": "Sport"
},
{
"status": true,
"_id": {
"$oid": "5e636d572b872f00178033d7"
},
"amount": {
"$numberInt": "1000"
},
"category": "Sport"
},
],
"customcategories": [
{
"budget": {
"$numberInt": "200"
},
"_id": {
"$oid": "5e636c552b872f00178033c7"
},
"title": "Sport"
},
{
"budget": {
"$numberInt": "100"
},
"_id": {
"$oid": "5e636c552b872f00178033c8"
},
"title": "Coffee"
}
]
}
}
My previos command is this one (you don't have to mind the status and the currentdate) :
User.aggregate([
{
$match: {
_id: req.user._id
}
},
{
$unwind: "$finance.expenditure"
},
{
$match: {
"finance.expenditure.status": true
}
},
{
$sort: {
"finance.expenditure.currentdate": -1
}
},
{
$group: {
_id: "$finance.expenditure.category",
amount: {
$sum: "$finance.expenditure.amount",
}
}
},
{
$project: {
_id: 0,
category: "$_id",
amount: 1
}
}
])
The Result looks like this :
{
"expenditure": [
{
"amount": 1900,
"category": "Coffee"
},
{
"amount": 4000,
"category": "Sport"
}
]
}
I would like to add the my grouped elements the budget from the associated "customcategory".
So that it looks like this :
{
"expenditure": [
{
"amount": 1900,
"category": "Coffee",
"budget" : 100
},
{
"amount": 4000,
"category": "Sport",
"budget" : 200
}
]
}
I tried several things but nothing works of the $lookup method worked for me.
I hope some can help me :)
give this pipeline a try:
db.collection.aggregate([
{
$match: { _id: ObjectId("5e636c552b872f00178033bf") }
},
{
$unwind: "$finance.expenditure"
},
{
$match: { "finance.expenditure.status": true }
},
{
$sort: { "finance.expenditure.currentdate": -1 }
},
{
$group: {
_id: "$finance.expenditure.category",
amount: { $sum: "$finance.expenditure.amount"},
categories: { $first: '$finance.customcategories' }
}
},
{
$project: {
_id: 0,
category: "$_id",
amount: 1,
budget: {
$arrayElemAt: [
{
$map: {
input: {
$filter: {
input: '$categories',
cond: { $eq: ['$$this.title', '$_id'] }
}
},
in: '$$this.budget'
}
},
0
]
}
}
}
])
https://mongoplayground.net/p/adsWInz3wgY
Try this one:
User.aggregate([
{
$match: {
_id: mongoose.Types.ObjectId(req.user._id)
}
},
{
$sort: {
"finance.expenditure.currentdate": -1
}
},
{
$unwind: "$finance.expenditure"
},
{
$unwind: "$finance.customcategories"
},
{
$match: {
"finance.expenditure.status": true
}
},
{
$group: {
_id: "$finance.expenditure.category",
amount: {
$addToSet: "$finance.expenditure"
},
customcategories: {
$addToSet: "$finance.customcategories"
}
}
},
{
$project: {
_id: 0,
"amount": {
$sum: "$amount.amount"
},
"category": "$_id",
"budget": {
$sum: {
$let: {
vars: {
budget: {
$filter: {
input: "$customcategories",
cond: {
$eq: [
"$_id",
"$$this.title"
]
}
}
}
},
in: "$$budget.budget"
}
}
}
}
}
])
//.exec(function(err, result){})
MongoPlayground

Resources