Querying and aggregating nested data in mongoDB - database

I got this type of document:
{
setting: {
team1: [{ level: 'high' }, { level: 'low' }],
team2: [{ level: 'high' }, { level: 'medium' }, { level: 'low' }],
}
}
I need to query all occurrences where level is 'high' and count them, over multiple documents.
Got mixed up.
How should I do it?

Use aggregation pipeline:
db.collection.aggregate([
{
// reduce setting.team1 and setting.team2 to a number 1 or 0
// based on the presence of {level: high}
"$project": {
"t1": {
"$reduce": {
"input": "$setting.team1",
"initialValue": 0,
"in": {
"$cond": [
{
"$eq": [
"$$this.level",
"high"
]
},
{
"$sum": [
"$$value",
1
]
},
"$$value"
]
}
}
},
"t2": {
"$reduce": {
"input": "$setting.team2",
"initialValue": 0,
"in": {
"$cond": [
{
"$eq": [
"$$this.level",
"high"
]
},
{
"$sum": [
"$$value",
1
]
},
"$$value"
]
}
}
}
}
},
// sum t1 and t2
{
"$project": {
"totalCount": {
"$sum": [
"$t1",
"$t2"
]
}
}
},
// group for count
{
"$group": {
"_id": null,
"highCount": {
"$sum": "$totalCount"
}
}
}
])
Link to Mongo Playground

Related

(MongoDB) $facet: using $group works, but changing to $project gives the error "$first's argument must be an array, but is object"

I'm using a $facet to get an intersection of IDs from two pipelines. Using $group in query_a and query_b in the following pipeline gives the list of IDs.
Pipeline 1:
[
{
"$facet": {
"query_a": [
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
],
"query_b": [
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
]
}
},
{
"$project": {
"intersection": {
"$setIntersection": [
"$query_a.ID",
"$query_b.ID"
]
},
"query_a": 1,
"query_b": 1
}
},
{
"$project": {
"_id": 0,
"data": {
"$map": {
"input": "$intersection",
"in": {
"intersection": "$$this",
"query_a": {
"$first": {
"$filter": {
"input": "$query_a",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
},
"query_b": {
"$first": {
"$filter": {
"input": "$query_b",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
}
}
}
}
}
},
{
"$unwind": "$data"
},
{
"$replaceRoot": {
"newRoot": "$data"
}
},
{
"$project": {
"intersection": 1
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
Changing $group to $project in query_a and query_b, so the list of IDs also includes not distinct values gives an error.
Pipeline 2:
[
{
"$facet": {
"query_a": [
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
],
"query_b": [
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
]
}
},
{
"$project": {
"intersection": {
"$setIntersection": [
"$query_a.ID",
"$query_b.ID"
]
},
"query_a": 1,
"query_b": 1
}
},
{
"$project": {
"_id": 0,
"data": {
"$map": {
"input": "$intersection",
"in": {
"intersection": "$$this",
"query_a": {
"$first": {
"$filter": {
"input": "$query_a",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
},
"query_b": {
"$first": {
"$filter": {
"input": "$query_b",
"as": "item",
"cond": {
"$eq": [
"$$item.ID",
"$$this"
]
}
}
}
}
}
}
}
}
},
{
"$unwind": "$data"
},
{
"$replaceRoot": {
"newRoot": "$data"
}
},
{
"$project": {
"intersection": 1
}
}
]
Error:
pymongo.errors.OperationFailure: PlanExecutor error during aggregation :: caused by :: $first's argument must be an array, but is object, full error: {'ok': 0.0, 'errmsg': "PlanExecutor error during aggregation :: caused by :: $first's argument must be an array, but is object"
Running the queries in separate pipelines works using either $group or $project.
Query using $group:
[
{
"$match": {
...
}
},
{
"$group": {
"ID": ...
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
Query using $project:
[
{
"$match": {
...
}
},
{
"$project": {
"ID": ...
}
}
]
Example result printed using pymongo:
{"ID": "c80ea2cb-3272-77ae-8f46-d95de600c5bf"}
{"ID": "cdbcc129-548a-9d51-895a-1538200664e6"}
{"ID": "a4ece1ba-42ae-e735-17b0-f619daa506f9"}
...
I would appreciate any suggestions!
The problem was that I had to change {"$first": "$data"} to just "$data" when changing from $group to $project.

Mongodb transform array using aggregation

I have an array in Mongodb collection:
some_array:
0: title: "Community"
model: "A"
1: title: "News"
model: "B"
How can I get only titles of array and list them as bellow:
titles: "Community, News"
You can group by null means group all titles and then project titles array only
Working Playground
db.collection.aggregate([
{
"$group": {
"_id": null,
"titles": {
"$push": "$title"
}
}
},
{
"$project": {
_id: 0,
titles: {
"$reduce": {
"input": "$titles",
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ { "$indexOfArray": [ "$titles", "$$this" ] }, 0 ] },
"then": { "$concat": [ "$$value", "$$this" ] },
"else": { "$concat": [ "$$value", ", ", "$$this" ] }
}
}
}
}
}
}
])
db.collection.aggregate([
{
$project: {
titles: "$somearray.title"
}
}
])
Working Mongo playground

Multiply Varying number of Values Inside array: Mongodb

I have arranged my data so that the documents belonging to the same customer id are aggregated into a single collection. The data format is as follows.
{
"items": [
{
"stock_code": [
"22617",
"22768",
"20749"
],
"description": [
"DESIGN",
"FAMILY PHOTO FRAME",
"ASSORTED CASES"
],
"quantity": [
18,
12,
84
],
"unit_price": [
4.95,
9.95,
6.35
]
}
],
"_id": 581485,
"customer_id": 17389,
"country": "United Kingdom"
}
I need to multiply the values of array quantity with corresponding unit_price and get a total for multiple documents in a new field. I have tried using the $reduce function and $map function to get the output but both of them result in "error"
Multiply only supports numeric types, and not arrays
Could you please suggest how should i go about accomplishing this.
Codes tried:
"$addFields": {"order_total" :
{
"$sum": {
"$map": {
"input": "$items",
"as": "items",
"in": { "$multiply": [
{ "$ifNull": [ "$$items.quantity", 0 ] },
{ "$ifNull": [ "$$items.unit_price", 0 ] }
]}
}
}
}
}
Second:
"order_total" : {
"$reduce" : {
"input" : "$items",
"initialValue" : Decimal128("0.00"),
"in": {
"$sum" : [
"$$value",
{"$multiply" : [ "$$this.quantity", "$$this.unit_price" ] }
]}
}
}
The expected result needs to add a new field of "total" by multiplying the corresponding entries of unit_price with quantity. The error message is that of multiply only supports numeric types and not arrays.
I would decompose the problem into solvable chunks beginning with the smallest unit and start with the two arrays quantity and unit_price.
So given just a document with the structure
{
"quantity": [ 18, 12, 84 ],
"unit_price": [ 4.95, 9.95, 6.35 ]
}
We can add another field with the totals for each element in both arrays i.e.
{
"quantity": [ 18, 12, 84 ],
"unit_price": [ 4.95, 9.95, 6.35 ],
"total": [ 89.1, 119.4, 533.4 ]
}
This field can be computed using $range within $map as
{
"total": {
"$map": {
"input": { "$range": [ 0, { "$size": "$quantity" } ] },
"as": "idx",
"in": {
"$let": {
"vars": {
"qty": { "$arrayElemAt": [ "$quantity", "$$idx" ] },
"price": {
"$ifNull": [
{ "$arrayElemAt": [ "$unit_price", "$$idx" ] },
0
]
}
},
"in": { "$multiply": [ "$$qty", "$$price" ] }
}
}
}
}
}
This then becomes basis for calculating the order_total field with two pipeline stages for clarity (although can be composed into a single pipeline with $reduce for brevity)
var totalMapExpression = {
"$map": {
"input": { "$range": [ 0, { "$size": "$$item.quantity" } ] },
"as": "idx",
"in": {
"$let": {
"vars": {
"qty": { "$arrayElemAt": [ "$$item.quantity", "$$idx" ] },
"price": {
"$ifNull": [
{ "$arrayElemAt": [ "$$item.unit_price", "$$idx" ] },
0
]
}
},
"in": { "$multiply": [ "$$qty", "$$price" ] }
}
}
}
};
db.collection.aggregate([
{ "$addFields": {
"items": {
"$map": {
"input": "$items",
"as": "item",
"in": {
"quantity": "$$item.quantity",
"unit_price": "$$item.unit_price",
"stock_code": "$$item.stock_code",
"description": "$$item.description",
"total": { "$sum": totalMapExpression }
}
}
}
} },
{ "$addFields": {
"order_total": { "$sum": "$items.total" }
} }
])
Try the below query:
db.collection.aggregate(
[{ $unwind: { path: "$items",} },
{ $unwind: { path: "$items.quantity",} },
{ $unwind: { path: "$items.unit_price",} },
{ $addFields: { 'total': {$multiply: ["$items.quantity", "$items.unit_price"] }} }])

Count all objects in array and infinite sub-arrays

{
"ArticleName": "Example Article",
"Comments": [
{
"Text": "Great Article",
"Responses": [
{
"Text": "No it isnt",
"Responses": [
{
"Text": "Yes it is"
}
]
},
{
"Text": "Spot on"
}
]
}
]
}
Every occurrence of the key 'Text' would be considered as a comment (so 4 comments). What's the best way to get a count on this in Mongo?
You can try below aggregation
Basically you have to loop over the each array using $map and count for the fields where Text is not equal to $ne undefined
db.collection.aggregate([
{ "$project": {
"commentsCount": {
"$sum": {
"$map": {
"input": "$Comments",
"as": "cc",
"in": {
"$add": [
{ "$cond": [{ "$ne": [ "$$cc.Text", undefined ] }, 1, 0 ] },
{ "$sum": {
"$map": {
"input": "$$cc.Responses",
"as": "dd",
"in": {
"$add": [
{ "$cond": [{ "$ne": [ "$$dd.Text", undefined ] }, 1, 0 ] },
{ "$sum": {
"$map": {
"input": "$$dd.Responses",
"as": "ee",
"in": { "$cond": [{ "$ne": [ "$$ee.Text", undefined ] }, 1, 0 ] }
}
}}
]
}
}
}}
]
}
}
}
}
}}
])
Output
[
{
"commentsCount": 4
}
]

Conditionally Project Matching Array Item

I have a collection named questions with documents like this:
{
"formats": [
{
"language_id": 1,
"text": "question text1"
},
{
"language_id": 2,
"text": "question text 2"
}
],
"qid": "HQSRFA3T"
}
I want to write a query such that if a specific language_id is not present, then language_id with 1 should be returned by default.
I've tried two queries so far:
db.questions.aggregate([
{
$match: {
'qid': 'HQSRFA3T'
}
},
{
$project: {
formats: {
$ifNull: [
{ $filter: { input: '$formats', as: 'format', cond: {$eq: ['$$format.language_id', 3]}} },
{ $filter: { input: '$formats', as: 'format', cond: {$eq: ['$$format.language_id', 1]}} }
]
},
_id: 0
}
}
])
This query results in something like this:
{ "formats" : [ ] }
Then there's another query which is something like this:
db.questions.aggregate([ { $match: {'qid': 'HQSRFA3T'}}, { $project: {
formats: {
$filter: {
input: '$formats',
as: 'format',
cond: {
$or: [
{ $eq: ['$$format.language_id', 1] },
{ $eq: ['$$format.language_id', 3] }
]
}
}
},
_id: 0
}}])
This query returns two elements in case both language_id's are present in the array.
There are a "few" ways:
Ideally you have $indexOfArray from MongoDB 3.4 then you can use that in combination with $in:
db.questions.aggregate([
{ "$match": { "qid": "HQSRFA3T" } },
{ "$project": {
"formats": {
"$cond": {
"if": { "$in": [ 3, "$formats.language_id"] },
"then": {
"$arrayElemAt": [
"$formats",
{ "$indexOfArray": [ "$formats.language_id", 3 ] }
]
},
"else": {
"$arrayElemAt": [
"$formats",
{ "$indexOfArray": [ "$formats.language_id", 1 ] }
]
}
}
}}
}
])
And if all you really want is the matching "text", then a slight alteration:
db.questions.aggregate([
{ "$match": { "qid": "HQSRFA3T" } },
{ "$project": {
"text": {
"$cond": {
"if": { "$in": [ 3, "$formats.language_id"] },
"then": {
"$arrayElemAt": [
"$formats.text",
{ "$indexOfArray": [ "$formats.language_id", 3 ] }
]
},
"else": {
"$arrayElemAt": [
"$formats.text",
{ "$indexOfArray": [ "$formats.language_id", 1 ] }
]
}
}
}}
}
])
That works because if the $indexOfArray returns -1 indicating "not found" then the the $cond will branch accordingly:
Alternately, use $filter with $size:
db.questions.aggregate([
{ "$match": { "qid": "HQSRFA3T" } },
{ "$project": {
"formats": {
"$cond": {
"if": { "$gt": [
{ "$size": {
"$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}
}},
0
]},
"then": {
"$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}
},
"else": {
"$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 1 ] }
}
}
}
}
}}
])
You can even vary on the last form with $arrayElemAt to just return the "single" matching array element at position 0 if you at least have MongoDB 3.2.
db.questions.aggregate([
{ "$match": { "qid": "HQSRFA3T" } },
{ "$project": {
"formats": {
"$cond": {
"if": { "$gt": [
{ "$size": {
"$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}
}},
0
]},
"then": {
"$arrayElemAt": [
{ "$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}},
0
]
},
"else": {
"$arrayElemAt": [
{ "$filter": {
"input": "$formats",
"cond": { "$eq": [ "$$this.language_id", 1 ] }
}},
0
]
}
}
}
}}
])
Other alternatives on the $cond for the if condition are using $in, to match the comparison on the array elements:
"if": { "$in": [ 3, "$formats.language_id" ] }
But since that requires MongoDB 3.4, then you may as well use the $indexOfArray operator instead.
There is very little point in trying to "force" multiple matches into $filter and then ultimately looking to discard one of them, but you "can" do it with $let:
db.questions.aggregate([
{ "$match": { "qid": "HQSRFA3T" } },
{ "$project": {
"formats": {
"$let": {
"vars": {
"formats": {
"$filter": {
"input": "$formats",
"cond": {
"$or": [
{ "$eq": [ "$$this.language_id", 1 ] },
{ "$eq": [ "$$this.language_id", 3 ] }
]
}
}
}
},
"in": {
"$cond": {
"if": {
"$gt": [
{ "$size": {
"$filter": {
"input": "$$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}
}},
0
]
},
"then": {
"$filter": {
"input": "$$formats",
"cond": { "$eq": [ "$$this.language_id", 3 ] }
}
},
"else": {
"$filter": {
"input": "$$formats",
"cond": { "$eq": [ "$$this.language_id", 1 ] }
}
}
}
}
}
}
}}
])
So it's there, but it's just extra work with little gain since at best the $or condition matches the "default" case and you still needed to "filter away" for only the "preferred" match anyway.

Resources