MongoDB lookup (join) with field in double nested array - arrays

With a MongoDB collection name department with the following structure:
{
"_id":99,
"name":"Erick Kalewe",
"faculty":"Zazio",
"lecturers":[
{
"lecturerID":31,
"name":"Granny Kinton",
"email":"gkintonu#answers.com",
"imparts":[
{
"groupID":70,
"codCourse":99
}
]
},
{
"lecturerID":36,
"name":"Michale Dahmel",
"email":"mdahmelz#artisteer.com",
"imparts":[
{
"groupID":100,
"codCourse":60
}
]
}
]
}
and another collection group with this structure:
{
"_id":100,
"codCourse":11,
"language":"Romanian",
"max_students":196,
"students":[
{
"studentID":1
}
],
"classes":[
{
"date":datetime.datetime(2022, 5, 10, 4, 24, 19),
"cod_classroom":100
}
]
}
join them to get the following:
{
"_id":99,
"name":"Erick Kalewe",
"faculty":"Zazio",
"lecturers":[
{
"lecturerID":31,
"name":"Granny Kinton",
"email":"gkintonu#answers.com",
"imparts":[
{
"groupID":70,
"codCourse":99
}
]
},
{
"lecturerID":36,
"name":"Michale Dahmel",
"email":"mdahmelz#artisteer.com",
"imparts":[
{
"_id":100,
"codCourse":11,
"language":"Romanian",
"max_students":196,
"students":[
{
"studentID":1
}
],
"classes":[
{
"date":datetime.datetime(2022, 5, 10, 4, 24, 19),
"cod_classroom":100
}
]
}
]
}
]
}
The objective is to get a report with the number of students taught by a professor from a department.

Query
unwind, do the join, and re-group back
its kinda big query because you want to join in nested field, and this means 2 unwind and 2 groupings to restore the structure
(i think in general joining fields shouldn't go deep inside)
unwind both arrays
do the lookup on groupID
and now construct back the document as 2 level nested
first its impacts that need to be grouped and pushed
(for rest argument i keep the $first)
we sum also the students based on the comment
then its lecturers that i need to be grouped and pushed
(for rest arguments i keep the $first)
we take the lecture with the max students in the department
(mongodb can compare documents also)
Playmongo (you can put your mouse at the end of each stage to see in/out of that stage)
department.aggregate(
[{"$unwind": "$lecturers"}, {"$unwind": "$lecturers.imparts"},
{"$lookup":
{"from": "coll",
"localField": "lecturers.imparts.groupID",
"foreignField": "_id",
"as": "lecturers.imparts"}},
{"$set": {"lecturers.imparts": {"$first": "$lecturers.imparts"}}},
{"$group":
{"_id": {"_id": "$_id", "lecturersID": "$lecturers.lecturerID"},
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lecturers":
{"$first":
{"lecturerID": "$lecturers.lecturerID",
"name": "$lecturers.name",
"email": "$lecturers.email"}},
"imparts": {"$push": "$lecturers.imparts"},
"lecture_max_students":
{"$sum": "$lecturers.imparts.max_students"}}},
{"$set":
{"lecturers":
{"$mergeObjects":
["$lecturers", {"imparts": "$imparts"},
{"lecture_max_students": "$lecture_max_students"}]},
"imparts": "$$REMOVE","lecture_max_students": "$$REMOVE"}},
{"$group":
{"_id": "$_id._id",
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lectures": {"$push": "$lecturers"},
"dept-max-lecturer":
{"$max": {"max-students": "$lecturers.lecture_max_students",
"lecturerID": "$lecturers.lecturerID"}}}}])

You can try aggregation framework,
$lookup with group collection pass lecturers.imparts.groupID as localField and pass _id as foreignField
$addFields to merge group data with imports and remove group fields because it is not needed
$map to iterate loop of lecturers array
$mergeObjects to merge current object of lecturers and updated object of imports
$map to iterate loop of imports array
$mergeObjects to merge current object of imports and found result from group
$filter to iterate loop of group array and find the group by groupID
$arrayElemAt to get first element from above filtered result
db.department.aggregate([
{
$lookup: {
from: "group",
localField: "lecturers.imparts.groupID",
foreignField: "_id",
as: "group"
}
},
{
$addFields: {
lecturers: {
$map: {
input: "$lecturers",
in: {
$mergeObjects: [
"$$this",
{
imparts: {
$map: {
input: "$$this.imparts",
as: "i",
in: {
$mergeObjects: [
"$$i",
{
$arrayElemAt: [
{
$filter: {
input: "$group",
cond: { $eq: ["$$this._id", "$$i.groupID"] }
}
},
0
]
}
]
}
}
}
}
]
}
}
},
group: "$$REMOVE"
}
}
])
Playground

Now that we understand the question (according to your other question), an answer can be:
Add each department document a set of all its relevant groups.
$lookup only the student ids for each group to create a groups array.
Insert the relevant groups data to each lecturer.
Calculate maxImpartsStudents which is the number of unique students per lecturer from all of its groups
$reduce the lecturers array to include only the lecturer with highest maxImpartsStudents.
Format the answer
db.department.aggregate([
{
$addFields: {
groups: {
$setIntersection: [
{
$reduce: {
input: "$lecturers.imparts.groupID",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
}
]
}
}
},
{
$lookup: {
from: "group",
let: {groupIDs: "$groups"},
pipeline: [
{$match: {$expr: {$in: ["$_id", "$$groupIDs"]}}},
{
$project: {
students: {
$reduce: {
input: "$students",
initialValue: [],
in: {$concatArrays: ["$$value", ["$$this.studentID"]]}
}
}
}
}
],
as: "groups"
}
},
{
$project: {
name: 1,
lecturers: {
$map: {
input: "$lecturers",
in: {
$mergeObjects: [
{lecturerID: "$$this.lecturerID"},
{groups: {
$map: {
input: "$$this.imparts",
in: {
$arrayElemAt: [
"$groups",
{$indexOfArray: ["$groups._id", "$$this.groupID"]}
]
}
}
}
}
]
}
}
}
}
},
{
$project: {
name: 1,
lecturers: {
$map: {
input: "$lecturers",
as: "item",
in: {
$mergeObjects: [
{
maxImpartsStudents: {
$size: {
$reduce: {
input: "$$item.groups",
initialValue: [],
in: {$setUnion: ["$$value", "$$this.students"]}
}
}
}
},
{lecturerID: "$$item.lecturerID"}
]
}
}
}
}
},
{
$set: {
lecturers: {
$reduce: {
input: "$lecturers",
initialValue: {
"maxImpartsStudents": 0
},
in: {
$cond: [
{$gte: ["$$this.maxImpartsStudents", "$$value.maxImpartsStudents"]},
"$$this", "$$value"
]
}
}
}
}
},
{
$project: {
lecturerID: "$lecturers.lecturerID",
maxImpartsStudents: "$lecturers.maxImpartsStudents",
departmentName: "$name"
}
}
])
Which is much better than combining the solutions from both questions.
See how it works on the playground example

Related

MongoDB: how to shuffle array and this new order be permanantly saved?

So suppose I have a document like:
{
_id: 1,
items: ["aaa", "bbb", "ccc", "ddd", "eee"...]
}
I would like to shuffle the items list once, with this order saved in the table - i.e. I don't want to call random or something for every query, since there are about 200,000 items in this array (not huge, but still, calling $rand every time I want to retrieve an item would be inefficient)
So I'm really looking for some kind of manual script that I can run once - it would then update this document, so it became something like:
{
_id: 1,
items: ["ddd", "bbb", "aaa", "eee", "ccc"...]
}
If anyone knows if this is possible, I'd appreciate it. Thanks
Otherwise, I'd probably fetch the data, shuffle it using another language, then save it back into Mongo
I'm not sure this is the better way to do this
https://mongoplayground.net/p/4AH8buOXudQ
db.collection.aggregate([
{
$unwind: {
path: "$items"
}
},
{
$sample: {
size: 100 //to shuffle values upto particular index
}
},
{
$group: {
_id: "$_id",
item: {
$push: "$items"
}
}
}
]);
If you're Mongo version 5.2+ I would do this using an aggregation pipeline update with the new $sortArray operator and $rand.
Essentially we add a random value for each item, sort the array and then transform it back, You can run this update on demand whenever you want to reshuffle the array.
db.collection.updateMany(
{},
[
{
$addFields: {
items: {
$map: {
input: {
$sortArray: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
sortBy: {
"sortVal": 1
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground
If you're on a lesser version, you can generate some kind of pseudo random sort using $reduce ( you can actually do a bubble sort as well but that n^2 performance on such a large array is not recommend ), here is an example of how to generate some sort of randomness:
The approach is to iterate over the items array with the $reduce operator, if the random generated value is less than 0.3 then we push the item to be in the start of the array, if that value is less than 0.6 we append it to the end of the new array and if that value is between 0.6 and 1 and push it in the middle of the array.
Obviously you can choose whatever random logic you want and add more switch cases, as mentioned even an actual sort is possible but at the cost of performance.
db.collection.update({},
[
{
$addFields: {
items: {
$map: {
input: {
$reduce: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
initialValue: [],
in: {
$switch: {
branches: [
{
case: {
$lt: [
"$$this.sortVal",
0.333
]
},
then: {
$concatArrays: [
"$$value",
[
"$$this"
]
]
},
},
{
case: {
$lt: [
"$$this.sortVal",
0.6666
]
},
then: {
$concatArrays: [
[
"$$this"
],
"$$value",
]
}
}
],
default: {
$concatArrays: [
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
}
]
},
[
"$$this"
],
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
},
{
$add: [
{
$size: "$$value"
},
1
]
}
]
}
]
}
}
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground

MongoDB query to return docs based on an array size after filtering array of JSON objects?

I have MongoDB documents structured in this way:
[
{
"id": "car_1",
"arrayProperty": [
{
"model": "sedan",
"turbo": "nil"
},
{
"model": "sedan",
"turbo": "60cc"
}
]
},
{
"id": "car_2",
"arrayProperty": [
{
"model": "coupe",
"turbo": "50cc"
},
{
"model": "coupe",
"turbo": "60cc"
}
]
}
]
I want to be able to make a find query that translates into basic English as "Ignoring all models that have 'nil' value for 'turbo', return all documents with arrayProperty of length X." That is to say, the "arrayProperty" of car 1 would be interpreted as having a size of 1, while the array of car 2 would have a size of 2. The goal is to be able to make a query for all cars with arrayProperty size of 2 and only see car 2 returned in the results.
Without ignoring the nil values, the query is very simple as:
{ arrayProperty: { $size: 2} }
And this would return both cars 1 and 2. Moreover, if our array was just a simple array such as:
[1,2,3,'nil]
Then our query is simply:
{
arrayProperty: {
$size: X,
$ne: "nil"
}
}
However, when we introduce an array of JSON objects, things get tricky. I have tried numerous things to no avail including:
"arrayProperty": {
$size: 2,
$ne: {"turbo": "nil"}
}
"arrayProperty": {
$size: 2,
$ne: ["arrayProperty.turbo": "nil"]
}
Even without the $size operator in there, I can't seem to filter by the nil value. Does anyone know how I would properly do this in those last two queries?
use $and in $match
db.collection.aggregate([
{
$match: {
"$and": [
{
arrayProperty: {
$size: 2
}
},
{
"arrayProperty.turbo": {
$ne: "nil"
}
}
]
}
}
])
mongoplayground
use $set first
db.collection.aggregate([
{
"$set": {
"arrayProperty": {
"$filter": {
"input": "$arrayProperty",
"as": "a",
"cond": {
$ne: [
"$$a.turbo",
"nil"
]
}
}
}
}
},
{
$match: {
arrayProperty: {
$size: 1
}
}
}
])
mongoplayground
set a new field of size
db.collection.aggregate([
{
"$set": {
"size": {
$size: {
"$filter": {
"input": "$arrayProperty",
"as": "a",
"cond": {
$ne: [
"$$a.turbo",
"nil"
]
}
}
}
}
}
},
{
$match: {
size: 1
}
}
])
mongoplayground

Sum Quantity in Mongo Subdocument Based on Filter

I have a "shipment" document in MongoDB that has the following basic structure:
shipment {
"id": "asdfasdfasdf",
"shipDate": "2021-04-02",
"packages": [
{
"id": "adfasdfasdfasdf",
"contents": [
{
"product": {
"id": "asdfasdfasdfasd"
},
"quantity": 10
}
]
}
]
}
Please note that "product" is stored as a DBRef.
I want to find the total quantity of a specific product (based on the product ID) that has been shipped since a given date. I believe this is the appropriate logic that should be followed:
Match shipments with "shipDate" greater than the given date.
Find entries where "contents" contains a product with an "id" matching the given product ID
Sum the "quantity" value for each matching entry
Return the sum
So far, this is what I've come up with for the Mongo query so far:
db.shipment.aggregate([
{$match: {"shipDate": {$gt: ISODate("2019-01-01")}}},
{$unwind: "$packages"},
{$unwind: "$packages.contents"},
{$unwind: "$packages.contents.product"},
{
$project: {
matchedProduct: {
$filter: {
input: "$packages.contents.products",
as: "products",
cond: {
"$eq": ["$products.id", ObjectId("5fb55eae3fb1bf783a4fa97f")]
}
}
}
}
}
])
The query works, but appears to just return all entries that meet the $match criteria with a "products" value of null.
I'm pretty new with Mongo queries, so it may be a simple solution. However, I've been unable to figure out just how to return the $sum of the "contents" quantity fields for a matching product ID.
Any help would be much appreciated, thank you.
Query Which Solved The Problem
db.shipment.aggregate([
{
$match: {
"shipDate": {$gte: ISODate("2019-01-01")},
"packages.contents.product.$id": ObjectId("5fb55eae3fb1bf783a4fa98e")
}
},
{ $unwind: "$packages" },
{ $unwind: "$packages.contents" },
{ $unwind: "$packages.contents.product" },
{
$match: {
"packages.contents.product.$id": ObjectId("5fb55eae3fb1bf783a4fa98e")
}
},
{
$group: {
"_id": null,
"total": {
"$sum": "$packages.contents.quantity"
}
}
}
])
Demo - https://mongoplayground.net/p/c3Ia9L47cJS
Use { $match: {"packages.contents.product.id": 1 } }, to filter records by product id.
After that group them back and find the total { $group: {"_id": null,"total": { "$sum": "$packages.contents.quantity" } } }
db.collection.aggregate([
{ $match: {"shipDate": "2021-04-02","packages.contents.product.id": 1 } },
{ $unwind: "$packages" },
{ $unwind: "$packages.contents" },
{ $match: { "packages.contents.product.id": 1 } },
{ $group: { "_id": null,"total": { "$sum": "$packages.contents.quantity" } } }
])
Adding extra check at top { $match: {"shipDate": "2021-04-02","packages.contents.product.id": 1 } } for product id will filter only documents with produce id we need so query will be faster.
Option-2
Demo - https://mongoplayground.net/p/eo521luylsG
db.collection.aggregate([
{ $match: { "shipDate": "2021-04-02", "packages.contents.product.id": 1 }},
{ $unwind: "$packages" },
{ $project: { contents: { $filter: { input: "$packages.contents", as: "contents", cond: {"$eq": [ "$$contents.product.id", 1] }}}}},
{ $unwind: "$contents" },
{ $group: { "_id": null, "total": { "$sum": "$contents.quantity" }}}
])

How to query two collections with related data?

I have 2 collections, collection A has some documents like {'id':1,'field':'name'},{'id':1,'field':'age'},and collection B has some documents like
{'_id':1,'name':'alice','age':18,'phone':123},{'_id':2,'name':'bob','age':30,'phone':321}
and I want to find all the document whose '_id' is in collectionA, and just project the corresponding field.
for example:
collection A
{'id':1,'field':'name'},
{'id':1,'field':'age'}
collection B
{'_id':1,'name':'alice','age':18,'phone':123},
{'_id':2,'name':'bob','age':30,'phone':321}
the result is:
{'name':'alice','age':18},
I don't know if there is an easy way to do that?
You can use $lookup to join two collection
db.col1.aggregate([
{
$match: {
id: 1
}
},
{
"$lookup": {
"from": "col2",
"localField": "id",
"foreignField": "_id",
"as": "listNames"
}
},
{
$project: {
listNames: {
$first: "$listNames"
}
}
},
{
$project: {
_id: 0,
name: "$listNames.name",
age: "$listNames.age"
}
}
])
Mongo Playground: https://mongoplayground.net/p/E-0WvK_SUS_
So the idea is:
Convert the documents in to key, value pair for both the collections using $objectToArray.
Then perform a join operation based on key k and (id <-> _id) using $lookup.
Replace the result as root element using $replaceRoot.
Convert array to object using $arrayToObject and again $replaceRoot.
Query:
db.colB.aggregate([
{
$project: {
temp: { $objectToArray: "$$ROOT" }
}
},
{
$lookup: {
from: "colA",
let: { temp: "$temp", colB_id: "$_id" },
pipeline: [
{
$addFields: {
temp: { k: "$field", v: "$id" }
}
},
{
$match: {
$expr: {
$and: [
{ $in: ["$temp.k", "$$temp.k"] },
{ $eq: ["$temp.v", "$$colB_id"] }
]
}
}
},
{
$replaceRoot: {
newRoot: {
$first: {
$filter: {
input: "$$temp",
as: "item",
cond: { $eq: ["$field", "$$item.k"] }
}
}
}
}
}
],
as: "array"
}
},
{
$replaceRoot: {
newRoot: { $arrayToObject: "$array" }
}
}
]);
Output:
{
"name" : "alice",
"age" : 18
}

Can't reduce a deeply nested array on MongoDB

I've a Mongo database with documents like these one inside a collection:
{
date:"2019-06-12T00:09:03.000Z",
actions:{
actionDate:"2019-06-12T00:15:25.000Z",
data:{
users:[
[{gender:"Male",age:24},
{gender:"Female",age:25}
],
[{gender:"Male",age:34},
{gender:"Male",age:26}
],
[{gender:"Female",age:19},
{gender:"Male",age:21}
]
]
}
}
}
I would like to summarize the users appearing inside the array users in a single document, like
{
"date":"2019-06-12T00:09:03.000Z",
"actionDate":"2019-06-12T00:15:25.000Z",
"summary":{
"countFemale":2,
"meanFemaleAge":22,
"countMale":4,
"meanMaleAge":26.25
}
}
Some considerations to be taken into account: there could be no cases for one gender and also, the users array might be limited to one or two arrays inside it.
I've tried to solve it using my, now I know, scarce knowledge of Mongo query language but it seems unsolvable to me. Thought this might be useful checking MongoDB: Reduce array of objects into a single object by computing the average of each field but can't catch up with the idea.
Any ideas, please?
Try below query :
db.collection.aggregate([
/** Merge all arrays inside 'users' & push to 'summary' field */
{
$project: {
date: 1,
actionDate: "$actions.actionDate",
summary: {
$reduce: {
input: "$actions.data.users",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] },
},
},
},
},
{
$unwind: "$summary",
},
/** Group on 'date' to push data related to same date */
{
$group: {
_id: "$date",
actionDate: {$first: "$actionDate",},
countFemale: {$sum: {$cond: [{$eq: ["$summary.gender", "Female"]},1,0]}},
countMale: {$sum: {$cond: [{$eq: ["$summary.gender", "Male"]},1,0]}},
meanFemaleAge: {$sum: {$cond: [{$eq: ["$summary.gender", "Female"]},"$summary.age",0]}},
meanMaleAge: {$sum: {$cond: [{$eq: ["$summary.gender", "Male"]},"$summary.age",0]}}
}
},
/** Re-create 'meanFemaleAge' & 'meanMaleAge' fields to add mean */
{
$addFields: {
meanFemaleAge: {$cond: [{$ne: ["$meanFemaleAge", 0]},{$divide: ["$meanFemaleAge","$countFemale"]},0]},
meanMaleAge: {$cond: [{$ne: ["$meanMaleAge", 0]},{$divide: ["$meanMaleAge","$countMale"]},0]},
}
}
]);
Test : MongoDB-Playground
Note : No matter what how you do this, I would suggest you to do not implement this kind of operations on entire collection with huge datasets.
We need to perform $reduce operator.
In the first stage, we create separate arrays (Male|Female) and push users according to their gender.
In the second stage, we transform / calculate result.
Try this one:
db.collection.aggregate([
{
$addFields: {
"users": {
$reduce: {
input: "$actions.data.users",
initialValue: {
"Male": [],
"Female": []
},
in: {
Male: {
$concatArrays: [
"$$value.Male",
{
$filter: {
input: "$$this",
cond: {
$eq: [
"$$this.gender",
"Male"
]
}
}
}
]
},
Female: {
$concatArrays: [
"$$value.Female",
{
$filter: {
input: "$$this",
cond: {
$eq: [
"$$this.gender",
"Female"
]
}
}
}
]
}
}
}
}
}
},
{
$project: {
_id: 0,
date: 1,
actionDate: "$actions.actionDate",
summary: {
"countFemale": {
$size: "$users.Female"
},
"meanFemaleAge": {
$avg: "$users.Female.age"
},
"countMale": {
$size: "$users.Male"
},
"meanMaleAge": {
$avg: "$users.Male.age"
}
}
}
}
])
MongoPlayground

Resources