Can't reduce a deeply nested array on MongoDB - arrays

I've a Mongo database with documents like these one inside a collection:
{
date:"2019-06-12T00:09:03.000Z",
actions:{
actionDate:"2019-06-12T00:15:25.000Z",
data:{
users:[
[{gender:"Male",age:24},
{gender:"Female",age:25}
],
[{gender:"Male",age:34},
{gender:"Male",age:26}
],
[{gender:"Female",age:19},
{gender:"Male",age:21}
]
]
}
}
}
I would like to summarize the users appearing inside the array users in a single document, like
{
"date":"2019-06-12T00:09:03.000Z",
"actionDate":"2019-06-12T00:15:25.000Z",
"summary":{
"countFemale":2,
"meanFemaleAge":22,
"countMale":4,
"meanMaleAge":26.25
}
}
Some considerations to be taken into account: there could be no cases for one gender and also, the users array might be limited to one or two arrays inside it.
I've tried to solve it using my, now I know, scarce knowledge of Mongo query language but it seems unsolvable to me. Thought this might be useful checking MongoDB: Reduce array of objects into a single object by computing the average of each field but can't catch up with the idea.
Any ideas, please?

Try below query :
db.collection.aggregate([
/** Merge all arrays inside 'users' & push to 'summary' field */
{
$project: {
date: 1,
actionDate: "$actions.actionDate",
summary: {
$reduce: {
input: "$actions.data.users",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] },
},
},
},
},
{
$unwind: "$summary",
},
/** Group on 'date' to push data related to same date */
{
$group: {
_id: "$date",
actionDate: {$first: "$actionDate",},
countFemale: {$sum: {$cond: [{$eq: ["$summary.gender", "Female"]},1,0]}},
countMale: {$sum: {$cond: [{$eq: ["$summary.gender", "Male"]},1,0]}},
meanFemaleAge: {$sum: {$cond: [{$eq: ["$summary.gender", "Female"]},"$summary.age",0]}},
meanMaleAge: {$sum: {$cond: [{$eq: ["$summary.gender", "Male"]},"$summary.age",0]}}
}
},
/** Re-create 'meanFemaleAge' & 'meanMaleAge' fields to add mean */
{
$addFields: {
meanFemaleAge: {$cond: [{$ne: ["$meanFemaleAge", 0]},{$divide: ["$meanFemaleAge","$countFemale"]},0]},
meanMaleAge: {$cond: [{$ne: ["$meanMaleAge", 0]},{$divide: ["$meanMaleAge","$countMale"]},0]},
}
}
]);
Test : MongoDB-Playground
Note : No matter what how you do this, I would suggest you to do not implement this kind of operations on entire collection with huge datasets.

We need to perform $reduce operator.
In the first stage, we create separate arrays (Male|Female) and push users according to their gender.
In the second stage, we transform / calculate result.
Try this one:
db.collection.aggregate([
{
$addFields: {
"users": {
$reduce: {
input: "$actions.data.users",
initialValue: {
"Male": [],
"Female": []
},
in: {
Male: {
$concatArrays: [
"$$value.Male",
{
$filter: {
input: "$$this",
cond: {
$eq: [
"$$this.gender",
"Male"
]
}
}
}
]
},
Female: {
$concatArrays: [
"$$value.Female",
{
$filter: {
input: "$$this",
cond: {
$eq: [
"$$this.gender",
"Female"
]
}
}
}
]
}
}
}
}
}
},
{
$project: {
_id: 0,
date: 1,
actionDate: "$actions.actionDate",
summary: {
"countFemale": {
$size: "$users.Female"
},
"meanFemaleAge": {
$avg: "$users.Female.age"
},
"countMale": {
$size: "$users.Male"
},
"meanMaleAge": {
$avg: "$users.Male.age"
}
}
}
}
])
MongoPlayground

Related

Finding documents in mongodb collection by order of elements index of array field

Array field in collection:
"fruits": [ "fruits": [ "fruits": [
{"fruit1": "banana"}, {"fruit2": "apple"}, {"fruit3": "pear"},
{"fruit2": "apple"}, {"fruit4": "orange"}, {"fruit2": "apple"},
{"fruit3": "pear"}, {"fruit1": "banana"}, {"fruit4": "orange"},
{"fruit4": "orange"} {"fruit3": "pear"} {"fruit1": "banana"}
]
I need to find those documents in collections, where "banana" signed before "apple". Does mongodb allows to compare elements in array just like :
if (fruits.indexOf('banana') < fruits.indexOf('apple')) return true;
Or maybe there is any other method to get result i need?
MongoDB's array query operations do not support any positional search as you want.
You can, however, write a $where query to do what you want:
db.yourCollection.find({
$where: function() {
return (this.fruits.indexOf('banana') < this.fruits.indexOf('apple'))
}
})
Be advised though, you won't be able to use indexes here and the performance will be a problem.
Another approach you can take is to rethink the database design, if you can specify what it is you're trying to build, someone can give you specific advise.
One more approach: pre-calculate the boolean value before persisting to DB as a field and query on true / false.
Consider refactoring your schema if possible. The dynamic field names(i.e. fruit1, fruit2...) make it unnecessarily complicated to construct a query. Also, if you require frequent queries by array index, you should probably store your array entries in individual documents with some sort keys to facilitate sorting with index.
Nevertheless, it is achievable through $unwind and $group the documents again. With includeArrayIndex clause, you can get the index inside array.
db.collection.aggregate([
{
"$unwind": {
path: "$fruits",
includeArrayIndex: "idx"
}
},
{
"$addFields": {
fruits: {
"$objectToArray": "$fruits"
}
}
},
{
"$addFields": {
"bananaIdx": {
"$cond": {
"if": {
$eq: [
"banana",
{
$first: "$fruits.v"
}
]
},
"then": "$idx",
"else": "$$REMOVE"
}
},
"appleIdx": {
"$cond": {
"if": {
$eq: [
"apple",
{
$first: "$fruits.v"
}
]
},
"then": "$idx",
"else": "$$REMOVE"
}
}
}
},
{
$group: {
_id: "$_id",
fruits: {
$push: {
"$arrayToObject": "$fruits"
}
},
bananaIdx: {
$max: "$bananaIdx"
},
appleIdx: {
$max: "$appleIdx"
}
}
},
{
$match: {
$expr: {
$lt: [
"$bananaIdx",
"$appleIdx"
]
}
}
},
{
$unset: [
"bananaIdx",
"appleIdx"
]
}
])
Mongo Playground

MongoDB: how to shuffle array and this new order be permanantly saved?

So suppose I have a document like:
{
_id: 1,
items: ["aaa", "bbb", "ccc", "ddd", "eee"...]
}
I would like to shuffle the items list once, with this order saved in the table - i.e. I don't want to call random or something for every query, since there are about 200,000 items in this array (not huge, but still, calling $rand every time I want to retrieve an item would be inefficient)
So I'm really looking for some kind of manual script that I can run once - it would then update this document, so it became something like:
{
_id: 1,
items: ["ddd", "bbb", "aaa", "eee", "ccc"...]
}
If anyone knows if this is possible, I'd appreciate it. Thanks
Otherwise, I'd probably fetch the data, shuffle it using another language, then save it back into Mongo
I'm not sure this is the better way to do this
https://mongoplayground.net/p/4AH8buOXudQ
db.collection.aggregate([
{
$unwind: {
path: "$items"
}
},
{
$sample: {
size: 100 //to shuffle values upto particular index
}
},
{
$group: {
_id: "$_id",
item: {
$push: "$items"
}
}
}
]);
If you're Mongo version 5.2+ I would do this using an aggregation pipeline update with the new $sortArray operator and $rand.
Essentially we add a random value for each item, sort the array and then transform it back, You can run this update on demand whenever you want to reshuffle the array.
db.collection.updateMany(
{},
[
{
$addFields: {
items: {
$map: {
input: {
$sortArray: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
sortBy: {
"sortVal": 1
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground
If you're on a lesser version, you can generate some kind of pseudo random sort using $reduce ( you can actually do a bubble sort as well but that n^2 performance on such a large array is not recommend ), here is an example of how to generate some sort of randomness:
The approach is to iterate over the items array with the $reduce operator, if the random generated value is less than 0.3 then we push the item to be in the start of the array, if that value is less than 0.6 we append it to the end of the new array and if that value is between 0.6 and 1 and push it in the middle of the array.
Obviously you can choose whatever random logic you want and add more switch cases, as mentioned even an actual sort is possible but at the cost of performance.
db.collection.update({},
[
{
$addFields: {
items: {
$map: {
input: {
$reduce: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
initialValue: [],
in: {
$switch: {
branches: [
{
case: {
$lt: [
"$$this.sortVal",
0.333
]
},
then: {
$concatArrays: [
"$$value",
[
"$$this"
]
]
},
},
{
case: {
$lt: [
"$$this.sortVal",
0.6666
]
},
then: {
$concatArrays: [
[
"$$this"
],
"$$value",
]
}
}
],
default: {
$concatArrays: [
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
}
]
},
[
"$$this"
],
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
},
{
$add: [
{
$size: "$$value"
},
1
]
}
]
}
]
}
}
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground

MongoDB Aggregation: How to return only the values that don't exist in all documents

Lets say I have an array ['123', '456', '789']
I want to Aggregate and look through every document with the field books and only return the values that are NOT in any documents. For example if '123' is in a document, and '456' is, but '789' is not, it would return an array with ['789'] as it's not included in any books fields in any document.
.aggregate( [
{
$match: {
books: {
$in: ['123', '456', '789']
}
}
},
I don't want the documents returned, but just the actual values that are not in any documents.
Here's one way to scan the entire collection to look for missing book values.
db.collection.aggregate([
{ // "explode" books array to docs with individual book values
"$unwind": "$books"
},
{ // scan entire collection creating set of book values
"$group": {
"_id": null,
"allBooksSet": {
"$addToSet": "$books" // <-- generate set of book values
}
}
},
{
"$project": {
"_id": 0, // don't need this anymore
"missing": { // use $setDifference to find missing values
"$setDifference": [
[ "123", "456", "789" ], // <-- your values go here
"$allBooksSet" // <-- the entire collection's set of book values
]
}
}
}
])
Example output:
[
{
"missing": [ "789" ]
}
]
Try it on mongoplayground.net.
Based on #rickhg12hs's answer, there is another variation replacing $unwind with $reduce, which considered less costly. Two out of Three steps are the same:
db.collection.aggregate([
{
$group: {
_id: null,
allBooks: {$push: "$books"}
}
},
{
$project: {
_id: 0,
allBooksSet: {
$reduce: {
input: "$allBooks",
initialValue: [],
in: {$setUnion: ["$$value", "$$this"]}
}
}
}
},
{
$project: {
missing: {
$setDifference: [["123","456", "789"], "$allBooksSet"]
}
}
}
])
Try it on mongoplayground.net.

MongoDB lookup (join) with field in double nested array

With a MongoDB collection name department with the following structure:
{
"_id":99,
"name":"Erick Kalewe",
"faculty":"Zazio",
"lecturers":[
{
"lecturerID":31,
"name":"Granny Kinton",
"email":"gkintonu#answers.com",
"imparts":[
{
"groupID":70,
"codCourse":99
}
]
},
{
"lecturerID":36,
"name":"Michale Dahmel",
"email":"mdahmelz#artisteer.com",
"imparts":[
{
"groupID":100,
"codCourse":60
}
]
}
]
}
and another collection group with this structure:
{
"_id":100,
"codCourse":11,
"language":"Romanian",
"max_students":196,
"students":[
{
"studentID":1
}
],
"classes":[
{
"date":datetime.datetime(2022, 5, 10, 4, 24, 19),
"cod_classroom":100
}
]
}
join them to get the following:
{
"_id":99,
"name":"Erick Kalewe",
"faculty":"Zazio",
"lecturers":[
{
"lecturerID":31,
"name":"Granny Kinton",
"email":"gkintonu#answers.com",
"imparts":[
{
"groupID":70,
"codCourse":99
}
]
},
{
"lecturerID":36,
"name":"Michale Dahmel",
"email":"mdahmelz#artisteer.com",
"imparts":[
{
"_id":100,
"codCourse":11,
"language":"Romanian",
"max_students":196,
"students":[
{
"studentID":1
}
],
"classes":[
{
"date":datetime.datetime(2022, 5, 10, 4, 24, 19),
"cod_classroom":100
}
]
}
]
}
]
}
The objective is to get a report with the number of students taught by a professor from a department.
Query
unwind, do the join, and re-group back
its kinda big query because you want to join in nested field, and this means 2 unwind and 2 groupings to restore the structure
(i think in general joining fields shouldn't go deep inside)
unwind both arrays
do the lookup on groupID
and now construct back the document as 2 level nested
first its impacts that need to be grouped and pushed
(for rest argument i keep the $first)
we sum also the students based on the comment
then its lecturers that i need to be grouped and pushed
(for rest arguments i keep the $first)
we take the lecture with the max students in the department
(mongodb can compare documents also)
Playmongo (you can put your mouse at the end of each stage to see in/out of that stage)
department.aggregate(
[{"$unwind": "$lecturers"}, {"$unwind": "$lecturers.imparts"},
{"$lookup":
{"from": "coll",
"localField": "lecturers.imparts.groupID",
"foreignField": "_id",
"as": "lecturers.imparts"}},
{"$set": {"lecturers.imparts": {"$first": "$lecturers.imparts"}}},
{"$group":
{"_id": {"_id": "$_id", "lecturersID": "$lecturers.lecturerID"},
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lecturers":
{"$first":
{"lecturerID": "$lecturers.lecturerID",
"name": "$lecturers.name",
"email": "$lecturers.email"}},
"imparts": {"$push": "$lecturers.imparts"},
"lecture_max_students":
{"$sum": "$lecturers.imparts.max_students"}}},
{"$set":
{"lecturers":
{"$mergeObjects":
["$lecturers", {"imparts": "$imparts"},
{"lecture_max_students": "$lecture_max_students"}]},
"imparts": "$$REMOVE","lecture_max_students": "$$REMOVE"}},
{"$group":
{"_id": "$_id._id",
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lectures": {"$push": "$lecturers"},
"dept-max-lecturer":
{"$max": {"max-students": "$lecturers.lecture_max_students",
"lecturerID": "$lecturers.lecturerID"}}}}])
You can try aggregation framework,
$lookup with group collection pass lecturers.imparts.groupID as localField and pass _id as foreignField
$addFields to merge group data with imports and remove group fields because it is not needed
$map to iterate loop of lecturers array
$mergeObjects to merge current object of lecturers and updated object of imports
$map to iterate loop of imports array
$mergeObjects to merge current object of imports and found result from group
$filter to iterate loop of group array and find the group by groupID
$arrayElemAt to get first element from above filtered result
db.department.aggregate([
{
$lookup: {
from: "group",
localField: "lecturers.imparts.groupID",
foreignField: "_id",
as: "group"
}
},
{
$addFields: {
lecturers: {
$map: {
input: "$lecturers",
in: {
$mergeObjects: [
"$$this",
{
imparts: {
$map: {
input: "$$this.imparts",
as: "i",
in: {
$mergeObjects: [
"$$i",
{
$arrayElemAt: [
{
$filter: {
input: "$group",
cond: { $eq: ["$$this._id", "$$i.groupID"] }
}
},
0
]
}
]
}
}
}
}
]
}
}
},
group: "$$REMOVE"
}
}
])
Playground
Now that we understand the question (according to your other question), an answer can be:
Add each department document a set of all its relevant groups.
$lookup only the student ids for each group to create a groups array.
Insert the relevant groups data to each lecturer.
Calculate maxImpartsStudents which is the number of unique students per lecturer from all of its groups
$reduce the lecturers array to include only the lecturer with highest maxImpartsStudents.
Format the answer
db.department.aggregate([
{
$addFields: {
groups: {
$setIntersection: [
{
$reduce: {
input: "$lecturers.imparts.groupID",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
}
]
}
}
},
{
$lookup: {
from: "group",
let: {groupIDs: "$groups"},
pipeline: [
{$match: {$expr: {$in: ["$_id", "$$groupIDs"]}}},
{
$project: {
students: {
$reduce: {
input: "$students",
initialValue: [],
in: {$concatArrays: ["$$value", ["$$this.studentID"]]}
}
}
}
}
],
as: "groups"
}
},
{
$project: {
name: 1,
lecturers: {
$map: {
input: "$lecturers",
in: {
$mergeObjects: [
{lecturerID: "$$this.lecturerID"},
{groups: {
$map: {
input: "$$this.imparts",
in: {
$arrayElemAt: [
"$groups",
{$indexOfArray: ["$groups._id", "$$this.groupID"]}
]
}
}
}
}
]
}
}
}
}
},
{
$project: {
name: 1,
lecturers: {
$map: {
input: "$lecturers",
as: "item",
in: {
$mergeObjects: [
{
maxImpartsStudents: {
$size: {
$reduce: {
input: "$$item.groups",
initialValue: [],
in: {$setUnion: ["$$value", "$$this.students"]}
}
}
}
},
{lecturerID: "$$item.lecturerID"}
]
}
}
}
}
},
{
$set: {
lecturers: {
$reduce: {
input: "$lecturers",
initialValue: {
"maxImpartsStudents": 0
},
in: {
$cond: [
{$gte: ["$$this.maxImpartsStudents", "$$value.maxImpartsStudents"]},
"$$this", "$$value"
]
}
}
}
}
},
{
$project: {
lecturerID: "$lecturers.lecturerID",
maxImpartsStudents: "$lecturers.maxImpartsStudents",
departmentName: "$name"
}
}
])
Which is much better than combining the solutions from both questions.
See how it works on the playground example

How could we merge nested arrays of subdocuments(Array within Array) in MongoDB

I would like to combine arrays within arrays in MongodB
for example,
test collection has documents in following formats
**{
cmp_id :1
depts : [{"dept_id":1, emps:[1,2,3]}, {"dept_id":2, emps:[4,5,6]}, {"dept_id":2, emps:[7,8,9]} ]
}**
I need following output, How can I?
***{
cmp_id :1,
empids : [1,2,3,4,5,6,7,8,9]
}***
You can use below aggregation
db.collection.aggregate([
{
$project: {
depts: {
$reduce: {
input: "$depts",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this.emps"
]
}
}
}
}
}
])
MongoPlayground
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path: "$depts"
}
},
// Stage 2
{
$unwind: {
path: "$depts.emps"
}
},
// Stage 3
{
$group: {
_id: '$cmp_id',
empids: {
$push: '$depts.emps'
}
}
},
]
);

Resources