How to reduce aggregration time mongo db - database

I have written this query to aggregate data
db.attendancesCollection.aggregate([
{
'$match': {
'Date': { "$gte": start_date, "$lte": end_date },
'Location': found_location["locationName"],
'locationId': { '$exists': True },
'workerType': workerType
}
},
{
"$group": {
'_id': {
'employeeId': '$employeeId',
'workerId': '$workerId',
'workerFullName': '$workerFullName'
},
'dailyPointsArray': {
'$push': {
'Date': '$Date',
'createdAs': '$createdAs',
'Points': '$shiftPoints',
'inTime': '$inTime',
'outTime': '$outTime',
}
},
'total_shift_points': { '$sum': '$shiftPoints' },
'total_duration': { '$sum': '$duration' },
}
},
{
'$addFields': {
'Location': found_location["locationName"],
}
},
{
'$lookup':
{
'from': "users",
'localField': "_id.employeeId",
'foreignField': "_id",
'as': "userInfo"
}
}
])
There is no issue in running this query, I am getting desired data, but the problem lies in the execution time of the query, it takes about 20-30 secs for that, there are about 500,000 documents in attendance collection where I am performing aggregation & 1000 docs in user Collection where I am performing a lookup, what can I do to reduce execution time
This is sample structure
{
"_id": {
"$oid": "60dd7d723fcb2eb7df248af3"
},
"workerId": "xxxx",
"workerFullName": "ARUN",
"workerType": "Employee",
"workerAadharCardNumber": "xxxxxxxxxx",
"Date": {
"$date": "2021-07-01T00:00:00.000Z"
},
"employeeId": {
"$oid": "60dd77c14524e6c116e1692c"
},
"Location": "HEAD OFFICE",
"sourceUnitType": null,
"duration": null,
"shiftPoints": null,
"createdAs": "ABSENT",
"ruleApplied": null,
"detections": [],
"locationId": {
"$oid": "60dd6d303da6c17209d5ef68"
},
"workerFaceRegistered": true
}

Related

Retrieving related elements from MongoDB

I have the following data in MongoDB. Based alone on an id that I have available how can I retrieve all other entries where the player matches the player for my current id.
For example : find who the player for id 12 is, search all other entries that match that player name and return a list of all of them.
[
{_id: '62ecdf342f1193134043964c', id: '12', player: 'David Beckham', team: 'Manchester United'},
{_id: '62ecdf342f1193134043965c', id: '17', player: 'Cristiano Rolando', team: 'Manchester United'},
{_id: '62ecdf342f1193134043966c', id: '22', player: 'Cristiano Rolando', team: 'Juventus'},
{_id: '62ecdf342f1193134043967c', id: '42', player: 'David Beckham', team: 'Real Madrid'},
]
This is the code that I'm using to retrieve the one single entry that matches a specific id and then I'd also like to get the related entries.
export async function getStaticProps({ params }) {
const { db } = await connectToDatabase();
const jerseyA = await db
.collection("Jerseys")
.find({ id: params.jersey })
.sort()
.toArray();
const jersey = JSON.parse(JSON.stringify(jerseyA))[0];
return { props: { jersey } };
}
Now that you know the name, do another fetch like .find({player: jersey.player})
I'm not sure of the output format you want, but here's one way to return all documents that match the "player" name of the given "id".
db.Jerseys.aggregate([
{
"$match": {
// your id goes here
"id": "17"
}
},
{
"$lookup": {
"from": "Jerseys",
"localField": "player",
"foreignField": "player",
"as": "docs"
}
},
{"$unwind": "$docs"},
{"$replaceWith": "$docs"}
])
Example output:
[
{
"_id": "62ecdf342f1193134043965c",
"id": "17",
"player": "Cristiano Rolando",
"team": "Manchester United"
},
{
"_id": "62ecdf342f1193134043966c",
"id": "22",
"player": "Cristiano Rolando",
"team": "Juventus"
}
]
Try it on mongoplayground.net.
Just an addition to #rickhg12hs solution, to ignore the first record. You can use the following query to ignore the first record (where the id also matched) and the others.
db.Jerseys.aggregate([
{
"$match": {
"id": "12"
}
},
{
"$lookup": {
"from": "Jerseys",
"localField": "player",
"foreignField": "player",
"as": "docs"
}
},
{
"$unwind": "$docs"
},
{
"$replaceWith": "$docs"
},
{
"$match": {
"id": {
"$not": {
"$eq": "12"
}
}
}
}
])
A possible javascript translation of it, should be,
export async function getStaticProps({ params }) {
const { db } = await connectToDatabase();
const { jersey: id } = params;
const jerseyA = await db
.collection("Jerseys")
.aggregate([
{
"$match": {
id
}
},
{
"$lookup": {
"from": "Jerseys",
"localField": "player",
"foreignField": "player",
"as": "docs"
}
},
{
"$unwind": "$docs"
},
{
"$replaceWith": "$docs"
},
{
"$match": {
"id": {
"$not": {
"$eq": id
}
}
}
}
]).toArray();
const jersey = JSON.parse(JSON.stringify(jerseyA))[0];
return { props: { jersey } };
}

Remove object from array with mongoose 5.12 ($pull not working)

I have a problem with mongoose 5.12.
{
"_id": {
"$oid": "60db70c9956a6c4d0645d447"
},
"articles": [
{
"_id": {
"$oid": "60db8764da322a23e787ca3d"
},
"type": "Déssert",
"name": "dfd",
"description": "",
"price": 0,
"tag": "",
"picture": "noarticle.jpg"
},
],
"editor": 27,
}
My restaurant document contains articles (object array)
And I want to remove an article from it.
I'm using this:
const deletedArticle = await this.restaurantModel.findOneAndUpdate(
{ editor: userId },
{ $pull: { articles: articleId } },
{ multi: true, new: true, useFindAndModify: true },
);
// userId -> 27 and articleId --> "60db8764da322a23e787ca3d"
But nothing changes.
Is this an _id type problem? Or anything else?
(The $push option work)
You did wrong in the line:
{ $pull: { articles: articleId } }
It should be:
{ $pull: { articles: { _id : articleId } } }
Or:
{ $pull: {"articles._id" : articleId } }

total register users of current month with each date

Here, date is register date and with simple group by I got result like this
[
{ date: '2019-09-01', count: 1 },
{ date: '2019-09-02', count: 3 },
{ date: '2019-09-04', count: 2 },
{ date: '2019-09-05', count: 5 },
// ...
]
But I want each and every date if on that date user is not register that display count as 0
[
{ date: '2019-09-01', count: 1 },
{ date: '2019-09-02', count: 3 },
{ date: '2019-09-03', count: 0 },
{ date: '2019-09-04', count: 0 },
// ...
]
If the user is not registered on 3 and 4 dates then displays 0 counts.
monthalldate = [ '2019-09-1', '2019-09-2', '2019-09-3', '2019-09-4', '2019-09-5', '2019-09-6', '2019-09-7', '2019-09-8', '2019-09-9',
'2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',.......,
'2019-09-30' ]
User.aggregate([
{ "$group": {
"_id": { "$substr": ["$createdOn", 0, 10] },
"count": { "$sum": 1 },
"time": { "$avg": "$createdOn" },
}},
{ "$sort": { "_id": 1 } },
{ "$project": { "date": "$_id", "createdOn": "$count" }},
{ "$group": { "_id": null, "data": { "$push": "$$ROOT" }}},
{ "$project": {
"data": {
"$map": {
"input": monthalldate,
"in": {
"k": "$$this",
"v": { "$cond": [{ "$in": ["$$this", "$data.date" ] }, 1, 0 ] }
}
}
}
}},
{ "$unwind": "$data" },
{ "$group": { "_id": "$data.k", "count": { "$sum": "$data.v" }}}
]).exec(function (err, montlysub) {
// console.log(montlysub);
});
But I got the wrong result
My user collection
{ "_id" : ObjectId("5a0d3123f954955f15fe88e5"), "createdOn" : ISODate("2019-11-16T06:33:07.838Z"), "name":"test" },
{ "_id" : ObjectId("5a0d3123f954955f15fe88e6"), "createdOn" : ISODate("2019-11-17T06:33:07.838Z"), "name":"test2" }
$project transforms input documents. If there is no user record for a particular month, there are no input documents to transform and you won't have any output for that month.
Ways around this:
Create a collection containing the months (only), then start by retrieving the desired date range from this collection and joining user documents to each month.
Add the missing zero data points in the application, either as the results are iterated or as a post-processing step prior to result rendering.

Query only for numbers in nested array

I am trying to get an average number of an key in a nested array inside a document, but not sure how to accomplish this.
Here is how my document looks like:
{
"_id": {
"$oid": "XXXXXXXXXXXXXXXXX"
},
"data": {
"type": "PlayerRoundData",
"playerId": "XXXXXXXXXXXXX",
"groupId": "XXXXXXXXXXXXXX",
"holeScores": [
{
"type": "RoundHoleData",
"points": 2
},
{
"type": "RoundHoleData",
"points": 13
},
{
"type": "RoundHoleData",
"points": 3
},
{
"type": "RoundHoleData",
"points": 1
},
{
"type": "RoundHoleData",
"points": 21
}
]
}
}
Now, the tricky part of this is that I only want the average of points for holeScores[0] of all documents with this playerid and this groupid.
Actually, the best solution would be collecting all documents with playerid and groupid and create a new array with the average of holeScores[0], holeScores[1], holeScores[2]... But if I only can get one array key at the time, that would be OK to :-)
Here is what I am thinking but not quit sure how to put it together:
var allScores = dbCollection('scores').aggregate(
{$match: {"data.groupId": groupId, "playerId": playerId}},
{$group: {
_id: playerId,
rounds: { $sum: 1 }
result: { $sum: "$data.scoreTotals.points" }
}}
);
Really hoping for help with this issue and thanks in advance :-)
You can use $unwind with includeArrayIndex to get index and then use $group to group by that index
dbCollection('scores').aggregate(
{
$match: { "data.playerId": "XXXXXXXXXXXXX", "data.groupId": "XXXXXXXXXXXXXX" }
},
{
$unwind: {
path: "$data.holeScores",
includeArrayIndex: "index"
}
},
{
$group: {
_id: "$index",
playerId: { $first: "data.playerId" },
avg: { $avg: "$data.holeScores.points" }
}
}
)
You can try below aggregation
db.collection.aggregate(
{ "$match": { "data.groupId": groupId, "data.playerId": playerId }},
{ "$group": {
"_id": null,
"result": {
"$sum": {
"$arrayElemAt": [
"$data.holeScores.points",
0
]
}
}
}}
)

MongoDB : Single update on multiple fields V/S Multiple updates on single field?

I have a collection which has 7 arrays, and each array contains more than 1000 subdocuments.
i.e.
{
"_id": 1,
"arr1": [
{ "date": 20100101, "time": 120000, "key": "value1" },
{ "date": 20100401, "time": 121500, "key": "value2" },
...
{ "date": 20161001, "time": 120000, "key": "valueN" },
],
.
.
"arr7": [
{ "date": 20100101, "time": 120000, "key": "value1" },
{ "date": 20100401, "time": 121500, "key": "value2" },
...
{ "date": 20161001, "time": 120000, "key": "valueN" },
]
}
I want to pull activities that are older than a particular date from all the arrays.
Should i execute a single update which will pull matching sub-documents from each array at once, i.e.
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 151031 } }, arr2: { date: { $lt: 151031 } }, ... arr7: { date: { $lt: 151031 } } } })
Or should i execute update separately for each array. i.e.
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 20160101 } } } })
db.collection.updateMany({}, { $pull: { arr2: { date: { $lt: 20160101 } } } })
...
db.collection.updateMany({}, { $pull: { arr7: { date: { $lt: 20160101 } } } })
Which would be more effective ?
I would say you should use a single query to pull from all the arrays
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 151031 } }, arr2: { date: { $lt: 151031 } }, ... arr7: { date: { $lt: 151031 } } } })
This gives the mongo query optimizer full control of the execution. For example it may have to access each object only once and not seven times.
You should look at the exact execution of the query using explain() though (see https://docs.mongodb.com/v3.2/reference/method/cursor.explain/).
You also may want to consider changing your data model as you might get into problems with mongo 16MB per document restriction. Also array indexes in mongo are very inefficient with such huge arrays.

Resources