Mongo query Update year depends on the inner document field - arrays

Here is my data. I wanted to change year but It should effective to only the first item of the document array
{
"_id": {
"$oid": "62053aa8aa1cfbe8c4e72662"
},
"school": "Test",
"reports": [
{
"year": "2020", // This has to be changed to 2019
"createdAt": {
"$date": "2022-02-10T17:05:25.682Z"
},
"pid": {
"$oid": "620545d5097761628f32365a"
},
"details": {
"end_date": {
"$date": "2020-03-31T00:00:00.000Z" // when end date is prior to July 01 of the $year mentioned.
}
}
}, {
"year": "2020",
"createdAt": {
"$date": "2022-03-14T19:08:38.125Z"
},
"pid": {
"$oid": "622f92b68a408531d4b784de"
},
"details": {
"end_date": {
"$date": "2021-03-31T00:00:00.000Z"
}
}
}
]
}
In the above data, I want to reduce the year to the previous year, if details.end_date is prior to July 01 of the current mentioned year. But It should change only the first item of the embedded array.
For example,
If Year is 2020 and details.end_date is prior to 01-July-2020, then change the year to 2019
If Year is 2020 and details.end_date is after 01-July-2020, then do not change the year
If Year is 2021 and details.end_date is prior to 01-July-2021, then change the year to 2020
If Year is 2021 and details.end_date is after 01-July-2021, then do not change the year

You can do the followings in an aggregation pipeline:
isolate the first elem of the reports array for easier processing using $arrayElemAt
use $cond to derive the year value with $month. Use $toInt and $toString for type conversion
use $concatArrays to append back the processed first Elem back to the reports array. Keep only the "tail" (i.e. without the first elem) using $slice
$merge to update the result back to the collection
db.collection.aggregate([
{
"$addFields": {
"firstElem": {
"$arrayElemAt": [
"$reports",
0
]
}
}
},
{
"$addFields": {
"firstElem.year": {
"$cond": {
"if": {
$lt: [
{
"$month": "$firstElem.end_date"
},
7
]
},
"then": {
"$toString": {
"$subtract": [
{
"$toInt": "$firstElem.year"
},
1
]
}
},
"else": "$firstElem.year"
}
}
}
},
{
"$addFields": {
"reports": {
"$concatArrays": [
[
"$firstElem"
],
{
"$slice": [
"$reports",
1,
{
"$subtract": [
{
"$size": "$reports"
},
1
]
}
]
}
]
}
}
},
{
"$project": {
firstElem: false
}
},
{
"$merge": {
"into": "collection",
"on": "_id",
"whenMatched": "replace"
}
}
])
Here is the Mongo playground for your reference.

Related

find overlapping dates within mongoDB array objects

I have a MongoDB document collection with multiple arrays that looks like this :
{
"_id": "1235847",
"LineItems": [
{
"StartDate": ISODate("2017-07-31T00:00:00.000+00:00"),
"EndDate": ISODate("2017-09-19T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "0.00"}
},
{
"StartDate": ISODate("2022-03-20T00:00:00.000+00:00"),
"EndDate": ISODate("2022-10-21T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "6.38"}
},
{
"StartDate": ISODate("2022-09-20T00:00:00.000+00:00"),
"EndDate": ISODate("9999-12-31T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "6.17"}
}
]
}
Is there a simple way to find documents where the startdate has overlapped with previously startdate, enddate?
The startdate can not be before previous end dates within the array
The start/end can not be between previous start/end dates within the array
The below works but I don't want to hardcode the array index to find all the documents
{
$match: {
$expr: {
$gt: [
'LineItems.3.EndDate',
'LineItems.2.StartDate'
]
}
}
}
Here's one way you could find docs where "StartDate" is earlier than the immediately previous "EndDate".
db.collection.find({
"$expr": {
"$getField": {
"field": "overlapped",
"input": {
"$reduce": {
"input": {"$slice": ["$LineItems", 1, {"$size": "$LineItems"}]},
"initialValue": {
"overlapped": false,
"prevEnd": {"$first": "$LineItems.EndDate"}
},
"in": {
"overlapped": {
"$or": [
"$$value.overlapped",
{"$lt": ["$$this.StartDate", "$$value.prevEnd"]}
]
},
"prevEnd": "$$this.EndDate"
}
}
}
}
}
})
Try it on mongoplayground.net.

Get frequency for multiple elements in all documents inside a collection mongodb

So heres my problem.
I am new to mongodb and have a collection which documents are saved like this:
{
"_id": {
"$oid": "60626db173b4ca321c02ee3e"
},
"year": "2021",
"name": "Book 1",
"authors": ["Joe, B", "Jessica, K"],
"createdAt": {
"$date": "2021-03-30T00:15:45.859Z"
}
},
{
"_id": {
"$oid": "60626db173b4ca321c02ee4e"
},
"year": "2021",
"authors": ["Carl, B", "Jessica, K"],
"name": "Book 2"
"createdAt": {
"$date": "2021-03-30T00:15:45.859Z"
}
},
I need to get both the frequency of all authors and the years of the books.
The expected result would be something like this (as long as i can get each element frequency it doesn't really matter how the results are returned):
{
"authors": {
"Joe, B": 1,
"Carl, B": 1,
"Jessica, K": 2
},
"year": {
"2021": 2
}
}
I've seen this thread How to count occurence of each value in array? which does the job in one array but i have no idea if its possible to adapt to get the frequency of multiple elements (year, authors) at the same time or how to do it.
I appreciate any help. Thank you.
Demo - https://mongoplayground.net/p/95JtQEThxvV
$group by year $push authors into the array get $sum count of the year occurrence, $unwind into individuals documents.
$group by authors and get $sum count of the author occurrence
$group by null to combine all documents, use $addToSet to push unique values and convert $arrayToObject to get final output in $project
$first
db.collection.aggregate([
{
$group: {
_id: { year: "$year" },
authors: { $push: "$authors" },
yearCount: { $sum: 1 }
}
},
{ $unwind: "$authors" },
{ $unwind: "$authors"},
{
$group: {
_id: { author: "$authors" },
year: { $first: "$_id.year" },
yearCount: { $first: "$yearCount" },
authors: { $push: "$authors" },
authorCount: { $sum: 1 }
}
},
{
"$group": {
_id: null,
years: {
$addToSet: { k: "$year", v: "$yearCount" }
},
authors: {
$addToSet: { k: "$_id.author", v: "$authorCount" }
}
}
},
{
$project: {
_id: 0,
years: { $arrayToObject: "$years" },
authors: { $arrayToObject: "$authors" }
}
}
])
Demo 2 - For author count grouped by year- https://mongoplayground.net/p/_elnjmknroF

total register users of current month with each date

Here, date is register date and with simple group by I got result like this
[
{ date: '2019-09-01', count: 1 },
{ date: '2019-09-02', count: 3 },
{ date: '2019-09-04', count: 2 },
{ date: '2019-09-05', count: 5 },
// ...
]
But I want each and every date if on that date user is not register that display count as 0
[
{ date: '2019-09-01', count: 1 },
{ date: '2019-09-02', count: 3 },
{ date: '2019-09-03', count: 0 },
{ date: '2019-09-04', count: 0 },
// ...
]
If the user is not registered on 3 and 4 dates then displays 0 counts.
monthalldate = [ '2019-09-1', '2019-09-2', '2019-09-3', '2019-09-4', '2019-09-5', '2019-09-6', '2019-09-7', '2019-09-8', '2019-09-9',
'2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',.......,
'2019-09-30' ]
User.aggregate([
{ "$group": {
"_id": { "$substr": ["$createdOn", 0, 10] },
"count": { "$sum": 1 },
"time": { "$avg": "$createdOn" },
}},
{ "$sort": { "_id": 1 } },
{ "$project": { "date": "$_id", "createdOn": "$count" }},
{ "$group": { "_id": null, "data": { "$push": "$$ROOT" }}},
{ "$project": {
"data": {
"$map": {
"input": monthalldate,
"in": {
"k": "$$this",
"v": { "$cond": [{ "$in": ["$$this", "$data.date" ] }, 1, 0 ] }
}
}
}
}},
{ "$unwind": "$data" },
{ "$group": { "_id": "$data.k", "count": { "$sum": "$data.v" }}}
]).exec(function (err, montlysub) {
// console.log(montlysub);
});
But I got the wrong result
My user collection
{ "_id" : ObjectId("5a0d3123f954955f15fe88e5"), "createdOn" : ISODate("2019-11-16T06:33:07.838Z"), "name":"test" },
{ "_id" : ObjectId("5a0d3123f954955f15fe88e6"), "createdOn" : ISODate("2019-11-17T06:33:07.838Z"), "name":"test2" }
$project transforms input documents. If there is no user record for a particular month, there are no input documents to transform and you won't have any output for that month.
Ways around this:
Create a collection containing the months (only), then start by retrieving the desired date range from this collection and joining user documents to each month.
Add the missing zero data points in the application, either as the results are iterated or as a post-processing step prior to result rendering.

How to aggregate on a non-standard week?

I have a collection with data like below:
{
"_id": ObjectId("95159a08a27971c35a2683f"),
"Date": ISODate("2018-04-03T07:00:00Z"),
"Employee": "Bill",
"Hours": 7.5
}
{
"_id": ObjectId("372c6be4912fdd32398382f"),
"Date": ISODate("2018-04-05T07:00:00Z"),
"Employee": "Bill",
"Hours": 2
}
And I would like to get the total hours per week, but the week needs to start on Saturday and end on Friday. If I were working with a standard week I would just do:
db.myCollection.aggregate(
{$match: {
Employee: "Bill",
Date: {
$gte: ISODate("2018-03-15T07:00:00Z"),
$lte: ISODate("2018-04-06T07:00:00Z")
}
}},
{$group: {
_id: {$week: "$Date"},
hours: {$sum: "$Hours"}
}
}
)
which works fine for standard weeks that start on Sunday and end Saturday.
How would I modify this to work with the non-standard week that I described? Would I be better off querying the data and aggregating it manually at the code level? Could I aggregate by week for Date - 1 day or would that be too weird?
UPDATE:
If anyone needs to make this work across years they can use the updated version of Neil's answer below:
{ "$group": {
"_id": {
"week":{
"$let": {
"vars": {
"satWeek": {
"$cond": {
"if": { "$eq": [ { "$dayOfWeek": "$Date" }, 7 ] },
"then": { "$add": [ { "$week": "$Date" }, 1 ] },
"else": { "$week": "$Date" }
}
}
},
"in": {
"$cond": {
"if": { "$gt": ["$$satWeek", 52] },
"then": 0,
"else": "$$satWeek"
}
}
}
},
"year": { "$year": "$Date" }
},
"hours": { "$sum": "$hours" }
}}
Well $week returns based on the "input" date, so Date - 1 day would still be in the "previous week" based on what it would return from that input even if you adjusted it by one day.
It's certainly always best to make the "server" do this type of thing, otherwise you're just pulling a lot of data "over the wire" which kind of obviates the point of using a database in the first place.
In brief, "starting on a Saturday" simply means that what was Week 1 becomes Week 2, etc when the day is Saturday. So week + 1. And the only real caveat is that anything above Week 52 becomes Week 0.
Therefore:
{ "$group": {
"_id": {
"$let": {
"vars": {
"satWeek": {
"$cond": {
"if": { "$eq": [ { "$dayOfWeek": "$Date" }, 7 ] },
"then": { "$add": [ { "$week": "$Date" }, 1 ] },
"else": { "$week": "$Date" }
}
}
},
"in": {
"$cond": {
"if": { "$gt": ["$$satWeek", 52] },
"then": 0,
"else": "$$satWeek"
}
}
}
},
"hours": { "$sum": "$hours" }
}}
The main adjusting point there being based around the test for $dayOfWeek, which returns 7 for Saturday.
Of course whether it's 52 or 53 as the boundary depends on whether it's a leap year or not, but since you want to aggregate per "week" then I presume you only want a "year at most", and then can adjust that as an input parameter based on whether your date selection is within a leap year or not.
Or of course, adjust the coding to be even more considerate of that. But the basic principle is adjust the "output week" instead of the "input Date"
As an alternate case, then I guess Date plus 1 day would actually get you the same result be skewing all dates forward:
{ "$group": {
"_id": { "$week": { "$add": [ "$Date", 1000 * 60 * 60 * 24 ] } },
"hours": { "$sum": : "$hours" }
}}
And if you needed local timezone adjustment, then from MongoDB 3.6 you can simply include the timezone information to adjust by:
{ "$group": {
"_id": {
"$week": {
"date": { "$add": [ "$Date", 1000 * 60 * 60 * 24 ] },
"timezone": "America/Chicago"
}
},
"hours": { "$sum": : "$hours" }
}}
Also note as of MongoDB 3.4 there is $isoWeek and like functions, where you get some different handling:
Returns the week number in ISO 8601 format, ranging from 1 to 53. Week numbers start at 1 with the week (Monday through Sunday) that contains the year’s first Thursday.
So all math would be based from Monday instead of Sunday and in consideration of which day of the year is counted as the "first week", as well as starting from 1 instead of 0.

MongoDB : Single update on multiple fields V/S Multiple updates on single field?

I have a collection which has 7 arrays, and each array contains more than 1000 subdocuments.
i.e.
{
"_id": 1,
"arr1": [
{ "date": 20100101, "time": 120000, "key": "value1" },
{ "date": 20100401, "time": 121500, "key": "value2" },
...
{ "date": 20161001, "time": 120000, "key": "valueN" },
],
.
.
"arr7": [
{ "date": 20100101, "time": 120000, "key": "value1" },
{ "date": 20100401, "time": 121500, "key": "value2" },
...
{ "date": 20161001, "time": 120000, "key": "valueN" },
]
}
I want to pull activities that are older than a particular date from all the arrays.
Should i execute a single update which will pull matching sub-documents from each array at once, i.e.
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 151031 } }, arr2: { date: { $lt: 151031 } }, ... arr7: { date: { $lt: 151031 } } } })
Or should i execute update separately for each array. i.e.
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 20160101 } } } })
db.collection.updateMany({}, { $pull: { arr2: { date: { $lt: 20160101 } } } })
...
db.collection.updateMany({}, { $pull: { arr7: { date: { $lt: 20160101 } } } })
Which would be more effective ?
I would say you should use a single query to pull from all the arrays
db.collection.updateMany({}, { $pull: { arr1: { date: { $lt: 151031 } }, arr2: { date: { $lt: 151031 } }, ... arr7: { date: { $lt: 151031 } } } })
This gives the mongo query optimizer full control of the execution. For example it may have to access each object only once and not seven times.
You should look at the exact execution of the query using explain() though (see https://docs.mongodb.com/v3.2/reference/method/cursor.explain/).
You also may want to consider changing your data model as you might get into problems with mongo 16MB per document restriction. Also array indexes in mongo are very inefficient with such huge arrays.

Resources