find overlapping dates within mongoDB array objects - arrays

I have a MongoDB document collection with multiple arrays that looks like this :
{
"_id": "1235847",
"LineItems": [
{
"StartDate": ISODate("2017-07-31T00:00:00.000+00:00"),
"EndDate": ISODate("2017-09-19T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "0.00"}
},
{
"StartDate": ISODate("2022-03-20T00:00:00.000+00:00"),
"EndDate": ISODate("2022-10-21T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "6.38"}
},
{
"StartDate": ISODate("2022-09-20T00:00:00.000+00:00"),
"EndDate": ISODate("9999-12-31T00:00:00.000+00:00"),
"Amount": {"$numberDecimal": "6.17"}
}
]
}
Is there a simple way to find documents where the startdate has overlapped with previously startdate, enddate?
The startdate can not be before previous end dates within the array
The start/end can not be between previous start/end dates within the array
The below works but I don't want to hardcode the array index to find all the documents
{
$match: {
$expr: {
$gt: [
'LineItems.3.EndDate',
'LineItems.2.StartDate'
]
}
}
}

Here's one way you could find docs where "StartDate" is earlier than the immediately previous "EndDate".
db.collection.find({
"$expr": {
"$getField": {
"field": "overlapped",
"input": {
"$reduce": {
"input": {"$slice": ["$LineItems", 1, {"$size": "$LineItems"}]},
"initialValue": {
"overlapped": false,
"prevEnd": {"$first": "$LineItems.EndDate"}
},
"in": {
"overlapped": {
"$or": [
"$$value.overlapped",
{"$lt": ["$$this.StartDate", "$$value.prevEnd"]}
]
},
"prevEnd": "$$this.EndDate"
}
}
}
}
}
})
Try it on mongoplayground.net.

Related

Mongo query Update year depends on the inner document field

Here is my data. I wanted to change year but It should effective to only the first item of the document array
{
"_id": {
"$oid": "62053aa8aa1cfbe8c4e72662"
},
"school": "Test",
"reports": [
{
"year": "2020", // This has to be changed to 2019
"createdAt": {
"$date": "2022-02-10T17:05:25.682Z"
},
"pid": {
"$oid": "620545d5097761628f32365a"
},
"details": {
"end_date": {
"$date": "2020-03-31T00:00:00.000Z" // when end date is prior to July 01 of the $year mentioned.
}
}
}, {
"year": "2020",
"createdAt": {
"$date": "2022-03-14T19:08:38.125Z"
},
"pid": {
"$oid": "622f92b68a408531d4b784de"
},
"details": {
"end_date": {
"$date": "2021-03-31T00:00:00.000Z"
}
}
}
]
}
In the above data, I want to reduce the year to the previous year, if details.end_date is prior to July 01 of the current mentioned year. But It should change only the first item of the embedded array.
For example,
If Year is 2020 and details.end_date is prior to 01-July-2020, then change the year to 2019
If Year is 2020 and details.end_date is after 01-July-2020, then do not change the year
If Year is 2021 and details.end_date is prior to 01-July-2021, then change the year to 2020
If Year is 2021 and details.end_date is after 01-July-2021, then do not change the year
You can do the followings in an aggregation pipeline:
isolate the first elem of the reports array for easier processing using $arrayElemAt
use $cond to derive the year value with $month. Use $toInt and $toString for type conversion
use $concatArrays to append back the processed first Elem back to the reports array. Keep only the "tail" (i.e. without the first elem) using $slice
$merge to update the result back to the collection
db.collection.aggregate([
{
"$addFields": {
"firstElem": {
"$arrayElemAt": [
"$reports",
0
]
}
}
},
{
"$addFields": {
"firstElem.year": {
"$cond": {
"if": {
$lt: [
{
"$month": "$firstElem.end_date"
},
7
]
},
"then": {
"$toString": {
"$subtract": [
{
"$toInt": "$firstElem.year"
},
1
]
}
},
"else": "$firstElem.year"
}
}
}
},
{
"$addFields": {
"reports": {
"$concatArrays": [
[
"$firstElem"
],
{
"$slice": [
"$reports",
1,
{
"$subtract": [
{
"$size": "$reports"
},
1
]
}
]
}
]
}
}
},
{
"$project": {
firstElem: false
}
},
{
"$merge": {
"into": "collection",
"on": "_id",
"whenMatched": "replace"
}
}
])
Here is the Mongo playground for your reference.

Aggregating a total from subdocuments in MongoDB

I have a document like the one below, I'd essentially like to produce an aggregate for the items in a sub document.
Essentially each document is a sales record, which has details of the sales and a sub document / array with the qtys of each item sold.
I'd like to produce a summary of all the items sold.
So an example collection is:
{
non_relevant_1: "ABC",
non_relevant_2: "DEF",
items_array: {
"item_1": 1,
"item_2": 2,
"item_3": 1,
"item_4": 1
}
},
{
non_relevant_1: "HIJ",
non_relevant_2: "KLM",
items_array: {
"item_1": 3,
"item_2": 2,
"item_3": 4
}
}
I'd then like to be able to produce something like:
{
items_array: {
"item_1": 4,
"item_2": 4,
"item_3": 5,
"item_4": 1
}
}
Many thanks in advance.
I think you need to change your schema, you are saving data in keys.
MongoDB operators are not made to have unknown keys, for example we can't group by an unknown key.To do those we do complicated and slow things like $objectToArray.
Also the data that you want as results have the same problem.
If you look at the query only the middle $unwind and $group would be needed it, with a changed schema, and asking for data without data in keys.
I mean instead of
items_array: {
"item_1": 1,
"item_2": 2,
"item_3": 1,
"item_4": 1
}
Your collection should have being like(first part of the query does that changing your schema)
items_array: [
{"name" "item_1",
"qty" : 1},
{"name" "item_2",
"qty" : 2},
{"name" "item_3",
"qty" : 1},
{"name" "item_4",
"qty" : 1}
]
Also the results should have known keys only.
Maybe the reason you were stuck is that.You will make things much easier for you.
Test code here
Query (query works, for your schema but i told you what i think)
db.collection.aggregate([
{
"$addFields": {
"items_array": {
"$map": {
"input": {
"$map": {
"input": {
"$objectToArray": "$items_array"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
"as": "item",
"in": {
"name": {
"$arrayElemAt": [
"$$item",
0
]
},
"qty": {
"$arrayElemAt": [
"$$item",
1
]
}
}
}
}
}
},
{
"$unwind": {
"path": "$items_array"
}
},
{
"$group": {
"_id": "$items_array.name",
"total-qty": {
"$sum": "$items_array.qty"
}
}
},
{
"$group": {
"_id": null,
"items_array": {
"$push": {
"$map": {
"input": {
"$map": {
"input": {
"$objectToArray": "$$ROOT"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
"as": "i",
"in": {
"$arrayElemAt": [
"$$i",
1
]
}
}
}
}
}
},
{
"$project": {
"_id": 0
}
},
{
"$addFields": {
"items_array": {
"$arrayToObject": "$items_array"
}
}
}
])

Multikey partial index not used with elemMatch

Consider the following document format which has an array field tasks holding embedded documents
{
"foo": "bar",
"tasks": [
{
"status": "sleep",
"id": "1"
},
{
"status": "active",
"id": "2"
}
]
}
There exists a partial index on key tasks.id
{
"v": 2,
"unique": true,
"key": {
"tasks.id": 1
},
"name": "tasks.id_1",
"partialFilterExpression": {
"tasks.id": {
"$exists": true
}
},
"ns": "zardb.quxcollection"
}
The following $elemMatch query with multiple conditions on the same array element
db.quxcollection.find(
{
"tasks": {
"$elemMatch": {
"id": {
"$eq": "1"
},
"status": {
"$nin": ["active"]
}
}
}
}).explain()
does not seem to use the index
"winningPlan": {
"stage": "COLLSCAN",
"filter": {
"tasks": {
"$elemMatch": {
"$and": [{
"id": {
"$eq": "1"
}
},
{
"status": {
"$not": {
"$eq": "active"
}
}
}
]
}
}
},
"direction": "forward"
}
How can I make the above query use the index? The index does seem to be used via dot notation
db.quxcollection.find({"tasks.id": "1"})
however I need the same array element to match multiple conditions which includes the status field, and the following does not seem to be equivalent to the above $elemMatch based query
db.quxcollection.find({
"tasks.id": "1",
"tasks.status": { "$nin": ["active"] }
})
The way the partial indexes work is it uses the path as a key. With $elemMatch you don't have the path explicitly in the query. If you check it with .explain("allPlansExecution") it is not even considered by the query planner.
To benefit from the index you can specify the path in the query:
db.quxcollection.find(
{
"tasks.id": "1",
"tasks": {
"$elemMatch": {
"id": {
"$eq": "1"
},
"status": {
"$nin": ["active"]
}
}
}
}).explain()
It duplicates part of the elemMatch condition, so the index will be used to get all documents containing tasks of specific id, then it will filter out documents with "active" tasks at fetch stage. I must admit the query doesn't look nice, so may be add some comments to the code with explanations.

Query only for numbers in nested array

I am trying to get an average number of an key in a nested array inside a document, but not sure how to accomplish this.
Here is how my document looks like:
{
"_id": {
"$oid": "XXXXXXXXXXXXXXXXX"
},
"data": {
"type": "PlayerRoundData",
"playerId": "XXXXXXXXXXXXX",
"groupId": "XXXXXXXXXXXXXX",
"holeScores": [
{
"type": "RoundHoleData",
"points": 2
},
{
"type": "RoundHoleData",
"points": 13
},
{
"type": "RoundHoleData",
"points": 3
},
{
"type": "RoundHoleData",
"points": 1
},
{
"type": "RoundHoleData",
"points": 21
}
]
}
}
Now, the tricky part of this is that I only want the average of points for holeScores[0] of all documents with this playerid and this groupid.
Actually, the best solution would be collecting all documents with playerid and groupid and create a new array with the average of holeScores[0], holeScores[1], holeScores[2]... But if I only can get one array key at the time, that would be OK to :-)
Here is what I am thinking but not quit sure how to put it together:
var allScores = dbCollection('scores').aggregate(
{$match: {"data.groupId": groupId, "playerId": playerId}},
{$group: {
_id: playerId,
rounds: { $sum: 1 }
result: { $sum: "$data.scoreTotals.points" }
}}
);
Really hoping for help with this issue and thanks in advance :-)
You can use $unwind with includeArrayIndex to get index and then use $group to group by that index
dbCollection('scores').aggregate(
{
$match: { "data.playerId": "XXXXXXXXXXXXX", "data.groupId": "XXXXXXXXXXXXXX" }
},
{
$unwind: {
path: "$data.holeScores",
includeArrayIndex: "index"
}
},
{
$group: {
_id: "$index",
playerId: { $first: "data.playerId" },
avg: { $avg: "$data.holeScores.points" }
}
}
)
You can try below aggregation
db.collection.aggregate(
{ "$match": { "data.groupId": groupId, "data.playerId": playerId }},
{ "$group": {
"_id": null,
"result": {
"$sum": {
"$arrayElemAt": [
"$data.holeScores.points",
0
]
}
}
}}
)

Elasticsearch: sort by max value in array

Let's say I have 2 documents:
{
"id": "1234",
"things": [
{
"datetime": "2016-01-01T12:00:00+03:00"
},
{
"datetime": "2016-01-06T12:00:00+03:00"
},
{
"datetime": "2100-01-01T12:00:00+03:00"
}
]
}
and
{
"id": "5678",
"things": [
{
"datetime": "2016-01-03T12:00:00+03:00"
},
{
"datetime": "2100-01-06T12:00:00+03:00"
}
]
}
things.datetime is mapped as { "type": "date", "format": "date_time_no_millis" }.
I want to sort these documents based on the latest things.datetime value that is not in the future.
I.e. sorted by simply the max things.datetime would use the dates 2100-01-01T12:00:00+03:00 and 2100-01-06T12:00:00+03:00. I want the sorting to be based on the values 2016-01-06T12:00:00+03:00 and 2016-01-03T12:00:00+03:00.
How can I achieve this, using ElasticSearch 2.x?
I've tried:
"sort": {
"things.datetime": {
"order": "desc",
"mode": "max"
}
}
But that doesn't seem to sort even by the 2100 dates.
I also tried to use nested_filter like so:
"sort": {
"things.datetime": {
"order": "desc",
"mode": "max",
"nested_filter": {
"range": {
"things.datetime": { "lte": "now" }
}
}
}
}
But it doesn't work as I'd expect.
Also the "sort" value in the response is a negative number. So for a document with dates:
"2015-10-24T05:50:00+03:00",
"2015-10-26T22:05:48+02:00",
"2015-10-24T08:05:43+03:00"
gets a negative sort value:
"sort": [
-9223372036854775808
]
The correct way to achieve this seems to be:
"sort": {
"things.datetime": {
"order": "desc",
"mode": "max",
"nested_path": "things",
"nested_filter": {
"range": {
"things.datetime": { "lte": "now" }
}
}
}
}
When there are no more dates left after the nested_filter, the sort value becomes a negative number to ensure the correct order.

Resources