Performance issue running mongodb aggregation - database

I need to run a query that joins documents from two collections, I wrote an aggregation query but it takes too much time when running in the production database with many documents. Is there any way to write this query in a more efficient way?
Query in Mongo playground: https://mongoplayground.net/p/dLb3hsJHNYt
There are two collections users and activities. I need to run a query to get some users (from users collection), and also their last activity (from activities collection).
Database:
db={
"users": [
{
"_id": 1,
"email": "user1#gmail.com",
"username": "user1",
"country": "BR",
"creation_date": 1646873628
},
{
"_id": 2,
"email": "user2#gmail.com",
"username": "user2",
"country": "US",
"creation_date": 1646006402
}
],
"activities": [
{
"_id": 1,
"email": "user1#gmail.com",
"activity": "like",
"timestamp": 1647564787
},
{
"_id": 2,
"email": "user1#gmail.com",
"activity": "comment",
"timestamp": 1647564834
},
{
"_id": 3,
"email": "user2#gmail.com",
"activity": "like",
"timestamp": 1647564831
}
]
}
Inefficient Query:
db.users.aggregate([
{
// Get users using some filters
"$match": {
"$expr": {
"$and": [
{ "$not": { "$in": [ "$country", [ "AR", "CA" ] ] } },
{ "$gte": [ "$creation_date", 1646006400 ] },
{ "$lte": [ "$creation_date", 1648684800 ] }
]
}
}
},
{
// Get the last activity within the time range
"$lookup": {
"from": "activities",
"as": "last_activity",
"let": { "cur_email": "$email" },
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{ "$eq": [ "$email", "$$cur_email" ] },
{ "$gte": [ "$timestamp", 1647564787 ] },
{ "$lte": [ "$timestamp", 1647564834 ] }
]
}
}
},
{ "$sort": { "timestamp": -1 } },
{ "$limit": 1 }
]
}
},
{
// Remove users with no activity
"$match": {
"$expr": {
"$gt": [ { "$size": "$last_activity" }, 0 ] }
}
}
])
Result:
[
{
"_id": 1,
"country": "BR",
"creation_date": 1.646873628e+09,
"email": "user1#gmail.com",
"last_activity": [
{
"_id": 2,
"activity": "comment",
"email": "user1#gmail.com",
"timestamp": 1.647564788e+09
}
],
"username": "user1"
},
{
"_id": 2,
"country": "US",
"creation_date": 1.646006402e+09,
"email": "user2#gmail.com",
"last_activity": [
{
"_id": 3,
"activity": "like",
"email": "user2#gmail.com",
"timestamp": 1.647564831e+09
}
],
"username": "user2"
}
]
I'm more familiar with relational databases, so I'm struggling a little to run this query efficiently.
Thanks!

Related

How can Update a Nested array in mongoDB

let data = [
{
"_id": '101',
"name": 'category_1',
"subcategory": [
{
"_id": '201',
"name": 'subCategory_1'
},
{
"_id": '202',
"name": 'subCategory_2',
"subsubcategory": [
{
"_id": '301',
"name": 'subsubcategory_1'
},
{
"_id": '302',
"name": 'subsubcategory_2'
}
]
}
]
}
]
How can I change subsubcategory name ?

MongoDB: find sum of nested array elements

Let's say I have a Customer document like the following
db.collection.insertOne( {
"customerName": "John Doe",
"orders": [
{
"type": "regular",
"items": [
{
"name": "itemA",
"price": 11.1
},
{
"name": "itemB",
"price": 22.2
}
]
},
{
"type": "express",
"items": [
{
"name": "itemC",
"price": 33.3
},
{
"name": "itemD",
"price": 44.4
}
]
}
]
})
How can I calculate the total price of all orders (111 in this example)?
You can $unwind twice (because nested array) and group using $sum like this:
db.collection.aggregate([
{
"$unwind": "$orders"
},
{
"$unwind": "$orders.items"
},
{
"$group": {
"_id": "$customerName",
"total": {
"$sum": "$orders.items.price"
}
}
}
])
Example here

How to replace a value of an array in MongoDB aggregation?

I'm trying to replace the value only in the aggregation result, not in the stored document, thats why I use an aggregation and not a update.
Suppose these are the documents stored in the collection:
{ "_id": 1, "first_name": "foo1", items: [
{ "item": "gloves", "body": "hands" },
{ "item": "gloves", "body": "hands" },
{ "item": "shirts" , "body": "torso" }
]
},
{ "_id": 2, "first_name": "foo2", items: [
{ "item": "skirts", "where": "legs" },
{ "item": "jeans", "body": "legs" },
{ "item": "socks" , "body": "feet" },
{ "item": "gloves" , "body": "hands" }
]
},
{ "_id": 3, "first_name": "foo3", items: [
{ "item": "jacket", "where": "torso" },
{ "item": "socks", "body": "feet" },
{ "item": "shirts" , "body": "torso" }
]
}
The aggregation must replace "gloves" by "sandals", and it correspondent attribute "body" from "hands" to "feet".
The expected result is:
{ "_id": 1, "first_name": "foo1", items: [
{ "item": "sandals", "body": "feet" },
{ "item": "sandals", "body": "feet" },
{ "item": "shirts" , "body": "torso" }
]
},
{ "_id": 2, "first_name": "foo2", items: [
{ "item": "skirts", "where": "legs" },
{ "item": "jeans", "body": "legs" },
{ "item": "socks" , "body": "feet" },
{ "item": "sandals" , "body": "feet" }
]
},
{ "_id": 3, "first_name": "foo3", items: [
{ "item": "jacket", "where": "torso" },
{ "item": "socks", "body": "feet" },
{ "item": "shirts" , "body": "torso" }
]
}
So far, I've tried using "$unwind" and $project with "$switch", among other approaches, but none of them work.

Mongodb group and sum two arrays of similar data from aggregate facet

I am running queries that target different levels of social data and have got this working with some fairly large aggregate pipelines which then run inside a $facet.
The results of the $facet section come out with two arrays I then want to combine. eg.
{
"first": [
{
"_id": {
"name": "one",
"_id": "1"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
1000
]
},
{
"_id": {
"name": "two",
"_id": "2"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
2000
]
}
],
"second": [
{
"_id": {
"name": "1",
"_id": "one"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
1000
]
},
{
"_id": {
"name": "two",
"_id": "2"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
2000
]
}
]
}
I would like to group these results together and add up the totals to end up with
[ {
"_id": {
"name": "1",
"_id": "one"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
2000
]
},{
"_id": {
"name": "2",
"_id": "two"
},
"date": [
"2017-09-07T00:00:00.000Z"
],
"data": [
4000
]
}]
I have a query that groups the individual responses to get them to this level but is missing something that allows me to merge across the two arrays. I have experimented with unwinding the first and second array but haven't been able to find a solution that combines the data across both arrays.
Any help, tips or knowledge greatly appreciated.
Use $concatArrays to merge arrays from $facets output followed by $group with $arrayElemAt to pick the first element for $first and $sum accumulator.
[
{
"$project": {
"combine": {
"$concatArrays": [
"$first",
"$second"
]
}
}
},
{
"$unwind": "$combine"
},
{
"$group": {
"_id": "$combine._id",
"date": {
"$first": {
"$arrayElemAt": [
"$combine.date",
0
]
}
},
"data": {
"$sum": {
"$arrayElemAt": [
"$combine.data",
0
]
}
}
}
}
]

Aggregating array of values in elasticsearch

I need to aggregate an array as follows
Two document examples:
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-30T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"riodejaneiro": 2,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-31T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"curitiba": 1,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
I want to aggregate the cities array, to find out all the cities the traveler has gone to. I want something like this:
{
"traveler":{
"name":"patrick"
},
"cities":{
"saopaulo":2,
"riodejaneiro":2,
"curitiba":1,
"total":3
}
}
Where the total is the length of the cities array minus 1. I tried the terms aggregation and the sum, but couldn't output the desired output.
Changes in the document structure can be made, so if anything like that would help me, I'd be pleased to know.
in the document posted above "cities" is not a json array , it is a json object.
If changing the document structure is a possibility I would change cities in the document to be an array of object
example document:
cities : [
{
"name" :"saopaulo"
"visit_count" :"2",
},
{
"name" :"riodejaneiro"
"visit_count" :"1",
}
]
You would then need to set cities to be of type nested in the index mapping
"mappings": {
"<type_name>": {
"properties": {
"cities": {
"type": "nested",
"properties": {
"city": {
"type": "string"
},
"count": {
"type": "integer"
},
"value": {
"type": "long"
}
}
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"registry": {
"type": "string"
},
"state": {
"type": "string"
},
"traveler": {
"type": "string"
}
}
}
}
After which you could use nested aggregation to get the city count per user.
The query would look something on these lines :
{
"query": {
"match": {
"traveler": "patrick"
}
},
"aggregations": {
"city_travelled": {
"nested": {
"path": "cities"
},
"aggs": {
"citycount": {
"cardinality": {
"field": "cities.city"
}
}
}
}
}
}

Resources