Mongo DB query for combining sorting and grouping - database

Let's say I have a collection with the following (dummy) data:
Country
State
Population Density (people per km^2)
Cases (in millions)
USA
New York
161
1.03
USA
California
95
4.47
Germany
Berlin
4,227
0.19
India
Kerala
859
4.09
India
Karnataka
319
2.95
India
Maharashtra
370
6.47
What is an optimized mongodb query to get results grouped by country and sorted by the count of states, also each group should contain states sorted by 'Cases'?
The results should look like this in JSON
{
'results': [
{
'country' : 'India',
'num_states': 3,
'states': [
{
'State': 'Maharashtra',
'Cases': 6.47,
'PPD' : 370,
},
{
'State': 'Kerala',
'Cases': 4.09,
'PPD' : 859,
},
{
'State': 'Karnataka',
'Cases': 2.95,
'PPD' : 319,
}
]
},
{
'country' : 'USA',
'num_states': 2,
'states': [
{
'State': 'California',
'Cases': 4.47,
'PPD' : 95,
},
{
'State': 'New York',
'Cases': 1.03,
'PPD' : 161,
}
]
},
{
'country' : 'Germany',
'num_states': 1,
'states': [
{
'State': 'Berlin',
'Cases': 0.19,
'PPD' : 4227,
}
]
},
]
}
Note: My actual data is different, but the use case is the same.

first sort by cases -1
then group by country
add new field for sorting
and project
db.collection.aggregate(
[
{
'$sort': {
'Cases': -1
}
}, {
'$group': {
'_id': {
'country': '$country'
},
'num_states': {
'$sum': 1
},
'states': {
'$push': {
'states': '$$ROOT.state',
'cases': '$$ROOT.Cases',
'ppd': '$$ROOT.Density'
}
}
}
}, {
'$addFields': {
'sorf': {
'$max': '$states.cases'
}
}
}, {
'$sort': {
'sorf': -1
}
}, {
'$project': {
'country': '$_id.country',
'num_states': '$num_states',
'states': 1,
'_id': 0
}
}
])

$sort - sort by cases in descending order
$group - group by country and construct an array of states, and get total count of states
$sort - sort by number of states in descending order
$project - rename the required fields
db.collection.aggregate([
{
"$sort": {
"cases": -1
}
},
{
$group: {
_id: "$country",
num_states: {
$sum: 1
},
states: {
$push: {
state: "$state",
cases: "$cases",
PPD: "$population"
}
}
}
},
{
"$sort": {
"num_states": -1
}
},
{
$project: {
country: "$_id",
num_states: 1,
states: 1,
_id: 0
}
}
])
Mongo Playgroud

Related

Mongoose | Find objects inside of an array, that each object has another array of objects to satisfy condition

I have a collection Shops. Each object in Shops collection has an array of Item objects called items.
{
_id: ObjectId(...),
shopName: 'Ice cream Shop',
items: [
<Item>{
itemName: 'Chocolate IC',
availabilities: [
{
city: 'NY',
arrivals: [
{
price: 3.99,
quantityLeft: 0,
date: 'yesterday'
},
{
price: 3.99,
quantityLeft: 40,
date: 'today'
}
]
},
{
city: 'LA',
arrivals: []
}
]
},
<Item>{
itemName: 'Strawberry IC',
availabilities: [
{
city: 'NY',
arrivals: [
{
price: 3.99,
quantityLeft: 0,
date: 'yesterday'
},
]
}
]
},
],
},
... anotherShops
I want to get list of Item objects which has overall quantityLeft more than 0 from a specific shop.
I tried this code to get all items with the name start with "Straw" from a Shop with shopName equal to 'Ice cream Shop':
const items = await Shop.aggregate()
.match({
shopName: 'Ice cream Shop',
})
.project({
items: {
$filter: {
input: "$items",
as: "item",
cond: {
$regexMatch: {
input: "$$item.itemName",
regex: `.*Straw.*`,
},
},
},
},
});
And it works. But I don't know how to sum up all quantityLeft values inside availabilities array of each item, and return only that items that has sum more than 0.
availabilities array can be an empty array [].
The city parameter also needs to be in condition. For example, only Items that are in stock in NY
I need this to get the list of items from a certain shop, and only the items that are still in stock.
Pretty hard.
I came up with this solution. If you have a better solution, please post it.
const shop = await GCShop.aggregate([
{
$match: {
shopName: 'Ice Cream Shop',
},
},
{
$unwind: "$items",
},
{
$unwind: "$items.availabilities",
},
{
$unwind: "$items.availabilities.arrivals",
},
{
$group: {
_id: "$items.id",
items_name: { $first: "$items.name" },
arrivals: {
$push: {
arrival_id: "$items.availabilities.arrivals.arrival_id",
price: "$items.availabilities.arrivals.price",
qtty: "$items.availabilities.arrivals.qtty",
},
},
totalQtty: { $sum: "$items.availabilities.arrivals.qtty" },
},
},
{
$project: {
offer_id: "$_id",
_id: 0,
offer_name: 1,
totalQtty: 1,
arrivals: 1,
},
},
{
$match: {
totalQtty: {
$gt: 0,
},
},
},
]).limit(20);

MongoDB: nested array count + original document

I have the following document structure which contains an array of votes:
{ _id: ObjectId("6350e2c1a15e0e656f4a7472"),
category: 'business',
votes:
[ { voteType: 'like',
userId: ObjectId("62314007da34df3f32f7cfc0") },
{ voteType: 'like',
userId: ObjectId("6356b5cbe2272ebf628451b") } ] }
What I would like to achieve is to add for each document the sum of votes for which voteType = like, while keeping the original document, such as:
[ [{ _id: ObjectId("6350e2c1a15e0e656f4a7472"),
category: 'business',
votes:
[ { voteType: 'like',
userId: ObjectId("62314007da34df3f32f7cfc0") },
{ voteType: 'like',
userId: ObjectId("6356b5cbe2272ebf628451b") } ] }, {sum: 2, voteType: "like"} ], ...]
At the moment, the only workaround that I found is through an aggregation although I cannot manage to keep the original documents in the results:
db.getCollection('MyDocument') .aggregate([ {
$unwind: "$votes" }, {
$match: {
"votes.voteType": "like",
} }, {
$group: {
_id: {
name: "$_id",
type: "$votes.voteType"
},
count: {
$sum: 1
}
} },
{ $sort : { "count" : -1 } }, {$limit : 5}
])
which gives me:
{ _id: { name: ObjectId("635004f1b96e494947caaa5e"), type: 'like' },
count: 3 }
{ _id: { name: ObjectId("63500456b96e494947cbd448"), type: 'like' },
count: 3 }
{ _id: { name: ObjectId("63500353b6c7eb0a01df268e"), type: 'like' },
count: 2 }
{ _id: { name: ObjectId("634e315bb7d17339f8077c39"), type: 'like' },
count: 1 }
You can do it like this:
$cond with $isArray - to check if the votes property is of the type array.
$filter - to filter votes based on voteType property.
$size - to get the sized of the filtered array.
db.collection.aggregate([
{
"$set": {
"count": {
"$cond": {
"if": {
"$isArray": "$votes"
},
"then": {
"$size": {
"$filter": {
"input": "$votes",
"cond": {
"$eq": [
"$$this.voteType",
"like"
]
}
}
}
},
"else": 0
}
}
}
}
])
Working example

MongoDB aggregation of daily records hourly basis

I have a collection or orders with the following format:
"createTime" : ISODate("2021-04-16T08:01:39.000Z"),
"statusDetails" : [
{
"createTime" : ISODate("2021-04-16T08:01:39.000Z"),
"createUser" : "FOP-SYSTEM",
"stateOccurTimeStr" : "2021-04-16 15:01:39",
"status" : 27,
"statusDesc" : "Shipped"
}
]
where createTime is showing that when the order has been created
statusDetails.status = 27 showing that order has been shipped
statusDetails.createTime showing that when the order has been shipped
The result which I need is something like this:
Order Date
0-4 Hours
4-8 Hours
8-12 Hours
12-24 Hours
> 24 Hours
Total Orders
01-Jan-21
15
10
4
1
1
31
This shows that on "1-Jan-2021" after order creation,
15 orders shipped between 0-4 hours,
10 orders shipped between 4-8 hours,
4 orders shipped between 8-12 hours
and so on.
What I have done so far is:
db.orders.aggregate([
{ $unwind : "$statusDetails"},
{$match: {"statusDetails.status": { "$exists": true, "$eq": 24 }}},
{$project : { createTime: 1,statusDetails:1,
dateDiff: {"$divide" : [{ $subtract: ["$statusDetails.createTime","$createTime" ] },3600000]}}},
{$sort:{"createTime":-1}}
])
But this is showing time difference of each individual record, but I need group by
Edit
I have updated my query and now it is showing records using $group but still I need to add an another pipeline to group the current data.
db.orders.aggregate([
{ $unwind : "$statusDetails"},
{$match: {"statusDetails.status": { "$exists": true, "$eq": 27 }}},
{$project : { createTime: 1,statusDetails:1, dateDiff:{"$floor": {"$divide" : [{ $subtract: ["$statusDetails.createTime","$createTime" ] },3600000]}}}},
{
$group:
{ _id: { year : { $year : "$createTime" }, month : { $month : "$createTime" }, day : { $dayOfMonth : "$createTime" }},
shippedTime: { $push: "$dateDiff" },
count: { $sum: 1 }
}
},
{$sort:{"createTime":-1}}
])
$unwind to deconstruct statusDetails array
$match your conditions
$addFields to add slot field on the base of your date calculation hour slot from equation and get total
$group by date as per your format using $dateToString and slot
$sort by slot in ascending order
$group by only date and construct array of slots and get total orders
db.collection.aggregate([
{ $unwind: "$statusDetails" },
{ $match: { "statusDetails.status": 27 } },
{
$addFields: {
slot: {
$multiply: [
{
$floor: {
$divide: [
{
$abs: {
"$divide": [
{ $subtract: ["$statusDetails.createTime", "$createTime"] },
3600000
]
}
},
4
]
}
},
4
]
}
}
},
{
$group: {
_id: {
date: {
$dateToString: {
date: "$createTime",
format: "%d-%m-%Y"
}
},
slot: "$slot"
},
total: {
$sum: 1
}
}
},
{ $sort: { "_id.slot": 1 } },
{
$group: {
_id: "$_id.date",
hours: {
$push: {
slot: "$_id.slot",
total: "$total"
}
},
totalOrders: { $sum: "$total" }
}
}
])
Playground
Result would be:
[
{
"_id": "16-04-2021",
"hours": [
{ "slot": 0, "total": 1 }, // from 0 to 4
{ "slot": 4, "total": 1 }, // from 4 to 8
{ "slot": 8, "total": 2 } // from 8 to 12
],
"totalOrders": 4
}
]

Update the existing collection from Aggregate Pipeline

I am very new to MongoDB and have been trying to create a new field within my collection that is calculated using existing data.
Is there a way to add the field myRating to the movies collection?
Here is what I came up with.
db.movies.aggregate([
{$unwind: "$genres"},
{$project:{_id:0, title:1, genres:1,
durationScore: {$cond: {if: {$gte: ["$runtime", 90]}, then: 10, else: 5}},
yearScore: {$cond: {if: {$gte: ["$year", 1990]}, then: 10, else: 5}},
genreScore: {$switch:{branches:[
{
case: {$eq :["$genres", "Action"]}, "then": 30 ,
},
{
case: {$eq :["$genres", "Western"]}, "then": 20 ,
},
{
case: {$eq :["$genres", "Comedy"]}, "then": 5 ,
},
{
case: {$eq :["$genres", "Drama"]}, "then": 15 ,
},
],
default: 10
}},
directorScore: {$switch:{branches:[
{
case: {$eq :["$director", "Quentin Tarantino"]}, "then": 20 ,
},
{
case: {$eq :["$director", "Martin Scorsese"]}, "then": 20 ,
},
],
default: 10
}}
}},
{$addFields: { myRating: { $sum: [ "$yearScore", "$durationScore", "$genreScore", "$directorScore" ]}}},
])
Sample of Data.
{
"_id": {
"$oid": "60502686eb0d3e3e849677ef"
},
"title": "Once Upon a Time in the West",
"year": 1968,
"rated": "PG-13",
"runtime": 175,
"countries": [
"Italy",
"USA",
"Spain"
],
"genres": [
"Western"
],
"director": "Sergio Leone",
"writers": [
"Sergio Donati",
"Sergio Leone",
"Dario Argento",
"Bernardo Bertolucci",
"Sergio Leone"
],
"actors": [
"Claudia Cardinale",
"Henry Fonda",
"Jason Robards",
"Charles Bronson"
],
"plot": "Epic story of a mysterious stranger with a harmonica who joins forces with a notorious desperado to protect a beautiful widow from a ruthless assassin working for the railroad.",
"poster": "http://ia.media-imdb.com/images/M/MV5BMTEyODQzNDkzNjVeQTJeQWpwZ15BbWU4MDgyODk1NDEx._V1_SX300.jpg",
"imdb": {
"id": "tt0064116",
"rating": 8.6,
"votes": 201283
},
"tomato": {
"meter": 98,
"image": "certified",
"rating": 9,
"reviews": 54,
"fresh": 53,
"consensus": "A landmark Sergio Leone spaghetti western masterpiece featuring a classic Morricone score.",
"userMeter": 95,
"userRating": 4.3,
"userReviews": 64006
},
"metacritic": 80,
"awards": {
"wins": 4,
"nominations": 5,
"text": "4 wins & 5 nominations."
},
"type": "movie"
}
I would suggest you keep _id field in $project stage.
Without considering performance, simply iterating through the aggregate result and $set myRating field through updateOne using the _id field.
db.movies.aggregate([
...
{$project:{_id:1, title:1, genres:1,
...
]).forEach(result = > {
db.movies.updateOne(
{_id : result._id},
{$set : {myRating : {result.myRating}}
})
})
Starting in MongoDB 4.2, you can use the aggregation pipeline for update operations. Try this query:
db.movies.updateOne(
{ "_id": ObjectId("60502686eb0d3e3e849677ef") },
[
{
$set: {
myRating: {
$let: {
vars: {
durationScore: { $cond: { if: { $gte: ["$runtime", 90] }, then: 10, else: 5 } },
yearScore: { $cond: { if: { $gte: ["$year", 1990] }, then: 10, else: 5 } },
genreScore: {
$switch: {
branches: [
{ case: { $in: ["Action", "$genres"] }, "then": 30 },
{ case: { $in: ["Western", "$genres"] }, "then": 20 },
{ case: { $in: ["Comedy", "$genres"] }, "then": 5 },
{ case: { $in: ["Drama", "$genres"] }, "then": 15 }
],
default: 10
}
},
directorScore: {
$switch: {
branches: [
{ case: { $eq: ["$director", "Quentin Tarantino"] }, "then": 20 },
{ case: { $eq: ["$director", "Martin Scorsese"] }, "then": 20 }
],
default: 10
}
}
},
in: { $sum: ["$$yearScore", "$$durationScore", "$$genreScore", "$$directorScore"] }
}
}
}
}
]
);

How to do pivoting on MongoDB?

I have this 'Sales' collection and a sample of it looks like this:
[
{cusID: 'a412q39x',
cusCountry: 'MEX',
itemPurchased: 'Toy_A'
},
{cusID: 'r760e11s',
cusCountry: 'USA',
itemPurchased: 'Toy_B'
},
{cusID: 'g723f01z',
cusCountry: 'USA',
itemPurchased: 'Toy_C'
},
{cusID: 'h277p01c',
cusCountry: 'CAN',
itemPurchased: 'Toy_B'
}
]
This is the result I am hoping to achieve.
[
{item: 'Toy_A',
USA: 4,
MEX: 2,
CAN: 1,
BRA: 0
},
{item: 'Toy_B',
USA: 3,
MEX: 0,
CAN: 2,
BRA: 1
}
]
I tried:
{
$group:{_id:{toy:'$itemPurchased', country: $cusCountry'},'cnt':{'$sum': 1}}
}
The result was not what I wanted.
[
{
_id.toy: 'Toy_A',
_id.country: 'BRA',
cnt: 43
},
{
_id.toy: 'Toy_A',
_id.country: 'USA',
cnt: 102
},
{
_id.toy: 'Toy_A',
_id.country: 'JPN',
cnt: 72
},
{
_id.toy: 'Toy_B',
_id.country: 'CAN',
cnt: 32
}
]
I have also experimented with $facet but to no avail. Mongo gurus, please enlighten. Thanks in advance.
Try as below:
db.collection.aggregate([
{
$group: { _id: '$itemPurchased' , items: { $push: { "country" : "$cusCountry", "count": { $sum :1} } } }
},
{
"$project": {
"countryCounts": {
"$arrayToObject": {
"$map": {
"input": "$items",
"as": "item",
"in": {
"k": "$$item.country",
"v": "$$item.count",
}
}
}
}
}
},
{ $replaceRoot: { newRoot: { "$mergeObjects":[ "$countryCounts" , { "item":"$_id"} ] } } }
])
You will get the result like below:
{
"USA" : 1,
"item" : "Toy_C"
},
/* 2 */
{
"USA" : 1,
"CAN" : 1,
"item" : "Toy_B"
},
/* 3 */
{
"MEX" : 1,
"item" : "Toy_A"
}

Resources