Related
I'm working on simple program that counts total number of special units through n number of players.
I have documents similar to this (simplified), where array rosterUnits could be of length 0 to 7. There is a total of 7 special units. I need to know how many of each unit players have in roster.
{
{
_id: ObjectId(...),
member: {
rosterUnits: [ "Unit1", "Unit2", "Unit3", "Unit4"]
}
},
{
_id: ObjectId(...),
member: {
rosterUnits: [ "Unit1", "Unit3"]
}
},
...
}
Expected result would be something like this:
{
_id: ...
result: [
{
name: "Unit1"
count: 2
},
{
name: "Unit2"
count: 1
},
{
name: "Unit3"
count: 2
},
...
{
name: "Unit7"
count: 0
}
]
}
How do I achieve this using aggregate pipeline?
EDIT (2/7/2023)
Excuse me everyone, I thought I provided enough details here but...
Documents are very big and pipeline until this stage is very long.
I wanted to spare you the trouble with the documents
I have guild with up to 50 players. I search for guild then $unwind members of guild and $lookup into members to get member.rosterUnit(s).
This is a full query I came up with:
db.getCollection('guilds').aggregate([
{ $match: { 'profile.id': 'jrl9Q-_CRDGdMyNjTQH1rQ' } },
//{ $match: { 'profile.id': { $in : ['jrl9Q-_CRDGdMyNjTQH1rQ', 'Tv_j9nhRTgufvH7C7oUYAA']} } },
{ $project: { member: 1, profile: 1 } },
{ $unwind: "$member" },
{
$lookup: {
from: "players",
localField: "member.playerId",
foreignField: "playerId",
pipeline: [
{
$project: {
profileStat: 1,
rosterUnit: {
$let: {
vars: { gls: ["JABBATHEHUTT:SEVEN_STAR", "JEDIMASTERKENOBI:SEVEN_STAR", "GRANDMASTERLUKE:SEVEN_STAR", "LORDVADER:SEVEN_STAR", "GLREY:SEVEN_STAR", "SITHPALPATINE:SEVEN_STAR", "SUPREMELEADERKYLOREN:SEVEN_STAR"], },
in: {
$reduce: {
input: "$rosterUnit",
initialValue: [],
in: {
$cond: {
if: { $gt: [{ $indexOfArray: ["$$gls", "$$this.definitionId"] }, -1] },
then: { $concatArrays: ["$$value", [{ definitionId: "$$this.definitionId", count: 1 }]] },
else: { $concatArrays: ["$$value", []] }
}
},
}
}
}
}
}
}
],
as: "member"
}
},
{
$addFields: {
member: { $arrayElemAt: ["$member", 0] },
gpStats: {
$let: {
vars: { member: { $arrayElemAt: ["$member", 0] } },
in: {
$reduce: {
input: "$$member.profileStat",
initialValue: {},
in: {
characterGp: {
$arrayElemAt: [
"$$member.profileStat.value",
{
$indexOfArray: [
"$$member.profileStat.nameKey",
"STAT_CHARACTER_GALACTIC_POWER_ACQUIRED_NAME"
]
}
]
},
shipGp: {
$arrayElemAt: [
"$$member.profileStat.value",
{
$indexOfArray: [
"$$member.profileStat.nameKey",
"STAT_SHIP_GALACTIC_POWER_ACQUIRED_NAME"
]
}
]
}
}
}
}
}
}
}
},
{
$group: {
_id: "$profile.id",
guildName: { $first: "$profile.name" },
memberCount: { $first: "$profile.memberCount" },
guildGp: { $first: "$profile.guildGalacticPower" },
totalGp: { $sum: { $sum: [{ $toInt: "$gpStats.characterGp" }, { $toInt: "$gpStats.shipGp" }] } },
avgTotalGp: { $avg: { $sum: [{ $toInt: "$gpStats.characterGp" }, { $toInt: "$gpStats.shipGp" }] } },
characterGp: { $sum: { $toInt: "$gpStats.characterGp" } },
shipGp: { $sum: { $toInt: "$gpStats.shipGp" } },
}
}
])
I want to add new field in group with desired result from above.
If I do $unwind on member.rosterUnit how do I go back to member grouping?
(Excuse me once again, this is my first question)
Use $unwind to deconstruct the rosterUnits array into separate documents.
Then use $group to group the documents by the rosterUnits values and calculate the count for each unit.
Then use $project to format the output to include only the name and count fields.
db.collection.aggregate([
{
$unwind: "$member.rosterUnits"
},
{
$group: {
_id: "$member.rosterUnits",
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
name: "$_id",
count: "$count"
}
}
])
Yes I think that the best way of do that is using aggregations.
I'm sure there is a better way to do it.
But here is the solution, I hope it works for you friend.
Basically we are going to use a "$group" aggregation and within it using an operator "$cond" and "$in" we are going to validate case by case if the searched element is found. In the case that it is so, we will add one and if the element is not found, zero.
I recommend you download mongodb compass to try it
Aggregation:
[{
$group: {
_id: null,
Unit1: {
$sum: {
$cond: [
{
$in: [
'Unit1',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit2: {
$sum: {
$cond: [
{
$in: [
'Unit2',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit3: {
$sum: {
$cond: [
{
$in: [
'Unit3',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit4: {
$sum: {
$cond: [
{
$in: [
'Unit4',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit5: {
$sum: {
$cond: [
{
$in: [
'Unit5',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit6: {
$sum: {
$cond: [
{
$in: [
'Unit6',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit7: {
$sum: {
$cond: [
{
$in: [
'Unit7',
'$member.rosterUnits'
]
},
1,
0
]
}
}
}
}, {
$project: {
_id: 0
}
}]
Query
because you want to count values that might not exists, you can make the groups manualy, and do conditional count
after the group you can do extra tranformation(if you really need the expected outpute exactly like that). Object to array, and map to give the field names(name,count)
Playmongo
aggregate(
[{"$unwind": "$member.rosterUnits"},
{"$group":
{"_id": null,
"Unit1":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit1"]}, 1, 0]}},
"Unit2":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit2"]}, 1, 0]}},
"Unit3":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit3"]}, 1, 0]}},
"Unit4":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit4"]}, 1, 0]}},
"Unit5":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit5"]}, 1, 0]}},
"Unit6":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit6"]}, 1, 0]}},
"Unit7":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit7"]}, 1, 0]}}}},
{"$unset": ["_id"]},
{"$project":
{"result":
{"$map":
{"input": {"$objectToArray": "$$ROOT"},
"in": {"name": "$$this.k", "count": "$$this.v"}}}}}])
I have a collection "product_reviews" with this document structure
{
_id: 'B000000OE4',
'product/title': 'Working Class Hero',
'product/price': '16.99',
reviews: [
{
'review/userId': 'unknown',
'review/profileName': 'unknown',
'review/helpfulness': '2/3',
'review/score': '4.0',
'review/time': '27/05/1999/00:00:00',
'review/summary': 'Worth it for one song',
'review/text': "I really like Joan Baez'..."
},
{
'review/userId': 'A1W0RKM6J6J73L',
'review/profileName': 'Aaron Woodin (purchagent#aol.com)',
'review/helpfulness': '1/1',
'review/score': '3.0',
'review/time': '09/02/1999/00:00:00',
'review/summary': 'The critical lambasting on the Amazon Page Missed one thing.',
'review/text': "They forgot to mention Mary Chapin..."
},
...
]
}
My goal is to add object for each product (each product has unique _id) that will have following structure:
{
avgReviewScore: 4.5
reviewsCount: 105
reviewScoreDistrib: {
1: 15
2: 0
3: 30
4: 40
5: 20
}
}
I tried numerous aggregation pipelines but couldn't find a solution.
You can try this code:
db.product_reviews.aggregate([{
$unwind: "$reviews"
},
{
$group: {
_id: "$_id",
avgReviewScore: {
$avg: "$reviews.review/score"
},
reviewsCount: {
$sum: 1
},
scores: {
$push: "$reviews.review/score"
}
}
},
{
$project: {
avgReviewScore: 1,
reviewsCount: 1,
reviewScoreDistrib: {
$arrayToObject: {
$map: {
input: [1, 2, 3, 4, 5],
as: "num",
in: {
k: {$toString: "$$num"},
v: {
$size: {
$filter: {
input: "$scores",
as: "s",
cond: {
$eq: ["$$s", "$$num"]
}
}
}
}
}
}
}
}
}
},
{
$merge: {
into: "product_reviews",
on: "_id"
}
}
])
If you have any issue, you can ask
No need to $unwind and $group again (which can be very inefficient). You can use a simple updateMany:
db.collection.updateMany({},
[
{$set: {
reviewsData: {$map: {
input: "$reviews.review/score",
in: {$toDouble: "$$this"}
}}
}},
{$set: {
reviewScoreDistrib: {
$arrayToObject: {$map: {
input: {$range: [1, 6]},
as: "num",
in: {
k: {$toString: "$$num"},
v: {$size: {$filter: {
input: "$reviewsData",
cond: {$eq: ["$$this", "$$num"]}
}}}
}
}}
},
avgReviewScore: {$avg: "$reviewsData"},
reviewsCount: {$size: "$reviewsData"}
}}
])
See how it works on the playground example
db.P2447653_reviews_c.aggregate([{
$group: {_id: {"reviewerID" : "reviewerID", count: {$sum: 1 }}},
$match:{"reviewTime":{$gt:1}},
$project : { "reviewerID":1, "reviewerName":1, "reviewTime":1}}
])
I don't understand the problem, I'm very new to MongoDB
Error: MongoServerError: A pipeline stage specification object must contain exactly one field.
I have no idea what else to try. I'm completely stuck.
Doing some formatting, your query is this:
db.P2447653_reviews_c.aggregate([
{
$group: { _id: { "reviewerID": "reviewerID", count: { $sum: 1 } } },
$match: { "reviewTime": { $gt: 1 } },
$project: { "reviewerID": 1, "reviewerName": 1, "reviewTime": 1 }
}
])
You missed some brackets, must be this:
db.P2447653_reviews_c.aggregate([
{
$group: {
_id: { "reviewerID": "$reviewerID" },
count: { $sum: 1 }
}
},
{ $match: { "reviewTime": { $gt: 1 } } },
{ $project: { "reviewerID": 1, "reviewerName": 1, "reviewTime": 1 } }
])
I am trying to write a performance report based on shapefile data I have stored within docs stored in collections.
Here is a sample of data:
The following function works quite well as it returns the amount of bytes for each document - great, however I would also like to know how many points/pairs are stored within each polygon's linear string for each document.
db.getCollection("_collectionName").aggregate([{"$project": {"object_size": { $bsonSize: "$$ROOT" }}}])
This returns the following set of data (sample):
{ _id: ObjectId("5ef7da26ae8659149c97657e"), rootSize: 42215 },
{ _id: ObjectId("5ef7da45ae8659149c97657f"), rootSize: 118574 },
{ _id: ObjectId("5ef7daf1ae8659149c976585"), rootSize: 11886 },
{ _id: ObjectId("5f216685dbef0f7c3339ec03"), rootSize: 43136 },
{ _id: ObjectId("5ef7daa6ae8659149c976582"), rootSize: 40823 },
{ _id: ObjectId("5f3495129861ce45eb4e9728"), rootSize: 394884 },
{ _id: ObjectId("5ef7d7f6ae8659149c97657c"), rootSize: 125309 },
{ _id: ObjectId("5ef7dad6ae8659149c976584"), rootSize: 127447 },
{ _id: ObjectId("5fa56ef26538cd3bddd8389e"), rootSize: 17670 },
{ _id: ObjectId("5fa56ef26538cd3bddd8389f"), rootSize: 11398 },
{ _id: ObjectId("5fa56ef16538cd3bddd8389c"), rootSize: 2415 },
{ _id: ObjectId("5fa56ef36538cd3bddd838ae"), rootSize: 1757 },
{ _id: ObjectId("5fa56ef36538cd3bddd838b0"), rootSize: 4866 },
{ _id: ObjectId("5fa56ef36538cd3bddd838a8"), rootSize: 1510 },
{ _id: ObjectId("5fa56ef26538cd3bddd838a7"), rootSize: 39631 },
{ _id: ObjectId("5fa56ef36538cd3bddd838ab"), rootSize: 3662 },
{ _id: ObjectId("5fa56ef36538cd3bddd838aa"), rootSize: 15844 },
{ _id: ObjectId("5fa56ef16538cd3bddd8389d"), rootSize: 17196 },
{ _id: ObjectId("5fa56ef26538cd3bddd838a3"), rootSize: 34940 },
{ _id: ObjectId("5fa56ef36538cd3bddd838af"), rootSize: 468367 }
Which is great but it does not tell me how many elements are in the array/linear string within geometry.coordinates.
I have tried the following, but no cigar:
db.getCollection("_collectionName").aggregate([{$project: { count: { $size: { "$ifNull": [ "$geometry", [] ] } } } }])
MongoServerError: The argument to $size must be an array, but was of type: object
It comes back with an error, which i understand - so i referenced the coordinates array:
db.getCollection("_collectionName").aggregate([{$project: { count: { $size: { "$ifNull": [ "$geometry.coordinates", [] ] } } } }])
Which, returned the following data, again correct, if you understand GeoJSON files this is normal as this is the top level of the linear ring, sample data:
{ _id: ObjectId("5ef7da26ae8659149c97657e"), count: 1 }
{ _id: ObjectId("5ef7da45ae8659149c97657f"), count: 1 }
{ _id: ObjectId("5ef7daf1ae8659149c976585"), count: 1 }
{ _id: ObjectId("5f216685dbef0f7c3339ec03"), count: 1 }
So I then added the top level array of 0 to my aggregate function:
db.getCollection("_collectionName").aggregate([{$project: { count: { $size: { "$ifNull": [ "$geometry.coordinates.0", [] ] } } } }])
And this is what was returned:
{ _id: ObjectId("5ef7da26ae8659149c97657e"), count: 0 }
{ _id: ObjectId("5ef7da45ae8659149c97657f"), count: 0 }
{ _id: ObjectId("5ef7daf1ae8659149c976585"), count: 0 }
{ _id: ObjectId("5f216685dbef0f7c3339ec03"), count: 0 }
And that is not possible, here is a screenshot from Studio3T software:
Anybody who might be able to help or point me in the right direction please do so....
(I would be very grateful!)
The dot notation won't work on array elements within an aggregation. You'll want to use the $arrayElemAt operator, as follows:
db.getCollection("_aGStbl").aggregate([{
$project: {
count: { $size: { $arrayElemAt: [ "$geometry.coordinates", 0 ]}}
}
}])
To cater for Null values, you can use a $cond, depending on your objective for the output:
INSERT SOME DATA INTO A TESTDB:
db.arrayTest.insertMany([
{ _id: 1, arrayOfArrays: [ [ 1, 2, 3 ], [ 1, 2, 3, 4 ], [ 1, 2, 3, 4, 5, 6, 7 ] ] },
{ _id: 2, arrayOfArrays: [ [ 4, 5 ], [ 5, 6, 7 ] ] },
{ _id: 3, arrayOfArrays: [ [], [] ] },
{ _id: 4, arrayOfArrays: [ [], [], [] ] },
{ _id: 5 }
] )
{ acknowledged: true,
insertedIds: { '0': 1, '1': 2, '2': 3, '3': 4, '4': 5 } }
TRY THESE AGGREGATE CALLS:
db.arrayTest.aggregate([{$project: { count: { $size: { "$ifNull": [ { $arrayElemAt: [ "$arrayOfArrays", 0 ] }, [ ] ] } } } } ] )
{ _id: 1, count: 3 }
{ _id: 2, count: 2 }
{ _id: 3, count: 0 }
{ _id: 4, count: 0 }
{ _id: 5, count: 0 }
db.arrayTest.aggregate([{$project: { count: { $cond: { if: {$arrayElemAt: [ "$arrayOfArrays", 0 ]}, then: { $size: { $arrayElemAt: [ "$arrayOfArrays", 0 ] } }, else: null} } } } ] )
{ _id: 1, count: 3 }
{ _id: 2, count: 2 }
{ _id: 3, count: 0 }
{ _id: 4, count: 0 }
{ _id: 5, count: null }
I am trying to get first date from inner array in mongodb object and add it to it's parent with aggregation. Example:
car: {
"model": "Astra",
"productions": [
"modelOne": {
"dateOfCreation": "2019-09-30T10:15:25.026+00:00",
"dateOfEstimation": "2017-09-30T10:15:25.026+00:00",
"someOnterInfo": "whatever"
},
"modelTwo": {
"dateOfCreation": "2017-09-30T10:15:25.026+00:00",
"dateOfEstimation": "2019-09-30T10:15:25.026+00:00",
"someOnterInfo": "whatever"
}
]
}
to be turned in
car: {
"model": "Astra",
"earliestDateOfEstimation": "2017-09-30T10:15:25.026+00:00",
"earliestDateOfCreation": "2017-09-30T10:15:25.026+00:00"
}
How can I achieve that?
I'm assuming that modelOne and modelTwo are unknown when you start your aggregation. The key step is to run $map along with $objectToArray in order to get rid of those two values. Then you can just use $min to get "earliest" values:
db.collection.aggregate([
{
$addFields: {
dates: {
$map: {
input: "$car.productions",
in: {
$let: {
vars: { model: { $arrayElemAt: [ { $objectToArray: "$$this" }, 0 ] } },
in: "$$model.v"
}
}
}
}
}
},
{
$project: {
_id: 1,
"car.model": 1,
"car.earliestDateOfEstimation": { $min: "$dates.dateOfEstimation" },
"car.earliestDateOfCreation": { $min: "$dates.dateOfCreation" },
}
}
])
Mongo Playground
EDIT:
First step can be simplified if there's always modelOne, 'modelTwo'... (fixed number)
db.collection.aggregate([
{
$addFields: {
dates: { $concatArrays: [ "$car.productions.modelOne", "$car.productions.modelTwo" ] }
}
},
{
$project: {
_id: 1,
"car.model": 1,
"car.earliestDateOfEstimation": { $min: "$dates.dateOfEstimation" },
"car.earliestDateOfCreation": { $min: "$dates.dateOfCreation" },
}
}
])
Mongo Playground (2)