Help to "flatten" (to pull nested fields at same level as document's fields) a mongodb document in a query
//this is "anagrafiche" collection
[{
"name": "tizio"
,"surname": "semproni"
,"birthday": "01/02/1923"
,"home": {
"road": "via"
,"roadname": "bianca"
,"roadN": 12
,"city": "rome"
,"country": "italy"
}
},
{
"name": "caio"
,"surname": "giulio"
,"birthday": "02/03/1932"
,"home": {
"road": "via"
,"roadname": "rossa"
,"roadN": 21
,"city": "milan"
,"country": "italy"
}
},
{
"name": "mario"
,"surname": "rossi"
// birthday is not present for this document
,"home": {
"road": "via"
,"roadname": "della pace"
,"roadN": 120
,"city": "rome"
,"country": "italy"
}
}
]
my query:
db.anagrafiche.aggregate([ {$match {"home.city": "rome"}}
{$project:{"name": 1, "surname":1, <an expression to flatten the address>, "birthday": 1, "_id":0}}
]
);
expected result:
{
,"name": "tizio"
,"surname": "semproni"
,"address": "via bianca 12 rome"
,"birthday": 01/02/1923
},{
,"name": "mario"
,"surname": "rossi"
,"address": "via della pace 120 rome"
,"birthday": NULL
}
You can use $objectToArray to get nested document keys and values and then use $reduce along with $concat to concatenate values dynamically:
db.collection.aggregate([
{
$project: {
_id: 0,
name: 1,
surname: 1,
birthday: 1,
address: {
$reduce: {
input: { $objectToArray: "$home" },
initialValue: "",
in: {
$concat: [
"$$value",
{ $cond: [ { $eq: [ "$$value", "" ] }, "", " " ] },
{ $toString: "$$this.v" }
]
}
}
}
}
}
])
Mongo Playground
Related
I'm working on simple program that counts total number of special units through n number of players.
I have documents similar to this (simplified), where array rosterUnits could be of length 0 to 7. There is a total of 7 special units. I need to know how many of each unit players have in roster.
{
{
_id: ObjectId(...),
member: {
rosterUnits: [ "Unit1", "Unit2", "Unit3", "Unit4"]
}
},
{
_id: ObjectId(...),
member: {
rosterUnits: [ "Unit1", "Unit3"]
}
},
...
}
Expected result would be something like this:
{
_id: ...
result: [
{
name: "Unit1"
count: 2
},
{
name: "Unit2"
count: 1
},
{
name: "Unit3"
count: 2
},
...
{
name: "Unit7"
count: 0
}
]
}
How do I achieve this using aggregate pipeline?
EDIT (2/7/2023)
Excuse me everyone, I thought I provided enough details here but...
Documents are very big and pipeline until this stage is very long.
I wanted to spare you the trouble with the documents
I have guild with up to 50 players. I search for guild then $unwind members of guild and $lookup into members to get member.rosterUnit(s).
This is a full query I came up with:
db.getCollection('guilds').aggregate([
{ $match: { 'profile.id': 'jrl9Q-_CRDGdMyNjTQH1rQ' } },
//{ $match: { 'profile.id': { $in : ['jrl9Q-_CRDGdMyNjTQH1rQ', 'Tv_j9nhRTgufvH7C7oUYAA']} } },
{ $project: { member: 1, profile: 1 } },
{ $unwind: "$member" },
{
$lookup: {
from: "players",
localField: "member.playerId",
foreignField: "playerId",
pipeline: [
{
$project: {
profileStat: 1,
rosterUnit: {
$let: {
vars: { gls: ["JABBATHEHUTT:SEVEN_STAR", "JEDIMASTERKENOBI:SEVEN_STAR", "GRANDMASTERLUKE:SEVEN_STAR", "LORDVADER:SEVEN_STAR", "GLREY:SEVEN_STAR", "SITHPALPATINE:SEVEN_STAR", "SUPREMELEADERKYLOREN:SEVEN_STAR"], },
in: {
$reduce: {
input: "$rosterUnit",
initialValue: [],
in: {
$cond: {
if: { $gt: [{ $indexOfArray: ["$$gls", "$$this.definitionId"] }, -1] },
then: { $concatArrays: ["$$value", [{ definitionId: "$$this.definitionId", count: 1 }]] },
else: { $concatArrays: ["$$value", []] }
}
},
}
}
}
}
}
}
],
as: "member"
}
},
{
$addFields: {
member: { $arrayElemAt: ["$member", 0] },
gpStats: {
$let: {
vars: { member: { $arrayElemAt: ["$member", 0] } },
in: {
$reduce: {
input: "$$member.profileStat",
initialValue: {},
in: {
characterGp: {
$arrayElemAt: [
"$$member.profileStat.value",
{
$indexOfArray: [
"$$member.profileStat.nameKey",
"STAT_CHARACTER_GALACTIC_POWER_ACQUIRED_NAME"
]
}
]
},
shipGp: {
$arrayElemAt: [
"$$member.profileStat.value",
{
$indexOfArray: [
"$$member.profileStat.nameKey",
"STAT_SHIP_GALACTIC_POWER_ACQUIRED_NAME"
]
}
]
}
}
}
}
}
}
}
},
{
$group: {
_id: "$profile.id",
guildName: { $first: "$profile.name" },
memberCount: { $first: "$profile.memberCount" },
guildGp: { $first: "$profile.guildGalacticPower" },
totalGp: { $sum: { $sum: [{ $toInt: "$gpStats.characterGp" }, { $toInt: "$gpStats.shipGp" }] } },
avgTotalGp: { $avg: { $sum: [{ $toInt: "$gpStats.characterGp" }, { $toInt: "$gpStats.shipGp" }] } },
characterGp: { $sum: { $toInt: "$gpStats.characterGp" } },
shipGp: { $sum: { $toInt: "$gpStats.shipGp" } },
}
}
])
I want to add new field in group with desired result from above.
If I do $unwind on member.rosterUnit how do I go back to member grouping?
(Excuse me once again, this is my first question)
Use $unwind to deconstruct the rosterUnits array into separate documents.
Then use $group to group the documents by the rosterUnits values and calculate the count for each unit.
Then use $project to format the output to include only the name and count fields.
db.collection.aggregate([
{
$unwind: "$member.rosterUnits"
},
{
$group: {
_id: "$member.rosterUnits",
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
name: "$_id",
count: "$count"
}
}
])
Yes I think that the best way of do that is using aggregations.
I'm sure there is a better way to do it.
But here is the solution, I hope it works for you friend.
Basically we are going to use a "$group" aggregation and within it using an operator "$cond" and "$in" we are going to validate case by case if the searched element is found. In the case that it is so, we will add one and if the element is not found, zero.
I recommend you download mongodb compass to try it
Aggregation:
[{
$group: {
_id: null,
Unit1: {
$sum: {
$cond: [
{
$in: [
'Unit1',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit2: {
$sum: {
$cond: [
{
$in: [
'Unit2',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit3: {
$sum: {
$cond: [
{
$in: [
'Unit3',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit4: {
$sum: {
$cond: [
{
$in: [
'Unit4',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit5: {
$sum: {
$cond: [
{
$in: [
'Unit5',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit6: {
$sum: {
$cond: [
{
$in: [
'Unit6',
'$member.rosterUnits'
]
},
1,
0
]
}
},
Unit7: {
$sum: {
$cond: [
{
$in: [
'Unit7',
'$member.rosterUnits'
]
},
1,
0
]
}
}
}
}, {
$project: {
_id: 0
}
}]
Query
because you want to count values that might not exists, you can make the groups manualy, and do conditional count
after the group you can do extra tranformation(if you really need the expected outpute exactly like that). Object to array, and map to give the field names(name,count)
Playmongo
aggregate(
[{"$unwind": "$member.rosterUnits"},
{"$group":
{"_id": null,
"Unit1":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit1"]}, 1, 0]}},
"Unit2":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit2"]}, 1, 0]}},
"Unit3":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit3"]}, 1, 0]}},
"Unit4":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit4"]}, 1, 0]}},
"Unit5":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit5"]}, 1, 0]}},
"Unit6":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit6"]}, 1, 0]}},
"Unit7":
{"$sum":
{"$cond": [{"$eq": ["$member.rosterUnits", "Unit7"]}, 1, 0]}}}},
{"$unset": ["_id"]},
{"$project":
{"result":
{"$map":
{"input": {"$objectToArray": "$$ROOT"},
"in": {"name": "$$this.k", "count": "$$this.v"}}}}}])
temporaryshifts is equal to
[{_id:123,
arr:[{_id:123321,name:"Bluh Bluh",date:"bluh bluh"}]
}]
so i want to access temporaryshifts[0].arr[0]
but i dont know how to access
$project:{
shiftArr:{$arrayElemAt:['$temporaryshifts',0]}
}
You have to make use of the let operators to make use of two $arrayElemAt Operators.
db.collection.aggregate([
{
"$project": {
"temporaryshifts": {
"$let": {
"vars": {
"masterKey": {
"$arrayElemAt": [
"$temporaryshifts",
0
]
}
},
"in": {
"$arrayElemAt": [
"$$masterKey.arr",
0
]
}
},
}
},
},
])
Mongo Playground Sample Execution
To Understand Properly i Am taking this example and suppose that this our data
`
[
{
_id: 123,
arr: [
{
_id: 123321,
name: "Bluh Bluh",
date: "bluh bluh"
},
{
_id: 1233219,
name: "Bluh Bluh2",
date: "bluh bluh2"
}
]
},
{
_id: 1234,
arr: [
{
_id: 1233214,
name: "Bluh Bluh4",
date: "bluh bluh4"
}
]
},
]
`
Can get temporaryshifts[0].arr[0] with this query
db.collection.aggregate([
{
"$match": {
"_id": 123
}
},
{
"$project": {
shiftArr: {
$arrayElemAt: [
"$arr",
0
]
}
}
}
])
You need to match at the first time to get the document and rest of thing arrayElementAt can manage.
I have a collection called shows, with documents as:
{
"url": "http://www.tvmaze.com/shows/167/24",
"name": "24",
"genres": [
"Drama",
"Action"
],
"runtime": 60
},
{
"url": "http://www.tvmaze.com/shows/4/arrow",
"name": "Arrow",
"genres": [
"Drama",
"Action",
"Science-Fiction"
],
"runtime": 60
}
I wanted to search shows with genre 'Action' and project the result array as
{
"url": "http://www.tvmaze.com/shows/167/24",
"name": "24",
"genres": [
"Action" // I want only the matched item in
//my result array
],
"runtime": 60
} , //same for the second doc as well
If I use
db.shows.find({genres:'Action'}, {'genres.$': 1});
It works but the same does not work in aggregate method with $project
Shows.aggregate([
{
$match: { 'genres': 'Action'}
},
{
$project: {
_id: 0,
url: 1,
name: 1,
runtime: 1,
'genres.$': 1
}
}
]);
this is the error I get on this aggregate query
Invalid $project :: caused by :: FieldPath field names may not start with '$'."
db.collection.aggregate([
{
$match: {
"genres": {
$regex: "/^action/",
$options: "im"
}
}
},
{
$project: {
_id: 0,
url: 1,
name: 1,
runtime: 1,
genres: {
$filter: {
input: "$genres",
as: "genre",
cond: {
$regexMatch: {
input: "$$genre",
regex: "/^action/",
options: "im"
}
}
}
}
}
}
])
Here is how I solved it, Thanks #turivishal for the help
I am very new to MongoDB and have been trying to create a new field within my collection that is calculated using existing data.
Is there a way to add the field myRating to the movies collection?
Here is what I came up with.
db.movies.aggregate([
{$unwind: "$genres"},
{$project:{_id:0, title:1, genres:1,
durationScore: {$cond: {if: {$gte: ["$runtime", 90]}, then: 10, else: 5}},
yearScore: {$cond: {if: {$gte: ["$year", 1990]}, then: 10, else: 5}},
genreScore: {$switch:{branches:[
{
case: {$eq :["$genres", "Action"]}, "then": 30 ,
},
{
case: {$eq :["$genres", "Western"]}, "then": 20 ,
},
{
case: {$eq :["$genres", "Comedy"]}, "then": 5 ,
},
{
case: {$eq :["$genres", "Drama"]}, "then": 15 ,
},
],
default: 10
}},
directorScore: {$switch:{branches:[
{
case: {$eq :["$director", "Quentin Tarantino"]}, "then": 20 ,
},
{
case: {$eq :["$director", "Martin Scorsese"]}, "then": 20 ,
},
],
default: 10
}}
}},
{$addFields: { myRating: { $sum: [ "$yearScore", "$durationScore", "$genreScore", "$directorScore" ]}}},
])
Sample of Data.
{
"_id": {
"$oid": "60502686eb0d3e3e849677ef"
},
"title": "Once Upon a Time in the West",
"year": 1968,
"rated": "PG-13",
"runtime": 175,
"countries": [
"Italy",
"USA",
"Spain"
],
"genres": [
"Western"
],
"director": "Sergio Leone",
"writers": [
"Sergio Donati",
"Sergio Leone",
"Dario Argento",
"Bernardo Bertolucci",
"Sergio Leone"
],
"actors": [
"Claudia Cardinale",
"Henry Fonda",
"Jason Robards",
"Charles Bronson"
],
"plot": "Epic story of a mysterious stranger with a harmonica who joins forces with a notorious desperado to protect a beautiful widow from a ruthless assassin working for the railroad.",
"poster": "http://ia.media-imdb.com/images/M/MV5BMTEyODQzNDkzNjVeQTJeQWpwZ15BbWU4MDgyODk1NDEx._V1_SX300.jpg",
"imdb": {
"id": "tt0064116",
"rating": 8.6,
"votes": 201283
},
"tomato": {
"meter": 98,
"image": "certified",
"rating": 9,
"reviews": 54,
"fresh": 53,
"consensus": "A landmark Sergio Leone spaghetti western masterpiece featuring a classic Morricone score.",
"userMeter": 95,
"userRating": 4.3,
"userReviews": 64006
},
"metacritic": 80,
"awards": {
"wins": 4,
"nominations": 5,
"text": "4 wins & 5 nominations."
},
"type": "movie"
}
I would suggest you keep _id field in $project stage.
Without considering performance, simply iterating through the aggregate result and $set myRating field through updateOne using the _id field.
db.movies.aggregate([
...
{$project:{_id:1, title:1, genres:1,
...
]).forEach(result = > {
db.movies.updateOne(
{_id : result._id},
{$set : {myRating : {result.myRating}}
})
})
Starting in MongoDB 4.2, you can use the aggregation pipeline for update operations. Try this query:
db.movies.updateOne(
{ "_id": ObjectId("60502686eb0d3e3e849677ef") },
[
{
$set: {
myRating: {
$let: {
vars: {
durationScore: { $cond: { if: { $gte: ["$runtime", 90] }, then: 10, else: 5 } },
yearScore: { $cond: { if: { $gte: ["$year", 1990] }, then: 10, else: 5 } },
genreScore: {
$switch: {
branches: [
{ case: { $in: ["Action", "$genres"] }, "then": 30 },
{ case: { $in: ["Western", "$genres"] }, "then": 20 },
{ case: { $in: ["Comedy", "$genres"] }, "then": 5 },
{ case: { $in: ["Drama", "$genres"] }, "then": 15 }
],
default: 10
}
},
directorScore: {
$switch: {
branches: [
{ case: { $eq: ["$director", "Quentin Tarantino"] }, "then": 20 },
{ case: { $eq: ["$director", "Martin Scorsese"] }, "then": 20 }
],
default: 10
}
}
},
in: { $sum: ["$$yearScore", "$$durationScore", "$$genreScore", "$$directorScore"] }
}
}
}
}
]
);
How to avoid empty array while filtering results while querying a collection in MongoDb
[
{
"_id": ObjectId("5d429786bd7b5f4ae4a64790"),
"extensions": {
"outcome": "success",
"docType": "ABC",
"Roll No": "1"
},
"data": [
{
"Page1": [
{
"heading": "LIST",
"content": [
{
"text": "<b>12345</b>"
},
],
}
],
"highlights": [
{
"name": "ABCD",
"text": "EFGH",
}
],
"marks": [
{
"revision": "revision 1",
"Score": [
{
"maths": "100",
"science": "40",
"history": "90"
},
{
"lab1": "25",
"lab2": "25"
}
],
"Result": "Pass"
},
{
"revision": "revision 1",
"Score": [
{
"maths": "100",
"science": "40"
},
{
"lab1": "25",
"lab2": "25"
}
],
"Result": "Pass"
}
]
}
]
}
]
I am looking for results that has only "history" marks in the score array.
I tried the following query (in mongo 3.6.10) but it returns empty score array as well the array that has history as well
db.getCollection('student_scores').find({
"data.marks.score.history": {
$not: {
$type: 10
},
$exists: true
}
},
{
"extensions.rollNo": 1,
"data.marks.score.history": 1
})
Desired output is
{
"extensions": {
"rollNo": "1"
},
"data": [
{
"marks": [
{
"Score": [
{
"history": "90"
}
]
}
]
}
]
}
I used something like the following;
db.getCollection('student_scores').aggregate([
{
$unwind: "$data"
},
{
$unwind: "$data.marks"
},
{
$unwind: "$data.marks.Score"
},
{
$match: {
"data.marks.Score.history": {
$exists: true,
$not: {
$type: 10
}
}
}
},
{
$project: {
"extensions.Roll No": 1,
"data.marks.Score.history": 1
}
},
{
$group: {
_id: "$extensions.Roll No",
history_grades: {
$push: "$data.marks.Score.history"
}
}
}
])
where I got the following result with your input (I think more readable than your expected output);
[
{
"_id": "1",
"history_grades": [
"90"
]
}
]
where _id represents "extensions.Roll No" value for any given data set.
What do you think?
check with a bigger input on mongoplayground
OK, so I still think the data design here with the Score array is a little off but here is solution that will ensure that a Score array contains only 1 entry and that entry is for a key of history. We use dotpath array diving as a trick to get to the value of history.
c = db.foo.aggregate([
{$unwind: "$data"}
,{$unwind: "$data.marks"}
,{$project: {
result: {$cond: [
{$and: [ // if
{$eq: [1, {$size: "$data.marks.Score"}]}, // Only 1 item...
// A little trick! $data.marks.Score.history will resolve to an *array*
// of the values associated with each object in $data.marks.Score (the parent
// array) having a key of history. BUT: As it resolves, if there is no
// field for that key, nothing is added to resolution vector -- not even a null.
// This means the resolved array could
// be **shorter** than the input. FOr example:
// > db.foo.insert({"x":[ {b:2}, {a:3,b:4}, {b:7}, {a:99} ]});
// WriteResult({ "nInserted" : 1 })
// > db.foo.aggregate([ {$project: {z: "$x.b", n: {$size: "$x.b"}} } ]);
// { "z" : [ 2, 4, 7 ], "n" : 3 }
// > db.foo.aggregate([ {$project: {z: "$x.a", n: {$size: "$x.a"}} } ]);
// { "z" : [ 3, 99 ], "n" : 2 }
//
// You must be careful about this.
// But we also know this resolved vector is of size 1 (see above) so we can go ahead and grab
// the 0th item and that becomes our output.
// Note that if we did not have the requirement of ONLY history, then we would not
// need the fancy $cond thing.
{$arrayElemAt: ["$data.marks.Score.history",0]}
]},
{$arrayElemAt: ["$data.marks.Score.history",0]}, // then (use value of history)
null ] } // else set null
,extensions: "$extensions" // just carry over extensions
}}
,{$match: {"result": {$ne: null} }} // only take good ones.