How to un-nest and group collections in mongoDB - database

I'm don't understand how to unwind and then nested collections in mongoDB. basically I have two collections that are structured like this:
questions doc:
{
"_id" : 1,
"questions" : [
{
"_id" : 1,
"body" : "What fabric is the top made of?",
"date_written" : "2018-01-04",
"asker_name" : "yankeelover",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 2
},
{
"_id" : 2,
"body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
"date_written" : "2019-04-28",
"asker_name" : "jbilas",
"asker_email" : "first.last#gmail.com",
"reported" : 1,
"helpful" : 4
},
{
"_id" : 4,
"body" : "How long does it last?",
"date_written" : "2019-07-06",
"asker_name" : "funnygirl",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 6
},
answers doc:
{
"_id" : 1,
"answers" : [
{
"_id" : 8,
"body" : "DONT BUY IT! It's bad for the environment",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 8
},
{
"_id" : 7,
"body" : "Its the best! Seriously magic fabric",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 7
},
{
"_id" : 5,
"body" : "Something pretty soft but I can't be sure",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 5,
"photos" : [
{
"_id" : 1,
"url" : "https://images.unsplash.com/photo-1530519729491-aea5b51d1ee1?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1651&q=80"
},
The _id field in the answers doc matches the _id field of the questions for which they are the answers too.
the end goal is to have data that looks something like this:
{
"_id": "17762",
"questions": [
{
"question_id": 152829,
"question_body": "Why Does it look like this?",
"question_date": "2021-03-06T00:00:00.000Z",
"asker_name": "garethTheGreato",
"question_helpfulness": 60,
"reported": false,
"answers": {
"1443770": {
"id": 1443770,
"body": "This question was really helpful! Thank you.",
"date": "2021-03-09T00:00:00.000Z",
"answerer_name": "SatisfiedCustomer",
"helpfulness": 3,
"photos": []
},
"1443807": {
"id": 1443807,
"body": "mimk",
"date": "2021-03-09T00:00:00.000Z",
"answerer_name": "jij",
"helpfulness": 3,
"photos": [
"blob:http://localhost:3000/8f6375b3-0795-4210-bef7-f112feed8244"
]
},
"1443834": {
"id": 1443834,
"body": "10/10 would recomend.",
"date": "2021-03-09T00:00:00.000Z",
"answerer_name": "Krista",
"helpfulness": 2,
"photos": []
},
"1443845": {
"id": 1443845,
"body": "Thank you so much for playing my game!",
"date": "2021-03-10T00:00:00.000Z",
"answerer_name": "itsameemario",
"helpfulness": 1,
"photos": []
},
"1443880": {
"id": 1443880,
"body": "Tree",
"date": "2021-03-10T00:00:00.000Z",
"answerer_name": "Tree",
"helpfulness": 0,
"photos": [
"blob:http://localhost:3000/123051b6-4dfb-410a-a96f-d4a5128e3056"
]
}
}
},
{
"question_id": 152702,
"question_body": "Please write your question here",
"question_date": "2021-03-05T00:00:00.000Z",
"asker_name": "Your nickname",
"question_helpfulness": 32,
"reported": false,
"answers": {}
},
The problem I'm having is that when I run the lookup I get an answers array that correlates to the questions collection but am not sure how to get each set of answers to their specific question given that the answers come back deeply nested.
Here is what I have so far: (the ignore the slice and sorts for now these are parameters I will need for later as another part of the project)
db.prodquests.aggregate([
{ $match: { _id: 5 } },
{ $unwind: '$questions' },
{ $match: { 'questions.reported': { $lt: 1 } } },
{ $sort: { 'questions.helpful': -1 } },
{ $group: { _id: '$_id', questions: { $push: '$questions' } } },
{ $project: { _id: 1, questions: { $slice: ['$questions', 0, 1] } } },
{ $unwind: '$questions' },
{
$lookup: {
from: 'groupansphotos',
localField: 'questions._id',
foreignField: '_id',
as: 'answers',
},
},
])
The return from this statement is as follows:
{
"_id" : 5,
"questions" : {
"_id" : 37,
"body" : "Why is this product cheaper here than other sites?",
"date_written" : "2018-10-18",
"asker_name" : "willsmith",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 4
},
"answers" : [
{
"_id" : 37,
"answers" : [
{
"_id" : 68,
"body" : "We are selling it here without any markup from the middleman!",
"date_written" : "2018-08-18",
"answerer_name" : "Seller",
"answerer_email" : "null",
"reported" : 0,
"helpful" : 4
}
]
}
]
}
Essentially I want to group just that answers array under it's corresponding questions for which the _id field matches.
Thank you in advance!

UPDATE based on comments:
Updated query:
db.questions.aggregate([
{ $match: { _id: 5 } },
{ $unwind: '$questions' },
{ $match: { 'questions.reported': { $lt: 1 } } },
{ $sort: { 'questions.helpful': -1 } },
{
$lookup: {
from: "answers",
let: { question_id: "$questions._id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$_id", "$$question_id"] }
}
},
{ $unwind: "$answers" },
{
$project: {
_id: 0,
k: { $toString: "$answers._id" },
v: "$$ROOT.answers"
}
}
],
as: "answers"
}
},
{
$group: {
_id: "$_id",
questions: {
$push: {
question_id: "$questions._id",
question_body: "$questions.body",
question_date: "$questions.date_written",
asker_name: "$questions.asker_name",
question_helpfulness: "$questions.helpful",
reported: "$questions.reported",
answers: { $arrayToObject: "$answers" }
}
}
}
}
]);
Old query:
Note: Plz fix the collection name and/or field names. Try this query:
db.questions.aggregate([
{ $match: { _id: 5 } },
{ $unwind: '$questions' },
{ $match: { 'questions.reported': { $lt: 1 } } },
{ $sort: { 'questions.helpful': -1 } },
{
$lookup: {
from: "answers",
let: { question_id: "$questions._id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$_id", "$$question_id"] }
}
},
{ $unwind: "$answers" },
{
$project: {
_id: 0,
k: { $toString: "$answers._id" },
v: "$$ROOT.answers"
}
}
],
as: "answers"
}
},
{
$match: {
$expr: {
$gt: [{ $size: "$answers" }, 0]
}
}
},
{
$group: {
_id: "$_id",
questions: {
$push: {
question_id: "$questions._id",
question_body: "$questions.body",
question_date: "$questions.date_written",
asker_name: "$questions.asker_name",
question_helpfulness: "$questions.helpful",
reported: "$questions.reported",
answers: { $arrayToObject: "$answers" }
}
}
}
}
]);
Output:
{
"_id" : 5,
"questions" : [
{
"question_id" : 2,
"question_body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
"question_date" : "2019-04-28",
"asker_name" : "jbilas",
"question_helpfulness" : 4,
"reported" : 0,
"answers" : {
"14" : {
"_id" : 14,
"body" : "DONT BUY IT! It's bad for the environment",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 8
},
"15" : {
"_id" : 15,
"body" : "Its the best! Seriously magic fabric",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 7
},
"16" : {
"_id" : 16,
"body" : "Something pretty soft but I can't be sure",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 5
}
}
},
{
"question_id" : 1,
"question_body" : "What fabric is the top made of?",
"question_date" : "2018-01-04",
"asker_name" : "yankeelover",
"question_helpfulness" : 2,
"reported" : 0,
"answers" : {
"11" : {
"_id" : 11,
"body" : "DONT BUY IT! It's bad for the environment",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 8
},
"12" : {
"_id" : 12,
"body" : "Its the best! Seriously magic fabric",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 7
},
"13" : {
"_id" : 13,
"body" : "Something pretty soft but I can't be sure",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 5
}
}
}
]
}
Test data:
questions collection
{
"_id" : 5,
"questions" : [
{
"_id" : 1,
"body" : "What fabric is the top made of?",
"date_written" : "2018-01-04",
"asker_name" : "yankeelover",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 2
},
{
"_id" : 2,
"body" : "HEY THIS IS A WEIRD QUESTION!!!!?",
"date_written" : "2019-04-28",
"asker_name" : "jbilas",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 4
},
{
"_id" : 4,
"body" : "How long does it last?",
"date_written" : "2019-07-06",
"asker_name" : "funnygirl",
"asker_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 6
}
]
}
answers collection:
/* 1 */
{
"_id" : 1,
"answers" : [
{
"_id" : 11,
"body" : "DONT BUY IT! It's bad for the environment",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 8
},
{
"_id" : 12,
"body" : "Its the best! Seriously magic fabric",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 7
},
{
"_id" : 13,
"body" : "Something pretty soft but I can't be sure",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 5
}
]
},
/* 2 */
{
"_id" : 2,
"answers" : [
{
"_id" : 14,
"body" : "DONT BUY IT! It's bad for the environment",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 8
},
{
"_id" : 15,
"body" : "Its the best! Seriously magic fabric",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 7
},
{
"_id" : 16,
"body" : "Something pretty soft but I can't be sure",
"date_written" : "2018-01-04",
"answerer_name" : "metslover",
"answerer_email" : "first.last#gmail.com",
"reported" : 0,
"helpful" : 5
}
]
}

Related

Mongodb Index not used in case of $or query

I have added created a collection first and created index;
db.first.createIndex({a:1, b:1, c:1, d:1, e:1, f:1});
then inserted data
db.first.insert({a:1, b:2, c:3, d:4, e:5, f:6});
db.first.insert({a:1, b:6});
When making queries like
db.first.find({f: 6, a:1, c:3}).sort({b: -1}).explain();
indexes are used (IXSCAN)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myproject.first",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"a" : {
"$eq" : 1
}
},
{
"c" : {
"$eq" : 3
}
},
{
"f" : {
"$eq" : 6
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"a" : 1,
"b" : 1,
"c" : 1,
"d" : 1,
"e" : 1,
"f" : 1
},
"indexName" : "a_1_b_1_c_1_d_1_e_1_f_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"a" : [ ],
"b" : [ ],
"c" : [ ],
"d" : [ ],
"e" : [ ],
"f" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "backward",
"indexBounds" : {
"a" : [
"[1.0, 1.0]"
],
"b" : [
"[MaxKey, MinKey]"
],
"c" : [
"[3.0, 3.0]"
],
"d" : [
"[MaxKey, MinKey]"
],
"e" : [
"[MaxKey, MinKey]"
],
"f" : [
"[6.0, 6.0]"
]
}
}
},
"rejectedPlans" : [
{
"stage" : "SORT",
"sortPattern" : {
"b" : -1
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"c" : {
"$eq" : 3
}
},
{
"f" : {
"$eq" : 6
}
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"a" : 1
},
"indexName" : "a_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"a" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"a" : [
"[1.0, 1.0]"
]
}
}
}
}
}
]
},
"serverInfo" : {
"host" : "Manishs-MacBook-Pro.local",
"port" : 27017,
"version" : "3.6.4",
"gitVersion" : "d0181a711f7e7f39e60b5aeb1dc7097bf6ae5856"
},
"ok" : 1
}
but when I use or query
db.first.find({ $or: [{f: 6}, {a:1}]}).explain();
index is not used instead columns are scanned (COLLSCAN)
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "myproject.first",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"a" : {
"$eq" : 1
}
},
{
"f" : {
"$eq" : 6
}
}
]
},
"winningPlan" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"$or" : [
{
"a" : {
"$eq" : 1
}
},
{
"f" : {
"$eq" : 6
}
}
]
},
"direction" : "forward"
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "Manishs-MacBook-Pro.local",
"port" : 27017,
"version" : "3.6.4",
"gitVersion" : "d0181a711f7e7f39e60b5aeb1dc7097bf6ae5856"
},
"ok" : 1
}
Please let me know if I am doing something wrong.
The fact that you have a compound index is the cause for indexes not being used with $or.
When evaluating the clauses in the $or expression, MongoDB either
performs a collection scan or, if all the clauses are supported by
indexes, MongoDB performs index scans. That is, for MongoDB to use
indexes to evaluate an $or expression, all the clauses in the $or
expression must be supported by indexes. Otherwise, MongoDB will
perform a collection scan.
When using indexes with $or queries, each clause of an $or can use its
own index. Consider the following query:
db.inventory.find( { $or: [ { quantity: { $lt: 20 } }, { price: 10 } ] } )
To support this query, rather than a compound index, you would create
one index on quantity and another index on price:
db.inventory.createIndex( { quantity: 1 } )
db.inventory.createIndex( { price: 1 } )
$or Clauses and Indexes
So just by adding individual indexing for fields f and a like;
db.first.createIndex({a:1});
db.first.createIndex({f:1});
will make your
db.first.find({ $or: [{f: 6}, {a:1}]})
query to use indexing.
The issue here is, you've created a compound index on {a:1, b:1, c:1, d:1, e:1, f:1} fields but you're not following the order of the index. So your queries should contain all the fields in the same order that you've constructed your index. Since the field 'f' is in the tail end of the index, your queries will not utilize or even identify it
Your queries:
db.first.find({f: 6, a:1, c:3}).sort({b: -1})
db.first.find({ $or: [{f: 6}, {a:1}]})
To make both your above queries use the index, you should build the compound index as below:
db.first.createIndex({ f:1, a:1, b:1, c:1 })
OR: you can build individual indexes on all fields and use it in any order in your query.
Remember: If you're building compound index, make sure to follow the Equality, Sort and Range order

Mongodb not using index for array of objects

I am trying to fetch few documents in a collection, by making a find query on array of nested objects. Nested objects are indexed but find query is not using the index to fetch documents.
Here is the structure of a document.
"_id" : ObjectId("5bc6498c1ec4062983c4f4ef"),
"appId" : ObjectId("5bbc775036021bea06d9bbc2"),
"status" : "active",
"segmentations" : [
{
"name" : "ch-1",
"values" : [
'true'
],
"type" : "string"
},
{
"name" : "browerInfo",
"values" : [
"Firefox"
],
"version" : [
"62.0"
],
"majorVersion" : [
"62"
],
"type" : "string"
},
{
"name" : "OS",
"values" : [
"Ubuntu"
],
"type" : "string"
},
{
"name" : "lastVisitTime",
"values" : [
1539721615231.0
],
"type" : "number"
}
]
}
Here are the index fields.
{
"v" : 2,
"key" : {
"appId" : 1,
"status" : 1,
"segmentations.name" : 1,
"segmentations.values" : 1
},
"name" : "SEGMENT_INDEX",
"ns" : "test.Collname"
}
below is the find find query i was executing
db.Collname.find({
appId: ObjectId("5c6a8ef544ff62c73bdb98fc"),
"segmentations.name": 'ch-1',
'segmentations.values': 'true',
status: 'active'
}, {})
I tried to get the query execution information using
<above query>.explain("executionStats")
The result is
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.Collname",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"appId" : {
"$eq" : ObjectId("5c6a8ef544ff62c73bdb98fc")
}
},
{
"segmentations.name" : {
"$eq" : "ch-1"
}
},
{
"segmentations.values" : {
"$eq" : "true"
}
},
{
"status" : {
"$eq" : "active"
}
}
]
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"segmentations.values" : {
"$eq" : "true"
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"appId" : 1.0,
"status" : 1.0,
"segmentations.name" : 1.0,
"segmentations.values" : 1.0
},
"indexName" : "SEGMENT_INDEX",
"isMultiKey" : true,
"multiKeyPaths" : {
"appId" : [],
"status" : [],
"segmentations.name" : [
"segmentations"
],
"segmentations.values" : [
"segmentations",
"segmentations.values"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"appId" : [
"[ObjectId('5c6a8ef544ff62c73bdb98fc'), ObjectId('5c6a8ef544ff62c73bdb98fc')]"
],
"status" : [
"[\"active\", \"active\"]"
],
"segmentations.name" : [
"[\"ch-1\", \"ch-1\"]"
],
"segmentations.values" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 28176,
"executionTimeMillis" : 72,
"totalKeysExamined" : 28176,
"totalDocsExamined" : 28176,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"segmentations.values" : {
"$eq" : "true"
}
},
"nReturned" : 28176,
"executionTimeMillisEstimate" : 70,
"works" : 28177,
"advanced" : 28176,
"needTime" : 0,
"needYield" : 0,
"saveState" : 220,
"restoreState" : 220,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 28176,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 28176,
"executionTimeMillisEstimate" : 10,
"works" : 28177,
"advanced" : 28176,
"needTime" : 0,
"needYield" : 0,
"saveState" : 220,
"restoreState" : 220,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"appId" : 1.0,
"status" : 1.0,
"segmentations.name" : 1.0,
"segmentations.values" : 1.0
},
"indexName" : "SEGMENT_INDEX",
"isMultiKey" : true,
"multiKeyPaths" : {
"appId" : [],
"status" : [],
"segmentations.name" : [
"segmentations"
],
"segmentations.values" : [
"segmentations",
"segmentations.values"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"appId" : [
"[ObjectId('5c6a8ef544ff62c73bdb98fc'), ObjectId('5c6a8ef544ff62c73bdb98fc')]"
],
"status" : [
"[\"active\", \"active\"]"
],
"segmentations.name" : [
"[\"ch-1\", \"ch-1\"]"
],
"segmentations.values" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 28176,
"seeks" : 1,
"dupsTested" : 28176,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "sys3029",
"port" : 27017,
"version" : "4.0.9",
"gitVersion" : "fc525e2d9b0e4bceff5c2201457e564362909765"
},
"ok" : 1.0
}
I could see from executionStats that "segmentations.values" field is not used in "IXSCAN" stage. And there is an extra filter stage on "segmentations.values". IXSCAN stage took just 10ms, where as FILTER stage took 50ms.
I couldn't understand why the field is not included in IXSCAN stage. My collection has around 3.2 Million documents and because of this issue query execution time is very high than expected.
Please help me fix the issue.
Thank you in advance.
Please suggest me If I need to change my database structure,
If it is not possible in mongodb,you can suggest some other database which supports above operations.
The following query will use your index for both of your array fields:
.find({
appId: ObjectId("5c6a8ef544ff62c73bdb98fc"),
segmentations:{$elemMatch:{name: 'ch-1',values: 'true'}},
status: 'active'
}, {})
If you are not using $elemMatch, MongoDB can compound the bounds for the array item keys with either the bounds for "segmentations.name" or the bounds for "segmentations.values", but not both.
In order to compound the bounds for "segmentations.name" with the bounds for "segmentations.values", the query must use $elemMatch.
To compound together the bounds for index keys from the same array:
the index keys must share the same field path up to but excluding the
field names,
and the query must specify predicates on the fields
using $elemMatch on that path.
I suggest you to read mongodb docs about multikey-index-bounds and also about $elemMatch.

$limit number of documents in $lookup

I encounter a problem with this query that returns this result:
{
"_id" : ObjectId("5bd22f28f77cfb1f6ce503ca"),
"search" : "flarize",
"name" : "flarize",
"color" : 0,
"profil" : "",
"banner" : "",
"desc" : "",
"date" : 1540501286109,
"friend" : [
[
{
"_id" : ObjectId("5bd22f28f77cfb1f6ce503ca"),
"search" : "flarize",
"name" : "flarize",
"email" : "flarize.b73#gmail.com",
"password" : "$2a$10$eYeOtEkEUyD7TFkjKvhZOuSSpvBolkL17TrPHuoHhOT8JrsQR0UKW",
"color" : 0,
"profil" : "",
"banner" : "",
"desc" : "",
"date" : 1540501286109,
"friend" : [
{
"id" : ObjectId("5bd22f28f77cfb1f6ce503ca"),
"date" : 1540572026419
},
{
"id" : ObjectId("5bd19a92da24674fdabd26b6"),
"date" : 1540572026419
}
],
"groupes" : [ ]
}
]
]
}
But it's not really what I want, I'd like these fields to be hidden: friend.password, friend.email and the result of friend.groups and friend.friend is limité to 10. I don't know how i can solve this problem. My request :
db.users.aggregate(
{$match:
{search:"flarize"}
},
{$lookup:
{from:"users",
localField:"friend.id",
foreignField:"_id",
as:"friend"
}},
{$project:
{ search: 1,
name: 1,
profil: 1,
banner: 1,
color: 1,
date: 1,
desc: 1,
friend: [{$slice:["$friend", 0, 10]}]
}
}).pretty();
One document of users:
{
"_id" : ObjectId("5bd22f28f77cfb1f6ce503ca"),
"search" : "flarize",
"name" : "flarize",
"email" : "theo.ba73#gmail.com",
"password" : "$2a$10$eYeOtEkEUyD7TFkjKvhZOuSSpvBolkL17TrPHuoHhOT8JrsQR0UKW",
"color" : 0,
"profil" : "",
"banner" : "",
"desc" : "",
"date" : 1540501286109,
"friend" : [
{
"id" : ObjectId("5bd22f28f77cfb1f6ce503ca"),
"date" : 1540572026419
},
{
"id" : ObjectId("5bd19a92da24674fdabd26b6"),
"date" : 1540572026419
}
],
"groupes" : [ ]
}
Thank you for helping me.
You can try below aggregation
db.users.aggregate([
{ $match: { search: "flarize" } },
{ $lookup: {
from: "users",
let: { friendId: "$friend.id" },
pipeline: [
{ $match: { $expr: { $in: ["$_id", "$$friendId"] }}},
{ $limit: 10 },
{ $project: { email: 0, password: 0 }}
],
as: "friend"
}}
])

MongoDB aggregation pipeline filtering two arrays

Could you give me advise? I have a document like this:
{
"_id" : ObjectId("569620270d3ac01895316edb"),
"customerId" : NumberLong("2000900000000000022"),
"gender" : "MALE",
"birthDate" : ISODate("1976-01-06T23:00:00Z"),
"someArray" : [
{
"id" : 5411,
"firstDate" : ISODate("2014-08-05T16:17:50Z"),
"lastDate" : ISODate("2015-10-31T11:55:51Z"),
"sumOfAll" : 5677.35,
"minAmount" : 9.75,
"maxAmount" : 231.72,
"innerArray" : [
{
"count" : 4,
"amount" : 449.33
},
{
"count" : 3,
"amount" : 401.31
},
{
"count" : 7,
"amount" : 617.8000000000001
},
{
"count" : 4,
"amount" : 465.28999999999996
},
{
"count" : 2,
"amount" : 212.95999999999998
},
{
"count" : 4,
"amount" : 497.53999999999996
},
{
"count" : 3,
"amount" : 278.23
},
{
"count" : 3,
"amount" : 383.15999999999997
},
{
"count" : 6,
"amount" : 459.63
},
{
"count" : 9,
"amount" : 677.19
},
{
"count" : 4,
"amount" : 393.85
}
]
},
{
"id" : 5812,
"firstDate" : ISODate("2014-09-03T17:16:32Z"),
"lastDate" : ISODate("2015-11-04T22:59:59Z"),
"sumOfAll" : 275.6,
"minAmount" : 15,
"maxAmount" : 69,
"innerArray" : [
{
"count" : 1,
"amount" : 17
},
{
"count" : 1,
"amount" : 15.4
},
{
"count" : 1,
"amount" : 69
},
{
"count" : 1,
"amount" : 53.7
},
{
"count" : 2,
"amount" : 84
}
]
},
{
"id" : 7399,
"firstDate" : ISODate("2015-01-12T22:59:59Z"),
"lastDate" : ISODate("2015-03-16T22:59:59Z"),
"sumOfAll" : 144.73,
"minAmount" : 0.84,
"maxAmount" : 24.98,
"innerArray" : [
{
"count" : 5,
"amount" : 50.379999999999995
},
{
"count" : 5,
"amount" : 55.45
},
{
"count" : 10,
"amount" : 38.900000000000006
}
]
},
]
}
And I'd like to filter both inner arrays and also project them. I'm trying this query:
db.sandbox.aggregate([
{ $match: {
'gender': {$eq : 'MALE'},
$or: [
{ $and: [{'someArray.id': {$eq: 5411}}, {'someArray.innerArray.count': 4}, {'someArray.innerArray.amount': {$gte: 2}}]},
{ $and: [{'someArray.id': {$eq: 5812}}, {'someArray.innerArray.count': 5}, {'someArray.innerArray.amount': {$gte: 50}}]},
]
}
},
{ $project: {
gender: 1,
customerId: 1,
someArray: { $filter: {
input: '$someArray',
as: 'item',
cond: {
$and: [
{ $or: [
{$and: [{$eq: ['$$item.id', 5411]}, {$eq: ['$$item.innerArray.count', 4]}, {$gte: ['$$item.innerArray.amount', 2]}]},
{$and: [{$eq: ['$$item.id', 5812]}, {$eq: ['$$item.innerArray.count', 5]}, {$gte: ['$$item.innerArray.amount', 50]}]},
]},
]
}
}},
}}
]).pretty()
And I received result without data in someArray:
{
"_id" : ObjectId("569620270d3ac01895316edb"),
"customerId" : NumberLong("2000900000000000022"),
"gender" : "MALE",
"someArray" : [ ]
}
I want to receive:
{
"_id" : ObjectId("569620270d3ac01895316edb"),
"customerId" : NumberLong("2000900000000000022"),
"gender" : "MALE",
"birthDate" : ISODate("1976-01-06T23:00:00Z"),
"someArray" : [
{
"id" : 5411,
"firstDate" : ISODate("2014-08-05T16:17:50Z"),
"lastDate" : ISODate("2015-10-31T11:55:51Z"),
"sumOfAll" : 5677.35,
"minAmount" : 9.75,
"maxAmount" : 231.72,
"innerArray" : [
{
"count" : 4,
"amount" : 449.33
},
{
"count" : 4,
"amount" : 465.28999999999996
},
{
"count" : 4,
"amount" : 497.53999999999996
},
{
"count" : 4,
"amount" : 393.85
}
]
}
]
}
If I change $eq to $gte, I will receive receive result, but I want to project innerArray too. How can I implement this? Should I use my own MapReduce job or I will able to do this with Aggregation pipeline?
MongoDB version 3.2. Also I observe when I'm trying to use several predicates for array and project only one element, for example:
db.sandbox.find( {$and: [{'someArray.id': 7399}, {'someArray.sumOfAll': {$gte: 5000}}]}, {'customerId': 1, 'someArray.$': 1}).pretty()
But it returns me:
{
"_id" : ObjectId("569620270d3ac01895316edb"),
"customerId" : NumberLong("2000900000000000022"),
"someArray" : [
{
"id" : 5411,
"firstDate" : ISODate("2014-08-05T16:17:50Z"),
"lastDate" : ISODate("2015-10-31T11:55:51Z"),
"sumOfAll" : 5677.35,
"minAmount" : 9.75,
"maxAmount" : 231.72,
"innerArray" : [
{
"count" : 4,
"amount" : 449.33
},
{
"count" : 3,
"amount" : 401.31
},
{
"count" : 7,
"amount" : 617.8000000000001
},
{
"count" : 4,
"amount" : 465.28999999999996
},
{
"count" : 2,
"amount" : 212.95999999999998
},
{
"count" : 4,
"amount" : 497.53999999999996
},
{
"count" : 3,
"amount" : 278.23
},
{
"count" : 3,
"amount" : 383.15999999999997
},
{
"count" : 6,
"amount" : 459.63
},
{
"count" : 9,
"amount" : 677.19
},
{
"count" : 4,
"amount" : 393.85
}
]
}
]
}
Which is incorrect for my perspective. I expect nothing.
First, the way you're using conditions in $match will not result in what you want.
{ $and: [{'someArray.id': {$eq: 5411}}, {'someArray.innerArray.count': 4}, {'someArray.innerArray.amount': {$gte: 2}}]}
The line above will verify each condition separately, instead of checking the count and amount conditions together for each innerArray element. If that's what you want, you should look into the $elemMatch operator.
Second, I don't believe you can use $filter like that on a second-level array. You should unwind someArray first:
db.sandbox.aggregate(
{
$match:
{
gender: { $eq: 'MALE' },
"someArray.id":
{
$in: [5411, 5812]
}
}
},
{
$unwind: "$someArray",
},
{
$project:
{
gender: 1,
customerId: 1,
someArray:
{
id: 1,
firstDate: 1,
lastDate: 1,
sumOfAll: 1,
minAmount: 1,
maxAmount: 1,
innerArray:
{
$filter:
{
input: '$someArray.innerArray',
as: 'item',
cond:
{
$or:
[
{
$and:
[
{ $eq: ['$$item.count', 4] },
{ $gte: ['$$item.amount', 2] }
]
},
{
$and:
[
{ $eq: ['$$item.count', 5] },
{ $gte: ['$$item.amount', 50] }
]
}
]
}
}
}
},
}
})
You can also $group someArray elements back if you want.

Poor performance with mongo array index

Hi I know there has been much said regarding this but I'm unable to find an answer to my specific problem. I have the following JSON document and trying to create an efficient index for the questions.questionEntry.metaTags array:
{
"questions": [
{
"questionEntry": {
"id": 1,
"info": {
"seasonNumber": 1,
"episodeNumber": 1,
"episodeName": "Days Gone Bye"
},
"questionItem": {
"theQuestion": "",
"attachedElement": {
"type": 1,
"value": ""
}
},
"options": [
{
"type": 1,
"value": ""
},
{
"type": 1,
"value": ""
}
],
"answer": {
"questionId": 1,
"answer": 1
},
"metaTags": [
"Season 1",
"Episode 1"
]
}
}
]
}
I then added 5000,000 duplicate documents to my DB and an additional document with different data fields to run some tests.
I ran the following query on the unindexed collection with an execution time of 640ms:
db.questions1.find({"questions.questionEntry.metaTags" : "Season 1"},{'questions.$':1})._addSpecial( "$explain", 1 ).pretty()
Then I created the following index:
db.questions1.createIndex( { "questions.questionEntry.metaTags" : 1 })
Now I ran the same query but now the execution time is 9070ms...!
Here is the explain() showing 500001 documents examined!:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.questions1",
"indexFilterSet" : false,
"parsedQuery" : {
"questions.questionEntry.metaTags" : {
"$eq" : "Season 1"
}
},
"winningPlan" : {
"stage" : "PROJECTION",
"transformBy" : {
"questions.$" : 1
},
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"questions.questionEntry.metaTa
s" : 1
},
"indexName" : "questions.questionEntry.
etaTags_1",
"isMultiKey" : true,
"direction" : "forward",
"indexBounds" : {
"questions.questionEntry.metaTa
s" : [
"[\"Season 1\", \"Seaso
1\"]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 500001,
"executionTimeMillis" : 11255,
"totalKeysExamined" : 500001,
"totalDocsExamined" : 500001,
"executionStages" : {
"stage" : "PROJECTION",
"nReturned" : 500001,
"executionTimeMillisEstimate" : 10750,
"works" : 500002,
"advanced" : 500001,
"needTime" : 0,
"needFetch" : 0,
"saveState" : 3907,
"restoreState" : 3907,
"isEOF" : 1,
"invalidates" : 0,
"transformBy" : {
"questions.$" : 1
},
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 500001,
"executionTimeMillisEstimate" : 9310,
"works" : 500002,
"advanced" : 500001,
"needTime" : 0,
"needFetch" : 0,
"saveState" : 3907,
"restoreState" : 3907,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 500001,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 500001,
"executionTimeMillisEstimate" : 8970,
"works" : 500001,
"advanced" : 500001,
"needTime" : 0,
"needFetch" : 0,
"saveState" : 3907,
"restoreState" : 3907,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"questions.questionEntry.metaTa
s" : 1
},
"indexName" : "questions.questionEntry.
etaTags_1",
"isMultiKey" : true,
"direction" : "forward",
"indexBounds" : {
"questions.questionEntry.metaTa
s" : [
"[\"Season 1\", \"Seaso
1\"]"
]
},
"keysExamined" : 500001,
"dupsTested" : 500001,
"dupsDropped" : 0,
"seenInvalidated" : 0,
"matchTested" : 0
}
}
},
"allPlansExecution" : [ ]
},
"serverInfo" : {
"host" : "Voltage",
"port" : 27017,
"version" : "3.0.3",
"gitVersion" : "b40106b36eecd1b4407eb1ad1af6bc60593c6105"
}
}
Mongo db is not my thing and I'm struggling to understand why execution is taking longer?
What would be the best method to index the string metaTags array?
Many thanks

Resources