Sort by deep document field in MongoDb - arrays

I have a collection called Visitor which has an array of chats and each array has a document called user.
I need to find some documents on this collection and sort them by if they have some specific user in their chats first.
The path for the user id is:
chats.user._id
where:
chats // array
user // document
_id // ObjectId
The below script does sort the documents correctly, however, it expands the chats array and multiplies the document for each chat in the array.
I only need the sorting, so can I sort and not use the unwind pipeline or make it somehow not multiply the documents?
db.getCollection('Visitor').aggregate([
{$unwind: "$chats"},
{ $match: {'event._id':ObjectId('5c942a3591deb389bfd92579'), 'chats.enabled': {$exists: true}}},
{
"$project": {
"_id": 1,
"chats.user._id": 1,
"weight": {
"$cond": [
{ "$eq": [ "$chats.user._id", ObjectId("5c942a3591deb389bfd92579") ] },
10,
0
]
}
}
},
{ "$sort": { "weight": -1 } },
])
EDIT: I don't need to sort the inner array, but sort the find command by checking if a specific user is in the chats array.
Some sample of Visitor collection:
[
{
"_id" : ObjectId("5c9a3a1bd86e0ba64106e90e"),
"event" : {
"_id" : ObjectId("5c942a3591deb389bfd92579")
},
"chats" : [
{
"enabled" : false,
"user" : {
"_id" : ObjectId("5c81232f09a923b559763418")
},
"_id" : ObjectId("5c9a3a1bd86e0ba64106e915")
}
]
},
{
"_id" : ObjectId("5c9a3a35d86e0ba64106e950"),
"event" : {
"_id" : ObjectId("5c942a3591deb389bfd92579")
},
"chats" : [
{
"enabled" : true,
"user" : {
"_id" : ObjectId("5c81232f09a923b559763418")
},
"_id" : ObjectId("5c9a3a35d86e0ba64106e957")
},
{
"enabled" : true,
"user" : {
"_id" : ObjectId("5c942a3591deb389bfd92579")
},
"_id" : ObjectId("5c9a3a34d86e0ba64106e91d")
}
]
}
]
In the above sample, I need to make the second document to be sorted first because it has the user with the _id ObjectId("5c942a3591deb389bfd92579").

The problem here is that using $unwind you modify initial structure of your documents (you will get one document per chats. I would suggest using $map to get an array of weights based on specified userId and then you can use $max to get final weight
db.col.aggregate([
{ $match: {'event._id':ObjectId('5c942a3591deb389bfd92579'), 'chats.enabled': {$exists: true}}},
{
"$project": {
"_id": 1,
"chats.user._id": 1,
"weight": {
$max: { $map: { input: "$chats", in: { $cond: [ { $eq: [ "$$this.user._id", ObjectId("5c942a3591deb389bfd92579") ] }, 10, 0 ] } } }
}
}
},
{ "$sort": { "weight": -1 } },
])

Related

How to update an array and pull a nested element from same array

With the following document:
{
"_id" : "123",
"firstArray" : [
{
"_id" : "456",
"status" : "open",
"nestedArray" : [
{
"_id" : "100",
"quantity" : 10
},
{
"_id" : "101",
"quantity" : 10
},
{
"_id" : "102",
"quantity" : 10
}
},
{
"_id" : "789",
"status" : "open",
"nestedArray" : [
{
"_id" : "200",
"quantity" : 10
},
{
"_id" : "201",
"quantity" : 10
},
{
"_id" : "202",
"quantity" : 10
}
}
]
}
How can I update the quantity by 20 of the nested ID 101 element and pull the one with the ID 201 from the same MongoDB query ?
I am trying to do that in Java with $set and $pull operator and I'm stuck with the following error:
[BulkWriteError{index=0, code=40, message='Update created a conflict
at 'firstArray.0.nestedArray'', details={}}]
MongoDB doesn’t allow multiple operations on the same property in the same update call. This means that the two operations must happen in two individual queries.
The first solution is you can write 2 seperate queries for both the operations.
The second solution is you can try update with aggregation pipeline, starting from MongoDB 4.2,
$map to iterate loop of firstArray
$filter to iterate loop of nestedArray and remove _id: "201" record
$map to iterate loop of above filtered nestedArray
$cond check condition if _id: "101" then return new quantity otherwise return current
$mergeObjects to merge current object with updated properties
db.collection.update(
{ "firstArray.nestedArray._id": "101" },
[{
$set: {
firstArray: {
$map: {
input: "$firstArray",
in: {
$mergeObjects: [
"$$this",
{
nestedArray: {
$map: {
input: {
$filter: {
input: "$$this.nestedArray",
cond: { $ne: ["$$this._id", "201"] }
}
},
in: {
_id: "$$this._id",
quantity: {
$cond: [
{ $eq: ["$$this._id", "101"] },
20,
"$$this.quantity"
]
}
}
}
}
}
]
}
}
}
}
}
])
Playground

How to remove a document inside an array in mongodb using $pull

I have the following document structure
{
"_id" : ObjectId("5ffef283f1f06ff8524aa2c2"),
"applicationName" : "TestApp",
"pName" : "",
"environments" : [],
"stages" : [],
"createdAt" : ISODate("2021-01-15T09:51:35.546Z"),
"workflows" : [
[
{
"pName" : "Test1",
"wName" : "TestApp_Test1",
"agent" : ""
},
{
"pName" : "Test2",
"wName" : "TestApp_Test2",
"agent" : ""
}
],
[
{
"pName" : "Test1",
"wName" : "TestApp_Test1",
"agent" : ""
}
]
],
"updatedAt" : Date(-62135596800000)
}
I wish to remove the occurrences of
{
"pName" : "Test1",
"wName" : "TestApp_Test1",
"agent" : ""
}
The resultant document should look like
{
"_id" : ObjectId("5ffef283f1f06ff8524aa2c2"),
"applicationName" : "TestApp",
"pName" : "",
"environments" : [],
"stages" : [],
"createdAt" : ISODate("2021-01-15T09:51:35.546Z"),
"workflows" : [
[
{
"pName" : "Test2",
"wName" : "TestApp_Test2",
"agent" : ""
}
]
],
"updatedAt" : Date(-62135596800000)
}
I've tried the below mongo query
db.getCollection('workflows').update({_id:ObjectId('5ffef283f1f06ff8524aa2c2')},
{$pull:{workflows: { $elemMatch: {pipelineName: 'Test1'}}}} )
This is removing all the documents from workflows field including Test2 since Test1 is matched.
How can we remove only the entries for Test1 and keep the others?
You can do it using the positional operator "$[]" :
db.getCollection('workflows').update({_id: ObjectId("5ffef283f1f06ff8524aa2c2") }, {$pull: {"workflows.$[]":{pName:"Test1" } } } )
but the schema looks abit strange and after the update you will have empty arrays inside workflows if all elements got deleted in the sub-array.
To fix the empty sub-arrays you will need to perform second operation to remove them:
db.getCollection('workflows').update({_id: ObjectId("5ffef283f1f06ff8524aa2c2") }, {$pull: {"workflows":[] } } )
You cannot use $elemMatch as it returns the first matching element in the array.
I am not sure there is another best way to do this with the provided schema design.
play
db.collection.aggregate({
"$unwind": "$workflows"
},
{
"$unwind": "$workflows"
},
{
"$match": {
"workflows.pName": {
"$ne": "Test1"
}
}
},
{
"$group": {
"_id": "$_id",
workflows: {
$push: "$workflows"
},
applicationName: {
"$first": "$applicationName"
}
}
},
{
"$group": {
"_id": "$_id",
workflows: {
$push: "$workflows"
},
applicationName: {
"$first": "$applicationName"
}
}
})
unwind twice required to de-normalize the data
match to filter out the unnecessary doc
group twice required to bring the required output
You can save this to a collection using $out as last stage.

In mongo DB, how do I grab multiple counts in a single query?

I'm currently trying to massage out counts from the mLab API for reasons I don't have control over. So I want to grab the data I need from there in one query so I can limit the amount of API calls.
Assuming that my data looks like this:
{
"_id": {
"$oid": "12345"
},
"dancer": "Beginner",
"pirate": "Advanced",
"chef": "Mid",
"beartamer": "Mid",
"swordsman": "Mid",
"total": "Mid"
}
I know I can do 6 queries with something similar to:
db.score.aggregate({"$group": { _id: {"total":"$total"}, count: {$sum:1} }} )
but how do I query to get the count for each key? I'd like to see something akin to:
{ "_id" : { "total" : "Advanced" }, "count" : 1 }
{ "_id" : { "total" : "Mid" }, "count" : 1 }
{ "_id" : { "total" : "Beginner" }, "count" : 4 }
{ "_id" : { "pirate" : "Advanced" }, "count" : 1 }
//...etc
The following should give you precisely what you want:
db.scores.aggregate({
$project: {
"_id": 0 // get rid of the "_id" field since we do not want to count it
}
}, {
$project: {
"doc": {
$objectToArray: "$$ROOT" // transform all documents into key-value pairs
}
}
}, {
$unwind: "$doc" // flatten the resulting array into separate documents
}, {
$group: {
"_id": "$doc", // group by distinct key-value combination
"count": { $sum: 1 } // count documents per bucket
}
}, {
$project: {
"_id": { // some more transformation magic to recreate the desired output structure
$mergeObjects: [
{ $arrayToObject: [ [ "$_id" ] ] },
{ "count": "$count" }
]
},
}
}, {
$replaceRoot: {
"newRoot": "$_id" // this moves the contents of the "_id" field to the root of the documents
}
})

$in requires an array as a second argument, found: missing

can anybody please tell me what am i doing wrong?
db document structure:
{
"_id" : "module_settings",
"moduleChildren" : [
{
"_id" : "module_settings_general",
"name" : "General",
},
{
"_id" : "module_settings_users",
"name" : "Users",
},
{
"_id" : "module_settings_emails",
"name" : "Emails",
}
],
“permissions” : [
"module_settings_general",
"module_settings_emails"
]
}
pipeline stage:
{ $project: {
filteredChildren: {
$filter: {
input: "$moduleChildren",
as: "moduleChild",
cond: { $in : ["$$moduleChild._id", "$permissions"] }
}
},
}}
I need to filter "moduleChildren" array to show only modules which ids are in "permissions" array. Ive tried "$$ROOT.permissions" and "$$CURRENT.permissions" but none of them is working. I always get an error that $in is missing array as argument. It works when i hardcode the array like this: cond: { $in : ["$$moduleChild._id", [“module_settings_general", "module_settings_emails”]] } so it seems the problem is in passing of the array.
Thanks for any advices!
First option --> Use aggregation
Because your some of the documents in your collection may or may not contain permissions field or is type not equal to array that's why you are getting this error.
You can find the $type of the field and if it is not an array or not exists in your document than you can add it as an array with $addFields and $cond aggregation
db.collection.aggregate([
{ "$addFields": {
"permissions": {
"$cond": {
"if": {
"$ne": [ { "$type": "$permissions" }, "array" ]
},
"then": [],
"else": "$permissions"
}
}
}},
{ "$project": {
"filteredChildren": {
"$filter": {
"input": "$moduleChildren",
"as": "moduleChild",
"cond": {
"$in": [ "$$moduleChild._id", "$permissions" ]
}
}
}
}}
])
Second option -->
Go to your mongo shell or robomongo on any GUI you are using and run
this command
db.collection.update(
{ "permissions": { "$ne": { "$type": "array" } } },
{ "$set": { "permissions": [] } },
{ "multi": true }
)

MongoDB query to find document with duplicate value in array

tldr; I'm struggling to construct a query to
Make an aggregation to get a count of values on a certain key ("original_text_source"), which
Is in a sub-document that is in an array
Full description
I have embedded documents with arrays that are structured like this:
{
"_id" : ObjectId("0123456789"),
"type" : "some_object",
"relationships" : {
"x" : [ ObjectId("0123456789") ],
"y" : [ ObjectId("0123456789") ],
},
"properties" : [
{
"a" : "1"
},
{
"b" : "1"
},
{
"original_text_source" : "foo.txt"
},
]
}
The docs were created from exactly 10k text files, sorted in various folders. During inserting documents into the MongoDB (in batches) I messed up and moved a few files around, causing one file to be imported twice (my database has a count of exactly 10001 docs), but obviously I don't know which one it is. Since one of the "original_text_source" values has to have a count of 2, I was planning on just deleting one.
I read up on solutions with $elemMatch, but since my array element is a document, I'm not sure how to proceed. Maybe with mapReduce? But I can't transfer the logic to my doc structure.
I also could just create a new collection and reupload all, but in case I mess up again, I'd rather like to learn how to query for duplicates. It seems more elegant :-)
You can find duplicates with a simple aggregation like this:
db.collection.aggregate(
{ $group: { _id: "$properties.original_text_source", docIds: { $push: "$_id" }, docCount: { $sum: 1 } } },
{ $match: { "docCount": { $gt: 1 } } }
)
which gives you something like this:
{
"_id" : [
"foo.txt"
],
"docIds" : [
ObjectId("59d6323613940a78ba1d5ffa"),
ObjectId("59d6324213940a78ba1d5ffc")
],
"docCount" : 2.0
}
Run the following:
db.collection.aggregate([
{ $group: {
_id: { name: "$properties.original_text_source" },
idsForDuplicatedDocs: { $addToSet: "$_id" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} }
]);
Given a collection which contains two copies of the document you showed in your question, the above command will return:
{
"_id" : {
"name" : [
"foo.txt"
]
},
"idsForDuplicatedDocs" : [
ObjectId("59d631d2c26584cd8b7b3337"),
ObjectId("59d631cbc26584cd8b7b3333")
],
"count" : 2
}
Where ...
The attribute _id.name is the value of the duplicated properties.original_text_source
The attribute idsForDuplicatedDocs contains the _id values for each of the documents which have a duplicated properties.original_text_source
"reviewAndRating": [
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7517",
"_id": "5bd9745e2d66162a6dd1f0ef",
"rating": "5"
},
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7518",
"_id": "5bd974612d66162a6dd1f0f0",
"rating": "5"
},
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7517",
"_id": "5bd974622d66162a6dd1f0f1",
"rating": "5"
}
]

Resources