MongoDB get documents from a collection not in other collection - arrays

I'm trying to get the documents from a collection where aren't in other collection (the common NOT IN clause in SQL).
If I run the next query:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
}
]);
This query shows the movies includes in CompanyList.Movies.Code and in Movie.Code. Good.
But I just have the rest of movies includes in CompanyList.Movies whose codes aren't included in Movie.Code.
As Nikos Tsagkas said in Get "data from collection b not in collection a" in a MongoDB shell query it should be sufficient to include the following sentence:
{
$match: { "matched_docs": { $eq: [] } }
}
But when I run my final code, it doesn't returns anything:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
},
{
$match: { "matched_docs": { $eq: [] } }
}
]);
There are 59 documents that are not returned by this code.
This is my pipeline I've created in MongoDB Compass after Tom's changes and it still doesn't work:
[{
$match:
{
'CompanyList.CodeCompany': '23'
}
},
{
$lookup:
{
from: 'Movies',
localField: 'CompanyList.Movies.Code',
foreignField: 'Movie.Code',
as: 'docs'
}
},
{
$project:
{
'CompanyList.Movies.Code': 1,
'CompanyList.CodeCompany': 1
}
},
{
$match:
{
docs:{ $eq: [] }
}
}]
If I delete the $project, it not works either.
Sample Data (reduced)
Companies_Movies collection:
{
_id:ObjectId("61bf47b974641866e1244e65"),
"CompanyList": {
"CodeCompany": "23",
"NameCompany": "Company Name Entertainment",
"Movies": [{
"Code": "123",
"Name": "Title 1",
"Order": 1,
"UserDescription": null
}, {
"Code": "124",
"Name": "Title 2",
"Order": 2,
"UserDescription": null
}, {
"Code": "125",
"Name": "Title 3",
"Order": 3,
"UserDescription": null
}],
"DateInserted": {
"$date": "2021-12-13T17:30:06.824Z"
}
}
}
Movies collection:
[{
_id:ObjectId("61bf57bc9d1f93b7ae5fa785"),
"Movie": {
"Code": "123",
"OriginalTitle": "Title 1",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa786"),
"Movie": {
"Code": "124",
"OriginalTitle": "Title 2",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa787"),
"Movie": {
"Code": "125",
"OriginalTitle": "Title 3",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa788"),
"Movie": {
"Code": "126",
"OriginalTitle": "Title 4",
"Year": 2021
}
}]
Anyone know what might be happening?
Thanks to everyone.

This is simply caused by your $project stage, after you run:
{
$project: {
"CompanyList.Movies.Code" : 1
}
},
You're data will look like this:
{
CompanyList: [
{
Movies: { code: "123", ... other fields }
}
]
}
Now you're trying to match "CompanyList.CodeCompany": "23", but the field CodeCompany simply does not exist anymore as you did not provide it in the project stage.
So just change you're projection stage to include fields you will use in later stages:
{
$project: {
"CompanyList.Movies.Code" : 1,
"CompanyList.CodeCompany": 1
}
},

Related

Mongo DB query to match a field1 and loop thru another field2 and get output as a single array with all fields of field2

Need help with mongo db query
Mondo db query - search for parents with state good and children with state bad or missing. output should be an array of all the children with state bad or missing from parents with good state
Below is the JSON list
[
{
"name": "parent-a",
"status": {
"state": "good"
},
"children": [
"child-1",
"child-2"
]
},
{
"name": "child-1",
"state": "good",
"parent": "parent-a"
},
{
"name": "child-2",
"state": {},
"parent": "parent-a"
},
{
"name": "parent-b",
"status": {
"state": "good"
},
"children": [
"child-3",
"child-4"
]
},
{
"name": "child-3",
"state": "good",
"parent": "parent-b"
},
{
"name": "child-4",
"state": "bad",
"parent": "parent-b"
},
{
"name": "parent-c",
"status": {
"state": "bad"
},
"children": [
"child-5",
"child-6"
]
},
{
"name": "child-5",
"state": "good",
"parent": "parent-c"
},
{
"name": "child-6",
"state": "bad",
"parent": "parent-c"
}
]
Expected output
"children": [
{
"name": "child-2",
"state": {}
},
{
"name": "child-4",
"state": "bad"
}
]
Any inputs would be appreciated. Thanks in advance :)
One option is to use $lookup* for this:
db.collection.aggregate([
{$match: {state: {$in: ["bad", {}]}}},
{$lookup: {
from: "collection",
localField: "parent",
foreignField: "name",
pipeline: [
{$match: {"status.state": "good"}}
],
as: "hasGoodParent"
}},
{$match: {"hasGoodParent.0": {$exists: true}}},
{$project: {name: 1, state: 1, _id: 0}}
])
See how it works on the playground example
*If your mongoDB version is lower than 5.0 you need to change the syntax a bit. Drop the localField and foreignField of the $lookup and replace with let and equality match on the pipeline
Here is an approach doing this all without a "$lookup" stage as performance usually suffers when involved. Basically we match all relevant children and parents and we group by the child id. if it has a parent (which means the parent has a "good" state, and a "child" which means the child has a "bad/{}" state then it's matched).
You should make sure you have the appropriate indexes to support the initial query.
Additionally I would personally recommend adding a boolean field on each document to mark wether it's a parent or a child. right now we have to use the field structure based on your input to mark this type but I would consider this a bad practice.
Another thing we did not discuss which doesn't seem possible from the current structure is recursion, can a child have children of it's own? Just some things to consider
db.collection.aggregate([
{
$match: {
$or: [
{
$and: [
{
"status.state": "good"
},
{
parent: {
$exists: false
}
},
{
"children.0": {
$exists: true
}
}
]
},
{
$and: [
{
"state": {
$in: [
"bad",
null,
{}
]
}
},
{
parent: {
$exists: true
}
}
]
}
]
}
},
{
$unwind: {
path: "$children",
preserveNullAndEmptyArrays: true
}
},
{
$addFields: {
isParent: {
$cond: [
{
$eq: [
null,
{
$ifNull: [
"$parent",
null
]
}
]
},
1,
0
]
}
}
},
{
$group: {
_id: {
$cond: [
"$isParent",
"$children",
"$name"
]
},
hasParnet: {
$sum: "$isParent"
},
hasChild: {
$sum: {
$subtract: [
1,
"$isParent"
]
}
},
state: {
"$mergeObjects": {
$cond: [
"$isParent",
{},
{
state: "$state"
}
]
}
}
}
},
{
$match: {
hasChild: {
$gt: 0
},
hasParnet: {
$gt: 0
}
}
},
{
$group: {
_id: null,
children: {
$push: {
name: "$_id",
state: "$state.state"
}
}
}
}
])
Mongo Playground

Aggregation between a MongoDB collection and an external array

I'm looking for a method for join a collection and a external array of strings (with codes), that returns another string array with all codes of first array that aren't included in the collection.
The collection sample:
[{
_id:ObjectId("61bf57bc9d1f93b7ae5fa785"),
"Movie": {
"Code": "123",
"OriginalTitle": "Title 1",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa786"),
"Movie": {
"Code": "124",
"OriginalTitle": "Title 2",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa787"),
"Movie": {
"Code": "125",
"OriginalTitle": "Title 3",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa788"),
"Movie": {
"Code": "126",
"OriginalTitle": "Title 4",
"Year": 2021
}
}]
the external array:
const codes = ["125", "127", "128", "129"];
the aggregation must compare "Movie.Code" with the array and returns another array with the next values:
returnCodes = ["127", "128", "129"];
How can I make it?
Maybe this:
db.collection.aggregate([
{
$group: {
_id: null,
Code: { $push: "$Movie.Code" }
}
},
{
$project: {
returnCodes: {
$filter: {
input: codes ,
cond: { $not: { $in: [ "$$this", "$Code" ] } }
}
}
}
}
]).toArray().shift().returnCodes
Of course, you could do it also in Javascript:
const codes = ["125", "127", "128", "129"];
const coll = db.collection.find({}, { Code: "$Movie.Code" }).toArray().map(x => x.Code);
returnCodes = codes.filter(x => !coll.includes(x));

Update all objects in nested array with values from other collection

I have a collection of vehicles with the following car structure:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":"",
"orders":[], <-- Specific to the vehicles collection
"info":"" <-- Specific to the vehicles collection
]
}
And a Suppliers collection with a structure like:
{
"name":"",
"contact":"",
"_id":{}
"internalId":"",
"address":"",
...
}
I need to add a new field in the suppliers array within each document in the vehicles collection with the internalId field from the supplier in the suppliers collection that has the same _id.
if the supplier array has a document with the id 123, i should go to the suppliers collection and look for the supplier with the id 123 and retrieve the internalId. afterwards should create the field in the supplier array with that value.
So that i end up with the vehicles collection as:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":""
"internalId":"" <-- the new field
]
}
Tried:
db.vehicles.aggregate([
{
"$unwind": { "path": "$suppliers", "preserveNullAndEmptyArrays": false }
},
{
"$project": { "supplierObjId": { "$toObjectId": "$suppliers.supplierId" } }
},
{
"$lookup":
{
"from": "suppliers",
"localField": "supplierObjId",
"foreignField": "_id",
"as": "supplierInfo"
}
},{
"$set": {
"suppliers.internalId": "$supplierInfo.internalid"
}}
])
But it is adding the new field, to the returned values instead to the array item at the collection.
How can i achieve this?
But it is adding the new field, to the returned values instead to the array item at the collection.
The .aggregate method does not update documents, but it will just format the result documents,
You have to use 2 queries, first aggregate and second update,
I am not sure i guess you want to execute this query for one time, so i am suggesting a query you can execute in mongo shell,
Aggregation query:
$lookup with pipeline, pass suppliers.supplierId in let
$toString to convert object id to string type
$match the $in condition
$project to show required fields
$map to iterate loop of suppliers array
$reduce to iterate loop of suppliers_data array and find the matching record by supplierId
$mergeObjects to merge current object properties with new property internalId
Loop the result from aggregate query using forEach
Update Query to update suppliers array
db.vehicles.aggregate([
{
$lookup: {
from: "suppliers",
let: { supplierId: "$suppliers.supplierId" },
pipeline: [
{
$match: {
$expr: {
$in: [{ $toString: "$_id" }, "$$supplierId"]
}
}
},
{
$project: {
_id: 0,
supplierId: { $toString: "$_id" },
internalId: 1
}
}
],
as: "suppliers_data"
}
},
{
$project: {
suppliers: {
$map: {
input: "$suppliers",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
internalId: {
$reduce: {
input: "$suppliers_data",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this.supplierId", "$$s.supplierId"] },
"$$this.internalId",
"$$value"
]
}
}
}
}
]
}
}
}
}
}
])
.forEach(function(doc) {
db.vehicles.updateOne({ _id: doc._id }, { $set: { suppliers: doc.suppliers } });
});
Playground for aggregation query, and Playground for update query.
It looks like one way to solve this is by using $addFields and $lookup. We first flatten any matching suppliers, then add the property, then regroup.
You can find a live demo here via Mongo Playground.
Database
Consider the following database structure:
[{
// Collection
"vehicles": [
{
"_id": "1",
brand: "ford",
model: "explorer",
year: "1999",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
{
name: "supplier2",
contact: "jane doez",
supplierId: "002"
}
]
},
{
"_id": "2",
brand: "honda",
model: "accord",
year: "2002",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
]
}
],
// Collection
"suppliers": [
{
"name": "supplier1",
"contact": "john doe",
"_id": "001",
"internalId": "999-001",
"address": "111 main street"
},
{
"name": "supplier2",
"contact": "jane doez",
"_id": "002",
"internalId": "999-002",
"address": "222 north street"
},
{
"name": "ignored_supplier",
"contact": "doesnt matter",
"_id": "xxxxxxx",
"internalId": "xxxxxxx",
"address": "0987 midtown"
}
]
}]
Query
This is the query that I was able to get working. I'm not sure how efficient it is, or if it can be improved, but this seemed to do the trick:
db.vehicles.aggregate([
{
$unwind: "$suppliers"
},
{
$lookup: {
from: "suppliers",
localField: "suppliers.supplierId",
foreignField: "_id", // <---- OR MATCH WHATEVER FIELD YOU WANT
as: "vehicle_suppliers"
}
},
{
$unwind: "$vehicle_suppliers"
},
{
$addFields: {
"suppliers.internalId": "$vehicle_suppliers.internalId"
}
},
{
$group: {
_id: "$_id",
brand: {
$first: "$brand"
},
model: {
$first: "$model"
},
year: {
$first: "$year"
},
suppliers: {
$push: "$suppliers"
}
}
}
])
Results
Which returns:
[
{
"_id": "2",
"brand": "honda",
"model": "accord",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
}
],
"year": "2002"
},
{
"_id": "1",
"brand": "ford",
"model": "explorer",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
},
{
"contact": "jane doez",
"internalId": "999-002",
"name": "supplier2",
"supplierId": "002"
}
],
"year": "1999"
}
]

Finding ID of mongo documents with duplicated elements in nested array

I would like to extract from the collection the IDs of documents that have duplicate IDs of "drives" objects that are nested in the array that is in "streetModel".
This is my typical document :
{
"_id": {
"$oid": "61375bec4fa522001b608568"
},
"name": "Streetz",
"statusDetail": {},
"streetModel": {
"_id": "3.7389-51.0566",
"name": "Kosheen - Darude - Swedish - Trynidad - Maui",
"countryCode": "DEN",
"drives": [{
"_id": -903500698,
"direction": "WEST"
}, {
"_id": 1915399546,
"direction": "EAST"
}, {
"_id": 1294835467,
"direction": "NORTH"
}, {
"_id": 1248969937,
"direction": "EAST"
}, {
"_id": 1248969937,
"direction": "EAST"
}, {
"_id": 1492411786,
"direction": "SOUTH"
}]
},
"createdAt": {
"$date": "2021-09-07T12:32:44.238Z"
}
}
In this particular document with the ID 61375bec4fa522001b608568, in "streetModel", in "drives" array I have got duplicated drives objects with id 1248969937.
I would like to create a query to the database that will return the ID of all documents with such a problem (duplicate "drives").
Right now I have got this:
db.streets.aggregate([
{
$unwind: "$streetModel"
},
{
$unwind: "$drives"
},
{
$group: {
_id: {
id: "$_id"
},
sum: {
$sum: 1
},
}
},
{
$match: {
sum: {
$gt: 1
}
}
},
{
$project: {
_id: "$_id._id",
duplicates: {
drives: "$_id"
}
}
}
])
but that's not it.
I try in many ways to rewrite this query, but unfortunately it doesn't work.
Query
unwind
group by document id + driverid
keep only those that had more than one time same driveid
replace-root is to make the document better looking, you could $project also instead
if you need any more stage i think you can add it, for examplpe to get the documents that have this problem project only the docid's
Test code here
db.collection.aggregate([
{
"$unwind": {
"path": "$streetModel.drives"
}
},
{
"$group": {
"_id": {
"docid": "$_id",
"driveid": "$streetModel.drives._id"
},
"duplicates": {
"$push": "$streetModel.drives.direction"
}
}
},
{
"$match": {
"$expr": {
"$gt": [
{
"$size": "$duplicates"
},
1
]
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$_id",
"$$ROOT"
]
}
}
},
{
"$project": {
"_id": 0
}
}
])

How to remove duplicate values inside a list array in MongoDB?

I have many records in one collection in MongoDB and this is 3 examples to remove only based one QUESTION match criteria.
{
"_id": {
"$oid": "5f0f561256efe82f5082252e"
},
"Item1": false,
"Item2": "",
"Item3": 1,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
and another one to compare
{
"_id": {
"$oid": "5f0f561256efe82f5082252c"
},
"Item1": false,
"Item2": "",
"Item3": 2,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
the third one :
{
"_id": {
"$oid": "5f0f561256efe82f5082252d"
},
"Item1": false,
"Item2": "",
"Item3": 3,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your last name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
What I'm trying here is to make query with aggregation approach and I only want to focus on Item4 for exactly ("Name": "QUESTION") and the value (the question) for identifying the duplication.
The idea is to looking for duplication in the the question itself only ("What is your name?") in our example here. and I don't want to specify witch question because there are long list of them.
I'm looking just for the duplicated questions no mater what is the question look like.
I used the following approach but still I cannot narrow down the output to be only related to question and its value in order to delete the duplicate in the another step.
db.collections.aggregate([{ $unwind: "$Item4" }, {$group: { _id: { QUESTION: "$Item4.Name.4", Value: "$Item4.Value.4" }}}]).pretty()
I'm executing from mongo shell directly.
The following aggregation will list all the documents (the _ids) which have the duplicates of "Item4.Value" for the condition "Item4.Name": "QUESTION".
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" }
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
},
{
$match: { $expr: { $gt: [ { $size: "$ids" }, 1 ] } }
}
] )
It works! thanks a lot. I add it to the rest of code as below :
db.test.find().count()
const duplicatesIds = [];
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" } //here is the trick...to filter the array to pass only the condition "Item4.Name": "QUESTION".
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
}
],
{
allowDiskUse: true
}
).forEach(function (doc) {
doc.ids.shift();
doc.ids.forEach(function (dupId) {
duplicatesIds.push(dupId);
})
});
printjson(duplicatesIds);
db.test.remove({_id:{$in:duplicatesIds}})
db.test.find().count()

Resources