Update all objects in nested array with values from other collection - arrays

I have a collection of vehicles with the following car structure:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":"",
"orders":[], <-- Specific to the vehicles collection
"info":"" <-- Specific to the vehicles collection
]
}
And a Suppliers collection with a structure like:
{
"name":"",
"contact":"",
"_id":{}
"internalId":"",
"address":"",
...
}
I need to add a new field in the suppliers array within each document in the vehicles collection with the internalId field from the supplier in the suppliers collection that has the same _id.
if the supplier array has a document with the id 123, i should go to the suppliers collection and look for the supplier with the id 123 and retrieve the internalId. afterwards should create the field in the supplier array with that value.
So that i end up with the vehicles collection as:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":""
"internalId":"" <-- the new field
]
}
Tried:
db.vehicles.aggregate([
{
"$unwind": { "path": "$suppliers", "preserveNullAndEmptyArrays": false }
},
{
"$project": { "supplierObjId": { "$toObjectId": "$suppliers.supplierId" } }
},
{
"$lookup":
{
"from": "suppliers",
"localField": "supplierObjId",
"foreignField": "_id",
"as": "supplierInfo"
}
},{
"$set": {
"suppliers.internalId": "$supplierInfo.internalid"
}}
])
But it is adding the new field, to the returned values instead to the array item at the collection.
How can i achieve this?

But it is adding the new field, to the returned values instead to the array item at the collection.
The .aggregate method does not update documents, but it will just format the result documents,
You have to use 2 queries, first aggregate and second update,
I am not sure i guess you want to execute this query for one time, so i am suggesting a query you can execute in mongo shell,
Aggregation query:
$lookup with pipeline, pass suppliers.supplierId in let
$toString to convert object id to string type
$match the $in condition
$project to show required fields
$map to iterate loop of suppliers array
$reduce to iterate loop of suppliers_data array and find the matching record by supplierId
$mergeObjects to merge current object properties with new property internalId
Loop the result from aggregate query using forEach
Update Query to update suppliers array
db.vehicles.aggregate([
{
$lookup: {
from: "suppliers",
let: { supplierId: "$suppliers.supplierId" },
pipeline: [
{
$match: {
$expr: {
$in: [{ $toString: "$_id" }, "$$supplierId"]
}
}
},
{
$project: {
_id: 0,
supplierId: { $toString: "$_id" },
internalId: 1
}
}
],
as: "suppliers_data"
}
},
{
$project: {
suppliers: {
$map: {
input: "$suppliers",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
internalId: {
$reduce: {
input: "$suppliers_data",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this.supplierId", "$$s.supplierId"] },
"$$this.internalId",
"$$value"
]
}
}
}
}
]
}
}
}
}
}
])
.forEach(function(doc) {
db.vehicles.updateOne({ _id: doc._id }, { $set: { suppliers: doc.suppliers } });
});
Playground for aggregation query, and Playground for update query.

It looks like one way to solve this is by using $addFields and $lookup. We first flatten any matching suppliers, then add the property, then regroup.
You can find a live demo here via Mongo Playground.
Database
Consider the following database structure:
[{
// Collection
"vehicles": [
{
"_id": "1",
brand: "ford",
model: "explorer",
year: "1999",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
{
name: "supplier2",
contact: "jane doez",
supplierId: "002"
}
]
},
{
"_id": "2",
brand: "honda",
model: "accord",
year: "2002",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
]
}
],
// Collection
"suppliers": [
{
"name": "supplier1",
"contact": "john doe",
"_id": "001",
"internalId": "999-001",
"address": "111 main street"
},
{
"name": "supplier2",
"contact": "jane doez",
"_id": "002",
"internalId": "999-002",
"address": "222 north street"
},
{
"name": "ignored_supplier",
"contact": "doesnt matter",
"_id": "xxxxxxx",
"internalId": "xxxxxxx",
"address": "0987 midtown"
}
]
}]
Query
This is the query that I was able to get working. I'm not sure how efficient it is, or if it can be improved, but this seemed to do the trick:
db.vehicles.aggregate([
{
$unwind: "$suppliers"
},
{
$lookup: {
from: "suppliers",
localField: "suppliers.supplierId",
foreignField: "_id", // <---- OR MATCH WHATEVER FIELD YOU WANT
as: "vehicle_suppliers"
}
},
{
$unwind: "$vehicle_suppliers"
},
{
$addFields: {
"suppliers.internalId": "$vehicle_suppliers.internalId"
}
},
{
$group: {
_id: "$_id",
brand: {
$first: "$brand"
},
model: {
$first: "$model"
},
year: {
$first: "$year"
},
suppliers: {
$push: "$suppliers"
}
}
}
])
Results
Which returns:
[
{
"_id": "2",
"brand": "honda",
"model": "accord",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
}
],
"year": "2002"
},
{
"_id": "1",
"brand": "ford",
"model": "explorer",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
},
{
"contact": "jane doez",
"internalId": "999-002",
"name": "supplier2",
"supplierId": "002"
}
],
"year": "1999"
}
]

Related

Mongo DB query to match a field1 and loop thru another field2 and get output as a single array with all fields of field2

Need help with mongo db query
Mondo db query - search for parents with state good and children with state bad or missing. output should be an array of all the children with state bad or missing from parents with good state
Below is the JSON list
[
{
"name": "parent-a",
"status": {
"state": "good"
},
"children": [
"child-1",
"child-2"
]
},
{
"name": "child-1",
"state": "good",
"parent": "parent-a"
},
{
"name": "child-2",
"state": {},
"parent": "parent-a"
},
{
"name": "parent-b",
"status": {
"state": "good"
},
"children": [
"child-3",
"child-4"
]
},
{
"name": "child-3",
"state": "good",
"parent": "parent-b"
},
{
"name": "child-4",
"state": "bad",
"parent": "parent-b"
},
{
"name": "parent-c",
"status": {
"state": "bad"
},
"children": [
"child-5",
"child-6"
]
},
{
"name": "child-5",
"state": "good",
"parent": "parent-c"
},
{
"name": "child-6",
"state": "bad",
"parent": "parent-c"
}
]
Expected output
"children": [
{
"name": "child-2",
"state": {}
},
{
"name": "child-4",
"state": "bad"
}
]
Any inputs would be appreciated. Thanks in advance :)
One option is to use $lookup* for this:
db.collection.aggregate([
{$match: {state: {$in: ["bad", {}]}}},
{$lookup: {
from: "collection",
localField: "parent",
foreignField: "name",
pipeline: [
{$match: {"status.state": "good"}}
],
as: "hasGoodParent"
}},
{$match: {"hasGoodParent.0": {$exists: true}}},
{$project: {name: 1, state: 1, _id: 0}}
])
See how it works on the playground example
*If your mongoDB version is lower than 5.0 you need to change the syntax a bit. Drop the localField and foreignField of the $lookup and replace with let and equality match on the pipeline
Here is an approach doing this all without a "$lookup" stage as performance usually suffers when involved. Basically we match all relevant children and parents and we group by the child id. if it has a parent (which means the parent has a "good" state, and a "child" which means the child has a "bad/{}" state then it's matched).
You should make sure you have the appropriate indexes to support the initial query.
Additionally I would personally recommend adding a boolean field on each document to mark wether it's a parent or a child. right now we have to use the field structure based on your input to mark this type but I would consider this a bad practice.
Another thing we did not discuss which doesn't seem possible from the current structure is recursion, can a child have children of it's own? Just some things to consider
db.collection.aggregate([
{
$match: {
$or: [
{
$and: [
{
"status.state": "good"
},
{
parent: {
$exists: false
}
},
{
"children.0": {
$exists: true
}
}
]
},
{
$and: [
{
"state": {
$in: [
"bad",
null,
{}
]
}
},
{
parent: {
$exists: true
}
}
]
}
]
}
},
{
$unwind: {
path: "$children",
preserveNullAndEmptyArrays: true
}
},
{
$addFields: {
isParent: {
$cond: [
{
$eq: [
null,
{
$ifNull: [
"$parent",
null
]
}
]
},
1,
0
]
}
}
},
{
$group: {
_id: {
$cond: [
"$isParent",
"$children",
"$name"
]
},
hasParnet: {
$sum: "$isParent"
},
hasChild: {
$sum: {
$subtract: [
1,
"$isParent"
]
}
},
state: {
"$mergeObjects": {
$cond: [
"$isParent",
{},
{
state: "$state"
}
]
}
}
}
},
{
$match: {
hasChild: {
$gt: 0
},
hasParnet: {
$gt: 0
}
}
},
{
$group: {
_id: null,
children: {
$push: {
name: "$_id",
state: "$state.state"
}
}
}
}
])
Mongo Playground

Join collection with array object field with another collection in MongoDB

I'm working in MongoDB and getting stuck at one aggregation case. Let me show you my collection.
First collection (data):
[
{
"_id": "8e7b3fa0-4230-448c-8f70-1d7300632834",
"data": [
{
"animal" : "7d44251a-b308-4deb-875a-33ef0a69fe2b",
"place": "Chennai"
},
{
"animal" : "fcfdd527-5885-48b0-a91f-03f72f78528f",
"place": "Kolkata"
}
]
}
]
Second collection (Animal):
[
{
"_id": "7d44251a-b308-4deb-875a-33ef0a69fe2b",
"name": "Dog"
},
{
"_id": "7d44251a-b308-4deb-875a-33ef0a69fe2b",
"name": "Cat"
}
]
I'm using this query:
db.data.aggregate([
{
"$lookup": {
"from": "animal",
"localField": "data.animal",
"foreignField": "_id",
"as": "doc"
}
},
{
"$unwind": "$doc"
},
{
"$project": {
"_id": 1,
"data.animal": "$doc.name",
"data.place": 1
}
}
])
and it result me this
[
{
"_id": "8e7b3fa0-4230-448c-8f70-1d7300632834",
"data": [
{
"animal": "Dog",
"place": "Chennai"
},
{
"animal": "Dog",
"place": "Kolkata"
}
]
},
{
"_id": "8e7b3fa0-4230-448c-8f70-1d7300632834",
"data": [
{
"animal": "Cat",
"place": "Chennai"
},
{
"animal": "Cat",
"place": "Kolkata"
}
]
}
]
Where I'm expecting like this
[
{
"_id": "8e7b3fa0-4230-448c-8f70-1d7300632834",
"data": [
{
"animal": "Dog",
"place": "Chennai"
},
{
"animal": "Cat",
"place": "Kolkata"
}
]
}
]
Mongo Playground
Also sharing this question in Mongo playgroud. Thanks in advance!!
Solution 1
$unset - Deconstruct the data array into multiple documents.
$lookup - Perform join with animal collection.
$project - Decorate the output document. For data.animal field, get the first value via $first.
$group - Group by _id. Push the data document into the data array.
db.data.aggregate([
{
"$unwind": "$data"
},
{
"$lookup": {
"from": "animal",
"localField": "data.animal",
"foreignField": "_id",
"as": "doc"
}
},
{
"$project": {
"_id": 1,
"data.animal": {
$first: "$doc.name"
},
"data.place": 1
}
},
{
$group: {
_id: "$_id",
data: {
$push: "$data"
}
}
}
])
Demo Solution 1 # Mongo Playground
Solution 2
$lookup
$set - Set data field.
2.1. $map - Iterate the data array and returns a new array.
2.1.1. $mergeObjects - Merge current iterated document with place field and the document from 2.1.1.1.
2.1.1.1. $first - Get the first document from the filtered doc arrays by matching the ids via $filter.
$unset - Remove _id and animals._id fields.
db.data.aggregate([
{
"$lookup": {
"from": "animal",
"localField": "data.animal",
"foreignField": "_id",
"as": "doc"
}
},
{
$set: {
data: {
$map: {
input: "$data",
as: "data",
in: {
$mergeObjects: [
{
place: "$$data.place"
},
{
$first: {
$filter: {
input: "$doc",
cond: {
$eq: [
"$$this._id",
"$$data.animal"
]
}
}
}
}
]
}
}
}
}
},
{
$unset: [
"doc",
"data._id"
]
}
])
Demo Solution 2 # Mongo Playground

MongoDB get only selected elements from objects inside an array

What I have is a collection of documents in MongoDB that have the structure something like this
[
{
"userid": "user1",
"addresses": [
{
"type": "abc",
"street": "xyz"
},
{
"type": "def",
"street": "www"
},
{
"type": "hhh",
"street": "mmm"
},
]
},
{
"userid": "user2",
"addresses": [
{
"type": "abc",
"street": "ccc"
},
{
"type": "def",
"street": "zzz"
},
{
"type": "hhh",
"street": "yyy"
},
]
}
]
If I can give the "type" and "userid", how can I get the result as
[
{
"userid": "user2",
"type": "abc",
"street": "ccc",
}
]
It would also be great even if I can get the "street" only as the result. The only constraint is I need to get it in the root element itself and not inside an array
Something like this:
db.collection.aggregate([
{
$match: {
userid: "user1" , "address.type":"abc"
}
},
{
$project: {
userid: 1,
address: {
$filter: {
input: "$addresses",
as: "a",
cond: {
$eq: [
"$$a.type",
"abc"
]
}
}
}
}
},
{
$unwind: "$address"
},
{
$project: {
userid: 1,
street: "$address.street",
_id: 0
}
}
])
explained:
Filter only documents with the userid & addresess.type you need
Project/Filter only the addresses elements with the needed type
unwind the address array
project only the needed elements as requested
For best results create index on the { userid:1 } field or compound index on { userid:1 , address.type:1 } fields
playground
You should be able to use unwind, match and project as shown below:
db.collection.aggregate([
{
"$unwind": "$addresses"
},
{
"$match": {
"addresses.type": "abc",
"userid": "user1"
}
},
{
"$project": {
"_id": 0,
"street": "$addresses.street"
}
}
])
You can also duplicate the match step as the first step to reduce the number of documents to unwind.
Here is the playground link.
There is a similar question/answer here.

MongoDB get documents from a collection not in other collection

I'm trying to get the documents from a collection where aren't in other collection (the common NOT IN clause in SQL).
If I run the next query:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
}
]);
This query shows the movies includes in CompanyList.Movies.Code and in Movie.Code. Good.
But I just have the rest of movies includes in CompanyList.Movies whose codes aren't included in Movie.Code.
As Nikos Tsagkas said in Get "data from collection b not in collection a" in a MongoDB shell query it should be sufficient to include the following sentence:
{
$match: { "matched_docs": { $eq: [] } }
}
But when I run my final code, it doesn't returns anything:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
},
{
$match: { "matched_docs": { $eq: [] } }
}
]);
There are 59 documents that are not returned by this code.
This is my pipeline I've created in MongoDB Compass after Tom's changes and it still doesn't work:
[{
$match:
{
'CompanyList.CodeCompany': '23'
}
},
{
$lookup:
{
from: 'Movies',
localField: 'CompanyList.Movies.Code',
foreignField: 'Movie.Code',
as: 'docs'
}
},
{
$project:
{
'CompanyList.Movies.Code': 1,
'CompanyList.CodeCompany': 1
}
},
{
$match:
{
docs:{ $eq: [] }
}
}]
If I delete the $project, it not works either.
Sample Data (reduced)
Companies_Movies collection:
{
_id:ObjectId("61bf47b974641866e1244e65"),
"CompanyList": {
"CodeCompany": "23",
"NameCompany": "Company Name Entertainment",
"Movies": [{
"Code": "123",
"Name": "Title 1",
"Order": 1,
"UserDescription": null
}, {
"Code": "124",
"Name": "Title 2",
"Order": 2,
"UserDescription": null
}, {
"Code": "125",
"Name": "Title 3",
"Order": 3,
"UserDescription": null
}],
"DateInserted": {
"$date": "2021-12-13T17:30:06.824Z"
}
}
}
Movies collection:
[{
_id:ObjectId("61bf57bc9d1f93b7ae5fa785"),
"Movie": {
"Code": "123",
"OriginalTitle": "Title 1",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa786"),
"Movie": {
"Code": "124",
"OriginalTitle": "Title 2",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa787"),
"Movie": {
"Code": "125",
"OriginalTitle": "Title 3",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa788"),
"Movie": {
"Code": "126",
"OriginalTitle": "Title 4",
"Year": 2021
}
}]
Anyone know what might be happening?
Thanks to everyone.
This is simply caused by your $project stage, after you run:
{
$project: {
"CompanyList.Movies.Code" : 1
}
},
You're data will look like this:
{
CompanyList: [
{
Movies: { code: "123", ... other fields }
}
]
}
Now you're trying to match "CompanyList.CodeCompany": "23", but the field CodeCompany simply does not exist anymore as you did not provide it in the project stage.
So just change you're projection stage to include fields you will use in later stages:
{
$project: {
"CompanyList.Movies.Code" : 1,
"CompanyList.CodeCompany": 1
}
},

How to remove duplicate values inside a list array in MongoDB?

I have many records in one collection in MongoDB and this is 3 examples to remove only based one QUESTION match criteria.
{
"_id": {
"$oid": "5f0f561256efe82f5082252e"
},
"Item1": false,
"Item2": "",
"Item3": 1,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
and another one to compare
{
"_id": {
"$oid": "5f0f561256efe82f5082252c"
},
"Item1": false,
"Item2": "",
"Item3": 2,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
the third one :
{
"_id": {
"$oid": "5f0f561256efe82f5082252d"
},
"Item1": false,
"Item2": "",
"Item3": 3,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your last name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
What I'm trying here is to make query with aggregation approach and I only want to focus on Item4 for exactly ("Name": "QUESTION") and the value (the question) for identifying the duplication.
The idea is to looking for duplication in the the question itself only ("What is your name?") in our example here. and I don't want to specify witch question because there are long list of them.
I'm looking just for the duplicated questions no mater what is the question look like.
I used the following approach but still I cannot narrow down the output to be only related to question and its value in order to delete the duplicate in the another step.
db.collections.aggregate([{ $unwind: "$Item4" }, {$group: { _id: { QUESTION: "$Item4.Name.4", Value: "$Item4.Value.4" }}}]).pretty()
I'm executing from mongo shell directly.
The following aggregation will list all the documents (the _ids) which have the duplicates of "Item4.Value" for the condition "Item4.Name": "QUESTION".
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" }
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
},
{
$match: { $expr: { $gt: [ { $size: "$ids" }, 1 ] } }
}
] )
It works! thanks a lot. I add it to the rest of code as below :
db.test.find().count()
const duplicatesIds = [];
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" } //here is the trick...to filter the array to pass only the condition "Item4.Name": "QUESTION".
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
}
],
{
allowDiskUse: true
}
).forEach(function (doc) {
doc.ids.shift();
doc.ids.forEach(function (dupId) {
duplicatesIds.push(dupId);
})
});
printjson(duplicatesIds);
db.test.remove({_id:{$in:duplicatesIds}})
db.test.find().count()

Resources