Aggregation between a MongoDB collection and an external array - arrays

I'm looking for a method for join a collection and a external array of strings (with codes), that returns another string array with all codes of first array that aren't included in the collection.
The collection sample:
[{
_id:ObjectId("61bf57bc9d1f93b7ae5fa785"),
"Movie": {
"Code": "123",
"OriginalTitle": "Title 1",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa786"),
"Movie": {
"Code": "124",
"OriginalTitle": "Title 2",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa787"),
"Movie": {
"Code": "125",
"OriginalTitle": "Title 3",
"Year": 2021
}},{
_id:ObjectId("61bf57bc9d1f93b7ae5fa788"),
"Movie": {
"Code": "126",
"OriginalTitle": "Title 4",
"Year": 2021
}
}]
the external array:
const codes = ["125", "127", "128", "129"];
the aggregation must compare "Movie.Code" with the array and returns another array with the next values:
returnCodes = ["127", "128", "129"];
How can I make it?

Maybe this:
db.collection.aggregate([
{
$group: {
_id: null,
Code: { $push: "$Movie.Code" }
}
},
{
$project: {
returnCodes: {
$filter: {
input: codes ,
cond: { $not: { $in: [ "$$this", "$Code" ] } }
}
}
}
}
]).toArray().shift().returnCodes
Of course, you could do it also in Javascript:
const codes = ["125", "127", "128", "129"];
const coll = db.collection.find({}, { Code: "$Movie.Code" }).toArray().map(x => x.Code);
returnCodes = codes.filter(x => !coll.includes(x));

Related

MongoDB get only selected elements from objects inside an array

What I have is a collection of documents in MongoDB that have the structure something like this
[
{
"userid": "user1",
"addresses": [
{
"type": "abc",
"street": "xyz"
},
{
"type": "def",
"street": "www"
},
{
"type": "hhh",
"street": "mmm"
},
]
},
{
"userid": "user2",
"addresses": [
{
"type": "abc",
"street": "ccc"
},
{
"type": "def",
"street": "zzz"
},
{
"type": "hhh",
"street": "yyy"
},
]
}
]
If I can give the "type" and "userid", how can I get the result as
[
{
"userid": "user2",
"type": "abc",
"street": "ccc",
}
]
It would also be great even if I can get the "street" only as the result. The only constraint is I need to get it in the root element itself and not inside an array
Something like this:
db.collection.aggregate([
{
$match: {
userid: "user1" , "address.type":"abc"
}
},
{
$project: {
userid: 1,
address: {
$filter: {
input: "$addresses",
as: "a",
cond: {
$eq: [
"$$a.type",
"abc"
]
}
}
}
}
},
{
$unwind: "$address"
},
{
$project: {
userid: 1,
street: "$address.street",
_id: 0
}
}
])
explained:
Filter only documents with the userid & addresess.type you need
Project/Filter only the addresses elements with the needed type
unwind the address array
project only the needed elements as requested
For best results create index on the { userid:1 } field or compound index on { userid:1 , address.type:1 } fields
playground
You should be able to use unwind, match and project as shown below:
db.collection.aggregate([
{
"$unwind": "$addresses"
},
{
"$match": {
"addresses.type": "abc",
"userid": "user1"
}
},
{
"$project": {
"_id": 0,
"street": "$addresses.street"
}
}
])
You can also duplicate the match step as the first step to reduce the number of documents to unwind.
Here is the playground link.
There is a similar question/answer here.

MongoDB get documents from a collection not in other collection

I'm trying to get the documents from a collection where aren't in other collection (the common NOT IN clause in SQL).
If I run the next query:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
}
]);
This query shows the movies includes in CompanyList.Movies.Code and in Movie.Code. Good.
But I just have the rest of movies includes in CompanyList.Movies whose codes aren't included in Movie.Code.
As Nikos Tsagkas said in Get "data from collection b not in collection a" in a MongoDB shell query it should be sufficient to include the following sentence:
{
$match: { "matched_docs": { $eq: [] } }
}
But when I run my final code, it doesn't returns anything:
db.Companies_Movies.aggregate([
{
$project:
{
"CompanyList.Movies.Code" : 1
}
},
{
$match:
{
"CompanyList.CodeCompany": "23"
}
},
{
$lookup:
{
from: "Movies",
localField: "CompanyList.Movies.Code",
foreignField: "Movie.Code",
as: "matched_docs"
}
},
{
$match: { "matched_docs": { $eq: [] } }
}
]);
There are 59 documents that are not returned by this code.
This is my pipeline I've created in MongoDB Compass after Tom's changes and it still doesn't work:
[{
$match:
{
'CompanyList.CodeCompany': '23'
}
},
{
$lookup:
{
from: 'Movies',
localField: 'CompanyList.Movies.Code',
foreignField: 'Movie.Code',
as: 'docs'
}
},
{
$project:
{
'CompanyList.Movies.Code': 1,
'CompanyList.CodeCompany': 1
}
},
{
$match:
{
docs:{ $eq: [] }
}
}]
If I delete the $project, it not works either.
Sample Data (reduced)
Companies_Movies collection:
{
_id:ObjectId("61bf47b974641866e1244e65"),
"CompanyList": {
"CodeCompany": "23",
"NameCompany": "Company Name Entertainment",
"Movies": [{
"Code": "123",
"Name": "Title 1",
"Order": 1,
"UserDescription": null
}, {
"Code": "124",
"Name": "Title 2",
"Order": 2,
"UserDescription": null
}, {
"Code": "125",
"Name": "Title 3",
"Order": 3,
"UserDescription": null
}],
"DateInserted": {
"$date": "2021-12-13T17:30:06.824Z"
}
}
}
Movies collection:
[{
_id:ObjectId("61bf57bc9d1f93b7ae5fa785"),
"Movie": {
"Code": "123",
"OriginalTitle": "Title 1",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa786"),
"Movie": {
"Code": "124",
"OriginalTitle": "Title 2",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa787"),
"Movie": {
"Code": "125",
"OriginalTitle": "Title 3",
"Year": 2021
},
_id:ObjectId("61bf57bc9d1f93b7ae5fa788"),
"Movie": {
"Code": "126",
"OriginalTitle": "Title 4",
"Year": 2021
}
}]
Anyone know what might be happening?
Thanks to everyone.
This is simply caused by your $project stage, after you run:
{
$project: {
"CompanyList.Movies.Code" : 1
}
},
You're data will look like this:
{
CompanyList: [
{
Movies: { code: "123", ... other fields }
}
]
}
Now you're trying to match "CompanyList.CodeCompany": "23", but the field CodeCompany simply does not exist anymore as you did not provide it in the project stage.
So just change you're projection stage to include fields you will use in later stages:
{
$project: {
"CompanyList.Movies.Code" : 1,
"CompanyList.CodeCompany": 1
}
},

Update all objects in nested array with values from other collection

I have a collection of vehicles with the following car structure:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":"",
"orders":[], <-- Specific to the vehicles collection
"info":"" <-- Specific to the vehicles collection
]
}
And a Suppliers collection with a structure like:
{
"name":"",
"contact":"",
"_id":{}
"internalId":"",
"address":"",
...
}
I need to add a new field in the suppliers array within each document in the vehicles collection with the internalId field from the supplier in the suppliers collection that has the same _id.
if the supplier array has a document with the id 123, i should go to the suppliers collection and look for the supplier with the id 123 and retrieve the internalId. afterwards should create the field in the supplier array with that value.
So that i end up with the vehicles collection as:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":""
"internalId":"" <-- the new field
]
}
Tried:
db.vehicles.aggregate([
{
"$unwind": { "path": "$suppliers", "preserveNullAndEmptyArrays": false }
},
{
"$project": { "supplierObjId": { "$toObjectId": "$suppliers.supplierId" } }
},
{
"$lookup":
{
"from": "suppliers",
"localField": "supplierObjId",
"foreignField": "_id",
"as": "supplierInfo"
}
},{
"$set": {
"suppliers.internalId": "$supplierInfo.internalid"
}}
])
But it is adding the new field, to the returned values instead to the array item at the collection.
How can i achieve this?
But it is adding the new field, to the returned values instead to the array item at the collection.
The .aggregate method does not update documents, but it will just format the result documents,
You have to use 2 queries, first aggregate and second update,
I am not sure i guess you want to execute this query for one time, so i am suggesting a query you can execute in mongo shell,
Aggregation query:
$lookup with pipeline, pass suppliers.supplierId in let
$toString to convert object id to string type
$match the $in condition
$project to show required fields
$map to iterate loop of suppliers array
$reduce to iterate loop of suppliers_data array and find the matching record by supplierId
$mergeObjects to merge current object properties with new property internalId
Loop the result from aggregate query using forEach
Update Query to update suppliers array
db.vehicles.aggregate([
{
$lookup: {
from: "suppliers",
let: { supplierId: "$suppliers.supplierId" },
pipeline: [
{
$match: {
$expr: {
$in: [{ $toString: "$_id" }, "$$supplierId"]
}
}
},
{
$project: {
_id: 0,
supplierId: { $toString: "$_id" },
internalId: 1
}
}
],
as: "suppliers_data"
}
},
{
$project: {
suppliers: {
$map: {
input: "$suppliers",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
internalId: {
$reduce: {
input: "$suppliers_data",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this.supplierId", "$$s.supplierId"] },
"$$this.internalId",
"$$value"
]
}
}
}
}
]
}
}
}
}
}
])
.forEach(function(doc) {
db.vehicles.updateOne({ _id: doc._id }, { $set: { suppliers: doc.suppliers } });
});
Playground for aggregation query, and Playground for update query.
It looks like one way to solve this is by using $addFields and $lookup. We first flatten any matching suppliers, then add the property, then regroup.
You can find a live demo here via Mongo Playground.
Database
Consider the following database structure:
[{
// Collection
"vehicles": [
{
"_id": "1",
brand: "ford",
model: "explorer",
year: "1999",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
{
name: "supplier2",
contact: "jane doez",
supplierId: "002"
}
]
},
{
"_id": "2",
brand: "honda",
model: "accord",
year: "2002",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
]
}
],
// Collection
"suppliers": [
{
"name": "supplier1",
"contact": "john doe",
"_id": "001",
"internalId": "999-001",
"address": "111 main street"
},
{
"name": "supplier2",
"contact": "jane doez",
"_id": "002",
"internalId": "999-002",
"address": "222 north street"
},
{
"name": "ignored_supplier",
"contact": "doesnt matter",
"_id": "xxxxxxx",
"internalId": "xxxxxxx",
"address": "0987 midtown"
}
]
}]
Query
This is the query that I was able to get working. I'm not sure how efficient it is, or if it can be improved, but this seemed to do the trick:
db.vehicles.aggregate([
{
$unwind: "$suppliers"
},
{
$lookup: {
from: "suppliers",
localField: "suppliers.supplierId",
foreignField: "_id", // <---- OR MATCH WHATEVER FIELD YOU WANT
as: "vehicle_suppliers"
}
},
{
$unwind: "$vehicle_suppliers"
},
{
$addFields: {
"suppliers.internalId": "$vehicle_suppliers.internalId"
}
},
{
$group: {
_id: "$_id",
brand: {
$first: "$brand"
},
model: {
$first: "$model"
},
year: {
$first: "$year"
},
suppliers: {
$push: "$suppliers"
}
}
}
])
Results
Which returns:
[
{
"_id": "2",
"brand": "honda",
"model": "accord",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
}
],
"year": "2002"
},
{
"_id": "1",
"brand": "ford",
"model": "explorer",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
},
{
"contact": "jane doez",
"internalId": "999-002",
"name": "supplier2",
"supplierId": "002"
}
],
"year": "1999"
}
]

How to remove duplicate values inside a list array in MongoDB?

I have many records in one collection in MongoDB and this is 3 examples to remove only based one QUESTION match criteria.
{
"_id": {
"$oid": "5f0f561256efe82f5082252e"
},
"Item1": false,
"Item2": "",
"Item3": 1,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
and another one to compare
{
"_id": {
"$oid": "5f0f561256efe82f5082252c"
},
"Item1": false,
"Item2": "",
"Item3": 2,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
the third one :
{
"_id": {
"$oid": "5f0f561256efe82f5082252d"
},
"Item1": false,
"Item2": "",
"Item3": 3,
"Item4": [
{
"Name": "TYPE",
"Value": "QUESTION"
},
{
"Name": "QUESTION",
"Value": "What is your last name?"
},
{
"Name": "CORRECT_ANSWER",
"Value": "1"
},
{
"Name": "ANSWER_1",
"Value": "name one"
},
{
"Name": "ANSWER_2",
"Value": "name two"
}
],
"Item5": [
10
],
"Item6": false
}
What I'm trying here is to make query with aggregation approach and I only want to focus on Item4 for exactly ("Name": "QUESTION") and the value (the question) for identifying the duplication.
The idea is to looking for duplication in the the question itself only ("What is your name?") in our example here. and I don't want to specify witch question because there are long list of them.
I'm looking just for the duplicated questions no mater what is the question look like.
I used the following approach but still I cannot narrow down the output to be only related to question and its value in order to delete the duplicate in the another step.
db.collections.aggregate([{ $unwind: "$Item4" }, {$group: { _id: { QUESTION: "$Item4.Name.4", Value: "$Item4.Value.4" }}}]).pretty()
I'm executing from mongo shell directly.
The following aggregation will list all the documents (the _ids) which have the duplicates of "Item4.Value" for the condition "Item4.Name": "QUESTION".
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" }
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
},
{
$match: { $expr: { $gt: [ { $size: "$ids" }, 1 ] } }
}
] )
It works! thanks a lot. I add it to the rest of code as below :
db.test.find().count()
const duplicatesIds = [];
db.test.aggregate( [
{
$unwind: "$Item4"
},
{
$match: { "Item4.Name": "QUESTION" } //here is the trick...to filter the array to pass only the condition "Item4.Name": "QUESTION".
},
{
$group: {
_id: { "Item4_Value": "$Item4.Value" },
ids: { $push: "$_id" }
}
}
],
{
allowDiskUse: true
}
).forEach(function (doc) {
doc.ids.shift();
doc.ids.forEach(function (dupId) {
duplicatesIds.push(dupId);
})
});
printjson(duplicatesIds);
db.test.remove({_id:{$in:duplicatesIds}})
db.test.find().count()

How to group data with date string inside subdocuments under an array mongodb

My document have this structure
_id: "adklkj389723jk23KLJjl2LU92kJO387"
"impressions": [{
"_id": ObjectId("5b74799535f2722494075981"),
"country": "GB",
"impression_count": 22,
"_campaignid": ObjectId("5b72d78847db422040ee60cf"),
"date": ISODate("2018-08-15T19:00:00Z")
},
{
"_id": ObjectId("5b74799d35f2722494075982"),
"country": "GB",
"impression_count": 22,
"_campaignid": ObjectId("5b72d7bf47db422040ee60d1"),
"date": ISODate("2018-08-15T19:00:00Z")
},
{
"_id": ObjectId("5b7479a735f2722494075983"),
"country": "GB",
"impression_count": 20,
"_campaignid": ObjectId("5b72d79e47db422040ee60d0"),
"date": ISODate("2018-08-15T19:00:00Z")
}
]
What i want to do
I want to group impressions with dates on which they are created where i am saving date as string in db to do this I am using this query
db.advertisers.aggregate([{
$group: {
_id: "$impressions.date",
count: {
$sum: "$impressions.impression_count"
}
}
}]).pretty()
Result
This query is giving me the result like so what am i doing wrong ?
{
"_id": [
ISODate("2018-08-15T19:00:00Z"),
ISODate("2018-08-15T19:00:00Z"),
ISODate("2018-08-15T19:00:00Z")
],
"count": 1
}
Since you are dealing with an array you might want to unwind the impressions:
db.collection.aggregate([
{
"$unwind": "$impressions"
},
{
$group: {
_id: "$impressions.date",
count: {
$sum: "$impressions.impression_count"
},
}
}
])
This would give you:
[
{
"_id": ISODate("2018-08-15T19:00:00Z"),
"count": 64
}
]
You can see this here.

Resources