MongoDB aggregate, how to addToSet each element of array in group pipeline - arrays

I have documents that contains a tags fields. It's a simple array with tag names inside, no object nor _id inside.
Just plain tags like this ["Protocol", "Access", "Leverage", "Capability"].
And in my group pipeline I tried something like 'selectedTags': { $addToSet: '$tags' } but then I end up with an array containing arrays of tags. And I get the same with $push.
I tried to use $each or $pushAll but they are not supported as grouping operator as my shell tell me.
Can someone help me on this one please ?
Thank you
Edit:
Sample docs:
{
"_id" : "HWEdDGsq86x4ikDSQ",
"teamId" : "AdLizGnPuqbWNsFHe",
"ownerId" : "Qb5EigWjqn2t3bfxD",
"type" : "meeting",
"topic" : "Grass-roots hybrid knowledge user",
"fullname" : "Guidouil",
"startDate" : ISODate("2017-07-30T09:00:05.513Z"),
"shareResults" : true,
"open" : true,
"language" : "fr",
"tags" : [
"Protocol",
"Challenge",
"Artificial Intelligence",
"Capability"
],
"isDemo" : true,
"createdAt" : ISODate("2017-11-15T19:24:05.513Z"),
"participantsCount" : 10,
"ratersCount" : 10,
"averageRating" : 3.4,
"hasAnswers" : true,
"updatedAt" : ISODate("2017-11-15T19:24:05.562Z")
}
{
"_id" : "rXvkFndpXwJ6KAvNo",
"teamId" : "AdLizGnPuqbWNsFHe",
"ownerId" : "Qb5EigWjqn2t3bfxD",
"type" : "meeting",
"topic" : "Profit-focused modular system engine",
"fullname" : "Guidouil",
"startDate" : ISODate("2017-07-24T12:00:05.564Z"),
"shareResults" : true,
"open" : true,
"language" : "fr",
"tags" : [
"Initiative",
"Artificial Intelligence",
"Protocol",
"Utilisation"
],
"isDemo" : true,
"createdAt" : ISODate("2017-11-15T19:24:05.564Z"),
"participantsCount" : 33,
"ratersCount" : 33,
"averageRating" : 2.9393939393939394,
"hasAnswers" : true,
"updatedAt" : ISODate("2017-11-15T19:24:05.753Z")
}
Aggregation:
db.surveys.aggregate(
{ $match: query },
{
$group: {
'_id': {
'year': { $year: '$startDate' },
'day': { $dayOfYear: '$startDate' },
},
'participants': { $sum: '$ratersCount' },
'rating': { $avg: '$averageRating' },
'surveys': { $push: '$_id' },
'selectedTags': { $addToSet: '$tags' },
'peoples': { $addToSet: '$fullname' },
}
},
{ $sort: { _id: 1 } }
);
then I tried to change the selectedTags to { $push: { $each: '$tags' } } or { $pushAll: '$tags' } but this does not execute :(
Edit 2:
In javascript I do it like that:
return Surveys.aggregate(
{ $match: query },
{ $group: {
_id: dateGroup,
participants: { $sum: '$ratersCount' },
rating: { $avg: '$averageRating' },
surveys: { $push: '$_id' },
selectedTags: { $push: '$tags' },
peoples: { $addToSet: '$fullname' },
} },
{ $project: {
_id: null,
selectedTags: {
$reduce: {
input: "$selectedTags",
initialValue: [],
in: { $setUnion: ["$$value", "$$this"] }
}
},
} }
);

To mimic functionality of $addToSet update operator with $each modifier in aggregation pipeline you can use a combination of $push on grouping stage and $reduce + $setUnion on projection stage. E.g.:
db.collection.aggregate([
{$group:{
_id: null,
selectedTags: { $push: '$tags' }
}},
{$project: {
selectedTags: { $reduce: {
input: "$selectedTags",
initialValue: [],
in: {$setUnion : ["$$value", "$$this"]}
}}
}}
])
results with a single document which contains a distinct list of tags from all documents in selectedTags array.

You can also use $unwind to get result:
db.collection.aggregate([
{$unwind: "$tags"},
{$group:{
_id: null,
selectedTags: { $addToSet: '$tags' }
}}
])

Dannyxu and Alex Beck's answers both worked, but only partially when used with a group stage. I needed to combine both to get the desired result of a single flat array of tags:
Model.aggregate()
.match({ /** some query */ })
.group({
_id: '$teamId',
tagsSet: { $push: '$tags' },
numRecords: { $sum: 1 },
})
.project({
_id: 0,
numRecords: 1,
tagsSet: {
$reduce: {
input: '$tagsSet',
initialValue: [],
in: { $setUnion: ['$$value', '$$this'] },
},
},
})
.unwind({ path: '$tagsSet' })
.group({
_id: null,
selectedTags: { $addToSet: '$tagsSet' },
numRecords: { $sum: '$numRecords' },
})

Related

mongodb group each fields along with total count

I have a collection with documents in cosmosDB .How can I group each field distinct values using mongoDB?
Here is my sample data:
{
"_id" : ObjectId("61ba65af74cf385ee93ad2c8"),
"Car_brand":"A",
"Plate_number":"5",
"Model_year":"2015",
"Company":"Tesla Motors"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2c9"),
"Car_brand":"B",
"Plate_number":"2",
"Model_year":"2021",
"Company":"Tesla Motors",
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2ca"),
"Car_brand":"B",
"Plate_number":"2",
"Model_year":"2011",
"Company":"Lamborghini",
}
expected:
{
"Car_brand":["A","B"]
"Plate_number":["5","2"]
"Model_year":["2015","2021","2011"]
"Company":["Lamborghini","Tesla Motors"]
}
Option1: Here is how to do in mongoDB , I guess it is similar in cosmosDB:
db.collection.aggregate([
{
$group: {
_id: null,
Car_brand: {
$push: "$Car_brand"
},
Plate_number: {
$push: "$Plate_number"
},
Model_year: {
$push: "$Model_year"
},
Company: {
$push: "$Company"
}
}
}
])
playground
Option2: Later I have identified you need the distinct values , here is an example:
db.collection.aggregate([
{
$group: {
_id: null,
Car_brand: {
$addToSet: "$Car_brand"
},
Plate_number: {
$addToSet: "$Plate_number"
},
Model_year: {
$addToSet: "$Model_year"
},
Company: {
$addToSet: "$Company"
}
}
}
])
playground2

How can I find subdocument using Mongoose?

I have defined a model like this.
const ShotcountSchema = new Schema({
shotCountId : {
type : ObjectId,
required : true,
ref : 'member-info'
},
userId : {
type : String,
required : true,
unique : true
},
shot : [{
shotId : {
type : String,
required : true,
unique : true
},
clubType : {
type : String,
required : true
}
createdAt : {
type : Date,
default : Date.now
}
}]
});
If you perform a query to find subdocuments based on clubType as follows, only the results of the entire document are output.
For example, if I write the following code and check the result, I get the full result.
const shotCount = await ShotcountSchema.aggregate([
{
$match : { shotCountId : user[0]._id }
},
{
$match : { 'shot.clubType' : 'driver' }
}
]);
console.log(shotCount[0]); // Full result output
I would like to filter the subdocuments via clubType or createdAt to explore. So, I want these results to be printed.
{
_id: new ObjectId("61d67f0a74ec8620f34c57ed"),
shot: [
{
shotId: 'undefinedMKSf*Tf#!qHxWpz1hPzUBTz%',
clubType: 'driver',
shotCount: 20,
_id: new ObjectId("61d67f0a74ec8620f34c57ef"),
createdAt: 2022-01-06T05:32:58.391Z
}
]
}
How should I write the code?
db.collection.aggregate([
{
"$match": {
_id: ObjectId("61d67f0a74ec8620f34c57ed"),
"shot.clubType": "driver",
shot: {
$elemMatch: {
$and: [
{
"createdAt": {
$gte: ISODate("2022-01-07T05:32:58.391Z")
}
},
{
"createdAt": {
$lte: ISODate("2022-01-09T05:32:58.391Z")
}
}
]
}
}
}
},
{
"$set": {
shot: {
"$filter": {
"input": "$shot",
"as": "s",
"cond": {
$and: [
{
"$eq": [
"$$s.clubType",
"driver"
]
},
{
"$gte": [
"$$s.createdAt",
ISODate("2022-01-07T05:32:58.391Z")
]
},
{
"$lte": [
"$$s.createdAt",
ISODate("2022-01-09T05:32:58.391Z")
]
}
]
}
}
}
}
}
])
mongoplayground

How to find and remove duplicates of paired data that in an array?

Document in my collection looks like one below. How to count the duplicates amount when two pair of strings (user and a32cc286-256b-40e5-fc5d-5ecbdc341ab1) is the same as in example? And how to remove one of these pair?
"_id" : ObjectId("5ea3138daee55c0001eac29f"),
"linkRole" : [
{
"role" : "admin",
"Organization" : "a32cc286-256b-40e5-fc5d-5ecbdc341ab1"
},
{
"role" : "superadmin",
"Organization" : "a32cc286-256b-40e5-fc5d-5ecbdc341ab1"
},
{
"role" : "user",
"Organization" : "a32cc286-256b-40e5-fc5d-5ecbdc341ab1"
},
{
"role" : "user",
"Organization" : "a32cc286-256b-40e5-fc5d-5ecbdc341ab1"
},
{
"role" : "admin",
"Organization" : "dd79f23d-2382-4eb7-a2f3-634890eba0bb"
},
{
"role" : "superadmin",
"Organization" : "dd79f23d-2382-4eb7-a2f3-634890eba0bb"
}]
Same document:
linkRole[0].role:admin
linkRoles[0].Organization:a32cc286-256b-40e5-fc5d-5ecbdc341ab1
linkRole[1].role:superadmin
linkRoles[1].Organization:a32cc286-256b-40e5-fc5d-5ecbdc341ab1
linkRole[2].role:user
linkRoles[2].Organization:a32cc286-256b-40e5-fc5d-5ecbdc341ab1
linkRole[3].role:user
linkRoles[3].Organization:a32cc286-256b-40e5-fc5d-5ecbdc341ab1
linkRole[4].role:admin
linkRoles[4].Organization:dd79f23d-2382-4eb7-a2f3-634890eba0bb
linkRole[5].role:superadmin
linkRoles[5].Organization:dd79f23d-2382-4eb7-a2f3-634890eba0bb
I use this code to get duplicates. And have documents with 2..3..10 of them. Now i need to remove all of them, but save 1 unique string.
db.users.aggregate(
{$unwind: "$linkRole"},
{$group: {"_id": {org: "$linkRole.Organization", role: "$linkRole.role"},
count: {"$sum": 1}}},
{$match: { "count": { "$gte": 2 }}})
I think this aggregation can help you
db.collection.aggregate([
{
"$unwind": "$linkRole"
},
{
"$group": {
"_id": {
"Organization": "$linkRole.Organization",
"role": "$linkRole.role"
}
}
},
{
"$project": {
"_id": 0,
"role": "$_id.role",
"Organization": "$_id.Organization"
}
}
])
Playground
pipeline = [
{$unwind: "$linkRole"},
{$group: {
_id: {
d_id: "$_id",
role: "$linkRole.role",
Organization: "$linkRole.Organization"
},
count: {$sum: 1}
}},
{$match:{count: {$gt: 1}}}
]
db.users.aggregate(pipeline).forEach(a => {
user = db.users.findOne({_id: a._id.d_id});
for (let i = 1; i < a.count; i++) {
user.linkRole.splice(
user.linkRole.findIndex(lr => lr.role === a._id.role && lr.Organization === a._id.Organization),
1
)
}
db.users.save(user)
})
Thank you, #AlexoLive from habr.

How to query on embedded documents

{
"_id" : ObjectId("5fa919a49bbe481d117506c9"),
"isDeleted" : 0,
"productId" : 31,
"references" : [
{
"_id" : ObjectId("5fa919a49bbe481d117506ca"),
"languageCode" : "en",
"languageId" : 1,
"productId" : ObjectId("5fa919a49bbe481d117506ba")
},
{
"_id" : ObjectId("5fa91cc7d7d52f1e389dee1f"),
"languageCode" : "ar",
"languageId" : 2,
"productId" : ObjectId("5fa91cc7d7d52f1e389dee1e")
}
],
"createdAt" : ISODate("2020-11-09T10:27:48.859Z"),
"updatedAt" : ISODate("2020-11-09T10:27:48.859Z"),
"__v" : 0
},
{
"_id" : ObjectId("5f9aab1d8e475489270ebe3a"),
"isDeleted" : 0,
"productId" : 21,
"references" : [
{
"_id" : ObjectId("5f9aab1d8e475489270ebe3b"),
"languageCode" : "en",
"languageId" : 1,
"productId" : ObjectId("5f9aab1c8e475489270ebe2d")
}
],
"createdAt" : ISODate("2020-10-29T11:44:29.852Z"),
"updatedAt" : ISODate("2020-10-29T11:44:29.852Z"),
"__v" : 0
}
This is my mongoDB collection in which i store the multilingual references to product collection. In productId are the references to product Collection. Now If we have ar in our request, then we will only have the productId of ar languageCode. If that languageCode does not exist then we will have en langCode productId.
For Example if the user pass ar then the query should return
"productId" : ObjectId("5fa91cc7d7d52f1e389dee1e")
"productId" : ObjectId("5f9aab1c8e475489270ebe2d")
I have tried using $or with $elemMatch but I am not able to get the desired result. Also i am thinking of using $cond. can anyone help me construct the query.
We can acheive
$facet helps to categorized the incoming documents
In the arArray, we get all documents which has"references.languageCode": "ar" (This document may or may not have en), then de-structure the references array, then selecting the "references.languageCode": "ar" only using $match. $group helps to get all productIds which belong to "references.languageCode": "ar"
In the enArray, we only get documents which have only "references.languageCode": "en". Others are same like arArray.
$concatArrays helps to concept both arArray,enArray arrays
$unwind helps to de-structure the array.
$replaceRoot helps to make the Object goes to root
Here is the mongo script.
db.collection.aggregate([
{
$facet: {
arAarray: [
{
$match: {
"references.languageCode": "ar"
}
},
{
$unwind: "$references"
},
{
$match: {
"references.languageCode": "ar"
}
},
{
$group: {
_id: "$_id",
productId: {
$addToSet: "$references.productId"
}
}
}
],
enArray: [
{
$match: {
$and: [
{
"references.languageCode": "en"
},
{
"references.languageCode": {
$ne: "ar"
}
}
]
}
},
{
$unwind: "$references"
},
{
$group: {
_id: "$_id",
productId: {
$addToSet: "$references.productId"
}
}
}
]
}
},
{
$project: {
combined: {
"$concatArrays": [
"$arAarray",
"$enArray"
]
}
}
},
{
$unwind: "$combined"
},
{
"$replaceRoot": {
"newRoot": "$combined"
}
}
])
Working Mongo playground
You can test this solution to see if it is useful for you question:
db.collection.aggregate([
{
$addFields: {
foundResults:
{
$cond: {
if: { $in: ["ar", "$references.languageCode"] }, then:
{
$filter: {
input: "$references",
as: "item",
cond: {
$and: [{ $eq: ["$$item.languageCode", 'ar'] },
]
}
}
}
, else:
{
$filter: {
input: "$references",
as: "item",
cond: {
$and: [{ $eq: ["$$item.languageCode", 'en'] },
]
}
}
}
}
}
}
},
{ $unwind: "$foundResults" },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$foundResults"] } } },
{ $project: { _id: 0, "productId": 1 } }
])

MongoDB joining across array of ids

Before the question, I'm extremely new to mongo DB and NoSQL.
I'm having two collections in my database:
users:
{
"_id" : ObjectId("5f1efeece50f2b25d4be2de2"),
"name" : {
"familyName" : "Doe",
"givenName" : "John"
},
"email" : "johndoe#example.com",
"threads" : [ObjectId("5f1f00f31abb0e3f107fbf93"), ObjectId("5f1f0725850eca800c70ef9e") ] }
}
threads:
{
"_id" : ObjectId("5f1f0725850eca800c70ef9e"),
"thread_participants" : [ ObjectId("5f1efeece50f2b25d4be2de2"), ObjectId("5f1eff1ae50f2b25d4be2de4") ],
"date_created" : ISODate("2020-07-27T16:25:19.702Z") }
}
I want to get all the threads which an user is involved in with the other user's info nested inside.
Something like:
{
"_id" : ObjectId("5f1f0725850eca800c70ef9e"),
"thread_participants" :
[
{
"name" : {
"familyName" : "Doe",
"givenName" : "John"
},
"email" : "johndoe#example.com",
},
{
"name" : {
"familyName" : "Doe",
"givenName" : "Monica"
},
"email" : "monicadoe#example.com",
}
],
"date_created" : ISODate("2020-07-27T16:25:19.702Z") }
},
...,
...,
...
How do I go about this?
You can use $lookup to "join" the data from both collections:
db.threads.aggregate([
{
$lookup: {
from: "$users",
let: { participants: "$thread_participants" },
pipeline: [
{
$match: {
$expr: {
$in: [ "$_id", "$$participants" ]
}
}
},
{
$project: {
_id: 1,
email: 1,
name: 1
}
}
],
as: "thread_participants"
}
}
])
Mongo Playground

Resources