Get all distinct keys of a nested object - database

I have following data in my collection:
[
{
_id: "2313123123",
metadata: {
path: "...",
value: "...",
name: "..."
}
},
{
_id: "2313123123",
metadata: {
path: "...",
name: "...",
origin: "...",
}
},
{
_id: "2313123123",
metadata: {
path: "...",
source: "..."
}
},
]
I want to retrieve all distinct key names of the field metadata from my documents.
I want to retrieve ["path", "value", "name", "origin", "source"].
How can I query for this? Is this possible with the distinct method or do I need to use aggregate?

You'll have to use an aggregate for this, sadly due to the nature of your needs this is going to be a very "expensive" pipeline to execute. There is no way to avoid iterating over the entire collection and adding the unique keys to the array.
We're going to use $objectToArray to turn metadata into an array, then $unwind it and finally using $group we could save all the unique values.
db.collection.aggregate([
{
$project: {
keys: {
$map: {
input: {
"$objectToArray": "$metadata"
},
in: "$$this.k"
}
}
}
},
{
$unwind: "$keys"
},
{
$group: {
_id: null,
keys: {
"$addToSet": "$keys"
}
}
}
])
Mongo Playground

db.collection.aggregate([
{
$addFields: {
metadata: {
$objectToArray: "$metadata"
}
}
},
{
$unwind: "$metadata"
},
{
$group: {
_id: "distinct",
dist: {
$addToSet: "$metadata.k"
}
}
}
])
explained:
Convert the metadata object to metadata array having the keys as values in k key.
Unwind the metadata array of k keys & v values
group with addToSet to extract only the distinct k values in the final result.
playground
helpfull javascript onliner from mongo shell option:
db.collection.find({},{metadata:1,_id:0}).forEach( function(doc) { for (key in doc.metadata) s.push(key); } );uni = Array.from(new Set(s));printjson(uni);
["path","name","origin","source","value"]

Related

Finding documents in mongodb collection by order of elements index of array field

Array field in collection:
"fruits": [ "fruits": [ "fruits": [
{"fruit1": "banana"}, {"fruit2": "apple"}, {"fruit3": "pear"},
{"fruit2": "apple"}, {"fruit4": "orange"}, {"fruit2": "apple"},
{"fruit3": "pear"}, {"fruit1": "banana"}, {"fruit4": "orange"},
{"fruit4": "orange"} {"fruit3": "pear"} {"fruit1": "banana"}
]
I need to find those documents in collections, where "banana" signed before "apple". Does mongodb allows to compare elements in array just like :
if (fruits.indexOf('banana') < fruits.indexOf('apple')) return true;
Or maybe there is any other method to get result i need?
MongoDB's array query operations do not support any positional search as you want.
You can, however, write a $where query to do what you want:
db.yourCollection.find({
$where: function() {
return (this.fruits.indexOf('banana') < this.fruits.indexOf('apple'))
}
})
Be advised though, you won't be able to use indexes here and the performance will be a problem.
Another approach you can take is to rethink the database design, if you can specify what it is you're trying to build, someone can give you specific advise.
One more approach: pre-calculate the boolean value before persisting to DB as a field and query on true / false.
Consider refactoring your schema if possible. The dynamic field names(i.e. fruit1, fruit2...) make it unnecessarily complicated to construct a query. Also, if you require frequent queries by array index, you should probably store your array entries in individual documents with some sort keys to facilitate sorting with index.
Nevertheless, it is achievable through $unwind and $group the documents again. With includeArrayIndex clause, you can get the index inside array.
db.collection.aggregate([
{
"$unwind": {
path: "$fruits",
includeArrayIndex: "idx"
}
},
{
"$addFields": {
fruits: {
"$objectToArray": "$fruits"
}
}
},
{
"$addFields": {
"bananaIdx": {
"$cond": {
"if": {
$eq: [
"banana",
{
$first: "$fruits.v"
}
]
},
"then": "$idx",
"else": "$$REMOVE"
}
},
"appleIdx": {
"$cond": {
"if": {
$eq: [
"apple",
{
$first: "$fruits.v"
}
]
},
"then": "$idx",
"else": "$$REMOVE"
}
}
}
},
{
$group: {
_id: "$_id",
fruits: {
$push: {
"$arrayToObject": "$fruits"
}
},
bananaIdx: {
$max: "$bananaIdx"
},
appleIdx: {
$max: "$appleIdx"
}
}
},
{
$match: {
$expr: {
$lt: [
"$bananaIdx",
"$appleIdx"
]
}
}
},
{
$unset: [
"bananaIdx",
"appleIdx"
]
}
])
Mongo Playground

MongoDB Aggregation: How to return only the values that don't exist in all documents

Lets say I have an array ['123', '456', '789']
I want to Aggregate and look through every document with the field books and only return the values that are NOT in any documents. For example if '123' is in a document, and '456' is, but '789' is not, it would return an array with ['789'] as it's not included in any books fields in any document.
.aggregate( [
{
$match: {
books: {
$in: ['123', '456', '789']
}
}
},
I don't want the documents returned, but just the actual values that are not in any documents.
Here's one way to scan the entire collection to look for missing book values.
db.collection.aggregate([
{ // "explode" books array to docs with individual book values
"$unwind": "$books"
},
{ // scan entire collection creating set of book values
"$group": {
"_id": null,
"allBooksSet": {
"$addToSet": "$books" // <-- generate set of book values
}
}
},
{
"$project": {
"_id": 0, // don't need this anymore
"missing": { // use $setDifference to find missing values
"$setDifference": [
[ "123", "456", "789" ], // <-- your values go here
"$allBooksSet" // <-- the entire collection's set of book values
]
}
}
}
])
Example output:
[
{
"missing": [ "789" ]
}
]
Try it on mongoplayground.net.
Based on #rickhg12hs's answer, there is another variation replacing $unwind with $reduce, which considered less costly. Two out of Three steps are the same:
db.collection.aggregate([
{
$group: {
_id: null,
allBooks: {$push: "$books"}
}
},
{
$project: {
_id: 0,
allBooksSet: {
$reduce: {
input: "$allBooks",
initialValue: [],
in: {$setUnion: ["$$value", "$$this"]}
}
}
}
},
{
$project: {
missing: {
$setDifference: [["123","456", "789"], "$allBooksSet"]
}
}
}
])
Try it on mongoplayground.net.

Looping through array to count in mongodb/mongoose

I have a user schema that contains a value called amputationInfo:
amputationInfo: [
{
type: String,
},
],
Here is an example of what that might look like in the database:
amputationInfo: [
"Double Symes/Boyd",
"Single Above-Elbow"
]
I have a review Schema that allows a user to leave a review, it contains a reference to the user who left it:
user: {
type: mongoose.Schema.ObjectId,
ref: 'User',
require: [true, 'Each review must have an associated user!'],
},
When a user leaves a review, I want to create an aggregate function that looks up the user on the review, finds their amputationInfo, loops through the array and adds up the total amount of users that contain "Double Symes/Boyd", "Single Above-Elbow"
So if we have 3 users and their amputationInfo is as follows:
amputationInfo: [
"Double Symes/Boyd",
"Single Above-Elbow"
]
amputationInfo: [
"Single Above-Elbow"
]
amputationInfo: []
The return from the aggregate function will count each term and add one to the corresponding value and look something like this:
[
{
doubleSymesBoyd: 1,
singleAboveElbow: 2
}
]
Here is what I have tried, but I just don't know enough about mongoDB to solve the issue:
[
{
'$match': {
'prosthetistID': new ObjectId('6126ca6148f34c00189f86f5')
}
}, {
'$lookup': {
'from': 'users',
'localField': 'user',
'foreignField': '_id',
'as': 'userInfo'
}
}, {
'$unwind': {
'path': '$userInfo'
}
}
]
After the $unwind, the resulting object has a userInfo key, that contains an amputationInfo array nested:
You can have following stages
$unwind to deconstruct the array
first $group to get the sum of each category
second $group to push into one document and make it as key value pair
$arrayToObject to get the desired output
$replaceRoot to make the data output into root
Here is the code
db.collection.aggregate([
{ "$unwind": "$userInfo.amputationInfo" },
{
"$group": {
"_id": "$userInfo.amputationInfo",
"count": { "$sum": 1 }
}
},
{
$group: {
_id: null,
data: { $push: {
k: "$_id",
v: "$count"
}
}
}
},
{ $project: { data: { "$arrayToObject": "$data" } } },
{ "$replaceRoot": { "newRoot": "$data" } }
])
Working Mongo playground

How to fix MongoDB array concatination error?

I have a collection in mongodb with a few million documents. there is an attribute(categories) that is an array that contains all the categories that a document belongs to. I am using following query to convert the array into a comma separated string to add it to SQL server through a spoon transformation.
for example
the document has ["a","b","c",...] and i need a,b,c,.... so i can pit it in a column
categories: {
$cond: [
{ $eq: [{ $type: "$categories" }, "array"] },
{
$trim: {
input: {
$reduce: {
input: "$categories",
initialValue: "",
in: { $concat: ["$$value", ",", "$$this"] }
}
}
}
},
"$categories"
]
}
when i run the query i get the following error and i cannot figure out what the problem is.
com.mongodb.MongoQueryException: Query failed with error code 16702 and error message '$concat only supports strings, not array' on server
a few documents had this attribute as string and not array so i added a type check. but still the issue is there. any help on how to narrow down the issue will be very appreciated.
A few other attributes were the same in the same collection and this query is working fine for the rest of them.
I don't see any problem in your aggregation. It shouldn't give this error. Can you try to update your mongodb version?
However, your aggregation is not working properly reduce wasn't working . I converted it to this:
db.collection.aggregate([
{
"$project": {
categories: {
$cond: [
{
$eq: [{ $type: "$categories" }, "array"]
},
{
'$reduce': {
'input': '$categories',
'initialValue': '',
'in': {
'$concat': [
'$$value',
{ '$cond': [{ '$eq': ['$$value', ''] }, '', ', '] },
'$$this'
]
}
}
},
"$categories"
]
}
}
}
])
Edit:
So, if you have nested arrays in the categories field. We can flat our arrays with unwind stage. So if you can add these 3 stages above the $project stage. Our aggregation will work.
{
"$unwind": "$categories"
},
{
"$unwind": "$categories"
},
{
"$group": {
_id: null,
categories: {
$push: "$categories"
}
}
},
Playground

mongo aggregation $lookup with arrays

I have a structure like this
unions { // collection
members { // array
instanceId // some id
...
}
...
}
In documents, I have ids prop (array)
I need to lookup all unions that have at least one id from ids (basically $in)
The problem is that it doesn't work
First I wanted to try this variant
{
from: 'unions',
let: { instanceIds: '$ids' },
as: 'unions',
pipeline: [
{
$match: { 'members.instanceId': { $in: '$$instanceIds' } },
},
],
}
But we can't use aggregation variables here. For that, we need to use $expr
{
from: 'unions',
let: { instanceIds: '$ids' },
as: 'unions',
pipeline: [
{
$match: {
$expr: {
$in: ['$members.instanceId', '$$instanceIds']
}
},
},
],
}
But then it returns 0 documents. The instanceIds array is not empty, I've checked it.
Also, if I paste an array with values in the example without $expr then it returns the right values. So most likely the problem is how I build this $lookup.
use { $ne: [{ $setIntersection: ['$members.instanceId', '$$instanceIds'] }, []] }
{
from: 'unions',
let: { instanceIds: '$ids' },
as: 'unions',
pipeline: [
{
$match: {
$expr: {
cond: { $ne: [{ $setIntersection: ['$members.instanceId', '$$instanceIds'] }, []] },
},
},
},
],
}

Resources