Calculate weight and skip a specific element Mongodb - Aggregation Framework - arrays

I have a collection with array elements containing A,B or C values. I have to calculate a weight of each element value.
The logic of this weight is sample :
We give 1.0 as weight of the last element (dfferente to C), and others 0
If the last element of the array is C, we give it to the previous last element (different to C).
if there is 1 element in the array (A,B or C) we give it 1 as a weight.
This is how my collection looking like :
{
"_id" : ObjectId("5a535c48a4d86ed94a7e8618"),
"myArray" : [
{
"value" : "C"
},
{
"value" : "A
},
{
"value" : "C"
},
{
"value" : "B"
},
{
"value" : "A"
},
{
"value" : "A"
}
]
}
{
"_id" : ObjectId("5a535c48a4d86ed94a7e8619"),
"myArray" : [
{
"value" : "A"
},
{
"value" : "C"
},
{
"value" : "B"
},
{
"value" : "C"
},
{
"value" : "C"
}
]
}
I did some aggregation to make it happen but I can here just to affect 1 to the last (different to C)
and (if the last is C I give 1 to the last one -1) so I have to fixe the case of having C in the last and last-1 and last -2 .. last-n
so I have to fix it to affect 1 to the last one different to C
db.col.aggregate([{'$addFields':{
'myArray_temp':{
'$switch':{
'branches':[{
'case':{'$and':[{'$gt':[{'$size':'$myArray'},1]},{'$eq':[{'$arrayElemAt':['$myArray.value',-1]},'C']}]},
'then':{'$concatArrays':[
{'$map':{
'input':{'$slice':['$myArray',{'$subtract':[{'$size':'$myArray'},2]}]},
'as':'val',
'in':{'value':'$$val.value','weight':0 }
}},
[{'value':{'$arrayElemAt':['$myArray.value',-2]},'weight':1}],
[{'value':{'$arrayElemAt':['$myArray.value',-1]},'weight':0}]
]}
},
{
'case':{'$eq':[{'$size':'$myArray'},1]},
'then':{'$concatArrays':[
[{'value':{'$arrayElemAt':['$myArray.value',0]},'weight':1}]
]}
},
{
'case':{'$and':[{'$gt':[{'$size':'$myArray'},1]},{'$ne':[{'$arrayElemAt':['$myArray.value',-1]},'C']}]},
'then':{'$concatArrays':[
{'$map':{
'input':{'$slice':['$myArray',{'$subtract':[{'$size':'$myArray'},1]}]},
'as':'val',
'in':{'value':'$$val.value','weight':0 }
}},
[{'value':{'$arrayElemAt':['$myArray.value',-1]},'weight':1}]
]}
}
],
'default':{'$concatArrays':[
[{'value':{'$arrayElemAt':['$myArray.value',0]},'weight':1}]
]}
}
}
}}])
The results should be :
{
"_id" : ObjectId("5a535c48a4d86ed94a7e8618"),
"myArray" : [
{
"value" : "C",
"weight": 0
},
{
"value" : "A" ,
"weight": 0
},
{
"value" : "C",
"weight": 0
},
{
"value" : "B" ,
"weight": 0
},
{
"value" : "A",
"weight": 0
},
{
"value" : "A",
"weight": 1
}
]
}
{
"_id" : ObjectId("5a535c48a4d86ed94a7e8619"),
"total" : 4.5,
"myArray" : [
{
"value" : "A",
"weight": 0
},
{
"value" : "C",
"weight": 0
},
{
"value" : "B" ,
"weight": 1 // here we give 1 to the last differente to C
},
{
"value" : "C" ,
"weight": 0 // my code affect 1 here cause it find C in the last and affect to the last-1 element.
},
{
"value" : "C" ,
"weight": 0
}
]
}
We have to skip all the last (C) elements and give the 1 weight to the last not-C element.
Thank you in advance!

Longest aggregation in my career but seems to be working:
db.collection.aggregate([
{
$addFields : {
size: { $size: "$myArray" },
reversed: {
$reverseArray: "$myArray"
}
}
},
{
$addFields: {
otherThanC: {
$filter: {
input: "$reversed",
as: "item",
cond: { $ne: [ "$$item.value", "C" ] }
}
}
}
},
{
$addFields: {
firstOtherThanCIndex : {
$indexOfArray: [ "$reversed", { $arrayElemAt: [ "$otherThanC", 0 ] } ]
}
}
},
{
$unwind: {
path: "$reversed",
includeArrayIndex: "arrayIndex"
}
},
{
$addFields: {
weight: {
$switch: {
branches: [
{ case: { $eq: [ "$size", 1 ] }, then: 1 },
{ case: { $and: [ { $eq: [ "$arrayIndex", 0 ] }, { $eq: [ { $size: "$otherThanC" }, 0 ] } ] } , then: 1},
{ case: { $eq: [ "$arrayIndex", "$firstOtherThanCIndex" ] }, then: 1 }
],
default: 0
}
}
}
},
{
$group: {
_id: "$_id",
myArrayReversed: {
$push: {
value: "$reversed.value",
weight: "$weight"
}
}
}
},
{
$project: {
_id: 1,
myArray: { $reverseArray: "$myArrayReversed" }
}
}
])
Brief description of each pipeline stage:
We need to add two extra fields: $size of and array and second array with reversed items
Second and third steps are to find first (last) item not equal to C using $filter and $arrayElemAt which returns first matching index
Then we can $unwind our reversed array using special syntax which adds index of array when unwinding
That is the moment when we can calculate weight using $switch - simply setting 1 if array has one element or indexes are matching and zero otherwise
Then we just need to reshape the data: grouping back by _id and reversing the array

Related

How to projection element in multi array field of MongoDb collection?

Is there a way to get only specific field values ​​from a multi array?
db.test.insert({'id':'stack', 'nums':[{'name':'Dave', 'num': 1, 'phones' :[{'id':1, 'number':'000-000'}, {'id':2, 'number':'000-001'}]}]})
{
"_id" : "63b3896a5b5d9de96b6277fa",
"id" : "stack",
"nums" : [
{
"name" : "Dave",
"num" : 1,
"phones" : [
{
"id" : 1,
"number" : 0
},
{
"id" : 2,
"number" : -1
}
]
}
]
}
the answer i want
{'id':1, 'number':'000-000'}
I want to print only the value with 'id' 1 in 'phones'.
db.collection.aggregate([
{
"$unwind": "$nums"
},
{
$project: {
"phones": {
$filter: {
input: "$nums.phones",
cond: {$eq: ["$$this.id",1]
}
}
}
}
}
])
Playground

MongoDB: extra object created when updating with $addToSet

I'm encountering a strange behavior in MongoDB.
This is the schema of the documents in my collection:
{
"_id" : ObjectId("62bf10951fecaed4dba275b1"),
"name" : "Rack 1",
"type" : "rack",
"positions" : [
{
"number" : 1
},
{
"number" : 2
},
{
"number" : 3,
"nodes" : [
{
"number" : 1
}
]
},
{
"number" : 4,
"nodes" : [
{
"number" : 1
},
{
"number" : 2
},
{
"number" : 3
}
]
},
]
}
I would like to run two queries:
create a new position, if it doesn't exist yet
create a new object in the nodes array
I have created two queries so far:
db.getCollection('locations').updateOne(
{ _id: ObjectId("62bf10951fecaed4dba275b1") },
{ $addToSet: { 'positions': { number: 5 } } }
)
db.getCollection('locations').updateOne(
{ _id: ObjectId("62bf10951fecaed4dba275b1"), 'positions.number': 5 },
{ $addToSet: { 'positions.$.nodes': { number: 1 } } }
)
The second query creates a new element in nodes. BUT it also creates a new element in the positions array.
This is the erroneous object:
{
"_id" : ObjectId("62bf10951fecaed4dba275b1"),
"name" : "Rack 1",
"type" : "rack",
"positions" : [
{
"number" : 1
},
{
"number" : 2
},
{
"number" : 3,
"nodes" : [
{
"number" : 1
}
]
},
{
"number" : 4,
"nodes" : [
{
"number" : 1
},
{
"number" : 2
},
{
"number" : 3
}
]
},
{
"number" : 5,
"nodes" : [
{
"number" : 1
}
]
},
{
"number" : 5 <<<<<< THIS IS WRONG
},
]
}
Is there something I need to know or is simply the wrong query?
Many thanks
The two queries may run at different lengths of time and should not be executed synchronously.
Please try using some async/await style ordering to ensure that the second query does indeed execute when the first query is finished.
e.g. in javascript:
const myFunction = async () => {
await db.getCollection('locations').updateOne(
{ _id: ObjectId("62bf10951fecaed4dba275b1") },
{ $addToSet: { 'positions': { number: 5 } } }
)
await db.getCollection('locations').updateOne(
{ _id: ObjectId("62bf10951fecaed4dba275b1"), 'positions.number': 5 },
{ $addToSet: { 'positions.$.nodes': { number: 1 } } }
)
}
myFunction()

In mongo DB, how do I grab multiple counts in a single query?

I'm currently trying to massage out counts from the mLab API for reasons I don't have control over. So I want to grab the data I need from there in one query so I can limit the amount of API calls.
Assuming that my data looks like this:
{
"_id": {
"$oid": "12345"
},
"dancer": "Beginner",
"pirate": "Advanced",
"chef": "Mid",
"beartamer": "Mid",
"swordsman": "Mid",
"total": "Mid"
}
I know I can do 6 queries with something similar to:
db.score.aggregate({"$group": { _id: {"total":"$total"}, count: {$sum:1} }} )
but how do I query to get the count for each key? I'd like to see something akin to:
{ "_id" : { "total" : "Advanced" }, "count" : 1 }
{ "_id" : { "total" : "Mid" }, "count" : 1 }
{ "_id" : { "total" : "Beginner" }, "count" : 4 }
{ "_id" : { "pirate" : "Advanced" }, "count" : 1 }
//...etc
The following should give you precisely what you want:
db.scores.aggregate({
$project: {
"_id": 0 // get rid of the "_id" field since we do not want to count it
}
}, {
$project: {
"doc": {
$objectToArray: "$$ROOT" // transform all documents into key-value pairs
}
}
}, {
$unwind: "$doc" // flatten the resulting array into separate documents
}, {
$group: {
"_id": "$doc", // group by distinct key-value combination
"count": { $sum: 1 } // count documents per bucket
}
}, {
$project: {
"_id": { // some more transformation magic to recreate the desired output structure
$mergeObjects: [
{ $arrayToObject: [ [ "$_id" ] ] },
{ "count": "$count" }
]
},
}
}, {
$replaceRoot: {
"newRoot": "$_id" // this moves the contents of the "_id" field to the root of the documents
}
})

Upserting a value at an array position using $min

I would like to either insert a new document with a default value as part of an array, or update that part of the array if the document already exists.
What I thought of was:
db.test.update(
{ "a": 5 },
{ $setOnInsert: { "b": [0, 0] }, $min: { "b.0": 5 } },
{ upsert: true }
);
If I do that, then I get:
Cannot update 'b' and 'b.0' at the same time
Another idea was to remove $setOnInsert and just keep $min, since the minimum between nothing and 5 should be 5.
db.test.update(
{ "a": 5 },
{ $min: { "b.0": 5 } },
{ upsert: true }
);
This doesn't raise an error, but now the document I get is:
{ "a" : 5, "b" : { "0" : 5 } }
I need an array with 5 at position 0 however, not an object with a 0 property.
How can I achieve this?
You can use .bulkWrite() for this, and it's actually a prime use case of why this exists. It only sends "one" actual request to the server and has only one response. It's still two operations, but they are more or less tied together and generally atomic anyway:
db.junk.bulkWrite([
{ "updateOne": {
"filter": { "a": 1 },
"update": { "$setOnInsert": { "b": [ 5,0 ] } },
"upsert": true
}},
{ "updateOne": {
"filter": { "a": 1 },
"update": { "$min": { "b.0": 5 } }
}}
])
Run for the first time will give you an "upsert", note that it's "inserted" and not "modified" in the response:
{
"acknowledged" : true,
"deletedCount" : 0,
"insertedCount" : 0,
"matchedCount" : 1,
"upsertedCount" : 1,
"insertedIds" : {
},
"upsertedIds" : {
"0" : ObjectId("5947c412d6eb0b7d6ac37f09")
}
}
And the document of course looks like:
{
"_id" : ObjectId("5947c412d6eb0b7d6ac37f09"),
"a" : 1,
"b" : [
5,
0
]
}
Then run with a different value to $min as you likely would in real cases:
db.junk.bulkWrite([
{ "updateOne": {
"filter": { "a": 1 },
"update": { "$setOnInsert": { "b": [ 5,0 ] } },
"upsert": true
}},
{ "updateOne": {
"filter": { "a": 1 },
"update": { "$min": { "b.0": 3 } }
}}
])
And the response:
{
"acknowledged" : true,
"deletedCount" : 0,
"insertedCount" : 0,
"matchedCount" : 2,
"upsertedCount" : 0,
"insertedIds" : {
},
"upsertedIds" : {
}
}
Which "matched" 2 but of course $setOnInsert does not apply, so the result is:
{
"_id" : ObjectId("5947c412d6eb0b7d6ac37f09"),
"a" : 1,
"b" : [
3,
0
]
}
Just like it should be

Mongo push objects

I want to push an object to specify name of fields rather than array. I tried $push but I lose informations about field's name inserted in the array.
My collection is :
/* 1 */
{
"_id" : ObjectId("57614a7bd75df17df3013903"),
"O":"aa",
"D":"bb",
"month":1,
"year":2015,
"freq":5
}
/* 2 */
{
"_id" : ObjectId("57614a7bd75df17df3013904"),
"O":"aa",
"D":"bb",
"month":2,
"year":2015,
"freq":5
}
/* 3 */
{
"_id" : ObjectId("57614a7bd75df17df3013905"),
"O":"aa",
"D":"bb",
"month":1,
"year":2016,
"freq":5
}
I want to store all freq corresponding to fields : O and D.
Here is my expected output :
"_id" : ...,
"O" : "aa",
"D" : "bb",
"freq" : {
"2015" : {
"1" : 5,
"2":5
},
"2016" : {
"1" : 5
}
}
}
I tried this :
db.collection.aggregate([
{
'$group':
{
_id:{"O":"$O","D":"$D","Y":"$year"},
"freq" :{$push: "$freq"}
}
},
{
'$group':
{
_id:{"O":"$O","D":"$D"},
"freq" :{$push: "$freq"}
}
})]
but I got an array without informations of year or month.
Thank you
You have used two $group in your query
Your First group query is enough to build the data which you are expecting.
If we are executing the first query
db.stackoverflow.aggregate([
{
'$group':
{
_id:{"O":"$O","D":"$D","Y":"$year"},
"freq" :{$push: "$freq"}
}
}]);
then the result is
{ "_id" : { "O" : "aa", "D" : "bb", "Y" : 2016 }, "freq" : [ 5 ] }
{ "_id" : { "O" : "aa", "D" : "bb", "Y" : 2015 }, "freq" : [ 5, 5 ] }
Now if you execute your second $group query
db.stackoverflow.aggregate([
{
'$group':
{
_id:{"O":"$O","D":"$D"},
"freq" :{$push: "$freq"}
}
}])
then the result is
{ "_id" : { "O" : "aa", "D" : "bb" }, "freq" : [ 5, 5, 5 ] }
Reason:
The values fetched in the first $group query is not passed to the second $group query.
Solution:
Use $project available in the aggregation pipeline which passes along the documents with only the specified fields to the next stage in the aggregation pipeline. The specified fields can be existing fields from the input documents or newly computed fields.
https://docs.mongodb.com/manual/reference/operator/aggregation/project/
Here is the query to get your expected result
db.collection.aggregate([
{
'$group': {
_id: {
"o": "$o",
"d": "$d",
"year": "$year"
},
myArr: {
$push: {
year: "$year",
month: "$month",
freq: "$freq"
}
}
}
},
{
'$group': {
_id: {
"o": "$o",
"d": "$d"
},
myArr1: {
$push: {
year: "$year",
freq: "$myArr"
}
}
}
},
],
{
allowDiskUse: true
})

Resources