MongoDb count percent of document with a certain field present - database

I have some MongoDb document's(representing orders) and their schema looks roughly like that:
{
id: ObjectID
exchange_order_products: Array
}
The exchange_order_products array is empty if the customer didn't exchange any items he ordered, or if they did, the array will contain an Object for each item exchanged.
I want to get the percent of orders in which the customer didn't exchange anything, e.g. exchange_order_products array is empty.
So basically the formula is the following: (Number Of Orders With At Least One Exchange * 100) / Number of Orders With No Exchanges
I know that I can count the number of orders where the exchange_order_products array is empty like that:
[{$match: {
exchange_order_products: {$exists: true, $size: 0}
}}, {$count: 'count'}]
But how do I simultaneously get the number of all the documents in my collection?

You can use $group and $sum along with $cond to count empty and non-empty ones separately. Then you need $multiply and $divide to calculate the percentage:
db.collection.aggregate([
{
$group: {
_id: null,
empty: { $sum: { $cond: [ { $eq: [ { $size: "$exchange_order_products" }, 0 ] }, 1, 0 ] } },
nonEmpty: { $sum: { $cond: [ { $eq: [ { $size: "$exchange_order_products" }, 0 ] }, 0, 1 ] } },
}
},
{
$project: {
percent: {
$multiply: [
100, { $divide: [ "$nonEmpty", "$empty" ] }
]
}
}
}
])
Mongo Playground

Related

MongoDB: how to shuffle array and this new order be permanantly saved?

So suppose I have a document like:
{
_id: 1,
items: ["aaa", "bbb", "ccc", "ddd", "eee"...]
}
I would like to shuffle the items list once, with this order saved in the table - i.e. I don't want to call random or something for every query, since there are about 200,000 items in this array (not huge, but still, calling $rand every time I want to retrieve an item would be inefficient)
So I'm really looking for some kind of manual script that I can run once - it would then update this document, so it became something like:
{
_id: 1,
items: ["ddd", "bbb", "aaa", "eee", "ccc"...]
}
If anyone knows if this is possible, I'd appreciate it. Thanks
Otherwise, I'd probably fetch the data, shuffle it using another language, then save it back into Mongo
I'm not sure this is the better way to do this
https://mongoplayground.net/p/4AH8buOXudQ
db.collection.aggregate([
{
$unwind: {
path: "$items"
}
},
{
$sample: {
size: 100 //to shuffle values upto particular index
}
},
{
$group: {
_id: "$_id",
item: {
$push: "$items"
}
}
}
]);
If you're Mongo version 5.2+ I would do this using an aggregation pipeline update with the new $sortArray operator and $rand.
Essentially we add a random value for each item, sort the array and then transform it back, You can run this update on demand whenever you want to reshuffle the array.
db.collection.updateMany(
{},
[
{
$addFields: {
items: {
$map: {
input: {
$sortArray: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
sortBy: {
"sortVal": 1
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground
If you're on a lesser version, you can generate some kind of pseudo random sort using $reduce ( you can actually do a bubble sort as well but that n^2 performance on such a large array is not recommend ), here is an example of how to generate some sort of randomness:
The approach is to iterate over the items array with the $reduce operator, if the random generated value is less than 0.3 then we push the item to be in the start of the array, if that value is less than 0.6 we append it to the end of the new array and if that value is between 0.6 and 1 and push it in the middle of the array.
Obviously you can choose whatever random logic you want and add more switch cases, as mentioned even an actual sort is possible but at the cost of performance.
db.collection.update({},
[
{
$addFields: {
items: {
$map: {
input: {
$reduce: {
input: {
$map: {
input: "$items",
in: {
value: "$$this",
sortVal: {
$rand: {}
}
}
}
},
initialValue: [],
in: {
$switch: {
branches: [
{
case: {
$lt: [
"$$this.sortVal",
0.333
]
},
then: {
$concatArrays: [
"$$value",
[
"$$this"
]
]
},
},
{
case: {
$lt: [
"$$this.sortVal",
0.6666
]
},
then: {
$concatArrays: [
[
"$$this"
],
"$$value",
]
}
}
],
default: {
$concatArrays: [
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
}
]
},
[
"$$this"
],
{
$slice: [
"$$value",
{
$round: {
$divide: [
{
$size: "$$value"
},
2
]
}
},
{
$add: [
{
$size: "$$value"
},
1
]
}
]
}
]
}
}
}
}
},
in: "$$this.value"
}
}
}
}
])
Mongo Playground

Access same field of every object in array of objects in mongodb aggregation

I have the following documents in collection of mongodb:
banks:[{name:"ABC", amt:0},{name:"PQR", amt:-1},{name"XYZ", amt:3400}]
banks:[{name:"ABC", amt:-2},{name:"PQR", amt:2344},{name"XYZ", amt:7600}]
Like this say I have 10 documents and each document contains one banks array. Each banks array has 30 objects in it as shown above.
I am trying to write aggregation query in mongodb to get the count of objects that have "amt" less than equal to zero and greater than zero but so far unable to get it. Please help. Thanks in advance!
The output for above sample documents should be
{"greaterThanZero": 1, "lessThanEqualToZero": 2 }
{"greaterThanZero": 2, "lessThanEqualToZero": 1 }
First you have to separate yours documents with $unwind
Then with a $project and a $cond you tell for each document if it's greaterThanZero or lessThanEqualToZero
Finally you sum up greaterThanZero and lessThanEqualToZero with a $group
You can test it here : Mongo Playground
[
{
"$unwind": "$banks"
},
{
"$project": {
"greaterThanZero": {
"$cond": [
{
"$gt": [
"$banks.amt",
0
]
},
1,
0
]
},
"lessThanEqualToZero": {
"$cond": [
{
"$lte": [
"$banks.amt",
0
]
},
1,
0
]
}
}
},
{
"$group": {
"_id": "$_id",
"greaterThanZero": {
"$sum": "$greaterThanZero"
},
"lessThanEqualToZero": {
"$sum": "$lessThanEqualToZero"
}
}
}
]
You can do it with $reduce,
it checks condition using $cond if match then add one to value,
db.collection.aggregate([
{
$project: {
lessThanEqualToZero: {
$reduce: {
input: "$banks",
initialValue: 0,
in: {
$cond: [
{ $lte: ["$$this.amt", 0] },
{ $add: ["$$value", 1] },
"$$value"
]
}
}
},
greaterThanZero: {
$reduce: {
input: "$banks",
initialValue: 0,
in: {
$cond: [
{ $gt: ["$$this.amt", 0] },
{ $add: ["$$value", 1] },
"$$value"
]
}
}
}
}
}
])
Playground

How to $match an item from the list

I am trying to display the items, that have specific name brand.
This is how that field looks like:
"brand" : [
[
"Samsung",
"Iphone",
"Huawei"
]
]
Tried that query, but I get 0 results:
db.collection.aggregate([{$match: { brand: "Samsung" }}])
Any ideas, what's wrong?
Your data model is an array of arrays so you have to deal with two dimensions. You can use $map along with $in first and then use $anyElementTrue to see if there's any sub-array matching your condition:
db.collection.aggregate([
{
$match: {
$expr: {
$anyElementTrue: {
$map: {
input: "$brand",
in: { $in: [ "Samsung", "$$this" ] }
}
}
}
}
},
{
$project: {
_id: 1,
color: 1
}
}
]);
Mongo Playground
EDIT: use $project to display only certain fields

Find an average day count in the array in mongo

Suppose we have an array in the aggregation pipeline:
{
dates: [
"2019-01-29",
"2019-01-29",
"2019-01-29",
"2019-01-29",
"2019-02-06",
"2019-02-06",
"2019-02-06",
"2019-02-08",
"2019-06-04",
"2019-06-25",
"2019-07-26",
"2019-08-15",
"2019-08-15",
]
}
How to find an average count of the days in such an array?
The next stage of the pipeline is supposed to look like this:
dates : {
"2019-01-29": 4,
"2019-02-06": 3,
"2019-02-08": 1,
"2019-06-04": 1,
"2019-06-25": 1,
"2019-07-26": 1,
"2019-08-15": 2
}
But the final result is supposed to look like this:
avg_day_count: 1.85714285714
I.e. the average count of the days.
The sum of all days divided by the count of unique days.
You can achieve this without any $group logic with a single $project;
db.collection.aggregate([
{
"$project": {
"result": {
$divide: [
{ $size: "$dates" },
{ $size: { $setUnion: [ "$dates" ] } }
]
}
}
}
])
will give out;
[
{
"_id": ...,
"result": 1.8571428571428572
}
]
check the code interactively on MongoPlayground
You need to run $group twice using $avg in the second one:
db.collection.aggregate([
{
$uwnind: "$dates"
},
{
$group: {
_id: "$dates",
count: { $sum: 1 }
}
},
{
$group: {
_id: null,
avg_day_count: { $avg: "$count" }
}
}
])
Mongo Playground

MongoDB remove duplicate subdocuments inside array based on a specific field

My documents have the following structure:
{
_id: ObjectId("59303aa1bad1081d4b98d636"),
clear_number: "83490",
items: [
{
name: "83490_1",
file_id: "e7209bbb",
hash: "2f568bb196f74263c64b7cf273f8ceaa",
},
{
name: "83490_2",
file_id: "9a56a935",
hash: "9c6230f7bf19d3f3186c6c3231ac2055",
},
{
name: "83490_2",
file_id: "ce5f6773",
hash: "9c6230f7bf19d3f3186c6c3231ac2055",
}
],
group_id: null
}
How to remove one of two subdocuments with the same items hash?
The following should do the trick if I understand you question correctly:
collection.aggregate({
$unwind: "$items" // flatten the items array
}, {
$group: {
"_id": { "_id": "$_id", "clear_number": "$clear_number", "group_id": "$group_id", "hash": "$items.hash" }, // per each document group by hash value
"items": { $first: "$items" } // keep only the first of all matching ones per group
}
}, {
$group: {
"_id": { "_id": "$_id._id", "clear_number": "$_id.clear_number", "group_id": "$_id.group_id" }, // now let's group everything again without the hashes
"items": { $push: "$items" } // push all single items into the "items" array
}
}, {
$project: { // this is just to restore the original document layout
"_id": "$_id._id",
"clear_number": "$_id.clear_number",
"group_id": "$_id.group_id",
"items": "$items"
}
})
In response to your comment I would suggest the following query to get the list of all document ids that contain duplicate hashes:
collection.aggregate({
$addFields: {
"hashes": {
$setUnion: [
[ { $size: "$items.hash" } ], // total number of hashes
[ { $size: { $setUnion: "$items.hash" } } ] // number of distinct hashes
]
}
}
}, {
$match:
{
"hashes.1": { $exists: true } // find all documents with a different value for distinct vs total number of hashes
}
}, {
$project: { _id: 1 } // only return _id field
})
There might be different approaches but this one seems pretty straight forward:
Basically, in the $addFields part, for each document, we first create an array consisting of two numbers:
the total number of hashes
the number of distinct hashes
Then we drive this array of two numbers through a $setUnion. After this step there can
either be two different numbers left in the array in which case the hash field does contain duplicates
or there is only one element left, in which case the number of distinct hashes equals the total number of hashes (so there are no duplicates).
We can check if there are two items in the array by testing if the element at position 1 (arrays are zero-based!) exists. That's what the $match stage does.
And the final $project stage is just to limit the output to the _id field only.

Resources