I'm currently using aggregation to display registration teams and nets for a stats page. I can do the count on each level, but the calculations for nets is inaccurate. My aggregation is as follows:
module.exports.registrationStats = function(req, res) {
Registration.aggregate([
{
"$group": {
"_id": {
"day": "$day",
"group": "$group",
"division": "$division",
"level": "$level"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group",
"division": "$_id.division"
},
"count": { "$sum": "$count" },
"levels": {
"$push": {
"level": "$_id.level",
"teams": "$count",
"nets" : {$ceil : { $divide: [ "$count" , 5 ] } }
}
}
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group"
},
"count": { "$sum": "$count" },
"divisions": {
"$push": {
"division": "$_id.division",
"count": "$count",
"levels": "$levels"
}
}
}
}
]).exec(function(err, regStats){
if(err) {
console.log("Error grouping registrations");
res.status(500).json(err);
} else {
console.log("Found and grouped " + regStats.length + " regStats");
res.json(regStats);
}
});
};
This gives me the following as an output:
[
{
"_id": {
"day": "Saturday",
"group": "nonpro"
},
"count": 144,
"divisions": [
{
"division": "Men's",
"count": 69,
"levels": [
{
"level": "BB",
"teams": 30,
"nets": 6
},
{
"level": "A",
"teams": 8,
"nets": 2
},
{
"level": "B",
"teams": 19,
"nets": 4
},
{
"level": "AA",
"teams": 12,
"nets": 3
}
]
},
{
"division": "Women's",
"count": 75,
"levels": [
{
"level": "AA",
"teams": 9,
"nets": 2
},
{
"level": "BB",
"teams": 16,
"nets": 4
},
{
"level": "B",
"teams": 18,
"nets": 4
},
{
"level": "A",
"teams": 32,
"nets": 7
}
]
}
]
}
]
The problem is that I cannot just run a ceil filter on the Math.ceil(divisions.count/5) to get a value for divisions.nets or Math.ceil(_id.count/5) to get _id.nets because they are wrong in some cases.
I need to be able to total divisions.levels.nets and push that into divisions.nets and add divisions.nets and put that value into _id.nets so the calculations work properly.
Any ideas on how to do this?
Something like this. Add $project stage to calculate the nets for level followed by $sum & $push nets for rest of $groups
Registration.aggregate([
{
"$group": {
"_id": {
"day": "$day",
"group": "$group",
"division": "$division",
"level": "$level"
},
"count": { "$sum": 1 }
}
},
{ $project: { count:1, nets: { $ceil : { $divide: [ "$count" , 5 ] } } } },
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group",
"division": "$_id.division"
},
"count": { "$sum": "$count" },
"nets": { "$sum": "$nets" },
"levels": {
"$push": {
"level": "$_id.level",
"teams": "$count",
"nets" : "$nets"
}
}
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group"
},
"count": { "$sum": "$count" },
"nets": { "$sum": "$nets" },
"divisions": {
"$push": {
"division": "$_id.division",
"count": "$count",
"nets:": "$nets",
"levels": "$levels"
}
}
}
}
])
Related
How to calculate the sum of confident_score for every individual vendor?
Data stored in the DB:
[
{
"_id": "61cab38891152daf9387c0c7",
"name": "dummy",
"company_email": "abc#mailinator.com",
"brief_msg": "Cillum sed est prae",
"similar_case_ids": [],
"answer_id": [
"61cab38891152daf9387c0c9"
],
"pros_cons": [
{
"vendor_name": "xyzlab",
"score": [
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor F",
"confident_score": 80,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf0f"
},
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor FFF",
"confident_score": 40,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf10"
}
]
},
{
"vendor_name": "abclab",
"score": [
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor B",
"confident_score": 50,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf16"
},
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor BB",
"confident_score": 60,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf17"
}
]
}
]
the query for getting the matching id and grouping objects according to the vendor_name:
aggregate([
{
$match: { _id: id }
},
{
$unwind: {
path: '$pros_cons'
}
},
{
$group: {
_id: '$pros_cons'
}
},
])
};
After query I'm getting this:
[
{
"_id": {
"vendor_name": "abclab",
"score": [
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor B",
"confident_score": 50,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf16"
},
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor BB",
"confident_score": 60,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf17"
}
],
}
},
{
"_id": {
"vendor_name": "xyzlab",
"score": [
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor F",
"confident_score": 80,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf0f"
},
{
"question_id": "61c5b47198b2c5bbf9f6471c",
"title": "Vendor FFF",
"confident_score": 40,
"text": "text1",
"_id": "61cac505caeeeb3cec78bf10"
}
],
}
}
]
Need to calculate sum for (vendor_name:abclab)TOTAL=110 and for (vendor_name:xyzlab)TOTAL=120 INDIVIDUALLY
required output:
[
{
"vendor_name": "abclab",
"totalScore": 110,
"count" : 2
},
{
"vendor_name": "xyzlab",
"totalScore": 120,
"count" : 2
}
]
$match - Filter documents by id.
$unwind - Deconstruct pros_cons array to multiple documents.
$project - Decorate output documents. With $reduce, to create totalScore field by summing confident_score from each element in pros_cons.score array.
db.collection.aggregate([
{
$match: {
_id: "61cab38891152daf9387c0c7"
}
},
{
$unwind: {
path: "$pros_cons"
}
},
{
$project: {
_id: 0,
vendor_name: "$pros_cons.vendor_name",
totalScore: {
$reduce: {
input: "$pros_cons.score",
initialValue: 0,
in: {
$sum: [
"$$value",
"$$this.confident_score"
]
}
}
}
}
}
])
Sample Demo on Mongo Playground
Let's say I have a Customer document like the following
db.collection.insertOne( {
"customerName": "John Doe",
"orders": [
{
"type": "regular",
"items": [
{
"name": "itemA",
"price": 11.1
},
{
"name": "itemB",
"price": 22.2
}
]
},
{
"type": "express",
"items": [
{
"name": "itemC",
"price": 33.3
},
{
"name": "itemD",
"price": 44.4
}
]
}
]
})
How can I calculate the total price of all orders (111 in this example)?
You can $unwind twice (because nested array) and group using $sum like this:
db.collection.aggregate([
{
"$unwind": "$orders"
},
{
"$unwind": "$orders.items"
},
{
"$group": {
"_id": "$customerName",
"total": {
"$sum": "$orders.items.price"
}
}
}
])
Example here
I´ve been trying to reverse $unwind in nested array. Please, if you could help me it would be great. Thanks in advance.
Here are the details:
checklists collection, this collection has steps and each step has many areas, and I'd like to lookup to fill the area by id. I did it but I cannot reverse $unwind.
{
"steps": [{
"name": "paso1",
"description": "paso1",
"estimated_time": 50,
"active": true,
"areas": [{
"area_id": "60b6e728c44f0365c0d547d6"
}, {
"area_id": "60b6e7a2c44f0365c0d547d8"
}]
}, {
"name": "paso2",
"description": "o",
"estimated_time": 7,
"active": true,
"areas": [{
"area_id": "60b6e76ac44f0365c0d547d7"
}]
}, {
"name": "paso2",
"description": "l",
"estimated_time": 7,
"active": true,
"areas": [{
"area_id": "60b6e728c44f0365c0d547d6"
}]
}],
"name": "prueba",
"description": "prueba",
"type": "prueba",
"active": true,
"updated_at": {
"$date": "2021-06-02T23:56:02.232Z"
},
"created_at": {
"$date": "2021-06-01T22:44:57.114Z"
},
"__v": 0
}
area collection
{
"_id":"60b6e706c44f0365c0d547d5"
"name": "Development",
"short_name": "DEV",
"description": "Development area",
"updated_at": {
"$date": "2021-06-02T02:03:50.383Z"
},
"created_at": {
"$date": "2021-06-02T02:03:50.383Z"
},
"__v": 0,
"active": true
}
My aggregation
db.checklists.aggregate([
{
"$unwind": "$steps"
},
{
"$unwind": "$steps.areas"
},
{
"$lookup": {
"from": "areas",
"let": {
"area_id": {
"$toObjectId": "$steps.areas.area_id"
}
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": [
"$_id",
"$$area_id"
]
}
}
}
],
"as": "convertedItems"
}
},
{
"$group": {
"_id": "$steps.name",
"root": {
"$first": "$$ROOT"
},
"items": {
"$push": {
"$mergeObjects": [
"$steps.areas",
{
"$arrayElemAt": [
"$convertedItems",
0
]
}
]
}
},
}
},
{
"$addFields": {
"values": {
"$reduce": {
"input": "$items",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
{
"$cond": [
{
"$in": [
"$$this.area_id",
"$$value.area_id"
]
},
[],
[
"$$this"
]
]
}
]
}
}
}
}
},
{
"$addFields": {
"root.steps.areas": "$values"
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
},
{
"$group": {
"_id": "$_id",
"root": {
"$first": "$$ROOT"
},
"steps": {
"$push": "$steps"
}
}
},
{
"$addFields": {
"root.steps": "$steps"
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
},
{
"$project": {
"convertedItems": 0
}
}
])
I don´t get to form this output:
{
"steps": [{
"name": "paso1",
"description": "paso1",
"estimated_time": 50,
"active": true,
"areas": [{
"_id": "60b6e728c44f0365c0d547d6",
"name":"Development",
..... //join or lookup
}, {
"_id": "60b6e7a2c44f0365c0d547d8",
"name":"Development",
..... //join or lookup
}]
}],
"name": "prueba",
"description": "prueba",
"type": "prueba",
"active": true,
"updated_at": {
"$date": "2021-06-02T23:56:02.232Z"
},
"created_at": {
"$date": "2021-06-01T22:44:57.114Z"
},
"__v": 0
}
Thank you very much!
$unwind deconstruct steps array
$lookup with areas collection pass area_id in let
$match to check is _id in area_ids after converting to string
$project to show required fields
$group by _id and reconstruct the steps array and pass your required fields
db.checklists.aggregate([
{ $unwind: "$steps" },
{
$lookup: {
from: "areas",
let: { area_id: "$steps.areas.area_id" },
pipeline: [
{
$match: {
$expr: { $in: [{ $toString: "$_id" }, "$$area_id"] }
}
},
{ $project: { name: 1 } }
],
as: "steps.areas"
}
},
{
$group: {
_id: "$_id",
steps: { $push: "$steps" },
name: { $first: "$name" },
description: { $first: "$description" },
type: { $first: "$type" },
active: { $first: "$active" },
updated_at: { $first: "$updated_at" },
created_at: { $first: "$created_at" },
__v: { $first: "$__v" }
}
}
])
Playground
I'm new to Elasticsearch and can't figure out how to solve the following problem.
The easiest way to explain my problem is to show you an example.
The following array "listing" is part of all my files in Elasticsearch, but the entries vary, so the "person" with the "id" 42, might be in 50% of my files. What I'm trying to do is to get the average "ranking.position.standard" of all the persons with id 42 in all my files in Elasticsearch.
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
Thanks for your help!
First of all, do you store listings as an object or nested data type? I don't think it's going to work if it's an object, so try the following example:
PUT /test
{
"mappings": {
"_default_": {
"properties": {
"listing": {
"type": "nested"
}
}
}
}
}
PUT /test/aa/1
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
PUT /test/aa/2
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 5
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 6
}
}
}
]
}
GET test/_search
{
"size": 0,
"aggs": {
"nest": {
"nested": {
"path": "listing"
},
"aggs": {
"persons": {
"terms": {
"field": "listing.person.id",
"size": 10
},
"aggs": {
"avg_standard": {
"avg": {
"field": "listing.ranking.position.standard"
}
}
}
}
}
}
}
}
This has brought me the following result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"nest": {
"doc_count": 4,
"persons": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 42,
"doc_count": 2,
"avg_standard": {
"value": 3.5
}
},
{
"key": 55,
"doc_count": 2,
"avg_standard": {
"value": 6.5
}
}
]
}
}
}
}
It does seem correct.
I have users indexed with categories as follows
{
id: 1
name: John
categories: [
{
id: 1
name: Category 1
},
{
id: 2
name: Category 2
}
]
},
{
id: 2
name: Mark
categories: [
{
id: 1
name: Category 1
}
]
}
And I'm trying to get all the documents with Category 1 or Category 2 with
{
filter:
{
bool: {
must: [
{
terms: {user.categories.id: [1, 2]}
}
]
}
}
}
But It only returns the first document that has the two categories, what I am doing wrong?
As I understood, terms search that one of the values is contained in the field, so for user 1
user.categories.id: [1, 2]
user 2
user.categories.id: [1]
Categoy id 1 is contained in both documents
The best way to handle this is probably with a nested filter. You'll have to specify the "nested" type in your mapping, though.
I can set up an index like this:
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"doc": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string"
}
}
},
"id": {
"type": "long"
},
"name": {
"type": "string"
}
}
}
}
}
then add some docs:
PUT /test_index/doc/1
{
"id": 1,
"name": "John",
"categories": [
{ "id": 1, "name": "Category 1" },
{ "id": 2, "name": "Category 2" }
]
}
PUT /test_index/doc/2
{
"id": 2,
"name": "Mark",
"categories": [
{ "id": 1, "name": "Category 1" }
]
}
PUT /test_index/doc/3
{
"id": 3,
"name": "Bill",
"categories": [
{ "id": 3, "name": "Category 3" },
{ "id": 4, "name": "Category 4" }
]
}
Now I can use a nested terms filter like this:
POST /test_index/doc/_search
{
"query": {
"constant_score": {
"filter": {
"nested": {
"path": "categories",
"filter": {
"terms": {
"categories.id": [1, 2]
}
}
}
},
"boost": 1.2
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"_source": {
"id": 1,
"name": "John",
"categories": [
{
"id": 1,
"name": "Category 1"
},
{
"id": 2,
"name": "Category 2"
}
]
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 1,
"_source": {
"id": 2,
"name": "Mark",
"categories": [
{
"id": 1,
"name": "Category 1"
}
]
}
}
]
}
}
Here is the code I used:
http://sense.qbox.io/gist/668aefe910643b52a3a10d40aca67104491668fc