Elasticsearch aggregation only on specific entries in an array

Elasticsearch aggregation only on specific entries in an array - arrays

I'm new to Elasticsearch and can't figure out how to solve the following problem.
The easiest way to explain my problem is to show you an example.
The following array "listing" is part of all my files in Elasticsearch, but the entries vary, so the "person" with the "id" 42, might be in 50% of my files. What I'm trying to do is to get the average "ranking.position.standard" of all the persons with id 42 in all my files in Elasticsearch.
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
Thanks for your help!

First of all, do you store listings as an object or nested data type? I don't think it's going to work if it's an object, so try the following example:
PUT /test
{
"mappings": {
"_default_": {
"properties": {
"listing": {
"type": "nested"
}
}
}
}
}
PUT /test/aa/1
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
PUT /test/aa/2
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 5
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 6
}
}
}
]
}
GET test/_search
{
"size": 0,
"aggs": {
"nest": {
"nested": {
"path": "listing"
},
"aggs": {
"persons": {
"terms": {
"field": "listing.person.id",
"size": 10
},
"aggs": {
"avg_standard": {
"avg": {
"field": "listing.ranking.position.standard"
}
}
}
}
}
}
}
}
This has brought me the following result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"nest": {
"doc_count": 4,
"persons": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 42,
"doc_count": 2,
"avg_standard": {
"value": 3.5
}
},
{
"key": 55,
"doc_count": 2,
"avg_standard": {
"value": 6.5
}
}
]
}
}
}
}
It does seem correct.

Related

What kind of JSON JOLT Spec to get key-value output where key is a data value and value is an array

I'm trying to find a spec array that yields the desired output
Input:
{
"aggregations": {
"masterId": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1Q52",
"doc_count": 3,
"serialNumbers": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3R24Z3",
"count": 1
},
{
"key": "526GA2",
"count": 1
},
{
"key": "873XHE",
"count": 1
}
]
}
}
]
}
}
}
Spec:
Trying to figure this out
Desired Output:
{
"1Q52": ["3R24Z3", "526GA2", "873XHE"]
}
My current Spec array is
[
{
"operation": "shift",
"spec": {
"aggregations": {
"masterId": {
"buckets": {
"*": {
"key": "key",
"serialNumbers": {
"buckets": {
"*": {
"key": "key"
}
}
}
}
}
}
}
}
}
]
and my current output is
{
"key" : [ "1Q52", "3R24Z3", "526GA2", "873XHE" ]
}
What kind of spec array could give me the desired output?

You can go 4 level up and grab the value to be the key for the array by using "key": "#(4,key)" such as
[
{
"operation": "shift",
"spec": {
"aggregations": {
"masterId": {
"buckets": {
"*": {
"serialNumbers": {
"buckets": {
"*": {
"key": "#(4,key)"
}
}
}
}
}
}
}
}
}
]
where : and then triple { character traverse is counted in order to reach the desired key value.

Issue with MongoDB array aggregation and grouping by key in array of objects

Considering an array like the one below which i'm trying to group by date and sum the durations:
[
{
"allDates": [
{
"duration": 153,
"date": "2021-10"
},
{
"duration": 20,
"date": "2021-11"
},
{
"duration": 181,
"date": "2021-11"
},
{
"duration": 180,
"date": "2021-11"
}
]
}
]
I'm trying to achieve a sum of the duration but grouped by the dates.
This is what i've tried so far:
db.collection.aggregate([
{
$addFields: {
durations: {
$arrayToObject: {
$map: {
input: "$allDates",
as: "allDate",
in: {
k: {
$toString: "$$allDate.date"
},
v: {
$sum: {
$map: {
input: "$allDates",
as: "kv",
in: {
$cond: {
if: {
$eq: [
{
$toString: "$allDate.date"
},
{
$toString: "$$kv.k"
}
]
},
then: "$$kv.duration",
else: 0
}
}
}
}
}
}
}
}
},
}
}
])
Unfortunately the result i get is:
[
{
"allDates": [
{
"date": "2021-10",
"duration": 153
},
{
"date": "2021-11",
"duration": 20
},
{
"date": "2021-11",
"duration": 181
},
{
"date": "2021-11",
"duration": 180
}
],
"durations": {
"2021-10": 534,
"2021-11": 534
}
}
]
So it's adding them all up for every key instead of for each one separately, what am i missing here?
Basically I'm expecting to get:
...
"durations": {
"2021-10": 153,
"2021-11": 381
}

Try this one:
db.collection.aggregate([
{
$unwind: "$allDates"
},
{
$group: {
_id: "$allDates.date",
duration: {
$sum: "$allDates.duration"
}
}
},
{
$group: {
_id: null,
durations: {
$push: {
k: "$_id",
v: "$duration"
}
}
}
},
{
$project: {
_id: 0,
durations: {
$arrayToObject: "$durations"
}
}
}
])
Mongo Playground

Make the sum of booleans as integers with MongoDB

I'm trying to sum booleans (where true means 1 and false -1) in an array for each document in my collection and then sort it.
I'm using MongoDB aggregation pipeline with $addFields, $sum and $cond.
Here's the playground : https://play.db-ai.co/m/XQLKqbkkgAABTFVm
The pipeline :
[
{
"$addFields": {
"score": {
"$sum": {
"$cond": [
"$votes.value",
1,
-1
]
}
}
}
},
{
"$sort": {
"score": -1
}
}
]
The collection :
[
{
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": true
},
{
"value": false
}
]
},
{
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": false
},
{
"value": false
}
]
}
]
Actual results :
[
{
"_id": ObjectId("000000000000000000000000"),
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": true
},
{
"value": false
}
],
"score": 1
},
{
"_id": ObjectId("000000000000000000000001"),
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": false
},
{
"value": false
}
],
"score": 1
}
]
What I want :
[{
"_id": ObjectId("000000000000000000000000"),
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": true
},
{
"value": false
}
],
"score": 2
}, {
"_id": ObjectId("000000000000000000000001"),
"votes": [
{
"value": true
},
{
"value": true
},
{
"value": false
},
{
"value": false
}
],
"score": 0
}]

I got it to work by unwinding the array and then grouping by _id again.
See Playground: https://play.db-ai.co/m/XQMFlZAtYAABLHtL
[
{
"$unwind": {
"path": "$votes"
}
},
{
"$group": {
"_id": "$_id",
"votes": {
"$push": "$votes"
},
"score": {
"$sum": {
"$cond": [
"$votes.value",
1,
-1
]
}
}
}
},
{
"$sort": {
"score": -1
}
}
]

To solve my problem I used $map multiple times. The solution of #Plancke is working but I had issues using a $match afterwards (it was always giving no results).
[
{
$addFields: {
scoresInBoolean: {
$map: {
input: '$votes',
as: 'vote',
in: '$$vote.value',
},
},
},
}, {
$addFields: {
scoresInInteger: {
$map: {
input: '$scoresInBoolean',
as: 'scoreInBoolean',
in: {
$cond: [
'$$scoreInBoolean',
1,
-1,
],
},
},
},
},
}, {
$addFields: {
score: {
$sum: '$scoresInInteger',
},
},
}
]

Custom math with aggregation using mongo and angular

I'm currently using aggregation to display registration teams and nets for a stats page. I can do the count on each level, but the calculations for nets is inaccurate. My aggregation is as follows:
module.exports.registrationStats = function(req, res) {
Registration.aggregate([
{
"$group": {
"_id": {
"day": "$day",
"group": "$group",
"division": "$division",
"level": "$level"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group",
"division": "$_id.division"
},
"count": { "$sum": "$count" },
"levels": {
"$push": {
"level": "$_id.level",
"teams": "$count",
"nets" : {$ceil : { $divide: [ "$count" , 5 ] } }
}
}
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group"
},
"count": { "$sum": "$count" },
"divisions": {
"$push": {
"division": "$_id.division",
"count": "$count",
"levels": "$levels"
}
}
}
}
]).exec(function(err, regStats){
if(err) {
console.log("Error grouping registrations");
res.status(500).json(err);
} else {
console.log("Found and grouped " + regStats.length + " regStats");
res.json(regStats);
}
});
};
This gives me the following as an output:
[
{
"_id": {
"day": "Saturday",
"group": "nonpro"
},
"count": 144,
"divisions": [
{
"division": "Men's",
"count": 69,
"levels": [
{
"level": "BB",
"teams": 30,
"nets": 6
},
{
"level": "A",
"teams": 8,
"nets": 2
},
{
"level": "B",
"teams": 19,
"nets": 4
},
{
"level": "AA",
"teams": 12,
"nets": 3
}
]
},
{
"division": "Women's",
"count": 75,
"levels": [
{
"level": "AA",
"teams": 9,
"nets": 2
},
{
"level": "BB",
"teams": 16,
"nets": 4
},
{
"level": "B",
"teams": 18,
"nets": 4
},
{
"level": "A",
"teams": 32,
"nets": 7
}
]
}
]
}
]
The problem is that I cannot just run a ceil filter on the Math.ceil(divisions.count/5) to get a value for divisions.nets or Math.ceil(_id.count/5) to get _id.nets because they are wrong in some cases.
I need to be able to total divisions.levels.nets and push that into divisions.nets and add divisions.nets and put that value into _id.nets so the calculations work properly.
Any ideas on how to do this?

Something like this. Add $project stage to calculate the nets for level followed by $sum & $push nets for rest of $groups
Registration.aggregate([
{
"$group": {
"_id": {
"day": "$day",
"group": "$group",
"division": "$division",
"level": "$level"
},
"count": { "$sum": 1 }
}
},
{ $project: { count:1, nets: { $ceil : { $divide: [ "$count" , 5 ] } } } },
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group",
"division": "$_id.division"
},
"count": { "$sum": "$count" },
"nets": { "$sum": "$nets" },
"levels": {
"$push": {
"level": "$_id.level",
"teams": "$count",
"nets" : "$nets"
}
}
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group"
},
"count": { "$sum": "$count" },
"nets": { "$sum": "$nets" },
"divisions": {
"$push": {
"division": "$_id.division",
"count": "$count",
"nets:": "$nets",
"levels": "$levels"
}
}
}
}
])

Finding union or intersection of buckets using elasticsearch aggregations

i have nested aggregations and i want to find union or intersections of 2nd aggregations buckets based on conditions on my 1st aggregation bucket results.For eg this my aggregation.
"aggs": {
"events": {
"terms": {
"field": "event_name"
},
"aggs":{
"devices":{
"terms":{
"field": "device-id"
}
}
}
}
}
And this the result of my aggregation
"aggregations": {
"events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "conversion_checkout",
"doc_count": 214,
"devices": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 6,
"buckets": [
{
"key": "9a11f243d44",
"doc_count": 94
},
{
"key": "ddcb21fd6cb",
"doc_count": 35
}
]
}
},
{
"key": "action_view_product",
"doc_count": 5,
"devices": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "54E4C593",
"doc_count": 4
},
{
"key": "9a11f243d44",
"doc_count": 1
}
]
}
}
]
}
}
Now if i want to find all the devices which have done action_view_product and conversion_checkout how do i do it in aggregations?

I think you want to get all the device-ids having event_names action_view_product and conversion_checkout as follows-
{
"aggregations":{
"devices_agg":{
"doc_count":516,
"devices":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":623232334,
"doc_count":275
},
{
"key":245454512,
"doc_count":169
},
{
"key":345454567,
"doc_count":32
},
{
"key":578787565,
"doc_count":17
},
{
"key":146272715,
"doc_count":23
}
]
}
}
}
}
The doc_count = 516 is the total number of documents having event_names either action_view_product or conversion_checkout and "key" in the devices aggregation is device-id.
If I get you correct, then below query will do the thing for you-
{
"size": 0,
"aggs": {
"devices_agg": {
"filter": {
"bool": {
"must": [
{
"terms": {
"event_name": [
"action_view_product",
"conversion_checkout"
]
}
}
]
}
},
"aggs": {
"devices": {
"terms": {
"field": "device-id",
"size": 100
}
}
}
}
}
}
Let me know if I got you wrong.