Elasticsearch aggregation using top_hits field with script ordering

Elasticsearch aggregation using top_hits field with script ordering - database

I have a set of documents with src, txt and flt fields. I want to query by txt field in the following way:
Group (bucketize) by src;
In each bucket calculate top 1 most relevant document;
Order each bucket by the _score * doc.flt value.
So far I have implemented 1 and 2, but not 3. Even if 3 may be not very efficient, I still want to have such an option. My query looks like:
{
"query" : {
'match' : {
'text' : {
'query' : <some text>,
'fuzziness' : 'AUTO',
'operator' : 'and'
}
}
},
"aggs": {
"by_src": {
"terms": {
"field": "src",
"size" : 10,
"order" : {"top_score" : "desc"}
},
"aggs": {
"top_hits" : {
"top_hits" : {
"sort": { "_score": { "order": "desc" } },
"size" : 1
}
},
"top_score": {
"max" : {
"script" : "_score",
}
}
}
}
}
}

I believe it's failing because you don't need to use _source field to apply the sort to each bucket, just apply the sort by the field name:
{
"query" : {
'match' : {
'text' : {
'query' : <some text>,
'fuzziness' : 'AUTO',
'operator' : 'and'
}
}
},
"aggs": {
"by_src": {
"terms": {
"field": "src",
"size" : 10,
"order" : {"top_score" : "desc"}
},
"aggs": {
"top_hits" : {
"top_hits" : {
"sort":[{
"flt": {"order": "desc"}
}],
"size" : 1
}
},
"top_score": {
"max" : {
"script" : "_score",
}
}
}
}
}
}
I am assuming your document has a field called flt that you want to use to sort. Naturally you can also change the sorting to asc if it's what you need.

Related

Write a mongo query to count the data where similar data in array?

Sample data: there are multiple similar collection:
{
"_id" : NumberLong(301),
"telecom" : [
{
"countryCode" : {
"value" : "+1"
},
"extension" : [
{
"url" : "primary",
"value" : [
"true"
]
}
],
"modifiedValue" : {
"value" : "8887778888"
},
"system" : {
"value" : "phone"
},
"useCode" : {
"value" : "Home Phone"
},
"value" : {
"value" : "8887778888"
}
},
{
"extension" : [
{
"url" : "primary",
"value" : [
"true"
]
}
],
"modifiedValue" : {
"value" : "abc#test.com"
},
"system" : {
"value" : "email"
},
"useCode" : {
"value" : "work"
},
"value" : {
"value" : "abc#test.com"
}
}
]
}
Issue: I want to cont the collection where telecom.system.value = email and countryCode doesn't exist in the email part object. here I am attaching a script but I need one line query
var count = 0,i;
db.getCollection('practitioner').find({"telecom.system.value":"email"}).forEach(function(practitioner){
//print("updating : " +practitioner._id.valueOf())
telecom = practitioner.telecom.valueOf()
for(i= 0;i<telecom.length;i++){
if(telecom[i].system.value === 'email' && telecom[i].countryCode){
count+=1;
}
}
});
print(" Total count of the practitioner with country code in email object: "+count)
Above mention, the script is working fine and the output is as I expected. but the script is not optimised and I want to write in a single line query. Thanks in advance.

You can try aggregation method aggregate(),
Approach 1:
$match condition for countryCode should exists and system.value should be email
$filter to iterate loop of telecom array and check both condition, this will return expected elements
$size to get total element from above filter result
$group by null and count total
var result = await db.getCollection('practitioner').aggregate([
{
$match: {
telecom: {
$elemMatch: {
countryCode: { $exists: true },
"system.value": "email"
}
}
}
},
{
$project: {
count: {
$size: {
$filter: {
input: "$telecom",
cond: {
$and: [
{ $ne: [{ $type: "$$this.countryCode" }, "missing"] },
{ $eq: ["$$this.system.value", "email"] }
]
}
}
}
}
}
},
{
$group: {
_id: null,
count: { $sum: "$count" }
}
}
]);
print("Total count of the practitioner with country code in email object: "+result[0].count);
Playground
Approach 2:
$match condition for countryCode should exists and system.value should be email
$unwind deconstruct telecom array
$match to filter document using above conditions
$count to get total elements count
var result = await db.getCollection('practitioner').aggregate([
{
$match: {
telecom: {
$elemMatch: {
countryCode: { $exists: true },
"system.value": "email"
}
}
}
},
{ $unwind: "$telecom" },
{
$match: {
"telecom.countryCode": { $exists: true },
"telecom.system.value": "email"
}
},
{ $count: "count" }
]);
print("Total count of the practitioner with country code in email object: "+result[0].count);
Playground
I have not tested the performance but you can check and use as per your requirement.

Update documents nested multiple levels in an array

I have this JSON structure in MongoDb and want to update changing a specific value of a particular item in a nested array. I would like to change the key targetAreaId from USER_MESSAGE to VISUAL_MESSAGE.
{
"_id" : "5cde9f482f2d5b924f492da2",
"scenario" : "SCENARIOX",
"mediaType" : "VISUAL",
"opCon" : "NORMAL",
"stepConfigs" : [
{
"stepType" : "STEPX",
"enabled" : true,
"configs" : [
{
"contentTemplateId" : "5cde9f472f2d5b924f492973",
"scope" : "STANDARD",
"key" : "CONTENT"
},
{
"priorityId" : "5cde9f472f2d5b924f49224f",
"scope" : "STANDARD",
"key" : "PRIORITY"
},
{
"targetAreaId" : "USER_MESSAGE",
"scope" : "STANDARD",
"key" : "TARGET_AREA"
}
],
"description" : "XPTO"
}
],
"scope" : "STANDARD" }
How can I do it in the same time?
EDIT
I am trying this way:
var cursor = db.getCollection('CollectionX').find({
"scenario": "SCENARIOX",
"stepConfigs.stepType": "STEPX",
"stepConfigs.configs.key": "TARGET_AREA"
});
if (cursor.hasNext()) {
var doc = cursor.next();
doc.stepConfigs.find(function(v,i) {
if (v.stepType == "STEPX") {
doc.stepConfigs[i].configs.find(function(w,j) {
if (w.key == "TARGET_AREA") {
var result = db.getCollection('CollectionX').update(
{ "_id" : doc._id },
{ "$set" : { doc.stepConfigs[i].configs[j].targetAreaId: "VISUAL_MESSAGE" }}
);
}
});
};
});
} else {
print("Step does not exist");
}
But the error below is occurring:
Error: Line 15: Unexpected token .

I don't think that's possible.
You can update the specific element you want using this query:
db.test_array.updateOne({}, {
$set: {
"stepConfigs.$[firstArray].configs.$[secondArray].targetAreaId": "VISUAL_MESSAGE"
}
}, {
arrayFilters: [{
"firstArray.stepType": "STEPX"
}, {
"secondArray.targetAreaId": "USER_MESSAGE"
}]
})
But pushing in the same array at the same time does render this (this was for the original model, but it's still the same problem, you can't $set and $push in the same array this way):
> db.test_array.updateOne({"step.type":"B"},{$push:{"step.$.configs":{"className":"something"}}, $set:{"step.$.configs.$[secondArray].targetAreaId":"TA-1"}},{arrayFilters:[{"secondArray.targetAreaId":"TA-2"}]})
2019-09-06T13:53:44.783+0200 E QUERY [js] WriteError: Updating the path 'step.$.configs.$[secondArray].targetAreaId' would create a conflict at 'step.$.configs' :

elasticsearch how to use exact search and ignore the keyword special characters in keywords?

i had some id value (numeric and text combination) in my elasticsearch index, and in my program user might will input some special characters in search keyword.
and i want to know is there anyway that can let elasticsearch to use exact search and also can remove some special characters in search keywork
i already use custom analyzer to split search keyword by some special characters. and use query->match to search data, and i still got no results
data
{
"_index": "testdata",
"_type": "_doc",
"_id": "11112222",
"_source": {
"testid": "1MK444750"
}
}
custom analyzer
"analysis" : {
"analyzer" : {
"testidanalyzer" : {
"pattern" : """([^\w\d]+|_)""",
"type" : "pattern"
}
}
}
mapping
{
"article" : {
"mappings" : {
"_doc" : {
"properties" : {
"testid" : {
"type" : "text",
"analyzer" : "testidanalyzer"
}
}
}
}
}
}
here's my elasticsearch query
GET /testdata/_search
{
"query": {
"match": {
// "testid": "1MK_444-750" // no result
"testid": "1MK444750"
}
}
}
and analyzer successfully seprated separated my keyword, but i just can't match anything in result
POST /testdata/_analyze
{
"analyzer": "testidanalyzer",
"text": "1MK_444-750"
}
{
"tokens" : [
{
"token" : "1mk",
"start_offset" : 0,
"end_offset" : 3,
"type" : "word",
"position" : 0
},
{
"token" : "444",
"start_offset" : 4,
"end_offset" : 7,
"type" : "word",
"position" : 1
},
{
"token" : "750",
"start_offset" : 8,
"end_offset" : 11,
"type" : "word",
"position" : 2
}
]
}
please help, thanks in advance!

First off, you should probably model the testid field as keyword rather than text, it's a more appropriate data type.
You want to put in a feature whereby some characters (_, -) are effectively ignored at search time. You can achieve this by giving your field a normalizer, which tells Elasticsearch how to preprocess data for this field prior to indexing or searching. Specifically, you can declare a mapping char filter in your normalizer that replaces these characters with an empty string.
This is how all these changes would fit into your mapping:
PUT /testdata
{
"settings": {
"analysis": {
"char_filter": {
"mycharfilter": {
"type": "mapping",
"mappings": [
"_ => ",
"- => "
]
}
},
"normalizer": {
"mynormalizer": {
"type": "custom",
"char_filter": [
"mycharfilter"
]
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"testid" : {
"type" : "keyword",
"normalizer" : "mynormalizer"
}
}
}
}
}
The following searches would then produce the same results:
GET /testdata/_search
{
"query": {
"match": {
"testid": "1MK444750"
}
}
}
GET /testdata/_search
{
"query": {
"match": {
"testid": "1MK_444-750"
}
}
}

How to match and retrieve specific Sub-document value from Mongo

How can I search and retrieve only "Stats.item.id" from this collection who have greater than zero "Stats.item.p" value. I am also facing problem in unwinding this collection.
{
"_id" : "8643",
"Stats" : [
{
"date" : ISODate("2014-02-01"),
"Stats" : {
"item" : [
{
"id" : "4356"
},
{
"id" : "9963",
"p" : NumberInt(1)
}
]
}
}
]
}
{
"_id" : "8643",
{
"date" : ISODate("2014-02-01"),
"Stats" : {
"item" : [
{
"id" : "9963",
"p" : NumberInt(1)
}
}
This is the output I expect. Can anyone help me write this aggregation? oooooooooooooooooooooooooooooooooooooooooooooo ooooooooooooooooooooooooo oooooooooooooooooooo oooooooooooo oooooooo

Hope this aggregation query will work
db.collection.aggregate([
{$unwind:'$Stats'},
{$unwind:'$Stats.Stats'},
{$unwind:'$Stats.Stats.item'},
{$match:{
'Stats.Stats.item.p':{$gt:0}
}},
{$group:{
_id:{
date:'$Stats.date'
},
item:{$push:'$Stats.Stats.item'},
_ids:'$_id'
}},
{$group:{
_id:{_ids:'$_ids',
date:'$_id.date'},
Stats:{$push:{
item:'$item'
}},
}},
{$project:{
_id:'$_id._ids',
Stats:{
date:'$_id.date'
Stats:'$Stats'
}
}}
])

Use $elemMAtch for searching in the array.
db.getCollection('CollectionName').find({
"Stats": {
"$elemMatch": {
"Stats.item": {
"$elemMatch": {
"p": 1
}
}
}
}
})

Mongo DB array aggregation to count all array entries in a collection [duplicate]

so i have a bunch of simple documents like
{
"foos": [
ObjectId("5105862f2b5e30877c685c58"),
ObjectId("5105862f2b5e30877c685c57"),
ObjectId("5105862f2b5e30877c685c56"),
],
"typ": "Organisation",
}
and i want to find out the overall size of associated foos to documents of type "Organisation"
so i have this aggregate query
db.profil.aggregate(
[
{
$match:{
"typ":"Organisation"
}
},
{
$project: {
fooos: { $size: "$foos" }
}
}
]
)
this returns the count of all foos for each document
like :
{ "_id" : ObjectId("50e577602b5e05e74b38a6c8"), "foooos" : 1 }
{ "_id" : ObjectId("51922170975a09f363e3eef5"), "foooos" : 3 }
{ "_id" : ObjectId("51922170975a09f363e3eef8"), "foooos" : 2 }
{ "_id" : ObjectId("5175441d975ae346a3a8dff2"), "foooos" : 0 }
{ "_id" : ObjectId("5192216f975a09f363e3eee9"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eeeb"), "foooos" : 3 }
{ "_id" : ObjectId("5192216f975a09f363e3eee4"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eee6"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eedb"), "foooos" : 2 }
{ "_id" : ObjectId("51922174975a09f363e3ef4a"), "foooos" : 1 }
{ "_id" : ObjectId("5192216f975a09f363e3eee1"), "foooos" : 1 }
{ "_id" : ObjectId("5192216e975a09f363e3eed7"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eeee"), "foooos" : 3 }
is there some query that would return the summed up count for foos of all documents ?
i played arround with $sum but dont know how to combine with my query, i only do get syntax errors, it would be cool to know if this is possible

Include the $group operator pipeline stage after the $project step as follows:
db.profil.aggregate([
{ "$match":{ "typ": "Organisation" } },
{ "$project": {
"fooos": { "$size": "$foos" }
} },
{ "$group": {
"_id": null,
"count": {
"$sum": "$fooos"
}
} }
])
This will group all the input documents from the previous $project stage and applies the accumulator expression $sum on the fooos field within the group to get the total (using your last example):
This can also be done by-passing the $project pipeline as:
db.profil.aggregate([
{ "$match": { "typ": "Organisation" } },
{ "$group": {
"_id": null,
"count": {
"$sum": { "$size": "$foos" }
}
} }
])
Output
/* 0 */
{
"result" : [
{
"_id" : null,
"count" : 24
}
],
"ok" : 1
}

I know this is an old question but you can bypass project altogether if you want, so how about this?
db.profil.aggregate([
{
"$match":{ "typ": "Organisation" }
},
{
"$group":
{
"_id": null,
"count":
{
"$sum": { "$size": "$foos" }
}
}
}])
The output remains the same and it seems it's (slightly) faster.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Elasticsearch aggregation using top_hits field with script ordering - database

Related

Write a mongo query to count the data where similar data in array?

Update documents nested multiple levels in an array

elasticsearch how to use exact search and ignore the keyword special characters in keywords?

How to match and retrieve specific Sub-document value from Mongo

Mongo DB array aggregation to count all array entries in a collection [duplicate]

Categories

Resources