Elasticsearch aggregation using top_hits field with script ordering - database

I have a set of documents with src, txt and flt fields. I want to query by txt field in the following way:
Group (bucketize) by src;
In each bucket calculate top 1 most relevant document;
Order each bucket by the _score * doc.flt value.
So far I have implemented 1 and 2, but not 3. Even if 3 may be not very efficient, I still want to have such an option. My query looks like:
{
"query" : {
'match' : {
'text' : {
'query' : <some text>,
'fuzziness' : 'AUTO',
'operator' : 'and'
}
}
},
"aggs": {
"by_src": {
"terms": {
"field": "src",
"size" : 10,
"order" : {"top_score" : "desc"}
},
"aggs": {
"top_hits" : {
"top_hits" : {
"sort": { "_score": { "order": "desc" } },
"size" : 1
}
},
"top_score": {
"max" : {
"script" : "_score",
}
}
}
}
}
}

I believe it's failing because you don't need to use _source field to apply the sort to each bucket, just apply the sort by the field name:
{
"query" : {
'match' : {
'text' : {
'query' : <some text>,
'fuzziness' : 'AUTO',
'operator' : 'and'
}
}
},
"aggs": {
"by_src": {
"terms": {
"field": "src",
"size" : 10,
"order" : {"top_score" : "desc"}
},
"aggs": {
"top_hits" : {
"top_hits" : {
"sort":[{
"flt": {"order": "desc"}
}],
"size" : 1
}
},
"top_score": {
"max" : {
"script" : "_score",
}
}
}
}
}
}
I am assuming your document has a field called flt that you want to use to sort. Naturally you can also change the sorting to asc if it's what you need.

Related

Write a mongo query to count the data where similar data in array?

Sample data: there are multiple similar collection:
{
"_id" : NumberLong(301),
"telecom" : [
{
"countryCode" : {
"value" : "+1"
},
"extension" : [
{
"url" : "primary",
"value" : [
"true"
]
}
],
"modifiedValue" : {
"value" : "8887778888"
},
"system" : {
"value" : "phone"
},
"useCode" : {
"value" : "Home Phone"
},
"value" : {
"value" : "8887778888"
}
},
{
"extension" : [
{
"url" : "primary",
"value" : [
"true"
]
}
],
"modifiedValue" : {
"value" : "abc#test.com"
},
"system" : {
"value" : "email"
},
"useCode" : {
"value" : "work"
},
"value" : {
"value" : "abc#test.com"
}
}
]
}
Issue: I want to cont the collection where telecom.system.value = email and countryCode doesn't exist in the email part object. here I am attaching a script but I need one line query
var count = 0,i;
db.getCollection('practitioner').find({"telecom.system.value":"email"}).forEach(function(practitioner){
//print("updating : " +practitioner._id.valueOf())
telecom = practitioner.telecom.valueOf()
for(i= 0;i<telecom.length;i++){
if(telecom[i].system.value === 'email' && telecom[i].countryCode){
count+=1;
}
}
});
print(" Total count of the practitioner with country code in email object: "+count)
Above mention, the script is working fine and the output is as I expected. but the script is not optimised and I want to write in a single line query. Thanks in advance.
You can try aggregation method aggregate(),
Approach 1:
$match condition for countryCode should exists and system.value should be email
$filter to iterate loop of telecom array and check both condition, this will return expected elements
$size to get total element from above filter result
$group by null and count total
var result = await db.getCollection('practitioner').aggregate([
{
$match: {
telecom: {
$elemMatch: {
countryCode: { $exists: true },
"system.value": "email"
}
}
}
},
{
$project: {
count: {
$size: {
$filter: {
input: "$telecom",
cond: {
$and: [
{ $ne: [{ $type: "$$this.countryCode" }, "missing"] },
{ $eq: ["$$this.system.value", "email"] }
]
}
}
}
}
}
},
{
$group: {
_id: null,
count: { $sum: "$count" }
}
}
]);
print("Total count of the practitioner with country code in email object: "+result[0].count);
Playground
Approach 2:
$match condition for countryCode should exists and system.value should be email
$unwind deconstruct telecom array
$match to filter document using above conditions
$count to get total elements count
var result = await db.getCollection('practitioner').aggregate([
{
$match: {
telecom: {
$elemMatch: {
countryCode: { $exists: true },
"system.value": "email"
}
}
}
},
{ $unwind: "$telecom" },
{
$match: {
"telecom.countryCode": { $exists: true },
"telecom.system.value": "email"
}
},
{ $count: "count" }
]);
print("Total count of the practitioner with country code in email object: "+result[0].count);
Playground
I have not tested the performance but you can check and use as per your requirement.

Update documents nested multiple levels in an array

I have this JSON structure in MongoDb and want to update changing a specific value of a particular item in a nested array. I would like to change the key targetAreaId from USER_MESSAGE to VISUAL_MESSAGE.
{
"_id" : "5cde9f482f2d5b924f492da2",
"scenario" : "SCENARIOX",
"mediaType" : "VISUAL",
"opCon" : "NORMAL",
"stepConfigs" : [
{
"stepType" : "STEPX",
"enabled" : true,
"configs" : [
{
"contentTemplateId" : "5cde9f472f2d5b924f492973",
"scope" : "STANDARD",
"key" : "CONTENT"
},
{
"priorityId" : "5cde9f472f2d5b924f49224f",
"scope" : "STANDARD",
"key" : "PRIORITY"
},
{
"targetAreaId" : "USER_MESSAGE",
"scope" : "STANDARD",
"key" : "TARGET_AREA"
}
],
"description" : "XPTO"
}
],
"scope" : "STANDARD" }
How can I do it in the same time?
EDIT
I am trying this way:
var cursor = db.getCollection('CollectionX').find({
"scenario": "SCENARIOX",
"stepConfigs.stepType": "STEPX",
"stepConfigs.configs.key": "TARGET_AREA"
});
if (cursor.hasNext()) {
var doc = cursor.next();
doc.stepConfigs.find(function(v,i) {
if (v.stepType == "STEPX") {
doc.stepConfigs[i].configs.find(function(w,j) {
if (w.key == "TARGET_AREA") {
var result = db.getCollection('CollectionX').update(
{ "_id" : doc._id },
{ "$set" : { doc.stepConfigs[i].configs[j].targetAreaId: "VISUAL_MESSAGE" }}
);
}
});
};
});
} else {
print("Step does not exist");
}
But the error below is occurring:
Error: Line 15: Unexpected token .
I don't think that's possible.
You can update the specific element you want using this query:
db.test_array.updateOne({}, {
$set: {
"stepConfigs.$[firstArray].configs.$[secondArray].targetAreaId": "VISUAL_MESSAGE"
}
}, {
arrayFilters: [{
"firstArray.stepType": "STEPX"
}, {
"secondArray.targetAreaId": "USER_MESSAGE"
}]
})
But pushing in the same array at the same time does render this (this was for the original model, but it's still the same problem, you can't $set and $push in the same array this way):
> db.test_array.updateOne({"step.type":"B"},{$push:{"step.$.configs":{"className":"something"}}, $set:{"step.$.configs.$[secondArray].targetAreaId":"TA-1"}},{arrayFilters:[{"secondArray.targetAreaId":"TA-2"}]})
2019-09-06T13:53:44.783+0200 E QUERY [js] WriteError: Updating the path 'step.$.configs.$[secondArray].targetAreaId' would create a conflict at 'step.$.configs' :

elasticsearch how to use exact search and ignore the keyword special characters in keywords?

i had some id value (numeric and text combination) in my elasticsearch index, and in my program user might will input some special characters in search keyword.
and i want to know is there anyway that can let elasticsearch to use exact search and also can remove some special characters in search keywork
i already use custom analyzer to split search keyword by some special characters. and use query->match to search data, and i still got no results
data
{
"_index": "testdata",
"_type": "_doc",
"_id": "11112222",
"_source": {
"testid": "1MK444750"
}
}
custom analyzer
"analysis" : {
"analyzer" : {
"testidanalyzer" : {
"pattern" : """([^\w\d]+|_)""",
"type" : "pattern"
}
}
}
mapping
{
"article" : {
"mappings" : {
"_doc" : {
"properties" : {
"testid" : {
"type" : "text",
"analyzer" : "testidanalyzer"
}
}
}
}
}
}
here's my elasticsearch query
GET /testdata/_search
{
"query": {
"match": {
// "testid": "1MK_444-750" // no result
"testid": "1MK444750"
}
}
}
and analyzer successfully seprated separated my keyword, but i just can't match anything in result
POST /testdata/_analyze
{
"analyzer": "testidanalyzer",
"text": "1MK_444-750"
}
{
"tokens" : [
{
"token" : "1mk",
"start_offset" : 0,
"end_offset" : 3,
"type" : "word",
"position" : 0
},
{
"token" : "444",
"start_offset" : 4,
"end_offset" : 7,
"type" : "word",
"position" : 1
},
{
"token" : "750",
"start_offset" : 8,
"end_offset" : 11,
"type" : "word",
"position" : 2
}
]
}
please help, thanks in advance!
First off, you should probably model the testid field as keyword rather than text, it's a more appropriate data type.
You want to put in a feature whereby some characters (_, -) are effectively ignored at search time. You can achieve this by giving your field a normalizer, which tells Elasticsearch how to preprocess data for this field prior to indexing or searching. Specifically, you can declare a mapping char filter in your normalizer that replaces these characters with an empty string.
This is how all these changes would fit into your mapping:
PUT /testdata
{
"settings": {
"analysis": {
"char_filter": {
"mycharfilter": {
"type": "mapping",
"mappings": [
"_ => ",
"- => "
]
}
},
"normalizer": {
"mynormalizer": {
"type": "custom",
"char_filter": [
"mycharfilter"
]
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"testid" : {
"type" : "keyword",
"normalizer" : "mynormalizer"
}
}
}
}
}
The following searches would then produce the same results:
GET /testdata/_search
{
"query": {
"match": {
"testid": "1MK444750"
}
}
}
GET /testdata/_search
{
"query": {
"match": {
"testid": "1MK_444-750"
}
}
}

How to match and retrieve specific Sub-document value from Mongo

How can I search and retrieve only "Stats.item.id" from this collection who have greater than zero "Stats.item.p" value. I am also facing problem in unwinding this collection.
{
"_id" : "8643",
"Stats" : [
{
"date" : ISODate("2014-02-01"),
"Stats" : {
"item" : [
{
"id" : "4356"
},
{
"id" : "9963",
"p" : NumberInt(1)
}
]
}
}
]
}
{
"_id" : "8643",
{
"date" : ISODate("2014-02-01"),
"Stats" : {
"item" : [
{
"id" : "9963",
"p" : NumberInt(1)
}
}
This is the output I expect. Can anyone help me write this aggregation? oooooooooooooooooooooooooooooooooooooooooooooo ooooooooooooooooooooooooo oooooooooooooooooooo oooooooooooo oooooooo
Hope this aggregation query will work
db.collection.aggregate([
{$unwind:'$Stats'},
{$unwind:'$Stats.Stats'},
{$unwind:'$Stats.Stats.item'},
{$match:{
'Stats.Stats.item.p':{$gt:0}
}},
{$group:{
_id:{
date:'$Stats.date'
},
item:{$push:'$Stats.Stats.item'},
_ids:'$_id'
}},
{$group:{
_id:{_ids:'$_ids',
date:'$_id.date'},
Stats:{$push:{
item:'$item'
}},
}},
{$project:{
_id:'$_id._ids',
Stats:{
date:'$_id.date'
Stats:'$Stats'
}
}}
])
Use $elemMAtch for searching in the array.
db.getCollection('CollectionName').find({
"Stats": {
"$elemMatch": {
"Stats.item": {
"$elemMatch": {
"p": 1
}
}
}
}
})

Mongo DB array aggregation to count all array entries in a collection [duplicate]

so i have a bunch of simple documents like
{
"foos": [
ObjectId("5105862f2b5e30877c685c58"),
ObjectId("5105862f2b5e30877c685c57"),
ObjectId("5105862f2b5e30877c685c56"),
],
"typ": "Organisation",
}
and i want to find out the overall size of associated foos to documents of type "Organisation"
so i have this aggregate query
db.profil.aggregate(
[
{
$match:{
"typ":"Organisation"
}
},
{
$project: {
fooos: { $size: "$foos" }
}
}
]
)
this returns the count of all foos for each document
like :
{ "_id" : ObjectId("50e577602b5e05e74b38a6c8"), "foooos" : 1 }
{ "_id" : ObjectId("51922170975a09f363e3eef5"), "foooos" : 3 }
{ "_id" : ObjectId("51922170975a09f363e3eef8"), "foooos" : 2 }
{ "_id" : ObjectId("5175441d975ae346a3a8dff2"), "foooos" : 0 }
{ "_id" : ObjectId("5192216f975a09f363e3eee9"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eeeb"), "foooos" : 3 }
{ "_id" : ObjectId("5192216f975a09f363e3eee4"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eee6"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eedb"), "foooos" : 2 }
{ "_id" : ObjectId("51922174975a09f363e3ef4a"), "foooos" : 1 }
{ "_id" : ObjectId("5192216f975a09f363e3eee1"), "foooos" : 1 }
{ "_id" : ObjectId("5192216e975a09f363e3eed7"), "foooos" : 2 }
{ "_id" : ObjectId("5192216f975a09f363e3eeee"), "foooos" : 3 }
is there some query that would return the summed up count for foos of all documents ?
i played arround with $sum but dont know how to combine with my query, i only do get syntax errors, it would be cool to know if this is possible
Include the $group operator pipeline stage after the $project step as follows:
db.profil.aggregate([
{ "$match":{ "typ": "Organisation" } },
{ "$project": {
"fooos": { "$size": "$foos" }
} },
{ "$group": {
"_id": null,
"count": {
"$sum": "$fooos"
}
} }
])
This will group all the input documents from the previous $project stage and applies the accumulator expression $sum on the fooos field within the group to get the total (using your last example):
This can also be done by-passing the $project pipeline as:
db.profil.aggregate([
{ "$match": { "typ": "Organisation" } },
{ "$group": {
"_id": null,
"count": {
"$sum": { "$size": "$foos" }
}
} }
])
Output
/* 0 */
{
"result" : [
{
"_id" : null,
"count" : 24
}
],
"ok" : 1
}
I know this is an old question but you can bypass project altogether if you want, so how about this?
db.profil.aggregate([
{
"$match":{ "typ": "Organisation" }
},
{
"$group":
{
"_id": null,
"count":
{
"$sum": { "$size": "$foos" }
}
}
}])
The output remains the same and it seems it's (slightly) faster.

Resources