Elasticsearch query in array conditions on doc fields - database

I have a document like this:
{
"_index": "listings",
"_type": "listing",
"_id": "234",
"_source": {
"category_id": "43608",
"categories": [
43608,
43596
]
}
}
I wanna query to array search category_id in categories. some thing like that
{
"query": {
"bool": {
"must": [
{
"terms": {
"category_id": "doc.categories"
}
}
]
}
}
}
What I supposed to do?

As, category_id is a string type, better to use SHOULD query instead of MUST and Simply, Itrate through the array categories and make separate term level query for each element in array.
{
"query": {
"bool": {
"should": [
{
"term": {
"category_id": "doc.categories[0]"
}
},
{
"term": {
"category_id": "doc.categories[1]"
}
},
...
]
}
}
}
It will return you all which match any of categories array.

You have to user script for find a field value in another field value.
{
"script": {
"script": {
"source": "doc.containsKey('categories') && doc['categories'].values.contains(doc['category_id'].value)",
"lang": "painless"
}
}
}

Related

Elasticsearch query identical arrays

I have documents like these:
Doc1
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#226536",
...
}
...
}
Doc2
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#226536",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#142965",
...
}
...
}
Doc3
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
}
...
}
And I want the document exactly has the array of articles I need. For example, if my Array of article Ids is ['5cdd17c7e24f6e05d487b2c2#142936', '5cdd17c7e24f6e05d487b2c2#226536'] I only want to get the Doc1.
Now I have this query:
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "articles",
"query": {
"query_string": {
"default_field": "articles.id",
"query": "5cdd17c7e24f6e05d487b2c2#142936 AND 5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
]
}
}
}
But with this, I get Doc1 & Doc2...
Assuming articles.id is of type keyword, I think this should work for you (not sure it's the most efficient way to write the query):
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "articles",
"query": {
"term": {
"articles.id": "5cdd17c7e24f6e05d487b2c2#142936"
}
}
}
},
{
"nested": {
"path": "articles",
"query": {
"term": {
"articles.id": "5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
],
"must_not": {
"nested": {
"path": "articles",
"query": {
"query_string": {
"default_field": "articles.id",
"query": "NOT 5cdd17c7e24f6e05d487b2c2#142936 AND NOT 5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
}
}
}

ElasticSearch-Kibana : filter array by key

I have data with one parameter which is an array. I know that objects in array are not well supported in Kibana, however I would like to know if there is a way to filter that array with only one value for the key. I mean :
This is a json for exemple :
{
"_index": "index",
"_type": "data",
"_id": "8",
"_version": 2,
"_score": 1,
"_source": {
"envelope": {
"version": "0.0.1",
"submitter": "VF12RBU1D53087510",
"MetaData": {
"SpecificMetaData": [
{
"key": "key1",
"value": "94"
},
{
"key": "key2",
"value": "0"
}
]
}
}
}
}
And I would like to only have the data which contains key1 in my SpecificMetaData array in order to plot them. For now, when I plot SpecificMetaData.value it takes all the values of the array (value of key1 and key2) and doesn't propose SpecificMetaData.value1 and SpecificMetaData.value2.
If you need more information, tell me. Thank you.
you may need to map your data to mappings so as SpecificMetaData should act as nested_type and inner_hits of nested filter can supply you with objects which have key1.
PUT envelope_index
{
"mappings": {
"document_type": {
"properties": {
"envelope": {
"type": "object",
"properties": {
"version": {
"type": "text"
},
"submitter": {
"type": "text"
},
"MetaData": {
"type": "object",
"properties": {
"SpecificMetaData": {
"type": "nested"
}
}
}
}
}
}
}
}
}
POST envelope_index/document_type
{
"envelope": {
"version": "0.0.1",
"submitter": "VF12RBU1D53087510",
"MetaData": {
"SpecificMetaData": [{
"key": "key1",
"value": "94"
},
{
"key": "key2",
"value": "0"
}
]
}
}
}
POST envelope_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"inner_hits": {},
"path": "envelope.MetaData.SpecificMetaData",
"query": {
"bool": {
"must": [
{
"term": {
"envelope.MetaData.SpecificMetaData.key": {
"value": "key1"
}
}
}
]
}
}
}
}
]
}
}
}

Combine 2 elastic search queries

I've 2 function_score queries.
The first one, that I run on attractions type
{
"query": {
"function_score": {
"query": {
"nested": {
"path": "translations",
"query": {
"multi_match": {
"query": "Text to search",
"type": "best_fields",
"fields": [
"title^3",
"description"
]
}
}
}
},
"functions": [
{
"filter": {
"term": {
"cityId": 3
}
},
"weight": 100
}
],
"score_mode": "multiply"
}
}
}
and the second one, that I run on pizzeria type
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "Text to search",
"type": "best_fields",
"fields": [
"name^3",
"description"
]
}
},
"functions": [
{
"filter": {
"term": {
"cityId": 1
}
},
"weight": 100
}
],
"score_mode": "multiply"
}
}
}
They both works well. I know want to search on both types with a single query but I don't know how to "merge" these 2 queries.
I want to do this because I want to have the elements from the 2 type sorted by the same score.
Is this possible ? How Can I do this ?
You can use the Multi Search API to combine queries into one single query:
Multi Search API

ElasticSearch: Retrieve string concatenation, or partial array

I have many indexed documents such as this one:
{
"_index":"myindex",
"_type":"somedata",
"_id":"31d3255d-67b4-40e6-b9d4-637383eb72ad",
"_version":1,
"_score":1,
"_source":{
"otherID":"b4c95332-daed-49ae-99fe-c32482696d1c",
"data":[
{
"data":"d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD":"9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data":"some text",
"iD":"c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data":"5256983c-ef69-4363-9787-97074297c646",
"iD":"8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data":"other text",
"iD":"8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data":"3",
"iD":"c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD":"31d3255d-67b4-40e6-b9d4-637383eb72ad"
}
}
It's type mapping is configured this way:
{
"somedata":{
"dynamic_templates":[
{
"defaultIDs":{
"match_pattern":"regex",
"mapping":{
"index":"not_analyzed",
"type":"string"
},
"match":".*(id|ID|iD)"
}
}
],
"properties":{
"otherID":{
"index":"not_analyzed",
"type":"string"
},
"data":{
"properties":{
"data":{
"type":"string"
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
}
}
I wish to be able to retrieve a string concatenation of data based on it's ID.
For example, given the id c554b8ce-c873-4fef-b306-ec65d2f40394, and the id 8d8f8a61-02d6-4d3e-9912-9ebb5d213c15, I would like to retrieve some text other text.
These IDs repeat in other documents of the same type with different data.
If this is not possible (which I suspect this is the case), I would like to at least retrieve a partial array containing my requested data.
Those arrays can become large (and so is the number of documents) and I would only need one or two elements from each hit.
If both my requests are not possible, how would you suggest changing my mappings in order to facilitate my needs?
Thanks in advance, Jonathan.
I have found a way to do exactly what I needed without changing my data structure.
(I actually did end up changing my data structure, but for reasons of space and efficiency).
All you have to do is enjoy the groovy goodness ElasticSearch has to offer:
{
"query" : { "term" : { "otherID" : "b4c95332-daed-49ae-99fe-c32482696d1c" } },
"script_fields" : { "requestedFields" : { "script" : "_source.data.findAll({ it.iD == 'c554b8ce-c873-4fef-b306-ec65d2f40394' || it.iD == '8d8f8a61-02d6-4d3e-9912-9ebb5d213c15'}) data.join(' ') " } }
}
Just goes to show how strong ElasticSearch really is.
I cannot help you with the field concatenation (maybe it's possible with scripting but I'm not experienced enough with it. I would assume a new field would have to be generated, etc.) but how to only retrieve the partial data.
It requires at least ES 1.5 because it uses inner_hits and you need to change the mapping.
I added type and include_in_parent to your data type:
DELETE somedata
PUT somedata
PUT somedata/sometype/_mapping
{
"sometype":{
"dynamic_templates":[
{
"defaultIDs":{
"match_pattern":"regex",
"mapping":{
"index":"not_analyzed",
"type":"string"
},
"match":".*(id|ID|iD)"
}
}
],
"properties":{
"otherID":{
"index":"not_analyzed",
"type":"string"
},
"data":{
"type": "nested",
"include_in_parent": true,
"properties":{
"data":{
"type":"string"
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
}
}
Now indexing your document:
PUT somedata/sometype/1
{
"otherID":"b4c95332-daed-49ae-99fe-c32482696d1c",
"data":[
{
"data":"d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD":"9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data":"some text",
"iD":"c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data":"5256983c-ef69-4363-9787-97074297c646",
"iD":"8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data":"other text",
"iD":"8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data":"3",
"iD":"c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD":"31d3255d-67b4-40e6-b9d4-637383eb72ad"
}
And here's how you can match and retrieve with inner_hits:
POST somedata/sometype/_search
{
"query": {
"nested": {
"path": "data",
"query": {
"bool": {
"should": [
{
"term": {
"data.iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
}
},
{
"term": {
"data.iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
}
}
]
}
},
"inner_hits": {}
}
}
}
In the result now look at this path: hits.hits[0].inner_hits.data.hits.hits[0]._source.data; it only contains your two requested matches:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.5986179,
"hits": [
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_score": 0.5986179,
"_source": {
"otherID": "b4c95332-daed-49ae-99fe-c32482696d1c",
"data": [
{
"data": "d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD": "9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data": "some text",
"iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data": "5256983c-ef69-4363-9787-97074297c646",
"iD": "8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data": "other text",
"iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data": "3",
"iD": "c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD": "31d3255d-67b4-40e6-b9d4-637383eb72ad"
},
"inner_hits": {
"data": {
"hits": {
"total": 2,
"max_score": 0.5986179,
"hits": [
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_nested": {
"field": "data",
"offset": 3
},
"_score": 0.5986179,
"_source": {
"data": "other text",
"iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
}
},
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_nested": {
"field": "data",
"offset": 1
},
"_score": 0.5986179,
"_source": {
"data": "some text",
"iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
}
}
]
}
}
}
}
]
}
}
Now, inner_hits is fairly new and the documentation also states:
Warning: This functionality is experimental and may be changed or removed completely in a future release.
YMMV.
Another thing to watch out: the inner_hits are sorted by score. In your original document they're in an array which is ordered but that information is lost in the actual result. If you require to have them in the same order in the inner_hits, I think you need to add a separate field for sorting (could just be the array index...) and sort the inner_hits by it.

Filtering results in mongodb

maybe a simple question for experimented developers with MongoDB, but I'm not getting a solution:
My json with "Stations" collection:
{
"code": "XX",
"variables": [
{
"code": 1,
"items": [
{
"value": 81
},
{
"value": 77
}
]
},
{
"code":2,
"items": [
{
"value": 33
}
]
}
]
}
....
I want to filter "Station" collection to only get variable with code 1 and item with value 81, obtaining something similar to this:
{
"code": "XX",
"variables": [
{
"code": 1,
"items": [
{
"value": 81
}
]
}
]
}
Due json contains arrays in different levels, my approach (mongo shell) was:
db.stations.find(
{"code": "XX"},
{
"variables":
{ $elemMatch:
{
"code": 1,
"items":
{ $elemMatch:
{
"value": 81
}
}
}
}
}
)
But that's getting all items of the same level of 'value: 81', not only this.
Any idea? I also tried to do something with "aggregate" operator and $redact, but no result...thanks!
As per mongo $elemMatch documentation
The $elemMatch operator matches documents that contain an array field with at least one element that matches all the specified query criteria.
hence using $elemMatch it match in items.value:81 and return whole matching items array like below query
db.stations.find({
"code": "XX"
}, {
"variables": {
"$elemMatch": {
"code": 1
}
},
"variables": {
"$elemMatch": {
"items": {
"$elemMatch": {
"value": 81
}
}
}
}
}).pretty()
This return items.value:81 and items.value:77 because of elemMatch match one elements in array. Or same if used in project as below it shows same result like above query
db.stations.find({
"code": "XX",
"variables": {
"$elemMatch": {
"code": 1
}
},
"variables": {
"$elemMatch": {
"items": {
"$elemMatch": {
"value": 81
}
}
}
}
}, {
"code": 1,
"variables.code.$": 1
}).pretty()
So If you find your expected output then you should use mongo aggregation as below :
db.stations.aggregate({
"$match": {
"code": "XX",
"variables.code": 1
}
}, {
"$unwind": "$variables"
}, {
"$unwind": "$variables.items"
}, {
"$match": {
"variables.items.value": 81
}
}, {
"$group": {
"_id": "$code",
"data": {
"$push": "$variables"
}
}
}, {
"$project": {
"code": "$_id",
"variables": "$data",
"_id": 0
}
}).pretty()

Resources