I've a type in elastic with documents with this structure
{
"name": "Foo Bar",
"myTags": [
{
"id": 3,
"name": "My tag 1"
},
{
"id": 5,
"name": "My Tag 5"
},
{
"id": 7,
"name": "My Tag 7"
}
]
}
Now, given 3 tags I would like to get ALL documents sorted by the number of matching tags. So first the documents that match all 3 tags than those that match 2 then one and finally none.
How can I do this ?
You can do it with function_score:
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"filter": {
"nested": {
"path": "myTags",
"query": {
"term": {
"myTags.name": "My Tag 1"
}
}
}
},
"weight": 1
},
{
"filter": {
"nested": {
"path": "myTags",
"query": {
"term": {
"myTags.name": "My Tag 5"
}
}
}
},
"weight": 1
},
{
"filter": {
"nested": {
"path": "myTags",
"query": {
"term": {
"myTags.name": "My Tag 7"
}
}
}
},
"weight": 1
}
],
"boost_mode": "sum",
"score_mode": "sum"
}
}
}
Related
Is there a way to group by a child document field and include the parent fields to the result?
Imagine you have
[
{
"id": "p1",
"name": "parent 1",
"_childDocuments_": [
{
"id": "p1c1",
"name": "child1_of_parent1",
"color": "red"
},
{
"id": "p1c2",
"name": "child2_of_parent1",
"color": "yellow"
}
]
},
{
"id": "p2",
"name": "parent 2",
"_childDocuments_": [
{
"id": "p2c1",
"name": "child1_of_parent2",
"color": "yellow"
}
]
}
]
in a collection.
Now a query
/select?group=true&group.field=color&group.limit=10
returns
{
"responseHeader":{
"params":{
"group.limit":"10",
"group.field":"color",
"group":"true"
}
},
"grouped":{
"color":{
"matches":3,
"groups":[
{
"groupValue":"red",
"doclist":{"numFound":1,"docs":[
{
"id":"p1c1",
"name":"child1_of_parent1"
}
]
}
},
{
"groupValue":"yellow",
"doclist":{"numFound":2,"docs":[
{
"id":"p1c2",
"name":"child2_of_parent1"
},
{
"id":"p2c1",
"name":"child1_of_parent2"
}
]
}
}
]
}
}
}
But I need a result that contains their parent fields as well, something like
{
"responseHeader":{
"params":{
"group.limit":"10",
"group.field":"color",
"group":"true"
}
},
"grouped":{
"color":{
"matches":3,
"groups":[
{
"groupValue":"red",
"doclist":{"numFound":1,"docs":[
{
"id":"p1c1",
"name":"child1_of_parent1",
"parent":{
"id": "p1",
"name": "parent 1",
}
}
]
}
},
{
"groupValue":"yellow",
"doclist":{"numFound":2,"docs":[
{
"id":"p1c2",
"name":"child2_of_parent1",
"parent":{
"id": "p1",
"name": "parent 1",
}
},
{
"id":"p2c1",
"name":"child1_of_parent2",
"parent":{
"id": "p2",
"name": "parent 2",
}
}
]
}
}
]
}
}
}
I'm coming from relational databases, where this can easily be done. Hopefully there's a way in solr as well. I'm using solr 8.7.0
One solution I found is subquery. It satisfies the requirements, but the performance is what is to expect when you transfer a relational "join" to a document db.
It will definitely be a better idea to rework the datamodel to a flat structure.
Before I had to add a field in the child doc for the parent id (the default "root" doesn't work):
[
{
"id": "p1",
"name": "parent 1",
"_childDocuments_": [
{
"id": "p1c1",
"name": "child1_of_parent1",
"color": "red",
"parent_id": "p1"
},
{
"id": "p1c2",
"name": "child2_of_parent1",
"color": "yellow",
"parent_id": "p1"
}
]
},
{
"id": "p2",
"name": "parent 2",
"_childDocuments_": [
{
"id": "p2c1",
"name": "child1_of_parent2",
"color": "yellow",
"parent_id": "p2"
}
]
}
]
Now I can query
/select?group=true&group.field=color&group.limit=10&fl=*%2Cparent%3A%5Bsubquery%5D&parent.q=%7B%21terms+f%3Did+v%3D%24row.parent_id%7D
and it returns
{
"responseHeader":{
"params":{
"group.limit":"10",
"group.field":"color",
"group":"true",
"fl":"*,parent:[subquery]",
"parent.q":"{!terms f=id v=$row.parent_id}",
}
},
"grouped":{
"color":{
"matches":3,
"groups":[
{
"groupValue":"red",
"doclist":{"numFound":1,"docs":[
{
"id":"p1c1",
"name":"child1_of_parent1",
"parent":{"numFound":1,"start":0,"numFoundExact":true,"docs":[
"id": "p1",
"name": "parent 1",
]}
}
]
}
},
{
"groupValue":"yellow",
"doclist":{"numFound":2,"docs":[
{
"id":"p1c2",
"name":"child2_of_parent1",
"parent":{"numFound":1,"start":0,"numFoundExact":true,"docs":[
"id": "p1",
"name": "parent 1",
]}
},
{
"id":"p2c1",
"name":"child1_of_parent2",
"parent":{"numFound":1,"start":0,"numFoundExact":true,"docs":[
"id": "p2",
"name": "parent 2",
]}
}
]
}
}
]
}
}
}
Feel free to comment if that reminds you at a better idea.
{
"_id": "null",
"data": [
{
"name": "abc",
"id": "123"
},
{
"name": "xzy",
"id": "123"
}
]
}
Explanation: the name value will become an object name. also want to convert it into one single document, that contains all the objects. abc and xyz is dynamically coming as a parameter.
Expected Output.
{
"data": {
"abc": {
"name": "abc",
"id": "100"
},
"xyz": {
"name": "xzy",
"id": "123"
}
}
}
Try this:
db.testCollection.aggregate([
{
$project: {
"array": {
$map: {
input: "$data",
as: "item",
in: {
k: "$$item.name",
v: {
"name": "$$item.id",
"id": "$$item.name"
}
}
}
}
}
},
{ $unwind: "$array" },
{
$group: {
_id: "$null",
"data": { $push: "$array" }
}
},
{
$project: {
"data": { $arrayToObject: "$data" }
}
}
]);
I have documents like these:
Doc1
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#226536",
...
}
...
}
Doc2
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#226536",
...
},
{
"id": "5cdd17c7e24f6e05d487b2c2#142965",
...
}
...
}
Doc3
{
"id": ...,
...
"articles": [
{
"id": "5cdd17c7e24f6e05d487b2c2#142936",
...
}
...
}
And I want the document exactly has the array of articles I need. For example, if my Array of article Ids is ['5cdd17c7e24f6e05d487b2c2#142936', '5cdd17c7e24f6e05d487b2c2#226536'] I only want to get the Doc1.
Now I have this query:
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "articles",
"query": {
"query_string": {
"default_field": "articles.id",
"query": "5cdd17c7e24f6e05d487b2c2#142936 AND 5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
]
}
}
}
But with this, I get Doc1 & Doc2...
Assuming articles.id is of type keyword, I think this should work for you (not sure it's the most efficient way to write the query):
GET my_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "articles",
"query": {
"term": {
"articles.id": "5cdd17c7e24f6e05d487b2c2#142936"
}
}
}
},
{
"nested": {
"path": "articles",
"query": {
"term": {
"articles.id": "5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
],
"must_not": {
"nested": {
"path": "articles",
"query": {
"query_string": {
"default_field": "articles.id",
"query": "NOT 5cdd17c7e24f6e05d487b2c2#142936 AND NOT 5cdd17c7e24f6e05d487b2c2#226536"
}
}
}
}
}
}
}
I have data with one parameter which is an array. I know that objects in array are not well supported in Kibana, however I would like to know if there is a way to filter that array with only one value for the key. I mean :
This is a json for exemple :
{
"_index": "index",
"_type": "data",
"_id": "8",
"_version": 2,
"_score": 1,
"_source": {
"envelope": {
"version": "0.0.1",
"submitter": "VF12RBU1D53087510",
"MetaData": {
"SpecificMetaData": [
{
"key": "key1",
"value": "94"
},
{
"key": "key2",
"value": "0"
}
]
}
}
}
}
And I would like to only have the data which contains key1 in my SpecificMetaData array in order to plot them. For now, when I plot SpecificMetaData.value it takes all the values of the array (value of key1 and key2) and doesn't propose SpecificMetaData.value1 and SpecificMetaData.value2.
If you need more information, tell me. Thank you.
you may need to map your data to mappings so as SpecificMetaData should act as nested_type and inner_hits of nested filter can supply you with objects which have key1.
PUT envelope_index
{
"mappings": {
"document_type": {
"properties": {
"envelope": {
"type": "object",
"properties": {
"version": {
"type": "text"
},
"submitter": {
"type": "text"
},
"MetaData": {
"type": "object",
"properties": {
"SpecificMetaData": {
"type": "nested"
}
}
}
}
}
}
}
}
}
POST envelope_index/document_type
{
"envelope": {
"version": "0.0.1",
"submitter": "VF12RBU1D53087510",
"MetaData": {
"SpecificMetaData": [{
"key": "key1",
"value": "94"
},
{
"key": "key2",
"value": "0"
}
]
}
}
}
POST envelope_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"inner_hits": {},
"path": "envelope.MetaData.SpecificMetaData",
"query": {
"bool": {
"must": [
{
"term": {
"envelope.MetaData.SpecificMetaData.key": {
"value": "key1"
}
}
}
]
}
}
}
}
]
}
}
}
I've 2 function_score queries.
The first one, that I run on attractions type
{
"query": {
"function_score": {
"query": {
"nested": {
"path": "translations",
"query": {
"multi_match": {
"query": "Text to search",
"type": "best_fields",
"fields": [
"title^3",
"description"
]
}
}
}
},
"functions": [
{
"filter": {
"term": {
"cityId": 3
}
},
"weight": 100
}
],
"score_mode": "multiply"
}
}
}
and the second one, that I run on pizzeria type
{
"query": {
"function_score": {
"query": {
"multi_match": {
"query": "Text to search",
"type": "best_fields",
"fields": [
"name^3",
"description"
]
}
},
"functions": [
{
"filter": {
"term": {
"cityId": 1
}
},
"weight": 100
}
],
"score_mode": "multiply"
}
}
}
They both works well. I know want to search on both types with a single query but I don't know how to "merge" these 2 queries.
I want to do this because I want to have the elements from the 2 type sorted by the same score.
Is this possible ? How Can I do this ?
You can use the Multi Search API to combine queries into one single query:
Multi Search API