MongoDB How to search in nested objects? - database

How i can search for the value "20044" in all fields "Barcode" and just in field "Barcode" in a nested object without specifying an absolute path e.g. "Item.Item.Item.Barcode" in MongoDB?
My current solutions:
Search in all text fields, not only in "Barcode" fields
find({$text: {$search: '20044'}})
Search in one specifying absolute path and not in all "Barcode" fields
find({'Item.Item.Item.Barcode': '20044'})
This is my databse object:
{
"_id": {
"$oid": "633d7cc238d7f8dafeace6f5"
},
"Number": "2",
"Item": [
{
"Type": "FrameElement",
"Item": [
{
"Type": "Frame",
"Barcode": "20011"
},
{
"Type": "Frame",
"Barcode": "20012"
},
{
"Type": "SashElement",
"Item": [
{
"Type": "Sash",
"Barcode": "20021"
},
{
"Type": "Sash",
"Barcode": "20022"
},
{
"Type": "GlassBarElement",
"Item": [
{
"Type": "GlassBar",
"Barcode": "20031"
},
{
"Type": "GlassBar",
"Barcode": "20032"
}
]
}
]
},
{
"Type": "Glass",
"Barcode": "20016"
},
{
"Type": "GlassBarElement",
"Item": [
{
"Type": "GlassBar",
"Barcode": "20043"
},
{
"Type": "GlassBar",
"Barcode": "20044"
}
]
}
]
}
]
}

Related

JSON-schema object array validation

I have a mission to validate such a JSON message :
{
"header": {
"action": "change_time",
"taskGuid": "someTaskGuid",
"publishDate": "2012-04-23T18:25:43.511Z"
},
"data": {
"code": "f2103839",
"conditions": [
{
"conditionsType": "A",
"dateBegin": "2021-11-22T17:30:43.511Z",
"dateEnd": "2021-11-22T17:35:43.511Z"
},
{
"conditionsType": "B",
"dateBegin": "2021-11-22T17:30:43.511Z",
"dateEnd": "2021-11-22T17:35:43.511Z"
},
{
"conditionsType": "C",
"dateBegin": "2021-11-22T17:30:43.511Z",
"dateEnd": "2021-11-22T17:35:43.511Z"
}
]
}
}
I've made such a JSON-schema to achieve that :
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "Some schema",
"description": "Some schema",
"type": "object",
"required": [
"header",
"data"
],
"properties": {
"header": {
"type": "object",
"required": [
"action",
"taskGuid",
"publishDate"
],
"properties": {
"action": {
"enum": [
"create_work_order",
"change_time",
"cancel_work"
]
},
"taskGuid": {
"type": "string"
},
"publishDate": {
"type": "string",
"format": "date-time"
}
}
},
"data": {
"type": "object",
"required": [
"code",
"conditions"
],
"properties": {
"code": {
"type": "string"
},
"conditions": {
"type": "array",
"items": [
{
"conditionsType": "object",
"properties": {
"type": {
"enum": [
"A",
"B",
"C"
]
},
"dateBegin": {
"type": "string",
"format": "date-time"
},
"dateEnd": {
"type": "string",
"format": "date-time"
}
},
"required": [
"conditionsType",
"dateBegin",
"dateEnd"
]
}
]
}
}
}
}
}
The conditions array will consist of 1-3 objects described by items. Each object should have a unique conditionsType.
I'm checking validation with this instrument - https://www.jsonschemavalidator.net/
The problem is that this schema does validate the message, But only the first object of array is processed as de. For instance, such a JSON is validated as well (see "conditions" object #2):
{
"header": {
"action": "change_time",
"taskGuid": "someTaskGuid",
"publishDate": "2012-04-23T18:25:43.511Z"
},
"data": {
"code": "f2103839",
"conditions": [
{
"conditionsType": "A",
"dateBegin": "2021-11-22T17:30:43.511Z",
"dateEnd": "2021-11-22T17:35:43.511Z"
},
{
"conditionsType": 123,
"dateBegin": [1,2,3],
"dateEnd": 1
},
{
"conditionsType": "C",
"dateBegin": "2021-11-22T17:30:43.511Z",
"dateEnd": "2021-11-22T17:35:43.511Z"
}
]
}
}
Is that actually the right direction I've chosen for this task ?
Two things. You have a typo in your items schema where you actually want to have type and not conditionsType. Secondly, if the items keyword is an array, the items of the array are validated against the schemas in this order. You want to have the items keyword as a single schema which is then applied to all items. Your corrected schema for copy-paste:
{"$schema":"http://json-schema.org/draft-07/schema","title":"Some schema","description":"Some schema","type":"object","required":["header","data"],"properties":{"header":{"type":"object","required":["action","taskGuid","publishDate"],"properties":{"action":{"enum":["create_work_order","change_time","cancel_work"]},"taskGuid":{"type":"string"},"publishDate":{"type":"string","format":"date-time"}}},"data":{"type":"object","required":["code","conditions"],"properties":{"code":{"type":"string"},"conditions":{"type":"array","items":{"type":"object","properties":{"conditionsType":{"enum":["A","B","C"]},"dateBegin":{"type":"string","format":"date-time"},"dateEnd":{"type":"string","format":"date-time"}},"required":["conditionsType","dateBegin","dateEnd"]}}}}}}

Elastic - JSON Array nested in Array

I have to index a json to Elastic which look like the below format. My problem is that the key "variable" is array that contains json objects (I thought about "nested" datatype of Elastic) but some of those objects it's possible to contain nested json arrays inside them. (see variable CUSTOMERS).
POST /example_data/data {
"process_name": "TEST_PROCESS",
"process_version ": 0,
"process_id": "1111",
"activity_id": "111",
"name": "update_data",
"username": "testUser",
"datetime": "2018-01-01 10:00:00",
"variables": [{
"name": "ΒΑΝΚ",
"data_type": "STRING",
"value": "EUROBANK"
},{
"name": "CITY",
"data_type": "STRING",
"value": "LONDON"
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "JOHN"
}, {
"name": " CUSTOMER_CITY",
"data_type": "STRING",
"value": "LONDON"
}
]
}
]
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "ΑΘΗΝΑ"
}, {
"name": " CUSTOMER_CITY ",
"data_type": "STRING",
"value": "LIVERPOOL"
}, {
"name": " CUSTOMER_NUMBER",
"data_type": "STRING",
"value": "1234567890"
}
]
}
]
}
] }
When I'm trying to index it I get the following error
{ "error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]"
}
],
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]" }, "status": 400 }
Mapping
{ "example_data": {
"mappings": {
"data": {
"properties": {
"activity_id": {
"type": "text"
},
"name": {
"type": "text"
},
"process_name": {
"type": "text"
},
"process_version": {
"type": "integer"
}
"process_id": {
"type": "text"
},
"datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"username": {
"type": "text",
"analyzer": "greek"
},
"variables": {
"type": "nested",
"properties": {
"data_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}}}
When I remove the variable CUSTOMERS that contains the array, then It works properly because there are only json objects.
Is there a way to handle that? Thanks in advance

How to construct a JSON based query on nested object having array items with ES search?

[
{
"name": "Document 1",
"tags": {
"typeATags": ["a1"],
"typeBTags": ["b1"],
"typeCTags": ["c1"],
"typeDTags": ["d1"]
}
},
{
"name": "Document 2",
"tags": {
"typeATags": ["a2"],
"typeBTags": ["b1", "b2"],
"typeCTags": ["c2"],
"typeDTags": ["d1", "d2"]
}
},
{
"name": "Document 3",
"tags": {
"typeATags": ["a1", "a2", "a3"],
"typeBTags": ["b1", "b2", "b3"],
"typeCTags": ["c3"],
"typeDTags": ["d1", "d2", "d3"]
}
}
]
How to build a query on ES 6.0,
That will return all the records that has 'a1' and 'b1' tags ? // should return 1,3
That will return all the records that has 'a1' and 'a2' tags combined? // should return 3
That will return all the records that has 'a1' or 'a2' tags ? //should return 1,2,3
That will return all the records that has 'a1' AND ( 'c1' OR 'c3') tags ? //should return 1,2
Thanks #mickl for the Answer
Edit 1:
Here is my actual Schema,
{
"cmslocal": {
"mappings": {
"video": {
"properties": {
"assetProps": {
"properties": {
"assetType": {
"type": "string"
},
"configPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"contentSha1": {
"type": "string"
},
"originalPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"path": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"thumbnailPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
}
}
},
"channel": {
"type": "string"
},
"configProps": {
"properties": {
"events": {
"type": "nested",
"include_in_root": true,
"properties": {
"Desc": {
"type": "string"
},
"Tags": {
"type": "string"
},
"UUID": {
"type": "string"
}
}
},
"roiUUID": {
"type": "string"
}
}
},
"contentSha1": {
"type": "string"
},
"eventDesc": {
"type": "string"
},
"ext": {
"type": "string"
},
"format": {
"type": "string"
},
"fovProps": {
"properties": {
"description": {
"type": "string"
},
"width": {
"type": "float"
}
}
},
"locationProps": {
"type": "nested",
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string"
},
"country": {
"type": "string"
},
"county": {
"type": "string"
},
"location": {
"type": "geo_point"
},
"postcode": {
"type": "string"
},
"state": {
"type": "string"
}
}
},
"nodeid": {
"type": "string"
},
"poleHeight": {
"type": "float"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"term": {
"properties": {
"nodeid": {
"type": "string"
}
}
}
}
}
}
}
}
},
"retentionPolicy": {
"type": "string"
},
"siteScopeID": {
"type": "string"
},
"tagProps": {
"type": "nested",
"properties": {
"conditions": {
"type": "string"
},
"environment": {
"type": "string"
},
"events": {
"type": "string"
},
"lighting": {
"type": "string"
},
"objects": {
"type": "string"
},
"other": {
"type": "string"
},
"scenes": {
"type": "string"
},
"useCases": {
"type": "string"
},
"weather": {
"type": "string"
}
}
},
"test": {
"type": "string"
},
"title": {
"type": "string"
},
"uploadTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"videoProps": {
"properties": {
"bitrate": {
"type": "float"
},
"datetime": {
"type": "date",
"format": "date_hour_minute_second_millis"
},
"daySegments": {
"type": "string"
},
"duration": {
"type": "long"
},
"framerate": {
"type": "float"
},
"height": {
"type": "integer"
},
"overlaysOn": {
"type": "boolean"
},
"width": {
"type": "integer"
}
}
}
}
}
}
}
}
Please help to build the query so I can search for
Only nodeId, Only Channel, Date Range
Any of the Tags
I'm able to search for nodeId, Channel Id using
{
"query": {
"bool": {
"filter": [
{ "match": { "nodeid": "N02cff15a" } },
{ "match": { "channel": "1" } }
]
}
}
}
and I can able to Search for tagProps using
{
"nested": {
"path": "tagProps",
"query": {
"bool": {
"must": [
{ "match": { "tagProps.objects": "car" } },
{ "match": { "tagProps.objects": "truck" } }
]
}
}
}
}
Help me combine two queries so I can search for NodeId with Combination of Tags.
Since tags is a nested field you should define nested mapping before indexing your documents.
{
"mappings": {
"your_type": {
"properties": {
"tags": {
"type": "nested"
}
}
}
}
}
Now you can index your data and then you can utilize nested query syntax, for your first use case it's like:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }},
{ "match": { "tags.typeBTags": "b1" }}
]
}
}
}
}
}
Next queries can be composed of must and should, like for the last one:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }}
],
"should": [
{"match": {"tags.typeCTags": "c1"}},
{"match": {"tags.typeCTags": "c3"}}
]
}
}
}
}
}

How to score by max relevance match in array elements in ElasticSearch?

I have an autocomplete analyser for a field("keywords"). This field is an array of strings. When I query with a search string I want to show first the documents where a single element of the array keywords matches best. The problem is that if a part of the string matches with more elements of the array "keywords", then this document appears before another that has less but better matches. For example, if I have a query with the word "gas station" the returning documents' keywords are these:
"hits": [
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761acd",
"_score": 3.1974547,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076d8dc536a03b4761cc3",
"_score": 3.0407648,
"_source": {
"keywords": [
"Stationery Store",
"Stationery Store"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761ace",
"_score": 2.903595,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076cddc536a03b4761c87",
"_score": 2.517158,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
}
The "Gas Station" is fourth, although it has the best single element matching. Is there a way to tell ElasticSearch that I do not care about how many times "gas" or "station" appears in keywords? I want the max element of the array keywords match as the score factor.
My settings are:
{
"locali": {
"settings": {
"index": {
"creation_date": "1480937810266",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"keywords": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"char_filter": [
"my_char_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"ί => ι",
"Ί => Ι",
"ή => η",
"Ή => Η",
"ύ => υ",
"Ύ => Υ",
"ά => α",
"Ά => Α",
"έ => ε",
"Έ => Ε",
"ό => ο",
"Ό => Ο",
"ώ => ω",
"Ώ => Ω",
"ϊ => ι",
"ϋ => υ",
"ΐ => ι",
"ΰ => υ"
]
}
}
},
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "TJjOt9L9QE2HrsUFHM6zJg",
"version": {
"created": "2040099"
}
}
}
}
}
And the mappings:
{
"locali": {
"mappings": {
"places": {
"properties": {
"formattedCategories": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"location": {
"properties": {
"formattedAddress": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"locality": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"neighbourhood": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"rating": {
"properties": {
"rating": {
"type": "long"
}
}
},
"seenDetails": {
"type": "long"
},
"verified": {
"type": "long"
}
}
},
"regions": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"type": {
"type": "long"
},
"weight": {
"type": "long"
}
}
},
"categories": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"weight": {
"type": "long"
}
}
}
}
}
}
Can you post your query here that you are trying here as well.
I tried your example with the following query
{
"query": {"match": {
"keywords": "gas station"
}
}
}
And i got your desired result.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.081366636,
"hits": [
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6QnpdNp-z_ybGd-L",
"_score": 0.081366636,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP5-u5dNp-z_ybGd-I",
"_score": 0.03182549,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6KiKdNp-z_ybGd-K",
"_score": 0.03182549,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
}
]
}
}
Try this query to see if you are getting desired result. Also you can reply with your mappings, query and ES version if this does't work for you.
Hope this solves your problem. Thanks

Aggregating array of values in elasticsearch

I need to aggregate an array as follows
Two document examples:
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-30T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"riodejaneiro": 2,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-31T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"curitiba": 1,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
I want to aggregate the cities array, to find out all the cities the traveler has gone to. I want something like this:
{
"traveler":{
"name":"patrick"
},
"cities":{
"saopaulo":2,
"riodejaneiro":2,
"curitiba":1,
"total":3
}
}
Where the total is the length of the cities array minus 1. I tried the terms aggregation and the sum, but couldn't output the desired output.
Changes in the document structure can be made, so if anything like that would help me, I'd be pleased to know.
in the document posted above "cities" is not a json array , it is a json object.
If changing the document structure is a possibility I would change cities in the document to be an array of object
example document:
cities : [
{
"name" :"saopaulo"
"visit_count" :"2",
},
{
"name" :"riodejaneiro"
"visit_count" :"1",
}
]
You would then need to set cities to be of type nested in the index mapping
"mappings": {
"<type_name>": {
"properties": {
"cities": {
"type": "nested",
"properties": {
"city": {
"type": "string"
},
"count": {
"type": "integer"
},
"value": {
"type": "long"
}
}
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"registry": {
"type": "string"
},
"state": {
"type": "string"
},
"traveler": {
"type": "string"
}
}
}
}
After which you could use nested aggregation to get the city count per user.
The query would look something on these lines :
{
"query": {
"match": {
"traveler": "patrick"
}
},
"aggregations": {
"city_travelled": {
"nested": {
"path": "cities"
},
"aggs": {
"citycount": {
"cardinality": {
"field": "cities.city"
}
}
}
}
}
}

Resources