Jolt for input array to name value pair of array - arrays

Input is :
{
"Size": "2",
"done": "true",
"records": [
{
"Id": "a7g6s0000004GZuAAM",
"NN": "00096411.0",
"Name": "ISOLIN TRADE & INVEST"
},
{
"Id": "a7g6s0000004GZzAAM",
"Number": "00096412.0",
"Name": "ISOLIN"
}
]
}
Spec used:
[
{
"operation": "remove",
"spec": {
"records": {
"*": {
"attributes": " "
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"records": {
"*": { //iterate on each object of records
"*": { //iterate on each element of object
"$": "Items[#1].Fields[].Name",
"#": "Items[#1].Fields[].Value"
}
}
}
}
}
]
Current Output:
{
"Size": "2",
"done": "true",
"Items": [
{
"Fields": [
{
"Name": "Id"
},
{
"Value": "a7g6s0000004GZuAAM"
},
{
"Name": "NN"
},
{
"Value": "00096411.0"
},
{
"Name": "Name"
},
{
"Value": "ISOLIN TRADE & INVEST"
},
{
"Name": "Id"
},
{
"Value": "a7g6s0000004GZzAAM"
},
{
"Name": "Number"
},
{
"Value": "00096412.0"
},
{
"Name": "Name"
},
{
"Value": "ISOLIN"
}
]
}
]
}
Expected output:
{
"Size": "2",
"done": "true",
"Items": [
{
"Fields": [
{
"Name": "Id",
"Value": "a7g6s0000004GZuAAM"
},
{
"Name": "NN",
"Value": "00096411.0"
},
{
"Name": "Name",
"Value": "ISOLIN TRADE & INVEST"
},
{
"Name": "Id",
"Value": "a7g6s0000004GZzAAM"
},
{
"Name": "Number",
"Value": "00096412.0"
},
{
"Name": "Name",
"Value": "ISOLIN"
}
]
}
]
}

You can seperate those attributes into individual objects through use of Items.&2.[#2]. pattern as prefix such as
[
{
"operation": "shift",
"spec": {
"*": "&",
"records": {
"*": {
"*": {
"$": "Items.&2.[#2].Name",
"#": "Items.&2.[#2].Value"
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"Items": {
"*": {
"*": "Fields[]"
}
}
}
}
]

Related

Removing and printing name/value pair from json using jolt

I want to remove a name/value pair from inside a json array and print it outside. I started by trying this and then expanding the whole request to be a json array. The solution mentioned above does not seem to be working.
Input :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Desired Output :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Current Jolt spec:
[
{
"operation": "shift",
"spec": {
"*": "[&]",
"attrs": {
"*": {
"name": {
"*": { "#2": "&4" },
"Details": {
"#(2,value)": "&1"
}
}
}
}
}
}
]
I can't seem to figure out how the jolt spec would change in case of the array
So far so good, just need to combine the attributes at a common node. To do this, I've used the identifiers [&1] and [&5] in order to reach the level of the outermost index within the tree such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"attrs": {
"*": {
"name": {
"*": {
"#2": "[&5].&4"
},
"Details": {
"#(2,value)": "[&5].&1"
}
}
}
}
}
}
}
]

Unable to properly Transform nested array with JOLT

I'm trying to convert json nested array's element and unable to get expected result, either I properly get name element or schemaExtensions element but can't get both together.
Here is my input:
{
"rows": [
{
"content": {
"name": {
"content": "User"
},
"schemaExtensions": {
"content": [
{
"content": {
"schema": {
"content": "User"
},
"required": {
"content": true
}
}
}
]
}
}
}
]
}
And the JOlt spec definition:
[
{
"operation": "shift",
"spec": {
"rows": {
"*": {
"content": {
"name": {
"content": "[&3].name"
},
"schemaExtensions": {
"content": {
"*": {
"content": {
"schema": {
"content": "schemaExtensions.[&7].schema"
},
"required": {
"content": "schemaExtensions.[&7].required"
}
}
}
}
}
}
}
}
}
}
]
The expected result:
[ {
"name" : "User",
"schemaExtensions": [ {
"schema":"User",
"required":true
}]
}]
Here is the result I got when having both in my spec
[ {
"name" : "User"
} ]
And if I remove name from my spec, then I get my schemaExtensions properly:
{
"schemaExtensions" : [ {
"schema" : "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User",
"required" : true
} ]
}
You can start with three level nesting to roam before writing the branches(name&schemaExtensions) explicitly. No need to write other key names such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": {
"*": {
"name": { "*": "&1" },
"schemaExtensions": {
"*": {
"*": { "*": { "*": { "*": "&5.&1" } } }
}
}
}
}
}
}
}
]
[
{
"operation": "shift",
"spec": {
"rows": {
"*": {
"content": {
"name": {
"content": "[].name"
},
"schemaExtensions": {
"content": {
"*": {
"content": {
"schema": {
"content": "[&7].schemaExtensions[&7].schema"
},
"required": {
"content": "[&7].schemaExtensions[&7].required"
}
}
}
}
}
}
}
}
}
}
]

JOLT filter array elements based on existence of a key field

Is it possible to filter array elements based on presence of a field.
My input JSON is as below:
{
"payload": {
"logical": {
"schemas": [
{
"name": "myschema",
"tables": [
{
"name": "myname",
"alias": "temp_alias",
"keys": [
{
"name": "value1",
"key": "key1",
"match": "match_val"
},
{
"name": "value1",
"key": "key2",
"match": "match_val"
},
{
"name": "value1",
"key": "key3"
},
{
"name": "value1",
"key": "key4"
}
]
}
]
}
]
}
}
}
Expected output is:
{
"payload": {
"logical": {
"schemas": [
{
"tables": [
{
"name": "myname",
"alias": "temp_alias",
"keys": {
"name": "value1",
"match": "match_val",
"key": [
"key1",
"key2"
]
}
}
]
}
]
}
}
}
If there is "match" field in "keys" element, then we will take "key" value from that element and put in the output array "key".
I am using this spec file, but not getting required output. Can anyone please suggest how to do it?
[
{
"operation": "shift",
"spec": {
"*": "&",
"payload": {
"*": "&",
"logical": {
"schemas": {
"*": {
"tables": {
"*": {
"name": "payload.logical.schemas[&3].tables[&1].name",
"alias": "payload.logical.schemas[&3].tables[&1].alias",
"keys": {
"*": {
"match": {
"match1|match2": {
"#2": {
"name": "payload.logical.schemas[&4].tables[&4].keys.name",
"match": "payload.logical.schemas[&4].tables[&4].keys.match",
"key": "payload.logical.schemas[&4].tables[&4].keys.key"
}
}
}
}
}
}
}
}
}
}
}
}
}
]
I got desired output with below spec file:
[
{
"operation": "shift",
"spec": {
"*": "&",
"payload": {
"*": "&",
"logical": {
"schemas": {
"*": {
"tables": {
"*": {
"name": "payload.logical.schemas[&3].tables[&1].name",
"alias": "payload.logical.schemas[&3].tables[&1].alias",
"keys": {
"*": {
"match": {
"*": {
"#(2,name)": "payload.logical.schemas[&7].tables[&5].keys[&3].name",
"#(2,match)": "payload.logical.schemas[&7].tables[&5].keys[&3].match",
"#(2,key)": "payload.logical.schemas[&7].tables[&5].keys[&3].key"
}
}
}
}
}
}
}
}
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"payload": {
"*": "&",
"logical": {
"schemas": {
"*": {
"tables": {
"*": {
"name": "payload.logical.schemas[&3].tables[&1].name",
"alias": "payload.logical.schemas[&3].tables[&1].alias",
"keys": {
"*": {
"name": "payload.logical.schemas[&5].tables[&3].keys.name",
"match": "payload.logical.schemas[&5].tables[&3].keys.match",
"key": "payload.logical.schemas[&5].tables[&3].keys.key"
}
}
}
}
}
}
}
}
}
},
{
"operation": "cardinality",
"spec": {
"payload": {
"logical": {
"schemas": {
"*": {
"tables": {
"*": {
"keys": {
"name": "ONE",
"match": "ONE"
}
}
}
}
}
}
}
}
}
]

How to construct a JSON based query on nested object having array items with ES search?

[
{
"name": "Document 1",
"tags": {
"typeATags": ["a1"],
"typeBTags": ["b1"],
"typeCTags": ["c1"],
"typeDTags": ["d1"]
}
},
{
"name": "Document 2",
"tags": {
"typeATags": ["a2"],
"typeBTags": ["b1", "b2"],
"typeCTags": ["c2"],
"typeDTags": ["d1", "d2"]
}
},
{
"name": "Document 3",
"tags": {
"typeATags": ["a1", "a2", "a3"],
"typeBTags": ["b1", "b2", "b3"],
"typeCTags": ["c3"],
"typeDTags": ["d1", "d2", "d3"]
}
}
]
How to build a query on ES 6.0,
That will return all the records that has 'a1' and 'b1' tags ? // should return 1,3
That will return all the records that has 'a1' and 'a2' tags combined? // should return 3
That will return all the records that has 'a1' or 'a2' tags ? //should return 1,2,3
That will return all the records that has 'a1' AND ( 'c1' OR 'c3') tags ? //should return 1,2
Thanks #mickl for the Answer
Edit 1:
Here is my actual Schema,
{
"cmslocal": {
"mappings": {
"video": {
"properties": {
"assetProps": {
"properties": {
"assetType": {
"type": "string"
},
"configPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"contentSha1": {
"type": "string"
},
"originalPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"path": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"thumbnailPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
}
}
},
"channel": {
"type": "string"
},
"configProps": {
"properties": {
"events": {
"type": "nested",
"include_in_root": true,
"properties": {
"Desc": {
"type": "string"
},
"Tags": {
"type": "string"
},
"UUID": {
"type": "string"
}
}
},
"roiUUID": {
"type": "string"
}
}
},
"contentSha1": {
"type": "string"
},
"eventDesc": {
"type": "string"
},
"ext": {
"type": "string"
},
"format": {
"type": "string"
},
"fovProps": {
"properties": {
"description": {
"type": "string"
},
"width": {
"type": "float"
}
}
},
"locationProps": {
"type": "nested",
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string"
},
"country": {
"type": "string"
},
"county": {
"type": "string"
},
"location": {
"type": "geo_point"
},
"postcode": {
"type": "string"
},
"state": {
"type": "string"
}
}
},
"nodeid": {
"type": "string"
},
"poleHeight": {
"type": "float"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"term": {
"properties": {
"nodeid": {
"type": "string"
}
}
}
}
}
}
}
}
},
"retentionPolicy": {
"type": "string"
},
"siteScopeID": {
"type": "string"
},
"tagProps": {
"type": "nested",
"properties": {
"conditions": {
"type": "string"
},
"environment": {
"type": "string"
},
"events": {
"type": "string"
},
"lighting": {
"type": "string"
},
"objects": {
"type": "string"
},
"other": {
"type": "string"
},
"scenes": {
"type": "string"
},
"useCases": {
"type": "string"
},
"weather": {
"type": "string"
}
}
},
"test": {
"type": "string"
},
"title": {
"type": "string"
},
"uploadTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"videoProps": {
"properties": {
"bitrate": {
"type": "float"
},
"datetime": {
"type": "date",
"format": "date_hour_minute_second_millis"
},
"daySegments": {
"type": "string"
},
"duration": {
"type": "long"
},
"framerate": {
"type": "float"
},
"height": {
"type": "integer"
},
"overlaysOn": {
"type": "boolean"
},
"width": {
"type": "integer"
}
}
}
}
}
}
}
}
Please help to build the query so I can search for
Only nodeId, Only Channel, Date Range
Any of the Tags
I'm able to search for nodeId, Channel Id using
{
"query": {
"bool": {
"filter": [
{ "match": { "nodeid": "N02cff15a" } },
{ "match": { "channel": "1" } }
]
}
}
}
and I can able to Search for tagProps using
{
"nested": {
"path": "tagProps",
"query": {
"bool": {
"must": [
{ "match": { "tagProps.objects": "car" } },
{ "match": { "tagProps.objects": "truck" } }
]
}
}
}
}
Help me combine two queries so I can search for NodeId with Combination of Tags.
Since tags is a nested field you should define nested mapping before indexing your documents.
{
"mappings": {
"your_type": {
"properties": {
"tags": {
"type": "nested"
}
}
}
}
}
Now you can index your data and then you can utilize nested query syntax, for your first use case it's like:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }},
{ "match": { "tags.typeBTags": "b1" }}
]
}
}
}
}
}
Next queries can be composed of must and should, like for the last one:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }}
],
"should": [
{"match": {"tags.typeCTags": "c1"}},
{"match": {"tags.typeCTags": "c3"}}
]
}
}
}
}
}

How to score by max relevance match in array elements in ElasticSearch?

I have an autocomplete analyser for a field("keywords"). This field is an array of strings. When I query with a search string I want to show first the documents where a single element of the array keywords matches best. The problem is that if a part of the string matches with more elements of the array "keywords", then this document appears before another that has less but better matches. For example, if I have a query with the word "gas station" the returning documents' keywords are these:
"hits": [
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761acd",
"_score": 3.1974547,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076d8dc536a03b4761cc3",
"_score": 3.0407648,
"_source": {
"keywords": [
"Stationery Store",
"Stationery Store"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761ace",
"_score": 2.903595,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076cddc536a03b4761c87",
"_score": 2.517158,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
}
The "Gas Station" is fourth, although it has the best single element matching. Is there a way to tell ElasticSearch that I do not care about how many times "gas" or "station" appears in keywords? I want the max element of the array keywords match as the score factor.
My settings are:
{
"locali": {
"settings": {
"index": {
"creation_date": "1480937810266",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"keywords": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"char_filter": [
"my_char_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"ί => ι",
"Ί => Ι",
"ή => η",
"Ή => Η",
"ύ => υ",
"Ύ => Υ",
"ά => α",
"Ά => Α",
"έ => ε",
"Έ => Ε",
"ό => ο",
"Ό => Ο",
"ώ => ω",
"Ώ => Ω",
"ϊ => ι",
"ϋ => υ",
"ΐ => ι",
"ΰ => υ"
]
}
}
},
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "TJjOt9L9QE2HrsUFHM6zJg",
"version": {
"created": "2040099"
}
}
}
}
}
And the mappings:
{
"locali": {
"mappings": {
"places": {
"properties": {
"formattedCategories": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"location": {
"properties": {
"formattedAddress": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"locality": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"neighbourhood": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"rating": {
"properties": {
"rating": {
"type": "long"
}
}
},
"seenDetails": {
"type": "long"
},
"verified": {
"type": "long"
}
}
},
"regions": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"type": {
"type": "long"
},
"weight": {
"type": "long"
}
}
},
"categories": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"weight": {
"type": "long"
}
}
}
}
}
}
Can you post your query here that you are trying here as well.
I tried your example with the following query
{
"query": {"match": {
"keywords": "gas station"
}
}
}
And i got your desired result.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.081366636,
"hits": [
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6QnpdNp-z_ybGd-L",
"_score": 0.081366636,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP5-u5dNp-z_ybGd-I",
"_score": 0.03182549,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6KiKdNp-z_ybGd-K",
"_score": 0.03182549,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
}
]
}
}
Try this query to see if you are getting desired result. Also you can reply with your mappings, query and ES version if this does't work for you.
Hope this solves your problem. Thanks

Resources