JOLT remove list levels in nested JSON - arrays

I have JSON files where the elements are embeded in lists, with unnecesary sub-levels. I need to simplify this structure with JOLT but I can't figure out how. I looked at similar Q&As but I didn't find examples that would be similar enough.
Here is the input:
{
"datasets": [{
"datasetid": "id_of_1st_dataset",
"metas": {
"domain": "mydomain",
"records_count": 120234,
"keywords": ["foo", "bar"]
},
"fields": [
{
"name": "1st_field_of_1st_dataset",
"type": "text"
},
{
"name": "2nd_field_of_1st_dataset",
"type": "int"
}
]
},
{
"datasetid": "id_of_2nd_dataset",
"metas": {
"domain": "mydomain",
"records_count": 5402,
"keywords": ["banana", "bar"]
},
"fields": [
{
"name": "1st_field_of_2nd_dataset",
"type": "text"
},
{
"name": "2nd_field_of_2nd_dataset",
"type": "int"
}
]
}
]
}
And the desired output:
{
"datasetid": "id_of_1st_dataset",
"domain": "mydomain",
"records_count": 120234,
"keywords": ["foo", "bar"]
"fields": [
{
"name": "1st_field_of_1st_dataset",
"type": "text"
},
{
"name": "2nd_field_of_1st_dataset",
"type": "int"
}
]
},
{
"datasetid": "id_of_2nd_dataset",
"domain": "mydomain",
"records_count": 5402,
"keywords": ["banana", "bar"]
"fields": [
{
"name": "1st_field_of_2nd_dataset",
"type": "text"
},
{
"name": "2nd_field_of_2nd_dataset",
"type": "int"
}
]
}
What would be the right jolt SPEC to transform this input into this output? Thanks in advance for your attention and best regards.

Can be done with the below spec,
[
{
"operation": "shift",
"spec": {
"datasets": {
"*": {
"datasetid": "[&1].datasetid",
"fields": "[&1].fields",
"metas": {
"*": {
"#": "[&3].&"
}
}
}
}
}
}
]

Related

Removing and printing name/value pair from json using jolt

I want to remove a name/value pair from inside a json array and print it outside. I started by trying this and then expanding the whole request to be a json array. The solution mentioned above does not seem to be working.
Input :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Desired Output :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Current Jolt spec:
[
{
"operation": "shift",
"spec": {
"*": "[&]",
"attrs": {
"*": {
"name": {
"*": { "#2": "&4" },
"Details": {
"#(2,value)": "&1"
}
}
}
}
}
}
]
I can't seem to figure out how the jolt spec would change in case of the array
So far so good, just need to combine the attributes at a common node. To do this, I've used the identifiers [&1] and [&5] in order to reach the level of the outermost index within the tree such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"attrs": {
"*": {
"name": {
"*": {
"#2": "[&5].&4"
},
"Details": {
"#(2,value)": "[&5].&1"
}
}
}
}
}
}
}
]

Add and update child arrays value in Mongo db

I want to add value in array of the following json structure in mongo db collection "Scdtls".
I need to add value only where "Type": "Fees" and "IsCurrentVersion": true.
{
"_id": ObjectId("60e71243b484cf3ec22a6845"),
"Item": "School",
"Text": "School Details",
"Types": [
{
"Type": "Library",
"Values": [
{
"IsCurrentVersion": false,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814f"),
"Value": "Physics"
}
]
}
]
},
{
"Type": "Fees",
"Values": [
{
"IsCurrentVersion": false,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814f"),
"Value": "Admission"
},
{
"_id": ObjectId("611a4f113800d6af3fc4814k"),
"Value": "Canteen"
}
]
},
{
"IsCurrentVersion": true,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814g"),
"Value": "Tution"
}
]
}
]
}
]
}
This is how I am trying
db.Scdtls.update({
"Item": "School",
"Types.Type": "Fees",
"Types.Values.IsCurrentVersion": true
},
{
"$push": {
"Types.1.Values.$[].KeyWords": {
"_id": ObjectId(),
"Value": "Sports"
}
}
})
But it is also adding "Value": "Sports" in "Type" : "Fees" where "IsCurrentVersion": false.
Not able to understand the reason.
If I understand correctly, you can do the update you want using "arrayFilters", like this:
db.Scdtls.update({
"Item": "School",
"Types.Type": "Fees",
"Types.Values.IsCurrentVersion": true
},
{
"$push": {
"Types.$[x].Values.$[y].KeyWords": {
"_id": ObjectId("deadbeafcafebabec0deface"),
"Value": "Sports"
}
}
},
{
"arrayFilters": [
{ "x.Type": "Fees" },
{ "y.IsCurrentVersion": true }
]
})
Try it on mongoplayground.net.

Copy a top level element into each element of an array using Jolt

I am working on to push down the top level field into each element of the array inside it.
I want to push 'sourceEntity' into the each element of 'supportedCountries' array and 'supportedCurrencies' Array
input JSON:
{
"description": "SUPPORTED_COUNTRY_CURRENCY",
"id": "20190902025202944",
"type": "devuae-SUPPORTED_COUNTRY_CURRENCY",
"sourceEntity": "EBI",
"sourceChannel": "COR",
"timestamp": "1567421549887",
"supportedCountries": [
{
"code": "IN",
"name": "India",
"supportedCurrencies": [
{
"code": "JOD",
"name": "JORDANIAN DINAR",
"isLocal": true
}
]
}
]
}
Expected OUTPUT:
{
"description": "SUPPORTED_COUNTRY_CURRENCY",
"id": "20190902025202944",
"type": "devuae-SUPPORTED_COUNTRY_CURRENCY",
"sourceEntity": "EBI",
"sourceChannel": "COR",
"timestamp": "1567421549887",
"supportedCountries": [
{
"EntityID": "EBI",
"code": "IN",
"name": "India",
"supportedCurrencies": [
{
"UnitID": "EBI",
"code": "JOD",
"name": "JORDANIAN DINAR",
"isLocal": true
}
]
}
]
}
Help me to solve this
May be this might help,
[
{
"operation": "shift",
"spec": {
"description": "description",
"id": "id",
"type": "type",
"sourceEntity": "sourceEntity",
"sourceChannel": "sourceChannel",
"timestamp": "timestamp",
"supportedCountries": {
"*": {
//Shifting sourceEntity from the level 1
"#(2,sourceEntity)": "supportedCountries[&1].EntityID",
"code": "supportedCountries[&1].code",
"name": "supportedCountries[&1].name",
"supportedCurrencies": {
"*": {
//Shifting sourceEntity from the level 1
"#(4,sourceEntity)": "supportedCountries[&1].supportedCurrencies[&1].UnitID",
"code": "supportedCountries[&1].supportedCurrencies[&1].code",
"name": "supportedCountries[&1].supportedCurrencies[&1].name",
"isLocal": "supportedCountries[&1].supportedCurrencies[&1].isLocal"
}
}
}
}
}
}
]

how to apply if-else for a complex JSON array

here is the Sample Json:-
[
{
"index": 0,
"object": {
"uri": "entities/oAFpSUX",
"type": "configuration/entityTypes/Pet",
"createdBy": "abc#xyz.com",
"createdTime": 1531431176965,
"updatedBy": "abc#xyz.com",
"updatedTime": 1531431177691,
"attributes": {
"Weight": [
{
"label": "5 lbs",
"value": {
"PetWeightMeasurement": [
{
"ov": true,
"value": "5",
}
],
"PetWeightUOM": [
{
"ov": true,
"value": "lbs",
"lookupCode": "lbs",
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Weight/1AeFvD8Kj"
}
],
"Identifiers": [
{
"label": "5155445576",
"value": {
"Type": [
{
"ov": false,
"value": "CRMO_Pet_Id",
}
],
"ID": [
{
"ov": true,
"value": "5155445576",
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Identifiers/1AeFvCrHh"
}
],
"Vaccination": [
{
"label": "Bordatella - 2018-10-26",
"value": {
"Type": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/Type",
"ov": true,
"value": "Bordatella",
"lookupCode": "4",
"lookupRawValue": "Bordatella",
"lookupAttributes": [
{
"name": "Sort Order",
"value": "3"
}
],
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr/Type/1AeFvA2X7"
}
],
"ExpirationDate": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/ExpirationDate",
"ov": true,
"value": "2018-10-26",
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr/ExpirationDate/1AeFvA6nN"
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr"
},
{
"label": "Distemper - 2018-10-25",
"value": {
"Type": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/Type",
"ov": true,
"value": "Distemper",
"lookupAttributes": [
{
"name": "Sort Order",
"value": "4"
}
],
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9YhJ/Type/1AeFv9cxZ"
}
],....
My question: I was able to get the value of "$..Vaccination..value.Type..value" as "Bordatella" so that works fine. However what I now want is that if the value is 'Bordatella' then I want to extract the "value" under "ExpirationDate". Can someone please help me how I can extract that "value" under "ExpirationDate" ? I am not sure if I need to do that with some custom Groovy code of using jmeter's if controller. Any help would be greatly appreciated!
Thanks.
There are several ways to run jq queries from within Java - e.g.
https://github.com/eiiches/jackson-jq
https://github.com/arakelian/java-jq (available from Maven Central)
Java Native Access wrapper https://github.com/bskaggs/jjq . Supported platform is Linux only.
Assuming the sample input has been revised in the obvious way to make it valid JSON, the following jq filter will produce the output as shown:
.[].object.attributes.Vaccination[].value
| select(.Type[].value == "Bordatella")
| .ExpirationDate[].value
Output:
"2018-10-26"
Alternative
Here's a jq filter that is agnostic about the relative location of the "Vaccination" object:
..
| objects
| select(has("Vaccination"))
| .Vaccination[].value?
| select(.Type[].value == "Bordatella")
| .ExpirationDate[].value
Given the following JSON response:
[
{
"index": 0,
"object": {
"uri": "entities/oAFpSUX",
"type": "configuration/entityTypes/Pet",
"createdBy": "a...#xyz.com",
"createdTime": 1531431176965,
"updatedBy": "a...#xyz.com",
"updatedTime": 1531431177691,
"attributes": {
"Weight": [
{
"label": "5 lbs",
"value": {
"PetWeightMeasurement": [
{
"ov": true,
"value": "5"
}
],
"PetWeightUOM": [
{
"ov": true,
"value": "lbs",
"lookupCode": "lbs"
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Weight/1AeFvD8Kj"
}
],
"Identifiers": [
{
"label": "5155445576",
"value": {
"Type": [
{
"ov": false,
"value": "CRMO_Pet_Id"
}
],
"ID": [
{
"ov": true,
"value": "5155445576"
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Identifiers/1AeFvCrHh"
}
],
"Vaccination": [
{
"label": "Bordatella - 2018-10-26",
"value": {
"Type": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/Type",
"ov": true,
"value": "Bordatella",
"lookupCode": "4",
"lookupRawValue": "Bordatella",
"lookupAttributes": [
{
"name": "Sort Order",
"value": "3"
}
],
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr/Type/1AeFvA2X7"
}
],
"ExpirationDate": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/ExpirationDate",
"ov": true,
"value": "2018-10-26",
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr/ExpirationDate/1AeFvA6nN"
}
]
},
"ov": true,
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9yGr"
},
{
"label": "Distemper - 2018-10-25",
"value": {
"Type": [
{
"type": "configuration/entityTypes/Pet/attributes/Vaccination/attributes/Type",
"ov": true,
"value": "Distemper",
"lookupAttributes": [
{
"name": "Sort Order",
"value": "4"
}
],
"uri": "entities/oAFpSUX/attributes/Vaccination/1AeFv9YhJ/Type/1AeFv9cxZ"
}
]
}
}
]
}
}
}
]
Add JSR223 PostProcessor as a child of the request which returns the above response
Put the following code into "Script" area:
new groovy.json.JsonSlurper().parse(prev.getResponseData()).get(0).get('object').get('attributes').get('Vaccination').each { vaccination ->
if (vaccination.get('value').get('Type').get(0).get('value').equals('Bordatella')) {
def expirationDate = vaccination.get('value').get('ExpirationDate').get(0).get('value')
log.info('ExpirationDate: ' + expirationDate)
vars.put('ExpirationDate', expirationDate)
}
}
If everything goes well (your JSON response matches the Groovy code) the ExpirationDate will be:
printed to jmeter.log file
stored as ${ExpirationDate} JMeter Variable
Demo:
More information:
Groovy: Parsing and producing JSON
Apache Groovy - Why and How You Should Use It

How to score by max relevance match in array elements in ElasticSearch?

I have an autocomplete analyser for a field("keywords"). This field is an array of strings. When I query with a search string I want to show first the documents where a single element of the array keywords matches best. The problem is that if a part of the string matches with more elements of the array "keywords", then this document appears before another that has less but better matches. For example, if I have a query with the word "gas station" the returning documents' keywords are these:
"hits": [
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761acd",
"_score": 3.1974547,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076d8dc536a03b4761cc3",
"_score": 3.0407648,
"_source": {
"keywords": [
"Stationery Store",
"Stationery Store"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761ace",
"_score": 2.903595,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076cddc536a03b4761c87",
"_score": 2.517158,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
}
The "Gas Station" is fourth, although it has the best single element matching. Is there a way to tell ElasticSearch that I do not care about how many times "gas" or "station" appears in keywords? I want the max element of the array keywords match as the score factor.
My settings are:
{
"locali": {
"settings": {
"index": {
"creation_date": "1480937810266",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"keywords": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"char_filter": [
"my_char_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"ί => ι",
"Ί => Ι",
"ή => η",
"Ή => Η",
"ύ => υ",
"Ύ => Υ",
"ά => α",
"Ά => Α",
"έ => ε",
"Έ => Ε",
"ό => ο",
"Ό => Ο",
"ώ => ω",
"Ώ => Ω",
"ϊ => ι",
"ϋ => υ",
"ΐ => ι",
"ΰ => υ"
]
}
}
},
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "TJjOt9L9QE2HrsUFHM6zJg",
"version": {
"created": "2040099"
}
}
}
}
}
And the mappings:
{
"locali": {
"mappings": {
"places": {
"properties": {
"formattedCategories": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"location": {
"properties": {
"formattedAddress": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"locality": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"neighbourhood": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"rating": {
"properties": {
"rating": {
"type": "long"
}
}
},
"seenDetails": {
"type": "long"
},
"verified": {
"type": "long"
}
}
},
"regions": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"type": {
"type": "long"
},
"weight": {
"type": "long"
}
}
},
"categories": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"weight": {
"type": "long"
}
}
}
}
}
}
Can you post your query here that you are trying here as well.
I tried your example with the following query
{
"query": {"match": {
"keywords": "gas station"
}
}
}
And i got your desired result.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.081366636,
"hits": [
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6QnpdNp-z_ybGd-L",
"_score": 0.081366636,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP5-u5dNp-z_ybGd-I",
"_score": 0.03182549,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6KiKdNp-z_ybGd-K",
"_score": 0.03182549,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
}
]
}
}
Try this query to see if you are getting desired result. Also you can reply with your mappings, query and ES version if this does't work for you.
Hope this solves your problem. Thanks

Resources