Elastic - JSON Array nested in Array - arrays

I have to index a json to Elastic which look like the below format. My problem is that the key "variable" is array that contains json objects (I thought about "nested" datatype of Elastic) but some of those objects it's possible to contain nested json arrays inside them. (see variable CUSTOMERS).
POST /example_data/data {
"process_name": "TEST_PROCESS",
"process_version ": 0,
"process_id": "1111",
"activity_id": "111",
"name": "update_data",
"username": "testUser",
"datetime": "2018-01-01 10:00:00",
"variables": [{
"name": "ΒΑΝΚ",
"data_type": "STRING",
"value": "EUROBANK"
},{
"name": "CITY",
"data_type": "STRING",
"value": "LONDON"
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "JOHN"
}, {
"name": " CUSTOMER_CITY",
"data_type": "STRING",
"value": "LONDON"
}
]
}
]
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "ΑΘΗΝΑ"
}, {
"name": " CUSTOMER_CITY ",
"data_type": "STRING",
"value": "LIVERPOOL"
}, {
"name": " CUSTOMER_NUMBER",
"data_type": "STRING",
"value": "1234567890"
}
]
}
]
}
] }
When I'm trying to index it I get the following error
{ "error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]"
}
],
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]" }, "status": 400 }
Mapping
{ "example_data": {
"mappings": {
"data": {
"properties": {
"activity_id": {
"type": "text"
},
"name": {
"type": "text"
},
"process_name": {
"type": "text"
},
"process_version": {
"type": "integer"
}
"process_id": {
"type": "text"
},
"datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"username": {
"type": "text",
"analyzer": "greek"
},
"variables": {
"type": "nested",
"properties": {
"data_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}}}
When I remove the variable CUSTOMERS that contains the array, then It works properly because there are only json objects.
Is there a way to handle that? Thanks in advance

Related

Removing and printing name/value pair from json using jolt

I want to remove a name/value pair from inside a json array and print it outside. I started by trying this and then expanding the whole request to be a json array. The solution mentioned above does not seem to be working.
Input :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Desired Output :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Current Jolt spec:
[
{
"operation": "shift",
"spec": {
"*": "[&]",
"attrs": {
"*": {
"name": {
"*": { "#2": "&4" },
"Details": {
"#(2,value)": "&1"
}
}
}
}
}
}
]
I can't seem to figure out how the jolt spec would change in case of the array
So far so good, just need to combine the attributes at a common node. To do this, I've used the identifiers [&1] and [&5] in order to reach the level of the outermost index within the tree such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"attrs": {
"*": {
"name": {
"*": {
"#2": "[&5].&4"
},
"Details": {
"#(2,value)": "[&5].&1"
}
}
}
}
}
}
}
]

LogicApp - How to check if collection is empty

What I would like to do it if there is addOn array, I would like to append the word "xxx" to the end of the name.
Schema applied to message.
{
"properties": {
"appointment": {
"properties": {
"id": {
"type": "integer"
},
"lines": {
"items": {
"properties": {
"addOn": {
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required": [
"id",
"name"
],
"type": "object"
},
"type": "array"
},
"id": {
"type": "integer"
},
"price": {
"type": "integer"
}
},
"required": [
"id",
"price"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"messageId": {
"type": "string"
}
},
"type": "object"
}
Message 1
{
"messageId": "11",
"appointment": {
"id": 22,
"lines": [
{
"id": 33,
"price": 125.0,
"addOn": [
{
"id": 44,
"name": "test"
}
]
}
]
}
}
Message 2
{
"messageId": "11",
"appointment": {
"id": 22,
"lines": [
{
"id": 33,
"price": 125.0
}
]
}
}
Message 1 works fine but whenever I try and use length or Parse Array I get a message that addOn is null.
How can I put a proper Condition express to not get any errors or do nothing when there is no addOn array.
Logic App.
Message1 - Okay
Message2 - Error
Code View.
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"For_each": {
"actions": {
"For_each_2": {
"actions": {
"Condition_3": {
"actions": {},
"expression": {
"and": [
{
"equals": [
"#empty(items('For_each_2'))",
"True"
]
}
]
},
"runAfter": {},
"type": "If"
}
},
"foreach": "#items('For_each')['addOn']",
"runAfter": {},
"type": "Foreach"
}
},
"foreach": "#body('Parse_JSON')?['appointment']?['lines']",
"runAfter": {
"Parse_JSON": [
"Succeeded"
]
},
"type": "Foreach"
},
"Parse_JSON": {
"inputs": {
"content": "#triggerBody()",
"schema": {
"properties": {
"appointment": {
"properties": {
"id": {
"type": "integer"
},
"lines": {
"items": {
"properties": {
"addOn": {
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required": [
"id",
"name"
],
"type": "object"
},
"type": "array"
},
"id": {
"type": "integer"
},
"price": {
"type": "integer"
}
},
"required": [
"id",
"price"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"messageId": {
"type": "string"
}
},
"type": "object"
}
},
"runAfter": {},
"type": "ParseJson"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"manual": {
"inputs": {
"schema": {
"properties": {
"appointment": {
"properties": {
"id": {
"type": "integer"
},
"lines": {
"items": {
"properties": {
"addOn": {
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required": [
"id",
"name"
],
"type": "object"
},
"type": "array"
},
"id": {
"type": "integer"
},
"price": {
"type": "integer"
}
},
"required": [
"id",
"price",
"addOn"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"messageId": {
"type": "string"
}
},
"type": "object"
}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}
Steve
I would like to append the word "xxx" to the end of the name.
For appending you can directly add while setting the "name" variable.
Alternatively, you can also use concat function as below.
concat(body('Parse_JSON')?['appointment']?['lines'][0]['addOn'][0]['name'],variables('xxx'))
where both cases would give the result as the following
How can I put a proper Condition express to not get any errors or do nothing when there is no addOn array.
You can do this in many ways where one of the workarounds would be using if the lines contain "addOn" or not.
"contains": ["#body('Parse_JSON')?['appointment']?['lines'][0]","addOn"]
MESSAGE1 RESULTS:
MESSAGE2 RESULTS:
You can reproduce the same in your logic app using the below codeview
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"Compose": {
"inputs": "#variables('Message')",
"runAfter": {
"For_each": [
"Succeeded"
]
},
"type": "Compose"
},
"For_each": {
"actions": {
"Condition": {
"actions": {
"Set_variable": {
"inputs": {
"name": "Message",
"value": {
"appointment": {
"id": "#body('Parse_JSON')?['appointment']?['id']",
"lines": [
{
"addOn": "addOn is NULL",
"id": "#items('For_each')?['id']",
"price": "#items('For_each')?['price']"
}
]
},
"messageId": "#{body('Parse_JSON')?['messageId']}"
}
},
"runAfter": {},
"type": "SetVariable"
}
},
"else": {
"actions": {
"For_each_2": {
"actions": {
"Set_variable_2": {
"inputs": {
"name": "Message",
"value": {
"appointment": {
"id": "#body('Parse_JSON')?['appointment']?['id']",
"lines": [
{
"addOn": [
{
"id": "#items('For_each_2')?['id']",
"name": "#{concat(body('Parse_JSON')?['appointment']?['lines'][0]['addOn'][0]['name'],variables('xxx'))}"
}
],
"id": "#items('For_each')?['id']",
"price": "#items('For_each')?['price']"
}
]
},
"messageId": "#{body('Parse_JSON')?['messageId']}"
}
},
"runAfter": {},
"type": "SetVariable"
}
},
"foreach": "#items('For_each')['addOn']",
"runAfter": {},
"type": "Foreach"
}
}
},
"expression": {
"and": [
{
"not": {
"contains": [
"#body('Parse_JSON')?['appointment']?['lines'][0]",
"addOn"
]
}
}
]
},
"runAfter": {},
"type": "If"
}
},
"foreach": "#body('Parse_JSON')?['appointment']?['lines']",
"runAfter": {
"Initialize_variable_xxx": [
"Succeeded"
]
},
"type": "Foreach"
},
"Initialize_variable": {
"inputs": {
"variables": [
{
"name": "Message",
"type": "object"
}
]
},
"runAfter": {
"Parse_JSON": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Initialize_variable_xxx": {
"inputs": {
"variables": [
{
"name": "xxx",
"type": "string",
"value": "xxx"
}
]
},
"runAfter": {
"Initialize_variable": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Parse_JSON": {
"inputs": {
"content": "#triggerBody()",
"schema": {
"properties": {
"appointment": {
"properties": {
"id": {
"type": "integer"
},
"lines": {
"items": {
"properties": {
"addOn": {
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required": [
"id",
"name"
],
"type": "object"
},
"type": "array"
},
"id": {
"type": "integer"
},
"price": {
"type": "integer"
}
},
"required": [
"id",
"price"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"messageId": {
"type": "string"
}
},
"type": "object"
}
},
"runAfter": {},
"type": "ParseJson"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"manual": {
"inputs": {
"schema": {
"properties": {
"appointment": {
"properties": {
"id": {
"type": "integer"
},
"lines": {
"items": {
"properties": {
"addOn": {
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required": [
"id",
"name"
],
"type": "object"
},
"type": "array"
},
"id": {
"type": "integer"
},
"price": {
"type": "integer"
}
},
"required": [
"id",
"price",
"addOn"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"messageId": {
"type": "string"
}
},
"type": "object"
}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}

How to create array json schema for an array string which contains some fixed values and may have other additonal values

So I have this json schema:-
{
"type": "object",
"properties": {
"campaignType": {
"type": "string",
"enum": [
"export"
]
},
"clientid": {
"type": "integer",
"minimum": 1
},
"select": {
"type": "object",
"minProperties": 1,
"anyOf": [
{
"required": [
"list"
]
},
{
"required": [
"segment"
]
}
],
"properties": {
"list": {
"type": "array",
"items": {
"type": "integer"
}
},
"segment": {
"type": "array",
"items": {
"type": "integer"
}
}
}
},
"attributes": {
"type": "array",
"minItems": 2,
"items": {
"type": "string",
"contains": ["fk", "uid"]
}
}
},
"required": [
"campaignType",
"clientid",
"select",
"attributes"
]
}
Here I want to have attributes field to have value "fk", "uid" fixed and must allow other field values with "fk" and "uid".
with this following code I am getting error while passing additonal values:-
{
"campaignType":"export",
"clientid":107311,
"select":{
"segment":[30]
},
"attributes":["uid","fk", "att1"]
}
error unmarshaling properties from json: error unmarshaling items from json: json: cannot unmarshal object into Go value of type []*jsonschema.Schema
how do I fix it?
The value of contains in your schema must be a schema:
According to your question, maybe change the "attributes" schema to:
"attributes": {
"type": "array",
"minItems": 2,
"items": [ { "const": "fk" }, { "const": "uid" } ],
"additionalItems": {
"type": "string"
}
}

Azure Logic app access json property depending on value

I'm trying to get a list of reviews from an api.
This gives me a JSON with nested properties.
I manage to get almost all the data I need except from the nested array ["value"]["answers"]["value"] because sometimes the array ["answers"] has one item (with "id" being either "goods", "delivery" or "service"), or 2 or all 3 of them.
So I can't use the index to get what I want.
I need to be able to check the "id" and from that append the "value" to my array.
Is there a way to write an expression that checks the "id" and from then on writes the "value" in my array ?
JSON :
...
"surveyData": [
{
"questionRef": "q1",
"type": "CORE_STAR_RATING",
"properties": {
"title": "How do you evaluate ...?"
},
"value": {
"stars": 5,
"notAnswered": false
},
"userDefinedId": "q1"
},
"userDefinedId": "q2-good"
},
{
"questionRef": "q3",
"type": "DIMENSIONS",
"properties": {
"title": "How do you evaluate on those criterias ?"
},
**"value": {
"answers": [
{
"id": "delivery",
"value": 4,
"userDefinedId": "delivery",
"name": "Delivery",
"firstLabel": "Nul",
"lastLabel": "Good"
},
{
"id": "goods",
"value": 5,
"userDefinedId": "goods",
"name": "Goods",
"firstLabel": "Nul",
"lastLabel": "Good"
},
{
"id": "service",
"value": 5,
"userDefinedId": "service",
"name": "Service",
"firstLabel": "Nul",
"lastLabel": "Good"
}
]**
},
"userDefinedId": "q3-group"
}
]
...
My array so far :
"value": {
"Event": "#items('For_each-Service')?['event']?['type']",
"Delivery": "", <= ??
"Goods": "", <= ??
"Service": "#items('For_each-Service')?['surveyData'][2]?['value']?['answers'][2]?['value']", <= doesn't work
"comment": "#items('For_each-Service')?['reply']?['comment']",
"order date": "#items('For_each-Service')?['transaction']?['date']",
"date": "#items('For_each-Service')?['reply']?['createdAt']",
"email": "#items('For_each-Service')?['customer']?['email']",
"id review": "#items('For_each-Service')?['event']?['id']",
"name client": "#items('For_each-Service')?['customer']?['fullName']",
"status": "#items('For_each-Service')?['state']",
"title": "#items('For_each-Service')?['title']"
}
Thank you
There are a couple of ways to skin this cat, some more efficient than others.
Personally, I would take the following approach.
I created a flow where, basically, the first thing it does is initialise a variable that contains your Answers array as the data ...
The next step is to initialize another variable that stores that array as XML ...
This is the expression contained within ...
xml(json(concat('{ "root": { "answer": ', variables('Answers Array'), '}}')))
In the next step, I'm basically doing what you're doing by constructing an object, only this time, the selection for the values are a little more complex, this is the definition of the body ...
{
"Delivery": #{first(xpath(xml(variables('XML')), '//id[contains(text(), "delivery")]/parent::answer/value/text()'))},
"Goods": #{first(xpath(xml(variables('XML')), '//id[contains(text(), "goods")]/parent::answer/value/text()'))},
"Service": #{first(xpath(xml(variables('XML')), '//id[contains(text(), "service")]/parent::answer/value/text()'))}
}
Using the xpath approach for querying the dataset can cut down the amount of steps you need to take to get the desired result.
If an element is missing, it will simply come back as null ...
Below is one of the workarounds to achieve your requirement. Firstly, I have initialised 3 variables for Delivery, Goods and Service Counts.
Then used Parse_JSON to extract the inner values of the JSON provided.
In the next step I'm trying to iterate through Id's using Switch action with body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['id'] expression and set the variables using below expression inside an until loop.
body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['value']
and finally you can add in your array like below
RESULTS:
You can use below code view to reproduce in your logic app
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"Compose": {
"inputs": {
"value": {
"answers": [
{
"firstLabel": "Nul",
"id": "goods",
"lastLabel": "Good",
"name": "Goods",
"userDefinedId": "goods",
"value": 4
},
{
"firstLabel": "Nul",
"id": "service",
"lastLabel": "Good",
"name": "Service",
"userDefinedId": "service",
"value": 5
}
]
}
},
"runAfter": {
"Initialize_variable_Service_Count": [
"Succeeded"
]
},
"type": "Compose"
},
"Compose_2": {
"inputs": " \"value\": {\n \"Event\": \"#items('For_each-Service')?['event']?['type']\",\n \"Delivery\": \"#{variables('Delivery Count')}\", \n \"Goods\": \"#{variables('Goods Count')}\", \n \"Service\": \"#{variables('Service Count')}\"\n }",
"runAfter": {
"Until": [
"Succeeded"
]
},
"type": "Compose"
},
"Initialize_variable": {
"inputs": {
"variables": [
{
"name": "i",
"type": "integer",
"value": 0
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Initialize_variable_Delivery_count": {
"inputs": {
"variables": [
{
"name": "Delivery Count",
"type": "integer"
}
]
},
"runAfter": {
"Initialize_variable": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Initialize_variable_Goods_Count": {
"inputs": {
"variables": [
{
"name": "Goods Count",
"type": "integer"
}
]
},
"runAfter": {
"Initialize_variable_Delivery_count": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Initialize_variable_Service_Count": {
"inputs": {
"variables": [
{
"name": "Service Count",
"type": "integer"
}
]
},
"runAfter": {
"Initialize_variable_Goods_Count": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Parse_JSON": {
"inputs": {
"content": "#outputs('Compose')",
"schema": {
"properties": {
"value": {
"properties": {
"answers": {
"items": {
"properties": {
"firstLabel": {
"type": "string"
},
"id": {
"type": "string"
},
"lastLabel": {
"type": "string"
},
"name": {
"type": "string"
},
"userDefinedId": {
"type": "string"
},
"value": {
"type": "integer"
}
},
"required": [],
"type": "object"
},
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
}
},
"runAfter": {
"Compose": [
"Succeeded"
]
},
"type": "ParseJson"
},
"Until": {
"actions": {
"Increment_variable": {
"inputs": {
"name": "i",
"value": 1
},
"runAfter": {
"Switch": [
"Succeeded"
]
},
"type": "IncrementVariable"
},
"Switch": {
"cases": {
"Case_Delivery": {
"actions": {
"Set_variable_Delivery_Count": {
"inputs": {
"name": "Delivery Count",
"value": "#body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['value']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"case": "delivery"
},
"Case_Goods": {
"actions": {
"Set_variable_Goods_Count": {
"inputs": {
"name": "Goods Count",
"value": "#body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['value']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"case": "goods"
},
"Case_Service": {
"actions": {
"Set_variable_Service_Count": {
"inputs": {
"name": "Service Count",
"value": "#body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['value']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"case": "service"
}
},
"default": {
"actions": {}
},
"expression": "#body('Parse_JSON')?['value']?['answers']?[iterationIndexes('Until')]?['id']",
"runAfter": {},
"type": "Switch"
}
},
"expression": "#equals(variables('i'), length(body('Parse_JSON')?['value']?['answers']))",
"limit": {
"count": 10,
"timeout": "PT1H"
},
"runAfter": {
"Parse_JSON": [
"Succeeded"
]
},
"type": "Until"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"manual": {
"inputs": {
"schema": {}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}

In Avro schema, how to wrap the array to an object and put it as a union instead of putting just array as union

I have the following data and I want to wrap the OPTIONAL array as an object and then put in as a union but since I am new to this, I am not sure how to do this.
This is how I have done so far so could someone help me correct the below structure in the expected output. Note that this dataRefs is an optional field and this entire structure may or may not be present.
{
"name": "dataRefs",
"default": null,
"type": ["null",
{
"type": "array",
"items": {
"name": "dataRef",
"type": "record",
"fields": [
{
"name": "dataId",
"type": "string",
"avro.java.string": "String"
},
{
"name": "email",
"type": ["null" ,"string"],
"avro.java.string": "String"
},
{
"name": "phone",
"type": ["null" ,"string"],
"avro.java.string": "String"
},
{
"name": "userName",
"type": ["null" ,"string"],
"avro.java.string": "String"
},
{
"name": "addressRef",
"default": null,
"type": ["null", {
"name": "addressRefRecord",
"type": "record",
"fields": [
{
"name": "addrRefId",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "addrType",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "addressLine1",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "addressLine2",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "city",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "province",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "country",
"type": ["null","string"],
"avro.java.string": "String"
},
{
"name": "postalCode",
"type": ["null","string"],
"avro.java.string": "String"
}
]
}
]
}
]
}
}
]
}
My JSON data that I intend to map to above schema looks like follows:
"dataRefs": [{
"addressRef": {
"addrRefId": "0",
"addrType": "ADDRESS",
"addressLine1": "DA 81",
"addressLine2": "",
"city": "Amsterdam",
"country": "Netherlands",
"postalCode": "xxxx LN",
"province": ""
},
"dataId": "0",
"email": "xyz#abc.com"
}],
This is how I was able to do so:
{
"name": "dataRefs",
"type": [
"null",
{
"type": "record",
"name": "dataRefsObject",
"fields": [
{
"name": "dataRefsArray",
"type": {
"type": "array",
"items": {
"name": "dataRef",
"type": "record",
"fields": [
{
"name": "dataId",
"type": ["null", "string"],
"avro.java.string": "String"
},
{
"name": "userName",
"type": [
"null",
"string"
],
"avro.java.string": "String"
},
....

Resources