Snowflake - flatten multiple nested array values from json variant column - arrays

I have a JSON data from which I wanted to extract the values of key 'text' with delimited in single row. Any help to achieve the desired output is much appreciated.
Sample JSON data:
{
"expand": "schema,names",
"issues": [
{
"id": "123456",
"key": "XYZ-123",
"fields": {
"customfield_10000": "abcd",
"customfield_10001": 7,
"customfield_10002": null,
"description": {
"version": 1,
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 1"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 2"
},
{
"type": "text",
"text": "some text value 3",
"marks": [
{
"type": "link",
"attrs": {
"href": "some ref"
}
}
]
},
{
"type": "text",
"text": "some text value 4"
}
]
},
{
"type": "blockquote",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 5"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "inlineCard",
"attrs": {
"url": "some url"
}
},
{
"type": "text",
"text": "some text value 6"
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 7"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 8"
},
{
"type": "text",
"text": "some text value 9",
"marks": [
{
"type": "link",
"attrs": {
"href": "some link"
}
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 10"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 11"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 12"
}
]
}
]
}
}
}
]
}
Desired output:
ISSUE_ID ISSUE_KEY CF_10000 CF_10001 CF_10002 DESCRIPTION
123456 XYZ-123 abcd 7 null some text value 1|some text value 2|some text value 3.....
I'm using the below query to get the arrays values. However I wanted the key 'text' values from arrays to be populated as above desired format.
select
ISSUE.value:id::number as ISSUE_ID,
ISSUE.value:key::varchar as ISSUE_KEY,
ISSUE.value:fields.customfield_10000::varchar as CF_10000,
ISSUE.value:fields.customfield_10001::number as CF_10001,
ISSUE.value:fields.customfield_10002::varchar as CF_10002,
ISSUE.value:fields.description.content::varchar as DESCRIPTION
from
VARIANT_TABLE,
lateral flatten( input => payload_json:issues, outer => true) as ISSUE
I have an UDF created to extract JSON array object key value into string of array, but that doesn't helped me to get the desired output from above shared JSON as it has nested arrays inside objects.
create or replace function UDF_ARRAY_OBJECT_TO_STRING_ARRAY(a array, b varchar)
returns array
language javascript
strict
comment = 'UDF to extract JSON array object key value into string of array. A refers to input array and B refers to extract which key from the array object'
as $$
return A.map(function(d) {return d[B]});
$$;

You have a lot more arrays in there than you are handling in your lateral flattens. With a few more flattens and a listagg() function, you should get there with this. Note, you might need to group by the index, rather than the field values, depending on what you are trying to get to, but this gives the result you were looking for in your example:
WITH x AS (
SELECT parse_json('{
"expand": "schema,names",
"issues": [
{
"id": "123456",
"key": "XYZ-123",
"fields": {
"customfield_10000": null,
"customfield_10001": null,
"customfield_10002": null,
"description": {
"version": 1,
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 1"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 2"
},
{
"type": "text",
"text": "some text value 3",
"marks": [
{
"type": "link",
"attrs": {
"href": "some ref"
}
}
]
},
{
"type": "text",
"text": "some text value 4"
}
]
},
{
"type": "blockquote",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 5"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "inlineCard",
"attrs": {
"url": "some url"
}
},
{
"type": "text",
"text": "some text value 6"
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 7"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 8"
},
{
"type": "text",
"text": "some text value 9",
"marks": [
{
"type": "link",
"attrs": {
"href": "some link"
}
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 10"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 11"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 12"
}
]
}
]
}
}
}
]
}') as payload_json)
select
issue.value:id::number as ISSUE_ID,
issue.value:key::varchar as ISSUE_KEY,
ISSUE.value:fields.customfield_10000::varchar as CF_10000,
ISSUE.value:fields.customfield_10001::number as CF_10001,
ISSUE.value:fields.customfield_10002::varchar as CF_10002,
listagg(content2.value:text::varchar,'|') as description
from
x,
lateral flatten( input => x.payload_json:issues, outer => true) as issue,
lateral flatten( input => issue.value:fields:description:content, outer => true) as content,
lateral flatten( input => content.value:content, outer => true) as content2
group by 1,2,3,4,5;

Related

Azure Data Factory - How to transform object with dynamic keys to array in a data flow?

After spending many hours of reading the documentation, following some tutorials and trial & error, i just can't figure it out; how can I transform the following complex object with key objects to an array using a data flow in Azure Data Factory?
Input
{
"headers": {
"Content-Length": 1234
},
"body": {
"00b50a39-8591-3db3-88f7-635e2ec5c65a": {
"id": "00b50a39-8591-3db3-88f7-635e2ec5c65a",
"name": "Example 1",
"date": "2023-02-09"
},
"0c206312-2348-391b-99f0-261323a94d95": {
"id": "0c206312-2348-391b-99f0-261323a94d95",
"name": "Example 2",
"date": "2023-02-09"
},
"0c82d1e4-a897-32f2-88db-6830a21b0a43": {
"id": "00b50a39-8591-3db3-88f7-635e2ec5c65a",
"name": "Example 3",
"date": "2023-02-09"
},
}
}
Expected output
[
{
"id": "00b50a39-8591-3db3-88f7-635e2ec5c65a",
"name": "Example 1",
"date": "2023-02-09"
},
{
"id": "0c206312-2348-391b-99f0-261323a94d95",
"name": "Example 2",
"date": "2023-02-09"
},
{
"id": "00b50a39-8591-3db3-88f7-635e2ec5c65a",
"name": "Example 3",
"date": "2023-02-09"
}
]
AFAIK, Your JSON keys are dynamic. So, getting the desired result using dataflow might not be possible.
In this case, you can try the below approach as a workaround. This will work only if all of your key's length is same.
This is my Pipeline:
First I have used a lookup activity to get the JSON file and converted the lookup output to a string and stored in a variable using below expression.
#substring(string(activity('Lookup1').output.value[0].body),2,sub(length(string(activity('Lookup1').output.value[0].body)),4)).
Then I have used split on that String variable with '},"' and stored in an array variable using below expression.
#split(variables('res_str'),'},"')
It will give the array like below.
Give that array to a ForEach and inside ForEach use an append variable activity to store the keys into an array with below expression.
#take(item(), 36)
Now, I got the list of keys in an array, after the above ForEach use another ForEach activity to get the desired array of objects. Use append variable actvity inside ForEach and give the below expression for it.
#activity('Lookup1').output.value[0].body[item()]
Result array after ForEach will be:
If you want to store the above JSON into a file, you need to use OPENJSON from SQL. This is because copy activity additonal column only supports string type not an array type.
Use a SQL dataset on copy activity source and give the below SQL script in the query.
DECLARE #json NVARCHAR(MAX)
SET #json =
N'#{variables('json_arr')}'
SELECT * FROM
OPENJSON ( #json )
WITH (
id varchar(200) '$.id' ,
name varchar(32) '$.name',
date varchar(32) '$.date'
)
In Sink, give a JSON dataset and select Array of Objects as File pattern.
Execute the pipeline and you will get the above array inside a file.
This is my Pipeline JSON:
{
"name": "pipeline1",
"properties": {
"activities": [
{
"name": "Lookup1",
"type": "Lookup",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "JsonSource",
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "JsonReadSettings"
}
},
"dataset": {
"referenceName": "Json1",
"type": "DatasetReference"
},
"firstRowOnly": false
}
},
{
"name": "Lookup output to Str",
"description": "",
"type": "SetVariable",
"dependsOn": [
{
"activity": "Lookup1",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "res_str",
"value": {
"value": "#substring(string(activity('Lookup1').output.value[0].body),2,sub(length(string(activity('Lookup1').output.value[0].body)),4))",
"type": "Expression"
}
}
},
{
"name": "Split Str to array",
"type": "SetVariable",
"dependsOn": [
{
"activity": "Lookup output to Str",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "split_arr",
"value": {
"value": "#split(variables('res_str'),'},\"')",
"type": "Expression"
}
}
},
{
"name": "build keys array using split array",
"type": "ForEach",
"dependsOn": [
{
"activity": "Split Str to array",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "#variables('split_arr')",
"type": "Expression"
},
"isSequential": true,
"activities": [
{
"name": "take first 36 chars of every item",
"type": "AppendVariable",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"variableName": "keys_array",
"value": {
"value": "#take(item(), 36)",
"type": "Expression"
}
}
}
]
}
},
{
"name": "build final array using keys array",
"type": "ForEach",
"dependsOn": [
{
"activity": "build keys array using split array",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"items": {
"value": "#variables('keys_array')",
"type": "Expression"
},
"isSequential": true,
"activities": [
{
"name": "Append variable1",
"description": "append every object to array",
"type": "AppendVariable",
"dependsOn": [],
"userProperties": [],
"typeProperties": {
"variableName": "json_arr",
"value": {
"value": "#activity('Lookup1').output.value[0].body[item()]",
"type": "Expression"
}
}
}
]
}
},
{
"name": "Just for Res show",
"type": "SetVariable",
"dependsOn": [
{
"activity": "build final array using keys array",
"dependencyConditions": [
"Succeeded"
]
}
],
"userProperties": [],
"typeProperties": {
"variableName": "final_res_show",
"value": {
"value": "#variables('json_arr')",
"type": "Expression"
}
}
},
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [
{
"activity": "Just for Res show",
"dependencyConditions": [
"Succeeded"
]
}
],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "AzureSqlSource",
"sqlReaderQuery": "DECLARE #json NVARCHAR(MAX)\nSET #json = \n N'#{variables('json_arr')}' \n \nSELECT * FROM \n OPENJSON ( #json ) \nWITH ( \n id varchar(200) '$.id' , \n name varchar(32) '$.name', \n date varchar(32) '$.date'\n )",
"queryTimeout": "02:00:00",
"partitionOption": "None"
},
"sink": {
"type": "JsonSink",
"storeSettings": {
"type": "AzureBlobFSWriteSettings"
},
"formatSettings": {
"type": "JsonWriteSettings",
"filePattern": "arrayOfObjects"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "AzureSqlTable1",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "Target_JSON",
"type": "DatasetReference"
}
]
}
],
"variables": {
"res_str": {
"type": "String"
},
"split_arr": {
"type": "Array"
},
"keys_array": {
"type": "Array"
},
"final_res_show": {
"type": "Array"
},
"json_arr": {
"type": "Array"
}
},
"annotations": []
}
}
Result file:

How to change the value of a state object?

I created an add button for adding new objects to the state. The logic is firstly, copy the last object of the state and after add at the end of it.
I can write the code so far but the problem is, I have to change the value and the key of the objects.
Note: I have to change the keys because I'm creating input fields with the keys and they should be unique.
This is my code :
addField() {
const index = (this.state?.fields.length)
console.log("state: ", this.state)
const newFields = [this.state.fields[index]].fill(this.state.fields[index - 1])
this.setState(
{
fields: [...this.state.fields, ...newFields],
},
() => {
console.log("updated state", this.state);
}
);
}
outputs:
state:
{
"fields": [
[
{
"key": "input_field_name",
"value": "This is a value"
},
{
"key": "field_name",
"value": "field name"
},
{
"key": "datatype",
"value": "text"
}
],
[
{
"key": "input_field_name",
"value": "This is a value 2"
},
{
"key": "field_name",
"value": "field name's value 2"
},
{
"key": "datatype",
"value": "text"
},
{
"key": "Datatype",
"value": "Label 2"
}
]
]
}
updated state:
{
"fields": [
[
{
"key": "input_field_name",
"value": "This is a value"
},
{
"key": "field_name",
"value": "field name"
},
{
"key": "datatype",
"value": "text"
}
],
[
{
"key": "input_field_name",
"value": "This is a value 2"
},
{
"key": "field_name",
"value": "field name's value 2"
},
{
"key": "datatype",
"value": "text"
},
{
"key": "Datatype",
"value": "Label 2"
}
],
[
{
"key": "input_field_name",
"value": "This is a value 2"
},
{
"key": "field_name",
"value": "field name's value 2"
},
{
"key": "datatype",
"value": "text"
},
{
"key": "Datatype",
"value": "Label 2"
}
]
]
}
my state:
interface IDetailsPageState {
fields: IFieldDefinition[][];
activeFields: {
key: number;
fields: IFieldDefinition[];
};
}
How can I change the key and value of these objects?
<div key={`${this.state.activeFields.key}-${field.key}`}>
<TextInput config={inputConfig} bindings={inputBindings}></TextInput>
</div>

MongoDB How to search in nested objects?

How i can search for the value "20044" in all fields "Barcode" and just in field "Barcode" in a nested object without specifying an absolute path e.g. "Item.Item.Item.Barcode" in MongoDB?
My current solutions:
Search in all text fields, not only in "Barcode" fields
find({$text: {$search: '20044'}})
Search in one specifying absolute path and not in all "Barcode" fields
find({'Item.Item.Item.Barcode': '20044'})
This is my databse object:
{
"_id": {
"$oid": "633d7cc238d7f8dafeace6f5"
},
"Number": "2",
"Item": [
{
"Type": "FrameElement",
"Item": [
{
"Type": "Frame",
"Barcode": "20011"
},
{
"Type": "Frame",
"Barcode": "20012"
},
{
"Type": "SashElement",
"Item": [
{
"Type": "Sash",
"Barcode": "20021"
},
{
"Type": "Sash",
"Barcode": "20022"
},
{
"Type": "GlassBarElement",
"Item": [
{
"Type": "GlassBar",
"Barcode": "20031"
},
{
"Type": "GlassBar",
"Barcode": "20032"
}
]
}
]
},
{
"Type": "Glass",
"Barcode": "20016"
},
{
"Type": "GlassBarElement",
"Item": [
{
"Type": "GlassBar",
"Barcode": "20043"
},
{
"Type": "GlassBar",
"Barcode": "20044"
}
]
}
]
}
]
}

fetch value from SharePoint JSON output

I am getting below JSON object from sharepoint list. how can I get the value for Company in the Data operation ( select) for logic apps. I did item()['Company']?['Value'] and it is not working. Any suggestions?
"body": [
{
"Company": {
"#odata.type": "#Microsoft.Azure.Connectors.SharePoint.SPListExpandedReference",
"Id": 0,
"Value": "Test1"
},
"Date From": "2022-03-30",
"Date To": "2022-03-31",
"Title": "Title 1"
},
{
"Company": {
"#odata.type": "#Microsoft.Azure.Connectors.SharePoint.SPListExpandedReference",
"Id": 2,
"Value": "Line2"
},
"Date From": "2022-03-21",
"Date To": "2022-03-29",
"Title": "Title 2"
}
]
}
I am fetching share-point list and then using data operations (select) to get the JSON as output.
I need JSON in the below format so that I can pass this to store proc and insert into the Azure SQL DB. I have another 12 items in the list.
[
{
"Company": "Test1",
"Date From": "2022-03-30",
"Date To": "2022-03-31",
"Title": "Title 1"
},
{
"Company": "Line2",
"Date From": "2022-03-21",
"Date To": "2022-03-29",
"Title": "Title 2"
}
]
Rather than select, you can set a variable. We're all different but that makes far more sense to me.
Your expression is much the same, I used ...
item()['Company']['Value']
Just make sure you initialise the variable outside and prior to the For each ...
This is the result for the first item in the array ...
To compile a full JSON object and add it to an array, again, simply use a variable and specify the values as need be.
Firstly, initialize your array outside of the For each ...
... and then in the For each, add an object to the array variable on each loop (make sure you include the quotes around the expression where required) ...
You just have to compile the JSON. The end result will look like this ...
This is the JSON in full ...
[
{
"Company": "Line2",
"Date From": "2022-03-21",
"Date To": "2022-03-29",
"Title": "Title 2"
},
{
"Company": "Test1",
"Date From": "2022-03-30",
"Date To": "2022-03-31",
"Title": "Title 1"
}
]
Also, you'll notice my list has come out in a different order, that's because the For each runs in parallel, if you need to avoid that, change the settings so it runs in a single thread ...
This is the JSON definition of the LogicApp, you can load it into your tenant and test with it ...
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"For_each": {
"actions": {
"Append_to_SQL_Array": {
"inputs": {
"name": "SQL Array",
"value": {
"Company": "#{item()['Company']['Value']}",
"Date From": "#{item()['Date From']}",
"Date To": "#{item()['Date To']}",
"Title": "#{item()['Title']}"
}
},
"runAfter": {},
"type": "AppendToArrayVariable"
}
},
"foreach": "#variables('SharePoint JSON')",
"runAfter": {
"Initialize_SQL_Array": [
"Succeeded"
]
},
"runtimeConfiguration": {
"concurrency": {
"repetitions": 1
}
},
"type": "Foreach"
},
"Initialize_SQL_Array": {
"inputs": {
"variables": [
{
"name": "SQL Array",
"type": "array"
}
]
},
"runAfter": {
"Initialize_SharePoint_JSON": [
"Succeeded"
]
},
"type": "InitializeVariable"
},
"Initialize_SharePoint_JSON": {
"inputs": {
"variables": [
{
"name": "SharePoint JSON",
"type": "array",
"value": [
{
"Company": {
"Id": 0,
"Value": "Test1",
"odata.type": "#Microsoft.Azure.Connectors.SharePoint.SPListExpandedReference"
},
"Date From": "2022-03-30",
"Date To": "2022-03-31",
"Title": "Title 1"
},
{
"Company": {
"Id": 2,
"Value": "Line2",
"odata.type": "#Microsoft.Azure.Connectors.SharePoint.SPListExpandedReference"
},
"Date From": "2022-03-21",
"Date To": "2022-03-29",
"Title": "Title 2"
}
]
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Initialize_variable": {
"inputs": {
"variables": [
{
"name": "Result",
"type": "array",
"value": "#variables('SQL Array')"
}
]
},
"runAfter": {
"For_each": [
"Succeeded"
]
},
"type": "InitializeVariable"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"Recurrence": {
"evaluatedRecurrence": {
"frequency": "Month",
"interval": 12
},
"recurrence": {
"frequency": "Month",
"interval": 12
},
"type": "Recurrence"
}
}
},
"parameters": {}
}

Elastic - JSON Array nested in Array

I have to index a json to Elastic which look like the below format. My problem is that the key "variable" is array that contains json objects (I thought about "nested" datatype of Elastic) but some of those objects it's possible to contain nested json arrays inside them. (see variable CUSTOMERS).
POST /example_data/data {
"process_name": "TEST_PROCESS",
"process_version ": 0,
"process_id": "1111",
"activity_id": "111",
"name": "update_data",
"username": "testUser",
"datetime": "2018-01-01 10:00:00",
"variables": [{
"name": "ΒΑΝΚ",
"data_type": "STRING",
"value": "EUROBANK"
},{
"name": "CITY",
"data_type": "STRING",
"value": "LONDON"
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "JOHN"
}, {
"name": " CUSTOMER_CITY",
"data_type": "STRING",
"value": "LONDON"
}
]
}
]
}, {
"name": "CUSTOMERS",
"data_type": "ENTITY",
"value": [{
"variables": [{
"name": "CUSTOMER_NAME",
"data_type": "STRING",
"value": "ΑΘΗΝΑ"
}, {
"name": " CUSTOMER_CITY ",
"data_type": "STRING",
"value": "LIVERPOOL"
}, {
"name": " CUSTOMER_NUMBER",
"data_type": "STRING",
"value": "1234567890"
}
]
}
]
}
] }
When I'm trying to index it I get the following error
{ "error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]"
}
],
"type": "illegal_argument_exception",
"reason": "Can't merge a non object mapping [variables.value] with an object mapping [variables.value]" }, "status": 400 }
Mapping
{ "example_data": {
"mappings": {
"data": {
"properties": {
"activity_id": {
"type": "text"
},
"name": {
"type": "text"
},
"process_name": {
"type": "text"
},
"process_version": {
"type": "integer"
}
"process_id": {
"type": "text"
},
"datetime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"username": {
"type": "text",
"analyzer": "greek"
},
"variables": {
"type": "nested",
"properties": {
"data_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}}}
When I remove the variable CUSTOMERS that contains the array, then It works properly because there are only json objects.
Is there a way to handle that? Thanks in advance

Resources