Snowflake : Json data flattern - snowflake-cloud-data-platform

I have below sql to fetch the data from JSON file, but my file contains array of data with multiple values.
SELECT
select
DISTINCT
,REPLACE(DOCUMENT:"_id"::VARCHAR(50),'guests-','') GUEST_ID
,PARSE_JSON(DOCUMENT):"_rev"::string as GUEST_REVISION_ID
,PARSE_JSON(DOCUMENT):personal_info:addresses:address_id::varchar(255) as ADDRESS_ID
,PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"address_type"::varchar(255) as ADDRESS_CODE
,UPPER(regexp_replace(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"address_line1"::VARCHAR(255),'[\n\r]','')) as ADDRESS_LINE_1
,UPPER(regexp_replace(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"address_line2"::VARCHAR(255),'[\n\r]','')) as ADDRESS_LINE_2
,UPPER(regexp_replace(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"city"::VARCHAR(255),'[\n\r]','')) as CITY_NAME
,UPPER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"state"::varchar(255)) as STATE_CODE
,UPPER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"country"::varchar(255)) as COUNTRY
,PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"postal_code"::varchar(255) as POSTAL_CODE
,UPPER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"country_code"::varchar(255)) as COUNTRY_CODE
,UPPER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"first_name"::varchar(255)) as ADDRESS_FIRST_NAME
,UPPER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"last_name"::varchar(255)) as ADDRESS_LAST_NAME
,PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"phone_number"::varchar(255) as PHONE_NUMBER
,CASE
WHEN LOWER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"primary") = 'true' THEN 1
WHEN LOWER(PARSE_JSON(DOCUMENT):"personal_info":"addresses[]":"primary") = 'false' THEN 0
ELSE NULL END as FLAG
from test
Sample Data :
{
"_id":"guests-240c8ef1-65f0-11e9-8e7e-8568b9f986fb",
"personal_info": {
"addresses": [
{
"address_id":"555148381793213101",
"address_line1":"509 BROADLEAF LANE",
"address_type":"generic",
"city":"MCKINNEY",
"country":"United States",
"country_code":"US",
"postal_code":"75070",
"primary": true,
"state":"TX"
},
{
"address_id":"856855604204997103",
"address_line1":"11 Blossom Dr",
"address_line2":"Basking Ridge",
"address_type":"billing",
"city":"Basking Ridge",
"country":"United States",
"country_code":"US",
"email_address":"deb_ron.fischang#att.net",
"first_name":"Deborah",
"last_name":"Fischang",
"phone_number":"9086723249",
"postal_code":"07920",
"primary": false,
"state":"NJ"
},
{
"address_id":"856855604204997103",
"address_line1":"11 Blossom Dr",
"address_line2":"Basking Ridge",
"address_type":"generic",
"city":"Basking Ridge",
"country":"United States",
"country_code":"US",
"email_address":"deb_ron.fischang#att.net",
"first_name":"Deborah",
"last_name":"Fischang",
"phone_number":"9086723249",
"postal_code":"07920",
"primary": false,
"state":"NJ"
}
]
How to get this data in mutiple rows basis on number of address in array. I tried lateral flattern but its not working........
How to get this data in mutiple rows basis on number of address in array. I tried lateral flattern but its not working........

You may use something like this:
SELECT
DISTINCT
REPLACE(DOCUMENT:"_id"::VARCHAR(50),'guests-','') GUEST_ID
,VALUE:address_id::varchar(255) as ADDRESS_ID
,VALUE:"address_type"::varchar(255) as ADDRESS_CODE
,regexp_replace(VALUE:"address_line1"::VARCHAR(255),'[\n\r]','') as ADDRESS_LINE_1
,regexp_replace(VALUE:"address_line2"::VARCHAR(255),'[\n\r]','') as ADDRESS_LINE_2
,regexp_replace(VALUE:"city"::VARCHAR(255),'[\n\r]','') as CITY_NAME
from test, lateral flatten( input => PARSE_JSON(DOCUMENT):"personal_info":"addresses" ) f;

Related

How to unnest my JSON data object and create a simplified JSON using Snowflake?

My current JSON object looks like this:
-- create a sample table
create or replace table json_example(v variant);
-- create sample json record
insert into json_example
select parse_json(
'[
{
"key": "variable_a",
"value": {
"double_value": null,
"float_value": null,
"int_value": null,
"string_value": "https://example.com"
}
},
{
"key": "variable_b",
"value": {
"double_value": null,
"float_value": null,
"int_value": 2,
"string_value": null
}
}
]');
And this is the simplified JSON that I am trying to achieve:
{
"variable_a": "https://example.com",
"variable_b": 2
}
How can I get the simplified JSON from the multilevel JSON object?
This is how I started to think:
select value:key::string as key, value:value:string_value::varchar as value
from json_example, lateral flatten(input => v)
union all
select value:key::string as key, value:value:int_value::varchar as value
from json_example, lateral flatten(input => v)
Thank you in advance.
So if you want everthing thing to be JSON text you can:
WITH data as (
select parse_json(
'[
{
"key": "variable_a",
"value": {
"double_value": null,
"float_value": null,
"int_value": null,
"string_value": "https://example.com"
}
},
{
"key": "variable_b",
"value": {
"double_value": null,
"float_value": null,
"int_value": 2,
"string_value": null
}
}
]') as json
)
SELECT f.value:key::text as t_key
,try_to_double(f.value:value:double_value::text) as d_val
,try_to_double(f.value:value:float_value::text) as f_val
,try_to_number(f.value:value:int_value::text) as n_val
,f.value:value:string_value::text as s_val
,coalesce(d_val::text, f_val::text, n_val::text, s_val) as c_val
,object_construct(t_key, c_val) as obj
FROM DATA, lateral flatten(input=>json) f
T_KEY D_VAL
F_VAL
N_VAL
S_VAL
C_VAL
OBJ
variable_a
https://example.com
https://example.com
variable_b
2
2
Which then shows us how to build a CASE statement, and build clean native objects like:
SELECT
case
when not is_null_value(f.value:value:double_value)
then object_construct(f.value:key::text, try_to_double(f.value:value:double_value::text))
when not is_null_value(f.value:value:float_value)
then object_construct(f.value:key::text, try_to_double(f.value:value:float_value::text))
when not is_null_value(f.value:value:int_value)
then object_construct(f.value:key::text, try_to_number(f.value:value:int_value::text))
else
object_construct(f.value:key::text, f.value:value:string_value::text)
end obj
FROM DATA, lateral flatten(input=>json) f
OBJ
{ "variable_a": "https://example.com" }
{ "variable_b": 2 }
Which can be turned into a single object like so:
SELECT
object_agg(f.value:key,
case
when not is_null_value(f.value:value:double_value)
then try_to_double(f.value:value:double_value::text)
when not is_null_value(f.value:value:float_value)
then try_to_double(f.value:value:float_value::text)
when not is_null_value(f.value:value:int_value)
then try_to_number(f.value:value:int_value::text)
else
f.value:value:string_value
end
) as obj
FROM DATA, lateral flatten(input=>json) f
OBJ
{ "variable_a": "https://example.com", "variable_b": 2 }
There are three parts to this:
flatten the original JSON array and select the values you want from it
create new JSON objects based on the resulting row values.
combine the JSON objects into a single object.
with json_example(json) as (
select parse_json(
'[
{
"key": "variable_a",
"value": {
"double_value": null,
"float_value": null,
"int_value": null,
"string_value": "https://example.com"
}
},
{
"key": "variable_b",
"value": {
"double_value": null,
"float_value": null,
"int_value": 2,
"string_value": null
}
}
]'
)
),
flattened_rows as
(
select v.value:key::string as key,
v.value:value:int_value::int as int_value,
v.value:value:string_value::string as string_value
-- other values here
from json_example, lateral flatten(input => json) as v
)
,
simplified_json as
(
select
case when int_value is not null then object_construct(key, int_value)::variant
else object_construct(key, string_value)
end as json
from flattened_rows
)
select object_agg(j.key, j.value)
from simplified_json, lateral flatten(input => json) AS j
;
flattened_rows looks like this:
KEY
INT_VALUE
STRING_VALUE
variable_a
https://example.com
variable_b
2
simplified_json looks like this:
JSON
{ "variable_a": "https://example.com" }
{ "variable_b": 2 }
Final results:
OBJ
{ { "variable_a": "https://example.com", "variable_b": 2 }
Update
I updated the answer above to incorporate the object_agg approach Simeon found. My original answer involved creating a JavaScript UDTF that leveraged Object.assign to combine the json objects.

Updating JSON in postgres based on dynamic input of type json array

I have a column in postgres table which is of JSON type and looks something like
{
"Name": "Some Name",
"Stages": [
{
"Title": "Early Flight",
"Tags": [....],
"Date": "2021-11-05T00:00:00",
"CloseDate": ""
},
{
"Title": "Midway Flight",
"Tags": [....],
"Date": "2021-11-05T00:00:00",
"CloseDate": ""
},
{
"Title": "Pro Flight",
"Tags": [....],
"Date": "2021-11-05T00:00:00",
"CloseDate": ""
},
{
"Title": "Expert Start",
"Tags": [....],
"Date": "2021-11-05T00:00:00",
"CloseDate": ""
}
]
}
I want to update the Date for the number of items that are provide in the newInputItem,
meaning the Date for Midway Flight and Expert Flight needs to change.
I tried using CTE as below but the query updates only the first element of the input array in this case its just Midway Flight that gets updated.
WITH newInputItem as
(
select
arr.newInputItem ::json ->> 'Title' as State,
(arr.newInputItem ::json ->> 'NewDate')::timestamp as NewDate
from
json_array_elements('[
{"Title" : "Midway Flight", "Date" : "01 / 01 / 1777"},
{"Title" : "Expert Flight", "Date" : "01 / 01 / 1999"}
]') WITH ORDINALITY arr(newInputItem, index)
),
oldItem AS
(
SELECT
('{Stages,' || index - 1 || ',"Date"}')::TEXT[] AS path,
user_id,
arr.oldItem ::json ->> 'Title' AS title
FROM
department.Process_Instance
jsonb_array_elements(process_instance_data -> 'Stages') WITH ORDINALITY arr(oldItem, index)
WHERE
department.Process_Instance."user_id" = 17
)
UPDATE
department.Process_Instance pi
SET
process_instance_data = jsonb_set(process_instance_data, oldItem.path, to_json(newInputItem.NewDate)::JSONB)
FROM
oldItem,
newInputItem
WHERE
pi.user_id = oldItem.user_id
AND oldItem.title = newInputItem.State;
In order to make several updates into the same jsonb data within the same query, you need to create an aggregate function based on the standard jsonb_set function :
CREATE OR REPLACE FUNCTION jsonb_set (x jsonb, y jsonb, p text[], z jsonb, b boolean)
RETURNS jsonb LANGUAGE sql IMMUTABLE AS
$$ SELECT jsonb_set (COALESCE(x, y), p, z, b) ; $$ ;
CREATE AGGREGATE jsonb_set_agg(jsonb, text[], jsonb, boolean)
( sfunc = jsonb_set, stype = jsonb) ;
Then, as you can't call an aggregate function directly in the SET clause of an UPDATE statement, you have to insert an additional cte before your UPDATE statement :
WITH newInputItem as
(
select
arr.newInputItem ::json ->> 'Title' as State,
(arr.newInputItem ::json ->> 'NewDate')::timestamp as NewDate
from
json_array_elements('[
{"Title" : "Midway Flight", "Date" : "01 / 01 / 1777"},
{"Title" : "Expert Flight", "Date" : "01 / 01 / 1999"}
]') WITH ORDINALITY arr(newInputItem, index)
), oldItem AS
(
SELECT
('{Stages,' || index - 1 || ',"Date"}')::TEXT[] AS path,
user_id,
arr.oldItem ::json ->> 'Title' AS title
FROM
department.Process_Instance
jsonb_array_elements(process_instance_data -> 'Stages') WITH ORDINALITY arr(oldItem, index)
WHERE
department.Process_Instance."user_id" = 17
), final AS
(
SELECT oldItem.user_id
, jsonb_set_agg( process_instance_data, oldItem.path,
to_json(newInputItem.NewDate)::JSONB, True) AS data_final
FROM oldItem
INNER JOIN newInputItem
ON oldItem.title = newInputItem.State
GROUP BY oldItem.user_id
)
UPDATE
department.Process_Instance pi
SET
process_instance_data = final.data_final
FROM
final
WHERE
pi.user_id = final.user_id ;

Is there any way to build below using UDF in snowflake instead of flattening?

i have below tables
table1:
Payload(column)
{
"list": "212=1.00,214"
}
table 2 looks like below
i want result like below using UDF instead of using flatten
{
"test13": {
"code": "212",
"desc": "success",
"value": "1.00"
},
"test15": {
"code": "214",
"desc": "Impression",
"value": ""
}
}
You ought to be able to do JavaScript UDTFs (User-Defined Table Functions) https://docs.snowflake.com/en/sql-reference/udf-js-table-functions.html that can take the single row payload and return multiple rows.
So the SQL to do this, I understand you don't want:
with table1 AS (
select parse_json('{"list": "212=1.00,214"}') as payload
), table2 AS (
select parse_json(column1) as payload
,column2 as key
,column3 as value
from values ('{"id":"212"}', 'test13', 'success' ),
('{"id":"214"}', 'test15', 'impression' )
), table1_demunged AS (
select split(f.value,'=')[0] as id
,split(f.value,'=')[1] as val
from table1 t, lateral flatten(input=>split(t.payload:list,',')) f
), tables_joined as (
select t2.key as obj_key
,t1.id as code
,t2.value as desc
,t1.val as value
from table2 t2
join table1_demunged t1 on t2.payload:id = t1.id
), as_objects AS (
select obj_key, object_construct('code', code, 'desc', desc, 'value', coalesce(value,'')) as res
from tables_joined t
)
select object_agg(obj_key, res) object
from as_objects
group by true;
gives the result you do want:
OBJECT
{ "test13": { "code": "212", "desc": "success", "value": "1.00" }, "test15": { "code": "214", "desc": "impression", "value": "" } }
But I do not understand if your are really want a UDF to do all that, given it's a FLATTEN then a JOIN and then some OBJECT_ functions, or if you are just want to avoid the FALTTEN as it "tricky SQL and you want to find it behind a UDF" or perhaps your using some system that cannot parse the => thus you need the flatten hidden behind a UDF, but in that case the UDF cannot do all the joins for you..
It just feels like there is more to the question than has been asked.

How do I parse an unnamed nested json array in the Snowflake database?

I've found the flatten function but the example is simpler than my real example. It can be created with the following code:
create or replace table test2 as
select '5dd17ef333de385a360e0ef1' as id,
parse_json('{
"_id" : "5dd17ef333de385a360e0ef1",
"uid" : "5dd175d3333b85961df27c51",
"task_info" : {
"reps=1" : [
[
{
"cached" : false,
"transform max RAM" : 51445000,
}
],
[
{
"cached" : false,
"transform max RAM" : 51445000,
}
],
[
{
"cached" : true,
"transform max RAM" : 51445000,
}
]
]
}
}')::variant as json_rec
;
Then my attempt to parse it:
select id
, json_rec:_id::string(100) as extracted_id
, value:cached::string as cached
, value
, json_rec
from
test2
, lateral flatten( input => json_rec:task_info )
;
The cached is clearly not going deep enough, although I am unclear of the syntax that is required to fish something like these values out. Thoughts?
If what you want is a separate row for each of the items in the innermost array (i.e. 3 rows for the above example), then you can use recursive=>true and filter on key='cached', like this:
select id
, json_rec:_id::string(100) as extracted_id
, value as cached
, json_rec
from
test2
, lateral flatten( input => json_rec:task_info, recursive=>true)
where key='cached';

Query elements in a nested array of a json object in postgresql 9.4 or 9.5

{
"studentID": 1,
"StudentName": "jhon",
"Data":{
"schoolname":"school1",
"enrolmentInfo":
[{
"year":"2015",
"info":
[
{
"courseID":"csc213",
"school":"IT",
"enrollmentdate":"2015-01-01",
"finshdate":"2015-07-01",
"grade": 80 },
{
"courseID":"csc113",
"school":"IT1",
"enrollmentdate":"2015-09-02",
"finshdate":null,
"grade": 90 } ]
},
{
"year":"2014",
"info":
[{
"courseID":"info233",
"school":"IT",
"enrollmentdate":"2014-03-11",
"finshdate":"2014-09-01",
"grade": 81 },
{
"courseID":"csc783",
"school":"IT",
"enrollmentdate":"2014-01-02",
"finshdate":"2014-08-01",
"grade": 87 } ]
} ]
}
}
I have stored in postgresql database json objects of the above format. Each object consists of informations about a certain student with enrollment information. I have complex objects with nested array inside arrays. I am trying to select all the element inside the "info" array. I tried to use the following query:
with recursive x (info) as (select value->'info' from jsontesting r, json_array_elements(r.data->'Data'->'enrolmentinfo')
UNION ALL
SELECT (e).value->'courseID', (e).value->'school', (e).value->'grade',(e).value->'enrollmentdate', (e).value->'finshdate'
from (select json_each(json_array_elements (info)) e from x) p)
select * from x;
This query is not working and it is giving the following error:"cannot call json_array_elements on a scalar". Is there any other query that I can use to extract the elements of the nested array "info"??
-- assuming that jsontesting.data contains your JSON
WITH info_data AS (
SELECT enrolment_info->'info' AS info
FROM jsontesting t, json_array_elements(t.data -> 'Data' -> 'enrolmentInfo') AS enrolment_info
)
SELECT info_item->>'courseID',
info_item->>'school',
info_item->>'enrollmentdate',
info_item->>'finshdate',
info_item->>'grade'
FROM info_data idata, json_array_elements(idata.info) AS info_item;

Resources