I have created a stored procedure in Snowflake to dynamically pivot the table and create a view from JSON data. The query creates a view with 370 columns as I desired but it's too slow to query over the view in fact querying a single row takes up to 20 mins
create or replace procedure create_pivot_view()
returns string
language javascript
execute as caller as
$$
var cols_query = ` select
concat('\\'',
listagg(
distinct f.value:signalName::STRING,'\\',\\''),'\\'') as signal_name_list
FROM
(select *
FROM
PM_POWER),table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f`;
var stmt1 = snowflake.createStatement({sqlText: cols_query});
var results1 = stmt1.execute();
results1.next();
var col_list = results1.getColumnValue(1);
pivot_query = `
create or replace view AWSS3_PM.PUBLIC.PM_POWER_CN8000_V2 as
select * from (
select figures,stats,SignalName,id,Latitude,Longitude,Altitude
from (
select
c1:id::STRING as id,
c1:data:baseData:unitID::varchar as UnitID,
c1:data:baseData:latitude::varchar as Latitude,
c1:data:baseData:longitude::varchar as Longitude,
c1:data:baseData:altitude::varchar as Altitude,
c1:timestamp::varchar as TimeStamp,
f.value:"dataValue"::varchar as SignalDataValue,
f.value:"dataValueValid"::varchar as SignalDataValueValid,
f.value:"signalID"::varchar as SignalID,
f.value:"units"::varchar as SignalUnits,
f.value:"dataValueEnum"::varchar as SignalDataEnum,
f.value:"signalName"::varchar as SignalName
from
(
select *
FROM
PM_POWER), table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f
) flt
unpivot (figures for stats in(UnitID,SignalDataValue, SignalDataValueValid, SignalID, SignalUnits, SignalDataEnum, TimeStamp))
) up
pivot (min(up.figures) for up.SignalName in (${col_list}
))
`;
var stmt2 = snowflake.createStatement({sqlText: pivot_query});
stmt2.execute();
return pivot_query;
$$;
call create_pivot_view();
Any suggestions to fasten this approach or an alternative would be really grateful to improve the performance of the query.
A single row of JSON data looks like the below:
{
"data": {
"baseData": {
"altitude": 0,
"altitudeValid": "false",
"customerID": "CN",
"kind": "Power",
"latitude": 0,
"latitudeValid": "false",
"longitude": 0,
"longitudeValid": "false",
"name": "Predictive Maintenance Data.Power",
"signals": [
{
"dataValue": 3112900,
"dataValueValid": "true",
"signalID": 3424,
"signalName": "mainGeneratorPower",
"units": "kW"
},
{
"dataValue": 1.4035000801086426,
"dataValueValid": "true",
"signalID": 380,
"signalName": "DB_HandlePos",
"units": "V"
},
{
"dataValue": 2,
"dataValueEnum": "Reverse",
"dataValueValid": "true",
"signalID": 813,
"signalName": "reverserPos",
"units": "none"
},
{
"dataValue": 291400,
"dataValueValid": "true",
"signalID": 586,
"signalName": "tractiveEffort",
"units": "none"
},
{
"dataValue": 8,
"dataValueEnum": "T 8",
"dataValueValid": "true",
"signalID": 899,
"signalName": "throttlePos",
"units": "none"
},
{
"dataValue": 47.05950164794922,
"dataValueValid": "true",
"signalID": 1805,
"signalName": "AWTF",
"units": "°C"
},
{
"dataValue": 0.2971585690975189,
"dataValueValid": "true",
"signalID": 4925,
"signalName": "ChpDty",
"units": "none"
},
{
"dataValue": 20.14109992980957,
"dataValueValid": "true",
"signalID": 4835,
"signalName": "LDBBLWA",
"units": "A"
},
{
"dataValue": 36.02000045776367,
"dataValueValid": "true",
"signalID": 2669,
"signalName": "IcGVelM",
"units": "km/hr"
},
{
"dataValue": 479185.125,
"dataValueValid": "true",
"signalID": 1070,
"signalName": "WPEgILP",
"units": "PSIG"
},
{
"dataValue": 293026.875,
"dataValueValid": "true",
"signalID": 1799,
"signalName": "WPEgOtP",
"units": "PSIG"
},
{
"dataValue": 926750,
"dataValueValid": "true",
"signalID": 4698,
"signalName": "MR2 Prs",
"units": "PSIG"
},
{
"dataValue": 24,
"dataValueEnum": "ON",
"dataValueValid": "true",
"signalID": 664,
"signalName": "MVCC>",
"units": "none"
},
{
"dataValue": 907422.625,
"dataValueValid": "true",
"signalID": 4804,
"signalName": "SR Pres",
"units": "PSIG"
}
],
"unitID": "CN 8000",
"ver": "1.0.0"
},
"baseType": "PredictiveMaintenanceData"
},
"dataName": "CN8000.Prod.PredictiveMaintenanceData",
"id": "18a89f9e-9620-4453-a546-23412025e7c0",
"tags": {
"iaapl.access.level1": "Private",
"iaapl.access.level2": "OEM",
"iaapl.internal.deviceID": "",
"iaapl.internal.deviceName": "",
"iaapl.internal.encodeTime": "2021-02-25T07:41:19.000Z",
"iaapl.internal.sender": "Intelligent",
"iaapl.software.name": "",
"iaapl.software.partNumber": 0,
"iaapl.software.version": ""
},
"timestamp": "2021-02-25T07:32:31.000Z"
}
The result should look like :
Thanks in Advance
the bits where you type
( select * FROM PM_POWER)
could just be
PM_POWER
Your JSON is malformed.
You are reading from data:baseData:signals but there is no signals in you data..
If you want help with your View performance, I would remove the whole Stored Procedure part. And get SELECT that get the result you want from the demo data you provide. Because presently.
Update:
So your example data still does not generate the desired output. So if this is a performance improvement project, it's super frustrating, to not even have the input + code = output
anyways, here's you data and code mixed together:
with pm_power as (
select parse_json('{
"data": {
"baseData": {
"altitude": 0,
"altitudeValid": "false",
"customerID": "CN",
"kind": "Power",
"latitude": 0,
"latitudeValid": "false",
"longitude": 0,
"longitudeValid": "false",
"name": "Predictive Maintenance Data.Power",
"signals": [
{
"dataValue": 3112900,
"dataValueValid": "true",
"signalID": 3424,
"signalName": "mainGeneratorPower",
"units": "kW"
},
{
"dataValue": 1.4035000801086426,
"dataValueValid": "true",
"signalID": 380,
"signalName": "DB_HandlePos",
"units": "V"
},
{
"dataValue": 2,
"dataValueEnum": "Reverse",
"dataValueValid": "true",
"signalID": 813,
"signalName": "reverserPos",
"units": "none"
},
{
"dataValue": 291400,
"dataValueValid": "true",
"signalID": 586,
"signalName": "tractiveEffort",
"units": "none"
},
{
"dataValue": 8,
"dataValueEnum": "T 8",
"dataValueValid": "true",
"signalID": 899,
"signalName": "throttlePos",
"units": "none"
},
{
"dataValue": 47.05950164794922,
"dataValueValid": "true",
"signalID": 1805,
"signalName": "AWTF",
"units": "°C"
},
{
"dataValue": 0.2971585690975189,
"dataValueValid": "true",
"signalID": 4925,
"signalName": "ChpDty",
"units": "none"
},
{
"dataValue": 20.14109992980957,
"dataValueValid": "true",
"signalID": 4835,
"signalName": "LDBBLWA",
"units": "A"
},
{
"dataValue": 36.02000045776367,
"dataValueValid": "true",
"signalID": 2669,
"signalName": "IcGVelM",
"units": "km/hr"
},
{
"dataValue": 479185.125,
"dataValueValid": "true",
"signalID": 1070,
"signalName": "WPEgILP",
"units": "PSIG"
},
{
"dataValue": 293026.875,
"dataValueValid": "true",
"signalID": 1799,
"signalName": "WPEgOtP",
"units": "PSIG"
},
{
"dataValue": 926750,
"dataValueValid": "true",
"signalID": 4698,
"signalName": "MR2 Prs",
"units": "PSIG"
},
{
"dataValue": 24,
"dataValueEnum": "ON",
"dataValueValid": "true",
"signalID": 664,
"signalName": "MVCC>",
"units": "none"
},
{
"dataValue": 907422.625,
"dataValueValid": "true",
"signalID": 4804,
"signalName": "SR Pres",
"units": "PSIG"
}
],
"unitID": "CN 8000",
"ver": "1.0.0"
},
"baseType": "PredictiveMaintenanceData"
},
"dataName": "CN8000.Prod.PredictiveMaintenanceData",
"id": "18a89f9e-9620-4453-a546-23412025e7c0",
"tags": {
"iaapl.access.level1": "Private",
"iaapl.access.level2": "OEM",
"iaapl.internal.deviceID": "",
"iaapl.internal.deviceName": "",
"iaapl.internal.encodeTime": "2021-02-25T07:41:19.000Z",
"iaapl.internal.sender": "Intelligent",
"iaapl.software.name": "",
"iaapl.software.partNumber": 0,
"iaapl.software.version": ""
},
"timestamp": "2021-02-25T07:32:31.000Z"
}')as c1
)
select *
from (
select
figures,
stats,
SignalName,
id,
Latitude,
Longitude,
Altitude
from (
select
c1:id::STRING as id,
c1:data:baseData:unitID::varchar as UnitID,
c1:data:baseData:latitude::varchar as Latitude,
c1:data:baseData:longitude::varchar as Longitude,
c1:data:baseData:altitude::varchar as Altitude,
c1:timestamp::varchar as TimeStamp,
f.value:"dataValue"::varchar as SignalDataValue,
f.value:"dataValueValid"::varchar as SignalDataValueValid,
f.value:"signalID"::varchar as SignalID,
f.value:"units"::varchar as SignalUnits,
f.value:"dataValueEnum"::varchar as SignalDataEnum,
f.value:"signalName"::varchar as SignalName
from PM_POWER
,table( flatten(input=>c1:data:baseData:signals, mode=>'ARRAY') ) as f
) flt
unpivot (figures for stats in(UnitID, SignalDataValue, SignalDataValueValid, SignalID, SignalUnits, SignalDataEnum, TimeStamp))
) up
pivot (min(up.figures) for up.SignalName in (
'mainGeneratorPower','DB_HandlePos','reverserPos','tractiveEffort','throttlePos','AWTF','LDBBLWA','IcGVelM','WPEgOtP','MR2 Prs','SR Pres','ChpDty','MVCC>','WPEgILP'
))
STATS
ID
LATITUDE
LONGITUDE
ALTITUDE
'mainGeneratorPower'
'DB_HandlePos'
'reverserPos'
'tractiveEffort'
'throttlePos'
'AWTF'
'LDBBLWA'
'IcGVelM'
'WPEgOtP'
'MR2 Prs'
'SR Pres'
'ChpDty'
'MVCC>'
'WPEgILP'
UNITID
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
CN 8000
SIGNALID
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
3424
380
813
586
899
1805
4835
2669
1799
4698
4804
4925
664
1070
SIGNALUNITS
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
kW
V
none
none
none
°C
A
km/hr
PSIG
PSIG
PSIG
none
none
PSIG
TIMESTAMP
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
2021-02-25T07:32:31.000Z
SIGNALDATAVALUEVALID
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
true
true
true
true
true
true
true
true
true
true
true
true
true
true
SIGNALDATAENUM
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
Reverse
T 8
ON
SIGNALDATAVALUE
18a89f9e-9620-4453-a546-23412025e7c0
0
0
0
3112900
1.4035000801086426
2
291400
8
47.05950164794922
20.14109992980957
36.02000045776367
293026.875
926750
907422.625
0.2971585690975189
24
479185.125
An Answer:
so this SQL gets the answer in the same structure BUT timestamp,unitid are not rows as those are all the same value, so it seem having them as rows is a mistake, and they should be a column..
select
case lower(o.key )
when 'datavalue' then 'SignalDataValue'
when 'datavaluevalid' then 'SignalDataValueValid'
when 'signalid' then 'SignalID'
when 'units' then 'SignalUnits'
when 'datavalueenum' then 'SignalDataEnum'
when 'signalname' then 'SignalName'
end as stats,
c1:id::STRING as id,
c1:data:baseData:unitID::varchar as UnitID,
c1:data:baseData:latitude::varchar as Latitude,
c1:data:baseData:longitude::varchar as Longitude,
c1:data:baseData:altitude::varchar as Altitude,
c1:timestamp::varchar as TimeStamp
,max(iff(f.value:"signalName"='mainGeneratorPower', o.value, null)) as "'mainGeneratorPower'"
,max(iff(f.value:"signalName"='DB_HandlePos', o.value, null)) as "'DB_HandlePos'"
,max(iff(f.value:"signalName"='reverserPos', o.value, null)) as "'reverserPos'"
,max(iff(f.value:"signalName"='tractiveEffort', o.value, null)) as "'tractiveEffort'"
,max(iff(f.value:"signalName"='throttlePos', o.value, null)) as "'throttlePos'"
,max(iff(f.value:"signalName"='AWTF', o.value, null)) as "'AWTF'"
,max(iff(f.value:"signalName"='LDBBLWA', o.value, null)) as "'LDBBLWA'"
,max(iff(f.value:"signalName"='IcGVelM', o.value, null)) as "'IcGVelM'"
,max(iff(f.value:"signalName"='WPEgOtP', o.value, null)) as "'WPEgOtP'"
,max(iff(f.value:"signalName"='MR2 Prs', o.value, null)) as "'MR2 Prs'"
,max(iff(f.value:"signalName"='SR Pres', o.value, null)) as "'SR Pres'"
,max(iff(f.value:"signalName"='ChpDty', o.value, null)) as "'ChpDty'"
,max(iff(f.value:"signalName"='MVCC>', o.value, null)) as "'MVCC>'"
,max(iff(f.value:"signalName"='WPEgILP', o.value, null)) as "'WPEgILP'"
from PM_POWER
,table( flatten(input=>c1:data:baseData:signals, mode=>'ARRAY') ) as f
,table( flatten(input=>f.value, mode=>'OBJECT')) as o
WHERE o.key != 'signalName'
GROUP BY 1,2,3,4,5,6,7
ORDER BY 1;
gives:
which means your Store Procedure needs to build that.. which is you are happy that this is more performant, you can solve how to build that above in your SP.
The table:
table Bar(
id int,
my_json jsonb
)
Example of what my_json looks like:
{
"foo": "bar",
"columns": [
{
"type": "integer",
"field": "myField",
"children": [
{
"type": "integer",
"field": "myField"
}
]
},
{
"type": "integer",
"field": "otherField",
"children": [
{
"type": "integer",
"field": "myField"
}
]
},
{
"type": "integer",
"field": "baz",
"children": null
}
]
}
I need to change all the "type": "integer" to "type": "string" where the matching "field" equals to "myField".
I made the lvl 1 (at columns lvl) with this next sql query:
with query as (
select ('{columns,' || index - 1 || ',type}')::text[] as path,
b.id
from Bar b,
jsonb_array_elements(my_json -> 'columns') with ordinality arr(col, index)
where col ->> 'field' = 'myField'
and col ->> 'type' = 'integer'
)
update Bar b
set my_json = jsonb_set(b.my_json, query.path, '"string"', false)
from query
where b.id = query.id;
I have to achieve the lvl 2 (ideally understand any lvl) with the nested array children which may be null, and I don't know how to do it.
I made it! Here's the solution I found:
with non_null_children as (
select index - 1 as idx,
col,
b.id as bar_id
from Bar b,
jsonb_array_elements(my_json -> 'columns') with ordinality arr(col, index)
where col ->> 'children' is not null
), query as (
select ('{columns,' || nnc.idx || ',children,' || index - 1 || ',type}')::text[] as path,
b.id
from Bar b,
non_null_children nnc,
jsonb_array_elements(nnc.col -> 'children') with ordinality arr(col2, index)
where b.id = nnc.bar_id
and col2 ->> 'field' = 'myField'
and col2 ->> 'type' = 'integer'
)
update Bar b
set my_json = jsonb_set(b.my_json, query.path, '"string"', false)
from query
where b.id = query.id;
Explanation:
I list all the items which contain children, each combined with its columns index and its Bar id -> non_null_children
For each Bar id / column index, I can work on the matching field operation, just like the lvl 1.
I don't know if this is the most optimized solution, feel free to improve it if you think it's necessary.
I get the below data from the 'World of Tanks' API and would like to show it in Power BI. The challenge is the arrays I get for damage and piercing_power. I would like to show 3 rows from this data with the damage and piercing power next to each other, how would I do that?
"4": {
"nation_i18n": "U.S.S.R.",
"name": "_76mm_L-11",
"price_gold": 0,
"level": 4,
"damage": [
110,
110,
156
],
"nation": "ussr",
"rate": 12.27,
"price_credit": 25990,
"tanks": [
1,
2049
],
"module_id": 4,
"name_i18n": "76 mm L-11",
"piercing_power": [
68,
75,
38
],
"turrets": [
259,
3,
3843
]
}
After converting to a Table and Expanding, I would Add a Column and use the Table.FromColumns function to construct a nested table on each row from the damage and piercing_power list-type columns. Here's my attempt at that:
= Table.FromColumns ( { [damage] , [piercing_power] } , { "damage" , "piercing_power" } )
That table can then be expanded to get your required output.
Here's my full script:
let
Source = Json.Document(File.Contents("C:\Users\mikeh\Downloads\WOT.json")),
#"Converted to Table" = Record.ToTable(Source),
#"Expanded Value" = Table.ExpandRecordColumn(#"Converted to Table", "Value", {"nation_i18n", "name", "price_gold", "level", "damage", "nation", "rate", "price_credit", "tanks", "module_id", "name_i18n", "piercing_power", "turrets"}, {"nation_i18n", "name", "price_gold", "level", "damage", "nation", "rate", "price_credit", "tanks", "module_id", "name_i18n", "piercing_power", "turrets"}),
#"Added MyTable" = Table.AddColumn(#"Expanded Value", "MyTable", each Table.FromColumns ( { [damage] , [piercing_power] } , { "damage" , "piercing_power" } )),
#"Expanded MyTable" = Table.ExpandTableColumn(#"Added MyTable", "MyTable", {"damage", "piercing_power"}, {"damage.1", "piercing_power.1"}),
#"Renamed Columns" = Table.RenameColumns(#"Expanded MyTable",{{"name", "name.1"}})
in
#"Renamed Columns"
I have an array of cities and states. Looks something like this:
locations = ["Colorado Springs","CO","Denver","CO","Kissimmee","FL","Orlando", "FL"]
I would ultimately like to get this result:
locations = ["Colorado Springs, CO","CO","Denver, CO","CO","Kissimmee, FL","FL","Orlando, FL", "FL"]
I did this to test:
locations[0] << ", #{locations[1]}"
And got this as a result:
locations = ["Colorado Springs, CO", "CO", "Denver", "CO", "Kissimmee", "FL", "Orlando", "FL"]
I am attempting the code below to convert the rest of the array but getting nil as a response:
locations = ["Colorado Springs","CO","Denver","CO","Kissimmee","FL","Orlando", "FL"]
counter0 = 0
counter1 = 1
while counter0 < locations.length
locations[counter0] << locations[counter1]
counter0 += 2
counter1 += 2
end
=> nil
locations.each_slice(2).flat_map { |city, state| ["#{city}, #{state}", state] }
#=> ["Colorado Springs, CO", "CO", "Denver, CO", "CO",
# "Kissimmee, FL", "FL", "Orlando, FL", "FL"]
The key is to use flat_map.
locations.each_slice(2).flat_map{|x, y| [[x, y].join(", "), y]}
# => ["Colorado Springs, CO", "CO", "Denver, CO", "CO", "Kissimmee, FL", "FL", "Orlando, FL", "FL"]