Importing JSON file and discover key names with OUTER APPLY - sql-server

I'm trying to bulk import a JSON file with exercise data from Fitbit. Before I insert the data into a table, I want to find all the distinct key names used across the entire file.
TL;DR: How do I "collapse" the OUTER APPLY results below into a single set of distinct keys?
declare
#json nvarchar(max) = '[
{
"logId": 5687739287,
"activityName": "Walk",
"activityTypeId": 90013,
"averageHeartRate": 100,
"calories": 140,
"duration": 1178000,
"activeDuration": 1178000,
"steps": 1584,
"logType": "auto_detected",
"manualValuesSpecified": {
"calories": false,
"distance": false,
"steps": false
},
"lastModified": "01/21/17 15:14:05",
"startTime": "01/20/17 20:07:43",
"originalStartTime": "01/20/17 20:07:43",
"originalDuration": 1178000,
"elevationGain": 0.0,
"hasGps": false,
"shouldFetchDetails": false,
"hasActiveZoneMinutes": false
},
{
"logId": 8704352278,
"activityName": "Bike",
"activityTypeId": 90001,
"averageHeartRate": 147,
"calories": 742,
"distance": 10.955718,
"distanceUnit": "Mile",
"duration": 3823000,
"activeDuration": 3579000,
"source": {
"type": "tracker",
"name": "Charge 2",
"id": "86599831",
"url": "https://www.fitbit.com/",
"trackerFeatures": [
"HEARTRATE",
"GPS",
"DISTANCE",
"CALORIES",
"SPEED",
"ELEVATION"
]
},
"logType": "tracker",
"manualValuesSpecified": {
"calories": false,
"distance": false,
"steps": false
},
"tcxLink": "REDACTED",
"speed": 11.020001341156748,
"lastModified": "07/10/17 01:05:32",
"startTime": "07/09/17 23:53:39",
"originalStartTime": "07/09/17 23:53:39",
"originalDuration": 3823000,
"elevationGain": 497.998688,
"hasGps": true,
"shouldFetchDetails": true,
"hasActiveZoneMinutes": false
}
]';
IF OBJECT_ID('tempdb..#exercise') IS NOT NULL DROP TABLE #exercise
SELECT activity.*
FROM OPENJSON (#json)
WITH(
logId bigint
,activityName varchar(max)
,activityTypeId int
,source nvarchar(max) as JSON
,averageHeartRate int
/*
????
not all keys are known
????
*/
) AS activity
/*
I cannot take credit for this trick.
It shows me all the keys, BUT
it's for EACH record, and there are hundreds of records!
How do I collapse these results to see a single set of distinct keys?
*/
SELECT L1.[key], L2.[key], L2.[value]
FROM openjson(#json,'$') AS L1
OUTER APPLY openjson(L1.[value]) AS L2
The source file is relatively consistent, but not all entries will have the same keys as shown in the image below. The "Bike" activity has more content than the "Walk" activity, source: {}, speed, tcxLink, distanceUnit, etc. etc.
Although I can target an grab data with FROM OPENJSON, I simply don't know what keys to expect throughout the entire file.
...
FROM OPENJSON (#json)
WITH(
logId bigint
,activityName varchar(max)
,activityTypeId int
,source nvarchar(max) as JSON
,averageHeartRate int
/*
????
not all keys are known
????
*/
)
So.... this OUTER APPLY is helpful, but is there anyway to "collapse" it so that I see a single set of all used keys? (not repeated for every single activity)

You can use the DISTINCT keyword to condense the results:
SELECT distinct L2.[key]
FROM openjson(#json,'$') AS L1
OUTER APPLY openjson(L1.[value]) AS L2

Related

Read JSON data from sql server and convert to row and column

{
"product": "car",
"fuel": {
"Maruti": {
"Model": {
"LXI": [
{
"Price": "700000"
}
],
"VXI": [
{
"Price": "800000"
}
],
"ZXI": [
{
"Price": "900000"
}
]
}
}
}
}
}
i have this JSON stored in SQL table and need t fetch it from CarDetails Table
table structure is as below:
ID
JSONData(this col has JSON stored)
CreatedDate
there can be more models as well like 10 or 20 or 1000....... I need all different prices for this in SQL Server. Can anyone assist
I need this data as:
700000
800000
900000
can anyone help me out in this.
I tried cross apply but it did not helped.
Having the model names as property keys complicates things slightly, but it's still possible to extract this data with multiple cross applies of the OPENJSON() function, e.g.:
declare #demo table (
ID int,
JSONData nvarchar(max)
);
insert #demo (ID, JSONData) values (47, N'{
"product": "car",
"fuel": {
"Maruti": {
"Model": {
"LXI": [
{
"Price": "700000"
}
],
"VXI": [
{
"Price": "800000"
}
],
"ZXI": [
{
"Price": "900000"
}
]
}
}
}
}
}');
select ID, J.product, K.[key] as [Model], L.Price
from #demo
cross apply openjson(JSONData) with (
product nvarchar(50),
ModelJSON nvarchar(max) '$.fuel.Maruti.Model' as JSON
) J
cross apply openjson(J.ModelJSON) K -- default columns: [key],[value],[type]
cross apply openjson(K.[value]) with (
Price nvarchar(10)
) L;
Which yields the results:
ID
product
Model
Price
47
car
LXI
700000
47
car
VXI
800000
47
car
ZXI
900000

Parse JSON file - Improve query in SQL Server

From this JSON File (just an example) I need to reach this final result
{
"Id": "101",
"name": "C01",
"testparameters": {
"room": [
{
"Floor": "First_Floor",
"Rooms": ["Room1", "Room2", "Room3"]
},
{
"Floor": "Second_Floor",
"Rooms": ["Room1", "Room2", "Room3"]
}
],
"area": [
{
"Name": "Area1",
"Subarea": ["Subarea1", "Subarea2", "Subarea3"]
},
{
"Name": "Area2",
"Subarea": ["Subarea4", "Subarea5"]
}
],
"requirements": [{
"condition": "",
"type": "type1",
"field1": "",
"field2": "aaaaa",
"operator": "",
"value2": ""
},
{
"condition": "AND",
"type": "type2",
"field1": "",
"field2": "numPersons",
"operator": ">",
"value2": "20"
},
{
"condition": "OR",
"type": "type2",
"field1": "",
"field2": "specification",
"operator": "=",
"value2": "wifi"
}
]
}
}
'
In one register I need to have all the information that is requested.
This is the first time that I need to parse a JSON file. After asking (a lot) I manage to reach the expected result by doing this:
Parsing JSON Example
However, I had to open the JSON file several times, and process each section apart. I'm wondering, how can I improve the code by reducing the number of times that I need to use the OPENJSON function, and in particular, how to rewrite the code snippet that handle the requirements section.
I must say, your desired result looks pretty de-normalized, you may want to rethink it.
Be that as it may, you can combine these quite easily, by using nested subqueries
SELECT
ID = JSON_VALUE(j.json, '$.Id'),
name = JSON_VALUE(j.json, '$.name'),
area = (
SELECT STRING_AGG(concat(d.a , ':', b.value),' - ')
from openjson(j.json, '$.testparameters.area')
with
(
a nvarchar(250) '$.Name',
s nvarchar(max) '$.Subarea' as json
) as d
cross apply openjson(d.s) as b
),
room = (
SELECT STRING_AGG(concat(c.f, ':', d.value), ' - ')
from openjson(j.json, '$.testparameters.room')
with(
f nvarchar(50) '$.Floor',
r nvarchar(Max) '$.Rooms' as json
) as c
cross apply openjson(c.r) as d
),
requirements = (
SELECT IIF(
SUBSTRING(requirements,1,3) = 'AND' or SUBSTRING(requirements,1,3) = 'OR',
SUBSTRING(requirements,5,LEN(requirements)),
requirements
)
from
(
select
STRING_AGG(CONCAT_WS(' ',
a.condition,
a.field2,
operator,
IIF (ISNUMERIC(a.value2) = 1,
a.value2,
CONCAT('''',a.value2,'''')
)
),
' ') as requirements
from openjson(j.json, '$.testparameters.requirements' )
with
(
condition nvarchar(255) '$.condition',
type nvarchar(255) '$.type',
field2 nvarchar(255) '$.field2',
operator nvarchar(255) '$.operator',
value2 nvarchar(255) '$.value2'
) a
where a.type = 'type2'
) a
)
FROM (VALUES(#json)) AS j(json) -- or you can reference a table

How to write a SQL query in CosmosDB for a JSON document which has nested/multiple array

I need to write a SQL query in the CosmosDB query editor, that will fetch results from JSON documents stored in Collection, as per my requirement shown below
The example JSON
{
"id": "abcdabcd-1234-1234-1234-abcdabcdabcd",
"source": "Example",
"data": [
{
"Laptop": {
"New": "yes",
"Used": "no",
"backlight": "yes",
"warranty": "yes"
}
},
{
"Mobile": [
{
"order": 1,
"quantity": 2,
"price": 350,
"color": "Black",
"date": "07202019"
},
{
"order": 2,
"quantity": 1,
"price": 600,
"color": "White",
"date": "07202019"
}
]
},
{
"Accessories": [
{
"covers": "yes",
"cables": "few"
}
]
}
]
}
Requirement:
SELECT 'warranty' (Laptop), 'quantity' (Mobile), 'color' (Mobile), 'cables' (Accessories) for a specific 'date' (for eg: 07202019)
I've tried the following query
SELECT
c.data[0].Laptop.warranty,
c.data[1].Mobile[0].quantity,
c.data[1].Mobile[0].color,
c.data[2].Accessories[0].cables
FROM c
WHERE ARRAY_CONTAINS(c.data[1].Mobile, {date : '07202019'}, true)
Original Output from above query:
[
{
"warranty": "yes",
"quantity": 2,
"color": "Black",
"cables": "few"
}
]
But how can I get this Expected Output, that has all order details in the array 'Mobile':
[
{
"warranty": "yes",
"quantity": 2,
"color": "Black",
"cables": "few"
},
{
"warranty": "yes",
"quantity": 1,
"color": "White",
"cables": "few"
}
]
Since I wrote c.data[1].Mobile[0].quantity i.e 'Mobile[0]' which is hard-coded, only one entry is returned in the output (i.e. the first one), but I want to have all the entries in the array to be listed out
Please consider using JOIN operator in your sql:
SELECT DISTINCT
c.data[0].Laptop.warranty,
mobile.quantity,
mobile.color,
c.data[2].Accessories[0].cables
FROM c
JOIN data in c.data
JOIN mobile in data.Mobile
WHERE ARRAY_CONTAINS(data.Mobile, {date : '07202019'}, true)
Output:
Update Answer:
Your sql:
SELECT DISTINCT c.data[0].Laptop.warranty, mobile.quantity, mobile.color, accessories.cables FROM c
JOIN data in c.data JOIN mobile in data.Mobile
JOIN accessories in data.Accessories
WHERE ARRAY_CONTAINS(data.Mobile, {date : '07202019'}, true)
My advice:
I have to say that,actually, Cosmos DB JOIN operation is limited to the scope of a single document. What possible is you can join parent object with child objects under same document. Cross-document joins are NOT supported.However,your sql try to implement mutiple parallel join.In other words, Accessories and Mobile are hierarchical, not nested.
I suggest you using stored procedure to execute two sql,than put them together. Or you could implement above process in the code.
Please see this case:CosmosDB Join (SQL API)

Postgresql get elements of a JSON array

Let's say that we have the following JSON in Postgresql:
{ "name": "John", "items": [ { "item_name": "lettuce", "price": 2.65, "units": "no" }, { "item_name": "ketchup", "price": 1.51, "units": "litres" } ] }
The JSONs are stored in the following table:
create table testy_response_p (
ID serial NOT NULL PRIMARY KEY,
content_json json NOT NULL
)
insert into testy_response_p (content_json) values (
'{ "name": "John", "items": [ { "item_name": "lettuce", "price": 2.65, "units": "no" }, { "item_name": "ketchup", "price": 1.51, "units": "litres" } ] }'
)
Since the following can return either JSON or text (with -> and ->> respectively select content_json ->> 'items' from testy_response_p) I want to use a subquery in order to get elements of the array under items:
select *
from json_array_elements(
select content_json ->> 'items' from testy_response_p
)
All I get is an error but I don't know what I'm doing wrong. The output of the subquery is text. The final output is:
{ "item_name": "lettuce", "price": 2.65, "units": "no" }
{ "item_name": "ketchup", "price": 1.51, "units": "litres" }
You need to join to the function's result. You can't use the ->> operator because that returns text, not json and json_array_elements() only works with a JSON value for its input.
select p.id, e.*
from testy_response_p p
cross join lateral json_array_elements(p.content_json -> 'items') as e;
Online example: https://rextester.com/MFGEA29396

Query for array elements inside JSON[] field array type

I have tried to unnest the JSON array with the function json_array_elements() and tried to count the elements of the array using json_array_length(field_name) not being successful. I am using PostgreSQL 9.4.5.
I was looking to query the result for the element "name" this is the data held on the json type array field crew:
[
{
"workHours": "9",
"workers": "50",
"checker_rate": 100,
"rate": 150,
"name": "Ramona",
"last": null,
"boxRate": 2,
"checkTraining": false,
"editing": true,
"ix": 0,
"breakPay": 3.0833333333333335,
"trainingPay": 0
},
{
"workHours": "4",
"workers": "50",
"checker_rate": 120,
"rate": 160,
"name": "Ramon",
"last": "Rosas",
"boxRate": 2,
"checkTraining": false,
"editing": false,
"id": 1,
"breakPay": 1.5416666666666667,
"trainingPay": 0
}
]
Your problem stems from the incorrect use of the type json[]. A json array is a single json object and its type is json, not json[]. Example:
create table test (id int, crew json);
insert into test values
(1, '
[
{
"workHours": "9",
"workers": "50",
"checker_rate": 100,
"rate": 150,
"name": "Ramona",
"last": null,
"boxRate": 2,
"checkTraining": false,
"editing": true,
"ix": 0,
"breakPay": 3.0833333333333335,
"trainingPay": 0
},
{
"workHours": "4",
"workers": "50",
"checker_rate": 120,
"rate": 160,
"name": "Ramon",
"last": "Rosas",
"boxRate": 2,
"checkTraining": false,
"editing": false,
"id": 1,
"breakPay": 1.5416666666666667,
"trainingPay": 0
}
]');
The function json_array_elements() works as expected:
select id, elem->'name' as name
from test
cross join json_array_elements(crew) elem;
id | name
----+----------
1 | "Ramona"
1 | "Ramon"
(2 rows)
One of the queries (or both) should work well with json[]:
select id, elem->'name' as name
from test
cross join json_array_elements(crew[1]) elem;
select id, elem->'name' as name
from test
cross join unnest(crew)
cross join json_array_elements(unnest) elem;

Resources