How can I "shred" JSON data already in a SQL database? - sql-server

I have successfully imported a JSON file into a SQL 2016 database and I am now attempting to parse that data so that I can populate a table for each of the fields in the column that is holding the JSON data. I am no DBA and it took me a couple of days to figure out how to successfully import this data. I need to know how to accomplish this using SQL. I am not certain what other information I need to provide here, so if anything else is needed, let me know and I will provide that info.
The table name is dbo.IncapsulaSourceData. This table has 5 columns: Site_ID, JSON_Source, Processed, Date_inserted, Date_processed.
Here is a sample of the JSON data that is being stored in the JSON_Source column:
{
"site_id":123456,
"statusEnum":"fully_configured",
"status":"fully-configured",
"domain":"site.name.com",
"account_id":111111,
"acceleration_level":"standard",
"site_creation_date":1410815844000,
"ips":[
"99.99.99.99"
],
"dns":[
{
"dns_record_name":"site.name.com",
"set_type_to":"CNAME",
"set_data_to":[
"frgt.x.wafdns.net"
]
}
],
"original_dns":[
{
"dns_record_name":"name.com",
"set_type_to":"A",
"set_data_to":[
""
]
},
{
"dns_record_name":"site.name.com",
"set_type_to":"A",
"set_data_to":[
"99.99.99.99"
]
},
{
"dns_record_name":"site.name.com",
"set_type_to":"CNAME",
"set_data_to":[
""
]
}
],
"warnings":[
],
"active":"active",
"additionalErrors":[
],
"display_name":"site.name.com",
"security":{
"waf":{
"rules":[
{
"action":"api.threats.action.block_ip",
"action_text":"Block IP",
"id":"api.threats.sql_injection",
"name":"SQL Injection"
},
{
"action":"api.threats.action.block_request",
"action_text":"Block Request",
"id":"api.threats.cross_site_scripting",
"name":"Cross Site Scripting"
},
{
"action":"api.threats.action.block_ip",
"action_text":"Block IP",
"id":"api.threats.illegal_resource_access",
"name":"Illegal Resource Access"
},
{
"block_bad_bots":true,
"challenge_suspected_bots":true,
"exceptions":[
{
"values":[
{
"ips":[
"99.99.99.99"
],
"id":"api.rule_exception_type.client_ip",
"name":"IP"
}
],
"id":123456789
},
{
"values":[
{
"ips":[
"99.99.99.99"
],
"id":"api.rule_exception_type.client_ip",
"name":"IP"
}
],
"id":987654321
}
],
"id":"api.threats.bot_access_control",
"name":"Bot Access Control"
},
{
"activation_mode":"api.threats.ddos.activation_mode.auto",
"activation_mode_text":"Auto",
"ddos_traffic_threshold":1000,
"id":"api.threats.ddos",
"name":"DDoS"
},
{
"action":"api.threats.action.quarantine_url",
"action_text":"Auto-Quarantine",
"id":"api.threats.backdoor",
"name":"Backdoor Protect"
},
{
"action":"api.threats.action.block_ip",
"action_text":"Block IP",
"id":"api.threats.remote_file_inclusion",
"name":"Remote File Inclusion"
},
{
"action":"api.threats.action.disabled",
"action_text":"Ignore",
"id":"api.threats.customRule",
"name":"wafRules"
}
]
},
"acls":{
"rules":[
{
"ips":[
"99.99.99.99"
],
"id":"api.acl.whitelisted_ips",
"name":"Visitors from whitelisted IPs"
},
{
"geo":{
"countries":[
"BR",
"NL",
"PL",
"RO",
"RU",
"TR",
"TW",
"UA"
]
},
"id":"api.acl.blacklisted_countries",
"name":"Visitors from blacklisted Countries"
}
]
}
},
"sealLocation":{
"id":"api.seal_location.none",
"name":"No seal "
},
"ssl":{
"origin_server":{
"detected":true,
"detectionStatus":"ok"
},
"generated_certificate":{
"ca":"GS",
"validation_method":"email",
"validation_data":"administrator#site.name.com",
"san":[
"*.site.name.com"
],
"validation_status":"done"
}
},
"siteDualFactorSettings":{
"specificUsers":[
],
"enabled":false,
"customAreas":[
],
"allowAllUsers":true,
"shouldSuggestApplicatons":true,
"allowedMedia":[
"ga",
"sms"
],
"shouldSendLoginNotifications":true,
"version":0
},
"login_protect":{
"enabled":false,
"specific_users_list":[
],
"send_lp_notifications":true,
"allow_all_users":true,
"authentication_methods":[
"ga",
"sms"
],
"urls":[
],
"url_patterns":[
]
},
"performance_configuration":{
"advanced_caching_rules":{
"never_cache_resources":[
],
"always_cache_resources":[
]
},
"acceleration_level":"standard",
"async_validation":true,
"minify_javascript":true,
"minify_css":true,
"minify_static_html":true,
"compress_jepg":true,
"progressive_image_rendering":false,
"aggressive_compression":false,
"compress_png":true,
"on_the_fly_compression":true,
"tcp_pre_pooling":true,
"comply_no_cache":false,
"comply_vary":false,
"use_shortest_caching":false,
"perfer_last_modified":false,
"accelerate_https":false,
"disable_client_side_caching":false,
"cache300x":false,
"cache_headers":[
]
},
"extended_ddos":1000,
"res":0,
"res_message":"OK",
"debug_info":{
"id-info":"1234"
}
}

Here is an idea of how to parse top level and second level data from json:
select top 100
ids.Site_ID,
ids.JSON_Source,
ids.Processed,
ids.Date_inserted,
ids.Date_processed,
x.site_id,
x.statusEnum,
x.[status],
x.[domain],
x.account_id,
x.acceleration_level,
x.site_creation_date,
x.ips as ipsJson,
x.dns as dnsJson,
ipsArr.key as ipKey,
ipsArr.value as [ip],
dnsArr.dns_record_name,
dnsArr.set_type_to,
dnsArr.set_data_to
from IncapsulaSourceData isd
outer apply openjson(isd.JSON_Source)
with (
site_id bigint,
statusEnum nvarchar(max),
[status] nvarchar(max),
[domain] nvarchar(max),
account_id bigint,
acceleration_level nvarchar(max),
site_creation_date bigint,
ips nvarchar(max) as json,
dns nvarchar(max) as json
-- JSON_Source contains only 1 object, so original rows
-- won't be duplicated. But this object contains some arrays
-- e.g. ips and dns. We don't parse them in this apply so no problem here
) as x
outer apply openjson(isd.JSON_Source, '$.ips') as ipsArr
-- here we parse arrays (ips above and dns below).
-- For some ids.row if there're 10 ipsArr values then in output
-- there'll be 10 rows <ids.row, ipsArr.value>.
-- I mean maybe you want to parse them in another query at all.
outer apply openjson(isd.JSON_Source, '$.dns')
with (
dns_record_name nvarchar(max),
set_type_to nvarchar(max),
set_data_to nvarchar(max) as json
) as dnsArr
-- other outer applies here
Sorry, wrote without checking in IDE, maybe made some mistakes.
p.s. maaaaaan, your inconsistent naming makes me cry!

Related

Is it possible to get key value pairs from snowflake api instead rowType?

I'm working with an API from snowflake and to deal with the json data, I would need to receive data as key-value paired instead of rowType.
I've been searching for results but haven't found any
e.g. A table user with name and email attributes
Name
Email
Kelly
kelly#email.com
Fisher
fisher#email.com
I would request this body:
{
"statement": "SELECT * FROM user",
"timeout": 60,
"database": "DEV",
"schema": "PLACE",
"warehouse": "WH",
"role": "DEV_READER",
"bindings": {
"1": {
"type": "FIXED",
"value": "123"
}
}
}
The results would come like:
{
"resultSetMetaData": {
...
"rowType": [
{ "name": "Name",
...},
{ "name": "Email",
...}
],
},
"data": [
[
"Kelly",
"kelly#email.com"
],
[
"Fisher",
"fisher#email.com"
]
]
}
And the results needed would be:
{
"resultSetMetaData": {
...
"data": [
[
"Name":"Kelly",
"Email":"kelly#email.com"
],
[
"Name":"Fisher",
"Email":"fisher#email.com"
]
]
}
Thank you for any inputs
The output is not valid JSON, but the return can arrive in a slightly different format:
{
"resultSetMetaData": {
...
"data":
[
{
"Name": "Kelly",
"Email": "kelly#email.com"
},
{
"Name": "Fisher",
"Email": "fisher#email.com"
}
]
}
}
To get the API to send it that way, you can change the SQL from select * to:
select object_construct(*) as KVP from "USER";
You can also specify the names of the keys using:
select object_construct('NAME', "NAME", 'EMAIL', EMAIL) from "USER";
The object_construct function takes an arbitrary number of parameters, as long as they're even, so:
object_construct('KEY1', VALUE1, 'KEY2', VALUE2, <'KEY_N'>, <VALUE_N>)

Sort array by two fields in different levels

My input:
[
{
"nfStatusNotificationUri": "http://172.19.0.2:32672/callback/nnrf-nfm/v1/onNFStatusEventPost/4e0becf9-c3ec-4002-a32b-2e35b76469b2",
"subscrCond": {
"serviceName": "namf-evts"
},
"subscriptionId": "36bc52dfdbdd4044b97ef15684706205",
"validityTime": "2022-04-30T16:40:48.274Z",
"reqNotifEvents": [
"NF_DEREGISTERED",
"NF_PROFILE_CHANGED",
"NF_REGISTERED"
]
},
{
"nfStatusNotificationUri": "http://172.19.0.2:32672/callback/nnrf-nfm/v1/onNFStatusEventPost/5319def1-af0b-4b7b-a94e-b787e614c065",
"subscrCond": {
"serviceName": "nbsf-management"
},
"subscriptionId": "e2e904bb52ca4fd6b048841c83a4c38e",
"validityTime": "2022-04-30T16:40:48.26Z",
"reqNotifEvents": [
"NF_DEREGISTERED",
"NF_PROFILE_CHANGED",
"NF_REGISTERED"
]
},
{
"nfStatusNotificationUri": "http://172.19.0.2:32672/callback/nnrf-nfm/v1/onNFStatusEventPost/31dfe10b-4020-47bd-943e-a3e293086b29",
"subscrCond": {
"serviceName": "namf-comm"
},
"subscriptionId": "e508077fab4f4b8d9dd732176a3777b9",
"validityTime": "2022-04-30T16:40:48.273Z",
"reqNotifEvents": [
"NF_DEREGISTERED",
"NF_PROFILE_CHANGED",
"NF_REGISTERED"
]
}
]
I would like to sort it by "subscriptionId" and "serviceName".
I can sort by subscriptionId but I don't know how to specify serviceName to the following expression.
jq -S '.|=sort_by(.subscriptionId)|.[].reqNotifEvents|=sort |del(.[].subscriptionId, .[].validityTime, .[].nfStatusNotificationUri)'
You can parameterize sort_by by a list of keys like so:
sort_by(.subscriptionId, .subscrCond.serviceName)
Online demo

Mongodb: Query the size of nested arrays

I have the following Schema:
Schema({
caller_address: {
type: String,
required: true,
},
traces: [[{
type: mongoose.Schema.Types.ObjectId,
ref: 'Call',
}]]
});
And I would like to retrieve only the objects that have traces with the Calls amount bigger than a specified number. In other words, the size of at least one nested array of traces should be bigger than a specified number.
I'm trying to use $elemMatch and $size, but no success. For now, I have this code:
CallerTraces.find({ 'traces' : { $elemMatch: { $size : { $gt: minTraceSize } }}})
Where minTraceSize is an int.
Could you guys help me?
I would really appreciate it!
Thanks for the sample data. My answer will be a raw MQL solution, not a mongoose solution, so some translation will be required.
I was able to insert two documents based on your comments in your post. I had to change the ObjectId of one of the two sample documents because your samples had the same primary key value and was generating a duplicate key exception.
Insert Sample Data
db.CallerTraces.insert(
{
"_id": ObjectId("6175e7ecc62cff004462d4a6"),
"traces": [
[
ObjectId("6175e7ecc62cff004462d4a4")
]
],
"caller_address": "0x4e204793bc4b8acee32edaf1fbba1f3ea45f7990"
})
db.CallerTraces.insert(
{
"_id": ObjectId("6175e7ecc62cff004462d4a7"),
"traces": [
[
ObjectId("6175e7ecc62cff004462d4a4"),
ObjectId("6175e7ecc62cff004462d4a4")
],
[
ObjectId("6175e7ecc62cff004462d4a4")
]
],
"caller_address": "0x4e204793bc4b8acee32edaf1fbba1f3ea45f7990"
})
If I want to find records having more than 0 items in the array traces I can issue the following:
Find more than zero traces
db.CallerTraces.find({ $expr: { $gt: [ { $size: "$traces" }, 0 ] } })
This returns the following:
Enterprise replSet [primary] barrydb> db.CallerTraces.find({ $expr: { $gt: [ { $size: "$traces" }, 0 ] } })
[
{
_id: ObjectId("6175e7ecc62cff004462d4a6"),
traces: [ [ ObjectId("6175e7ecc62cff004462d4a4") ] ],
caller_address: '0x4e204793bc4b8acee32edaf1fbba1f3ea45f7990'
},
{
_id: ObjectId("6175e7ecc62cff004462d4a7"),
traces: [
[
ObjectId("6175e7ecc62cff004462d4a4"),
ObjectId("6175e7ecc62cff004462d4a4")
],
[ ObjectId("6175e7ecc62cff004462d4a4") ]
],
caller_address: '0x4e204793bc4b8acee32edaf1fbba1f3ea45f7990'
}
]
Find more than 1 trace
If instead I want to find more than one trace I simply alter the query slightly:
db.CallerTraces.find({ $expr: { $gt: [ { $size: "$traces" }, 1 ] } })
... and this returns with the following results:
Enterprise replSet [primary] barrydb> db.CallerTraces.find({ $expr: { $gt: [ { $size: "$traces" }, 1 ] } })
[
{
_id: ObjectId("6175e7ecc62cff004462d4a7"),
traces: [
[
ObjectId("6175e7ecc62cff004462d4a4"),
ObjectId("6175e7ecc62cff004462d4a4")
],
[ ObjectId("6175e7ecc62cff004462d4a4") ]
],
caller_address: '0x4e204793bc4b8acee32edaf1fbba1f3ea45f7990'
}
]
Conclusion
When attempting to evaluate the length of the array within the query processor we must elect to use the $eval option as the syntax for MQL does not consider your use case. The $eval is somewhat of a catch-all option for things that do not fit nicely in the MQL framework.
UPDATE #1
OP introduced additional requirements. Rather than look at the count of the array, we must consider the count of the array within the array (nested inner array). Since the find() method with the $expr cannot evaluate nested arrays we must instead use the aggregation framework and unwind the outer array. This example stores the original form in a new field called original then replaces root after all the evaluation is complete. Since unwinding can result in duplicates in the pipeline we finalize with a $group to suppress duplicates.
Solution
db.CallerTraces.aggregate([
{
$addFields: {
"original._id": "$_id",
"original.traces": "$traces",
"original.caller_address": "$caller_address"
}
},
{
$unwind: "$traces"
},
{
$match: { $expr: { $gt: [ { $size: "$traces" }, 1 ] } }
},
{
$replaceRoot: { newRoot: "$original" }
},
{
$group:
{
_id: "$_id",
traces: { "$first": "$traces" },
caller_address: { "$first": "$caller_address" }
}
}
])

MongoDB Track data changes

I want to track changes on MongoDB Documents. The big Challenge is that MongoDB has nested Documents.
Example
[
{
"_id": "60f7a86c0e979362a25245eb",
"email": "walltownsend#delphide.com",
"friends": [
{
"name": "Hancock Nelson"
},
{
"name": "Owen Dotson"
},
{
"name": "Cathy Jarvis"
}
]
}
]
after the update/change
[
{
"_id": "60f7a86c0e979362a25245eb",
"email": "walltownsend#delphide.com",
"friends": [
{
"name": "Daphne Kline" //<------
},
{
"name": "Owen Dotson"
},
{
"name": "Cathy Jarvis"
}
]
}
]
This is a very basic example of a highly expandable real world use chase.
On a SQL Based Database, I would suggest some sort of this solution.
The SQL way
users
_id
email
60f7a8b28db7c78b57bbc217
cathyjarvis#delphide.com
friends
_id
user_id
name
0
60f7a8b28db7c78b57bbc217
Hancock Nelson
1
60f7a8b28db7c78b57bbc217
Suarez Burt
2
60f7a8b28db7c78b57bbc217
Mejia Elliott
after the update/change
users
_id
email
60f7a8b28db7c78b57bbc217
cathyjarvis#delphide.com
friends
_id
user_id
name
0
60f7a8b28db7c78b57bbc217
Daphne Kline
1
60f7a8b28db7c78b57bbc217
Suarez Burt
2
60f7a8b28db7c78b57bbc217
Mejia Elliott
history
_id
friends_id
field
preUpdate
postUpdate
0
0
name
Hancock Nelson
Daphne Kline
If there is an update and the change has to be tracked before the next update, this would work for NoSQL as well. If there is a second Update, we have a second line in the SQL database and it't very clear. On NoSQL, you can make a list/array of the full document and compare changes during the indexes, but there is very much redundant information which hasn't changed.
Have a look at Set Expression Operators
$setDifference
$setEquals
$setIntersection
Be ware, these operators perform set operation on arrays, treating arrays as sets. If an array contains duplicate entries, they ignore the duplicate entries. They ignore the order of the elements.
In your example the update would result in
removed: [ {name: "Hancock Nelson" } ],
added: [ {name: "Daphne Kline" } ]
If the number of elements is always the same before and after the update, then you could use this one:
db.collection.insertOne({
friends: [
{ "name": "Hancock Nelson" },
{ "name": "Owen Dotson" },
{ "name": "Cathy Jarvis" }
],
updated_friends: [
{ "name": "Daphne Kline" },
{ "name": "Owen Dotson" },
{ "name": "Cathy Jarvis" }
]
})
db.collection.aggregate([
{
$set: {
difference: {
$map: {
input: { $range: [0, { $size: "$friends" }] },
as: "i",
in: {
$cond: {
if: {
$eq: [
{ $arrayElemAt: ["$friends", "$$i"] },
{ $arrayElemAt: ["$updated_friends", "$$i"] }
]
},
then: null,
else: {
old: { $arrayElemAt: ["$friends", "$$i"] },
new: { $arrayElemAt: ["$updated_friends", "$$i"] }
}
}
}
}
}
}
},
{
$set: {
difference: {
$filter: {
input: "$difference",
cond: { $ne: ["$$this", null] }
}
}
}
}
])

Update array content within another array that don't have key

I have mongoDB content as below:
[
{
"_id":{
"$oid":"57c6699711bd6a0976cabe8a"
},
"ID":"1111",
"FullName":"AAA",
"Category":[
{
"CategoryId":{
"$oid":"57c66ebedcba0f63c1ceea51"
},
"_id":{
"$oid":"57e38a8ad190ea1100649798"
},
"Value":[
{
"Name":""
}
]
},
{
"CategoryId":{
"$oid":"57c3df061eb1e59d3959cc40"
},
"_id":{
"$oid":"57e38a8ad190ea1100649797"
},
"Value":[
[
"111",
"XXXX",
"2005"
],
[
"1212",
"YYYY",
"2000"
],
[
"232323",
"ZZZZZ",
"1999"
]
]
}
]
},
{
"_id":{
"$oid":"57c6699711bd6a0976cabe8a"
},
"ID":"1111",
"FullName":"BBB",
"Category":[
{
"CategoryId":{
"$oid":"57c66ebedcba0f63c1ceea51"
},
"_id":{
"$oid":"57e38a8ad190ea1100649798"
},
"Value":[
{
"Name":""
}
]
},
{
"CategoryId":{
"$oid":"57c3df061eb1e59d3959cc40"
},
"_id":{
"$oid":"57e38a8ad190ea1100649797"
},
"Value":[
[
"4444",
"XXXX",
"2005"
],
[
"7777",
"GGGG",
"2000"
],
[
"8888",
"ZZZZZ",
"1999"
]
]
}
]
}
]
Here I have an array named 'Category' where it contains objects with different category id.
I need to
select a particular category id - '57c3df061eb1e59d3959cc40'
From the above selected Category, we get 'Value' array
From Value array need to find if the second value is equal to 'ZZZZZ' ie. value[1] == 'ZZZZZ'
And now, update the matched value arrays with a new value at the end
Eg:
[
"232323",
"ZZZZZ",
"1999"
]
should be updated to
[
"232323",
"ZZZZZ",
"1999",
"update1"
]
and
[
"8888",
"ZZZZZ",
"1999"
]
should be updated to
[
"8888",
"ZZZZZ",
"1999",
"update1"
]
I have tried as below:
resume.update({
"Category.CategoryId": new ObjectId('57c3df191eb1e59d3959cc43'),
"Category.Value.$.1": 'ZZZZZ'
},
{"$set": {"Category.Value.$.3": "update1"}
}, function(err, resData){
res.send(resData);
});
But, nothing gets updated. Its there any way to get this work. Please help to update the inner array.
Thanks in advance.
Your goal is not possible at the moment since you need to update two positional elements.
There is a JIRA trackable for the sort of behaviour you want here: https://jira.mongodb.org/browse/SERVER-831
It's a problem since you need to match two elements positions:
the Category element with the matched CategoryId
the Value element in the Value array of arrays
If one of these wouldn't be an array it would have been possible.
Anyway, Your update try above was wrong. IF this feature was possible (and it is not!!!) it would have been something like this:
db.resume.update(
{
Category: {
$elemMatch: {
CategoryId: ObjectId('57c3df061eb1e59d3959cc40'),
Value: {
$elemMatch: {
'1': 'ZZZZZ'
}
}
}
}
},
{
$push: {
'Category.$.Value.$': 'update1'
}
}
)
The positional $ operator should be used during the update and not the find like you did, and it will update the first element that matched the query.
Doing the above will return the error:
Too many positional (i.e. '$') elements found in path 'Category.$.Value.$'
Because of the missing feature I explained at the top.
So, currently (version 3.2) you will not be able to do this unless you change your schema.

Resources