Query/Flatten all the elemensts inside array in nested Json in snowflake - snowflake-cloud-data-platform

I am new to snowflake. I am tyring to query/flatten all the fields inside nested Json but in Transactio array, i have multiple transactions and i can't extract all of them, i can only select the first using indexing.
My json structure is like this
[
{
"Employee": {
"UID": "value",
"Transactions": [
{
"ORBIS-Tx-ID": "123F",
"ReferenceID": {
"Amount": {
"Charge": 0.3,
"Currency": "USD",
"StatusEntry": {
"Status": "COMPLETED",
"Timestamp": {
"$date": "2021-09-07T11:58:40.672Z"
}
},
"SummaryStatus": "COMPLETED",
"Value": 9.7
},
"Metadata": {
"Notes": "Salary advance paid out",
"TxSubType": {
"From": {
"OrbisUID": "OPA",
"ProviderID": "gbQG0sTWZVmioQ"
},
"To": {
"UID": "59B7",
"ProviderID": "DnYOEs5A"
}
},
"TxType": "ADVANCE"
},
"OrigMessage": {
"EC": "0",
"SC": 200,
"approvalCode": "145382",
"card": {
"expirationDate": "202605",
"last4": "6423"
},
"fees": {
"interchange": "0.10",
"network": "0.01"
},
"network": "VisaFF",
"networkID": "5812198027",
"networkRC": "00",
},
"Provider": {
"ProviderID": "iuMVYeZLHQ",
"ProviderName": "Name"
}
}
},
{
"ORBIS-Tx-ID": "123F",
"ReferenceID": {
"Amount": {
"Charge": 0.45,
"Currency": "USD",
"StatusEntry": {
"Status": "COMPLETED",
"Timestamp": {
"$date": "2021-09-10T15:44:11.896Z"
}
},
"SummaryStatus": "COMPLETED",
"Value": 19.55
},
"Metadata": {
"Notes": "Salary advance paid out",
"TxSubType": {
"From": {
"OrbisUID": "ORA",
"ProviderID": “QIHVsTWZVmioQ"
},
"To": {
"UID": "59EB11-85A9-00155DC29747",
"ProviderID": "DngXSCgkYOEs5A"
}
},
"TxType": "ADVANCE"
},
"OrigMessage": {
"EC": "0",
"SC": 200,
"approvalCode": "164647",
"card": {
"expirationDate": "202605",
"last4": "6423"
},
"fees": {
"interchange": "0.10",
"network": "0.02",
},
"network": "VisaFF",
"networkID": "58112122",
"networkRC": "00"
},
"Provider": {
"ProviderID": "QjS0o4Mg",
"ProviderName": "Name"
}
}
}
}
}
]
I want to have all the fields in separate columns and be able to query for eg. Charge inside Transactions.ReferenceID.Amount for all the transactions.
For now i am flattening like this:
select
value:Employer:UID as ER_ORBISUID,
value:Employee:UID EE_ORBISUID,
value:Employee:Transactions[0]['ORBIS-Tx-ID'] as TrasactionID,
array_size(value:Employee:Transactions) as arraySize,
value:Employee:Transactions[0].ReferenceID.Amount.Charge as AmtValue
,value:Employee:Transactions[0]:ReferenceID.Amount.StatusEntry.Status as Staus
from test11,
-- lateral flatten(input => CLMN) as aa,
lateral flatten(input => CLMN:Employee:Transactions);
When i run the above query, i am only able to get the first element inside the array as i am using 0 index, i am not sure how to do it without using index. Anyone can help? Thank you.

You can use the (LATERAL) FLATTEN function to extract a nested variant, object, or array from JSON data.
Details & Example: https://community.snowflake.com/s/article/Using-lateral-flatten-to-extract-data-from-JSON-internal-field

Related

How to find array's object property(string) by its numeric value in mongodb query?

I have a restaurant collection with its documents formed like this one:
{
"address": {
"building": "1007",
"coord": [
-73.856077,
40.848447
],
"street": "Moris Park Ave",
"zipcode": "10462"
},
"borough": "Bronx",
"cuisine": "Bakery",
"grades": [
{
"date": {
"$date": 1393804800000
},
"grade": "A",
"score": "81"
},
{
"date": {
"$date": 1378857600000
},
"grade": "A",
"score": "6"
},
{
"date": {
"$date": 1358985600000
},
"grade": "A",
"score": "99"
},
{
"date": {
"$date": 11322006400000
},
"grade": "B",
"score": "14"
},
{
"date": {
"$date": 1288715200000
},
"grade": "B",
"score": "14"
}
],
"name": "Morris Park Bake Shop"
}
My homework asked me to find any restaurants having score from 80 to 100 and I do this
db.restaurants.find({ $expr: {$and: [{$gt: [ { $toInt: "$grades.score" }, 80 ]}, {$lt: [ { $toInt: "$grades.score" }, 100 ]}] } }).pretty()
And received "Executor error during find command :: caused by :: Unsupported conversion from array to int in $convert with no onError value".
I try
db.restaurants.find({ $expr: {$and: [{$gt: [ { $toInt: "$grades.$score" }, 80 ]}, {$lt: [ { $toInt: "$grades.$score" }, 100 ]}] } }).pretty()
And this returned:"FieldPath field names may not start with '$'. Consider using $getField or $setField."
Then i try
db.restaurants.find({$and:[{'grades.score': {$gt: 80}}, {'grade.score':{$lt:100}}]}).collation({locale:'en_US' ,numericOrdering: true})
And that returned nothing. It has to return at least the document i mentioned above, right?.
Perhaps this homework is about learning proper field value types or collation. With collation, numericOrdering can be used as commented by #prasad_. If score is truly numeric, for several reasons it's best to store it as numeric.
Unfortunately there doesn't seem to be a way at this time to specify a collation with mongoplayground.net. Without using a collation, there are many ways to achieve your desired output. Here's one way.
db.collection.aggregate([
{
// make grades.score numeric
"$set": {
"grades": {
"$map": {
"input": "$grades",
"as": "grade",
"in": {
"$mergeObjects": [
"$$grade",
{ "score": { "$toDecimal": "$$grade.score" } }
]
}
}
}
}
},
{
"$match": {
"grades.score": {
"$gt": 80,
"$lt": 100
}
}
},
{
"$project": {
"_id": 0,
"name": 1
}
}
])
Try it on mongoplayground.net.

How to parse complex data using react native?

Following is the DATA that needs to parse which can nested arrays and objects i need
help understanding the easiest way to parse this .
const country= [
{
"place": "sikkim",
"location": 2,
"Extension": "",
"Keys": {
"string": [
"Enabled",
"Disabled"
]
},
"ItemValues": {
"ItemData": [
{
"Name": "Enabled",
"Data": {
"Rows": {
"Row": {
"Values": {
"Type": false
}
}
}
}
},
{
"Name": "Value",
"Data": {
"Rows": {
"DataRow": {
"Values": {
"anyType": "100"
}
}
}
}
}
]
}
},
{
"place": "assam",
"location": 1,
"Extension": "",
"Keys": {
"string": "MinValue"
},
"ItemValues": {
"ItemData": {
"Name": "nValue",
"Data": {
"Rows": {
"DataRow": {
"Values": {
"anyType": "1"
}
}
}
}
}
}
}
]
In this array 2 objects are there i need to parse this complex array of object and
show like this -
OUTCOME should be -
sikkim:{
"place": "sikkim",
"location": 2,
"Extension": "",
"string":"Enabled","Disabled",
"Name": "Enabled",
"Type": false,
"Name": "Value",
"anyType": 100
},
assam:{.. so on}
code that i tried is displaying only keys -
const getValues = country =>
country === Object(country)
? Object.values(country).flatMap(getValues)
: [country];
console.log(getValues(country));
OUTPUT from above code =
 ["sikkim", 2, "", "Enabled", "Disabled", "Enabled", false, "Value", "100",
"assam", 1, "", "MinValue", "nValue", "1"]
Need to write logic which is smart way to write to cover this logic because my data
is huge any leads?I can achieve the output but writing many for and if loops but that
creates lot of confusion.

Reverse a match or match every element in an array where some IDs are not set

Struggling with making this ES query. Basically, I have a nested object, something like:
{
"id": "00000000-0000-0000-0000-000000000000",
"exchangeRate": 0.01,
"payments": [
{
"id": "00000000-0000-0000-0000-000000000000",
"paymentId": "some-id",
"currency": "USD",
"amount": 400.0
},
{
"id": "00000000-0000-0000-0000-000000000000",
"currency": "USD",
"paymentId": "some-id2",
"amount": -200.0
},
{
"id": "00000000-0000-0000-0000-000000000000",
"currency": "USD",
"amount": -200.0
}
]
}
And I want to match on an object where some of the "paymentId" keys are defined, but not all. So the above object would be a match. Whereas something like:
{
"id": "00000000-0000-0000-0000-000000000000",
"exchangeRate": 0.01,
"payments": [
{
"id": "00000000-0000-0000-0000-000000000000",
"paymentId": "some-id",
"currency": "USD",
"amount": 400.0
},
{
"id": "00000000-0000-0000-0000-000000000000",
"currency": "USD",
"paymentId": "some-id2",
"amount": -200.0
},
{
"id": "00000000-0000-0000-0000-000000000000",
"currency": "USD",
"paymentId": "some-id3",
"amount": -200.0
}
Would not match.
I've made a query which matches if all paymendIds are defined and returns all objects where that is true. This query is:
{
"query": {
"bool": {
"must": {
"nested": {
"path": "payments",
"query": {
"exists": {
"field": "payments.paymendIds"
}
}
}
}
}
}
}
The question would be how do I reverse this? So that if it matches this query, it doesn't return as a match. As putting "must_not" simply does the opposite. It returns all records that don't have any paymentIds defined at all. Which is something I want to match on, but I need all the ones that even have only some of the paymentIds set.
You could compare the payments objects' field value sizes one by one while in the nested context.
Assuming both payments.id and payments.paymentId are of the keyword mapping type, you could say:
GET your-index/_search
{
"query": {
"nested": {
"path": "payments",
"query": {
"script": {
"script": "doc['payments.id'].size() != doc['payments.paymentId'].size()"
}
}
}
}
}

How to find in specific date interval

I have problem in Mongodb to find wanted documents and fragment of list field. So I have following document:
{
"_id": "5ed38e5d2a6e74567c7a579c",
"id": "AAA",
"events": [
{
"dayTypeId": "5e71e1918ee9a326ebdf1611",
"startDate": {
"$date": "2020-06-01T00:00:00.000Z"
},
"endDate": {
"$date": "2020-06-04T00:00:00.000Z"
},
"stared": false,
"userId": "XXX",
"mixing": "wjk0ra1v7m88p0x5aaxwndkmwmlxx4pn92hzjlgpl34u8ojx855m8e8spp1v7l57omtoc4qxbv0g22nybqubd3hq5skuff8ezbzdum2a92itwco64tbi5y2p5mboznxiuwynv0rb8eqk9d80ib65cve6ab9p1d1divee3wbywc2st1lkjruqvgu42zgx8mjtsnb8gyeqtxycl4ujpllgxpshdu8o97iiw347bjqv4mrv6jgwq4r21zp5rm4dw6a1"
},
{
"dayTypeId": "5e71e1918ee9a326ebdf1611",
"startDate": {
"$date": "2020-06-10T00:00:00.000Z"
},
"endDate": {
"$date": "2020-06-10T00:00:00.000Z"
},
"stared": false,
"userId": "XXX",
"mixing": "wjk0ra1v7m88p0x5aaxwndkmwmlxx4pn92hzjlgpl34u8ojx855m8e8spp1v7l57omtoc4qxbv0g22nybqubd3hq5skuff8ezbzdum2a92itwco64tbi5y2p5mboznxiuwynv0rb8eqk9d80ib65cve6ab9p1d1divee3wbywc2st1lkjruqvgu42zgx8mjtsnb8gyeqtxycl4ujpllgxpshdu8o97iiw347bjqv4mrv6jgwq4r21zp5rm4dw6a1"
},
{
"dayTypeId": "5e71d8628ee9a326e7df160d",
"startDate": {
"$date": "2020-06-05T00:00:00.000Z"
},
"endDate": {
"$date": "2020-06-09T00:00:00.000Z"
},
"stared": false,
"userId": "XXX",
"mixing": "wjk0ra1v7m88p0x5aaxwndkmwmlxx4pn92hzjlgpl34u8ojx855m8e8spp1v7l57omtoc4qxbv0g22nybqubd3hq5skuff8ezbzdum2a92itwco64tbi5y2p5mboznxiuwynv0rb8eqk9d80ib65cve6ab9p1d1divee3wbywc2st1lkjruqvgu42zgx8mjtsnb8gyeqtxycl4ujpllgxpshdu8o97iiw347bjqv4mrv6jgwq4r21zp5rm4dw6a1"
},
{
"dayTypeId": "5e71d8628ee9a326e7df160d",
"startDate": {
"$date": "2020-07-13T00:00:00.000Z"
},
"endDate": {
"$date": "2020-07-21T00:00:00.000Z"
},
"stared": false,
"userId": "XXXX",
"mixing": "wjk0ra1v7m88p0x5aaxwndkmwmlxx4pn92hzjlgpl34u8ojx855m8e8spp1v7l57omtoc4qxbv0g22nybqubd3hq5skuff8ezbzdum2a92itwco64tbi5y2p5mboznxiuwynv0rb8eqk9d80ib65cve6ab9p1d1divee3wbywc2st1lkjruqvgu42zgx8mjtsnb8gyeqtxycl4ujpllgxpshdu8o97iiw347bjqv4mrv6jgwq4r21zp5rm4dw6a1"
}
]
}
I want to get fragment of "events" list where either the startDate or the endDate is in the range of 02.06.2020-11.06.2020.
For example out of my sample data I want all but the last element to be returned.
You can use $filter to filter out any events that do not match your conditions:
db.collection.aggregate([
{
$match: {
$or: [
{
"events.startDate": {$gte: new Date("2020-06-02"), $lte: new Date("2020-06-11")}
},
{
"events.endDate": {$gte: new Date("2020-06-02"), $lte: new Date("2020-06-11")}
}
]
}
},
{
$project: {
events: {
$filter: {
input: "$events",
as: "event",
cond: {
$or: [
{
$and: [
{$gte: ["$$event.startDate", new Date("2020-06-02")]},
{$lte: ["$$event.startDate", new Date("2020-06-11")]},
]
},
{
$and: [
{$gte: ["$$event.endDate", new Date("2020-06-02")]},
{$lte: ["$$event.endDate", new Date("2020-06-11")]},
]
}
]
}
}
}
}
}
])

Aggregating array of values in elasticsearch

I need to aggregate an array as follows
Two document examples:
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-30T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"riodejaneiro": 2,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
{
"_index": "log",
"_type": "travels",
"_id": "tnQsGy4lS0K6uT3Hwzzo-g",
"_score": 1,
"_source": {
"state": "saopaulo",
"date": "2014-10-31T17",
"traveler": "patrick",
"registry": "123123",
"cities": {
"saopaulo": 1,
"curitiba": 1,
"total": 2
},
"reasons": [
"Entrega de encomenda"
],
"from": [
"CompraRapida"
]
}
},
I want to aggregate the cities array, to find out all the cities the traveler has gone to. I want something like this:
{
"traveler":{
"name":"patrick"
},
"cities":{
"saopaulo":2,
"riodejaneiro":2,
"curitiba":1,
"total":3
}
}
Where the total is the length of the cities array minus 1. I tried the terms aggregation and the sum, but couldn't output the desired output.
Changes in the document structure can be made, so if anything like that would help me, I'd be pleased to know.
in the document posted above "cities" is not a json array , it is a json object.
If changing the document structure is a possibility I would change cities in the document to be an array of object
example document:
cities : [
{
"name" :"saopaulo"
"visit_count" :"2",
},
{
"name" :"riodejaneiro"
"visit_count" :"1",
}
]
You would then need to set cities to be of type nested in the index mapping
"mappings": {
"<type_name>": {
"properties": {
"cities": {
"type": "nested",
"properties": {
"city": {
"type": "string"
},
"count": {
"type": "integer"
},
"value": {
"type": "long"
}
}
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"registry": {
"type": "string"
},
"state": {
"type": "string"
},
"traveler": {
"type": "string"
}
}
}
}
After which you could use nested aggregation to get the city count per user.
The query would look something on these lines :
{
"query": {
"match": {
"traveler": "patrick"
}
},
"aggregations": {
"city_travelled": {
"nested": {
"path": "cities"
},
"aggs": {
"citycount": {
"cardinality": {
"field": "cities.city"
}
}
}
}
}
}

Resources