getting null value from parsed xml file - snowflake-cloud-data-platform

I have a column where each cell has the same type of data. Where I needed a value at "EXILITY", which is "1",I tried using get(column_name, 6) but the position is not fixed , So it tried using a name like this: get(column_name, "EXILITY") but got a null value.
[
{
"$": 5,
"#": "BASE",
"#source": "ice"
},
{
"$": 3.6,
"#": "TEAL"
},
{
"$": "C:C",
"#": "VER_STG"
},
{
"$": [
{
"$": 3,
"#": "VEOR"
},
{
"$": 1,
"#": "COTY"
}
],
"#": "ACS",
"CITY": 1,
"VR": 0
},
{
"$": [
{
"$": 2,
"#": "CONFLITY"
},
{
"$": 1,
"#": "INGRITY"
},
{
"$": 1,
"#": "AVITY"
}
],
"#": "IT",
"AVAILATY": 2,
"CONFIDY": 0,
"IY": 1
},
{
"$": 1,
"#": "AION"
},
{
"$": 1,
"#": "EXILITY"
},
{
"$": 3,
"#": "RL"
},
{
"$": 1,
"#": "RE"
}
]
Trying to get the value at "#": "EXILITY" but able to get it
simply I was trying to use the get function but didn't find any luck.

Sharing the XML could be more useful. Anyway based on the JSON you provided, you can fetch the value using FLATTEN and GET functions:
with json as (
select parse_json ('[
{
"$": 5,
"#": "BASE",
"#source": "ice"
},
{
"$": 3.6,
"#": "TEAL"
},
{
"$": "C:C",
"#": "VER_STG"
},
{
"$": [
{
"$": 3,
"#": "VEOR"
},
{
"$": 1,
"#": "COTY"
}
],
"#": "ACS",
"CITY": 1,
"VR": 0
},
{
"$": [
{
"$": 2,
"#": "CONFLITY"
},
{
"$": 1,
"#": "INGRITY"
},
{
"$": 1,
"#": "AVITY"
}
],
"#": "IT",
"AVAILATY": 2,
"CONFIDY": 0,
"IY": 1
},
{
"$": 1,
"#": "AION"
},
{
"$": 1,
"#": "EXILITY"
},
{
"$": 3,
"#": "RL"
},
{
"$": 1,
"#": "RE"
}
]') x
)
select parse_xml(items.VALUE) xml, get(xml,'$') xml_value
from json,
lateral flatten( x ) items
where get(XML,'#') = 'EXILITY';

Related

Jolt Transform complex json to a flat json

I am looking to Jolt Transform a complex json into below desired json.
Input JSON:
{
"Rating": 1,
"SecondaryRatings": [
{
"Design": 4,
"Price": 2,
"RatingDimension3": 1,
"Arr1": [
{
"Val1": 34
},
{
"Val2": 45
}
]
},
{
"Design": 44,
"Price": 23,
"RatingDimension3": 12,
"Arr1": [
{
"Val1": 56
},
{
"Val2": 22
}
]
}
]
}
Desired Output
[
{
"Design": 4,
"Price": 2,
"RatingDimension3": 1,
"rating-primary": 1,
"Val1": 34,
"Val2": 45
},
{
"Design": 44,
"Price": 23,
"RatingDimension3": 12,
"rating-primary": 1,
"Val1": 56,
"Val2": 22
}
]
I tried following Jolt Spec
[
{
"operation": "shift",
"spec": {
"SecondaryRatings": {
"*": {
"*": {
"*": {
"#(2,Design)": "[&3].Design",
"#(2,Price)": "[&3].Price",
"#(2,RatingDimension3)": "[&3].RatingDimension3",
"Val1": "[&3].Val1",
"Val2": "[&3].Val2",
"#(4,Rating)": "[&3].rating-primary"
}
}
}
}
}
}
]
But got following Output
[
{
"Design" : [ 4, 4, 4, 4, 4 ],
"Price" : [ 2, 2, 2, 2, 2 ],
"RatingDimension3" : [ 1, 1, 1, 1, 1 ],
"rating-primary" : [ 1, 1, 1, 1, 1 ],
"Val1" : 34,
"Val2" : 45
},
{
"Design" : [ 44, 44, 44, 44, 44 ],
"Price" : [ 23, 23, 23, 23, 23 ],
"RatingDimension3" : [ 12, 12, 12, 12, 12 ],
"rating-primary" : [ 1, 1, 1, 1, 1 ],
"Val1" : 56,
"Val2" : 22
}
]
So as it is seen except for the last level values, all others are having array with repeated values. Can anybody help to what is missing or wrong in Jolt Spec?
You can directly walk through indexes of the outer array(SecondaryRatings) rather than inner one(Arr1) such as
[
{
"operation": "shift",
"spec": {
"SecondaryRatings": {
"*": {
"*": "[&1].&",
"#(2,Rating)": "[&].rating-primary",
"Arr1": {
"*": {
"*": "[&3].&"
}
}
}
}
}
}
]
in this case, no need to write all elements individually except for Rating

How to find matching elements from an array of objects using mongoDB query

I want to find all elements matching product "bat". My structure for database is as follows
[
{
"key": 1,
"productArray" : [
{
"requirementId": 5,
"product": "bat"
},
{
"requirementId": 6,
"product": "Pen"
},
]
},
{
"key": 2
},
{
"key": 3,
"productArray": [
{
"requirementId": 1,
"product": "bat"
},
{
"requirementId": 2,
"product": "Pen"
},
{
"requirementId": 3,
"product": "bat"
},
{
"requirementId": 4,
"product": "bat"
}
]
}
]
I have tried the following query but this query is returning only one matching element.
db.collection.find({"key": 3}, {"productArray": {"$elemMatch": { "product": "bat"}}})
result of above query is as follows
[
{
"_id": ObjectId("5a934e000102030405000002"),
"productArray": [
{
"product": "bat",
"requirementId": 1
}
]
}
]
Can I get expected output for my problem is as follows using mongodb query Or should I use another approach for my case:
My expected output is as follows
[
{
"productArray": [
{
"requirementId": 1,
"product": "bat"
},
{
"requirementId": 3,
"product": "bat"
},
{
"requirementId": 4,
"product": "bat"
}
]
}
]
As you found, $elemMatch but also $ are lazy operators and return the first element that matches.
You could add a pipeline in find (mongoDB 4.4+) but aggregation is better supported:
db.collection.aggregate({
$match: {
key: 3
}
},
{
$project: {
productArray: {
"$filter": {
"input": "$productArray",
"as": "p",
"cond": {
$eq: [
"$$p.product",
"bat"
]
}
}
}
}
})
Live version

Elasticsearch aggregation only on specific entries in an array

I'm new to Elasticsearch and can't figure out how to solve the following problem.
The easiest way to explain my problem is to show you an example.
The following array "listing" is part of all my files in Elasticsearch, but the entries vary, so the "person" with the "id" 42, might be in 50% of my files. What I'm trying to do is to get the average "ranking.position.standard" of all the persons with id 42 in all my files in Elasticsearch.
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
Thanks for your help!
First of all, do you store listings as an object or nested data type? I don't think it's going to work if it's an object, so try the following example:
PUT /test
{
"mappings": {
"_default_": {
"properties": {
"listing": {
"type": "nested"
}
}
}
}
}
PUT /test/aa/1
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 2
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 7
}
}
}
]
}
PUT /test/aa/2
{
"listing": [
{
"person": {
"id": 42
},
"ranking": {
"position": {
"standard": 5
}
}
},
{
"person": {
"id": 55
},
"ranking": {
"position": {
"standard": 6
}
}
}
]
}
GET test/_search
{
"size": 0,
"aggs": {
"nest": {
"nested": {
"path": "listing"
},
"aggs": {
"persons": {
"terms": {
"field": "listing.person.id",
"size": 10
},
"aggs": {
"avg_standard": {
"avg": {
"field": "listing.ranking.position.standard"
}
}
}
}
}
}
}
}
This has brought me the following result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"nest": {
"doc_count": 4,
"persons": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 42,
"doc_count": 2,
"avg_standard": {
"value": 3.5
}
},
{
"key": 55,
"doc_count": 2,
"avg_standard": {
"value": 6.5
}
}
]
}
}
}
}
It does seem correct.

Query ElasticSearch by array filter length

I have a prop containing an array of integers:
_source: {
counts: [
11,
7,
18,
3,
22
]
}
From another post I know that I can filter by a range using:
{
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"range": {
"counts": {
"gte": 10,
"lte": 20
}
}
}
}
}
}
However, I need to additionally know if the range match count is greater than a certain number. For instance, I only want records back which have less than 3 counts matching between 10 and 20.
Mapping used:
{
"properties" : {
"counts" : {
"type" : "integer"
}
}
}
These are the docs I indexed:
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"counts": [
13,
17
]
}
},
{
"_index": "test_index",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"counts": [
11,
7,
18,
3,
22
]
}
},
{
"_index": "test_index",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"counts": [
11,
19
]
}
}
]
}
}
Now try this query:
{
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": [
{"script" : { "script" : "doc['counts'].values.size() < 4" }},
{"range": { "counts": { "gte": 10, "lte": 20 } }}
]
}
}
}
Results: Only doc id 2 and 3 are returned. Doc 1 is not returned.
{
"took": 29,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"counts": [
13,
17
]
}
},
{
"_index": "test_index",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"counts": [
11,
19
]
}
}
]
}
}
Is this what you are trying to do?

ElasticSearch Read element on array

I'm working with Elasticsearch, currently I have a struct like that
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "999999",
"_type": "content",
"_id": "NmYTku",
"_score": 1,
"_source": {
"internal_id": "NmYTk4",
"external_id": "Ga_UI502",
"
"images": [
{
"uri_id": "2939306",
"url": "14mast_head.jpg",
"type": "Masthead",
"orientation": "Landscape",
"x_resolution": 3280,
"y_resolution": 1480
},
{
"uri_id": "Galavision/POST_poster/2939306",
"url": "140603_29un_erro_poster.jpg",
"type": "Poster",
"orientation": "Portrait",
"x_resolution": 720,
"y_resolution": 405
},
{
"uri_id": "Galavision/POST_poster_title/2939306",
"url": "140603_29un_erro_poster_title.jpg",
"type": "PosterWithTitle",
"orientation": "Portrait",
"x_resolution": 924,
"y_resolution": 518
},
{
"uri_id": "Galavision/POST_poster_cover/2939306",
"url": "140603_29poster_cover.jpg",
"type": "Poster",
"orientation": "Landscape",
"x_resolution": 600,
"y_resolution": 868
}
]
}
}
]
}
}
I was wondering, how can I get only one value from my array e.g.
I want to have only the images with oritentation on Landscape and type Poster. I tried with This query but it only returns me all the image elements.
{
"query": {
"filtered": {
"filter": { "term":{"_id":"NmYTku"} }
}
},
"_source": ["images"]
}
I don't have idea how do a filter on the elements
Are you using nested or child fields for the images? If not, that doc is actually being indexed like:
...
images.uri_id = [1, 2, 3, 4, etc..]
images.url = [1, 2, 3, 4, etc..]
images.type = [1, 2, 3, 4, etc..]
...
so the distinction between individual elements is gone. Try giving this a read:
http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/complex-core-fields.html
If you don't need to query, why not just filter out the ones you like client side?
Try this:
{
"filtered": {
"query": {
"match": { "term": "_id" : "NmYTku" }
},
"_source": [images]{
"orientation": "landscape",
"type": "Poster",
}
}

Resources