golang json unstructured data - arrays

I have a json (unstructured) and wants to retrieve each and every key from the json data
Then loop through the keys and value. If the value is of type json (nested) or array then keep continuing.
I have found an example of structured json parsing but cannot get this.
Checked this code but could not get the complete one
err := json.Unmarshal([]byte(input), &customers)
Sample json:
{
"components": [
{
"key": "d1",
"components": [
{
"key": "custname",
"value": "Abraham",
"input": true,
"tableView": true
},
{
"key": "type",
"type": "radio",
"label": "Fisrt",
"values": [
{
"label": "Sole",
"value": "sole",
"shortcut": ""
},
{
"label": "Bata",
"value": "Bata",
"shortcut": ""
}
],
"validate": {
"required": true
},
"tableView": false
},
{
"key": "registeredField",
"value": "reg 111",
"input": true,
},
{
"key": "dirc",
"value": "abraham",
},
{
"key": "gst",
"value": "textfield",
"useLocaleSettings": false
},
{
"key": "pan",
"value": "AAAAA0000",
"useLocaleSettings": false
}
],
"collapsible": false
}
]
}
Expected output:
Key: custname Value: Abraham
Key: type Value: {
"label": "Sole",
"value": "sole",
"shortcut": ""
}, {
"label": "Bata",
"value": "Bata",
"shortcut": ""
}
Key: registeredField Value: reg 111

So you have an object with a key components, that is a slice of components. Each of these components can have a number of keys. First thing to do is to take stock of all possible fields a component can have and define a type with said fields:
type Validation struct {
Required bool `json:"required"`
}
type Value struct {
Label string `json:"label"`
Value string `json:"value"`
Shortcut string `json:"shortcut"`
}
type Data struct {
Components []Data `json:"components,omitempty"`
Collapsable bool `json:"collapsable"`
Input bool `json:"input"`
Key string `json:"key"`
TableView bool `json:"tableView"`
Type string `json:"type"`
Value string `json:"value"`
UseLocaleSettings bool `json:"useLocaleSettings"`
Values []Value `json:"values,omitempty"`
Validate *Validation `json:"validate,omitempty"`
}
Now you simply take the input and unmarshal it into the Data type:
data := Data{}
if err := json.Unmarshal([]byte(input), &data); err != nil {
// handle error
fmt.Printf("Oops, something went wrong: %+v", err)
return
}
At this point, we have all the data in a struct, so we can start printing it all out. First thing we notice is how Data basically contains a slice of Data types. A recursive function to print it all out would make sense:
func PrintComponents(data []Data) {
for _, c := range data {
if len(c.Components) > 0 {
PrintComponents(c.Components) // recursive
continue // skip value of this component, remove this line if needed
}
val := c.Value // assign string value
if len(c.Values) > 0 {
// this component has a slice of values, not a single value
vals, err := json.MarshalIndent(c.Values, "", " ") // marshal with indent of 4 spaces, no prefix
if err != nil {
fmt.Printf("Oops, looks like we couldn't format something: %+v\n", err)
return // handle this
}
val = string(vals) // marshalled values as string
}
fmt.Printf("Key: %s Value: %s\n", c.Key, val) // print output
}
}
You could alter this function a bit to pass in an indent parameter for each level of recursion, so you can print out the components in indented blocks:
func PrintComponents(data []Data, indent string) {
for _, c := range data {
if len(c.Components) > 0 {
// print the key for this block of components
fmt.Printf("Component block: %s\n", c.Key)
PrintComponents(data, indent + " ") // current indent + 4 spaces
continue // we're done with this component
}
val := c.Value
if len(c.Values) > 0 {
vals, _ := json.MarshalIndent(c.Values, indent, " ") // pass in indent level here, and DON'T ignore the error, that's just for brevity
val = string(vals)
}
fmt.Printf("%sKey: %s Value: %s\n", indent, c.Key, val) // pass in indent
}
}
Putting it all together, we get this:
func main() {
data := Data{}
if err := json.Unmarshal(input, &data); err != nil {
fmt.Println(err.Error())
return
}
fmt.Println("Printing with simple recursive function")
// print all components, these could be nested, so let's use a recursive function
PrintComponents(data.Components)
fmt.Println("\n\nPrinting with indented recursion:")
PrintComponentsIndent(data.Components, "") // start with indent of 0
}
func PrintComponents(data []Data) {
for _, c := range data {
if len(c.Components) > 0 {
PrintComponents(c.Components) // recursive
continue // skip value of this component, remove this line if needed
}
val := c.Value // assign string value
if len(c.Values) > 0 {
// this component has a slice of values, not a single value
vals, err := json.MarshalIndent(c.Values, "", " ") // marshal with indent of 4 spaces, no prefix
if err != nil {
fmt.Printf("Oops, looks like we couldn't format something: %+v\n", err)
return // handle this
}
val = string(vals) // marshalled values as string
}
fmt.Printf("Key: %s Value: %s\n", c.Key, val) // print output
}
}
func PrintComponentsIndent(data []Data, indent string) {
for _, c := range data {
if len(c.Components) > 0 {
fmt.Printf("%sComponent block: %s\n", indent, c.Key)
PrintComponentsIndent(c.Components, indent + " ")
continue
}
val := c.Value
if len(c.Values) > 0 {
// this component has a slice of values, not a single value
vals, _ := json.MarshalIndent(c.Values, indent, " ")
val = string(vals) // marshalled values as string
}
fmt.Printf("%sKey: %s Value: %s\n", indent, c.Key, val) // print output
}
}
Which outputs:
Printing with simple recursive function
Key: custname Value: Abraham
Key: type Value: [
{
"label": "Sole",
"value": "sole",
"shortcut": ""
},
{
"label": "Bata",
"value": "Bata",
"shortcut": ""
}
]
Key: registeredField Value: reg 111
Key: dirc Value: abraham
Key: gst Value: textfield
Key: pan Value: AAAAA0000
Printing with indented recursion:
Component block: d1
Key: custname Value: Abraham
Key: type Value: [
{
"label": "Sole",
"value": "sole",
"shortcut": ""
},
{
"label": "Bata",
"value": "Bata",
"shortcut": ""
}
]
Key: registeredField Value: reg 111
Key: dirc Value: abraham
Key: gst Value: textfield
Key: pan Value: AAAAA0000
Your desired output doesn't include the square brackets for the values slice. Well, that's a really easy thing to get rid of. The square brackets are always the first and last characters of the string, and json.Marshal returns a byte slice ([]byte). Chopping off the first and last characters is as easy as:
val = string(vals[1:len(vals)-2])
Taking a sub-slice of the byte slice returned by json.Marshal, starting at offset 1 (cutting of offset 0, which is [), and keeping everything until the next to last character (offset len(vals)-2). For the indented example, that will leave you with a blank line, containing an unknown number of spaces (indentation). You can trim the right-hand side of the string using the strings package:
// remove square brackets, trim trailing new-line and spaces
val = strings.TrimRight(string(vals[1:len(vals)-2]), "\n ")

Related

How to compare list of array to another array using typescript

i have a list of array and a single array. I want to compare this 2 array like this if productID and attributesData match return something my list structure given below
List1:
{
"unitPrice": "800.0",
"productTypeTitle": "TYPE",
"productId": "470",
"attributesData": [
{
"attributeName": "COLOR",
"attributeData": "BLUE"
},
{
"attributeName": "SIZE",
"attributeData": "36"
},
{..}
],
"count": 2,
"shopid": "53",
"sessionid": "1643195257593",
...
},
{
},...
]
List2:
{
"unitPrice": "800.0",
"productTypeTitle": "TYPE",
"productId": "470",
"attributesData": [
{
"attributeName": "SIZE",
"attributeData": "42"
},
{
"attributeName": "COLOR",
"attributeData": "Orange"
},{...}
]
...
}
Here productId is same but attributesData not same how can I find out that.I am able check if productId is same or not but unable to compare attributesData .How I can solve this problem of efficient way
You can use lodash isEqual, https://docs-lodash.com/v4/is-equal/
function isEqual(list1,list2):boolean{
return isEqual(list1,list2)
}
if not using lodash
-then you need to cycle through all the properties and compare
function deepEqual(a, b) {
if (a === b) {
return true;
}
if (a == null || typeof(a) != "object" ||
b == null || typeof(b) != "object")
{
return false;
}
var propertiesInA = 0, propertiesInB = 0;
for (var property in a) {
propertiesInA += 1;
}
for (var property in b) {
propertiesInB += 1;
if (!(property in a) || !deepEqual(a[property], b[property])) {
return false;
}
}
return propertiesInA == propertiesInB;
}
this link source
enter link description here
because object is reference when process equaling

How to format JSON correctly with arrays

I'm trying to send a JSON payload in my POST request but I'm not sure on how to format it correctly to use arrays. This below is what the correct JSON itself looks like:
{
"animal": "dog",
"contents": [{
"name": "daisy",
"VAL": "234.92133",
"age": 3
}]
}
I have this so far:
group := map[string]interface{}{
"animal": "dog",
"contents": map[string]interface{}{
"name": "daisy",
"VAL": "234.92133",
"age": 3,
},
}
But I can't figure out how to do array of contents (the square brackets), only the curly brackets from "contents".
The quick answer:
group := map[string]interface{}{
"animal": "dog",
"contents": []map[string]interface{}{
{
"name": "daisy",
"VAL": "234.92133",
"age": 3,
},
},
}
But as already said in the comments it is better (type safety) to use structs instead:
type Animal struct {
Type string `json:"animal"`
Contents []AnimalContent `json:"contents"`
}
type AnimalContent struct {
Name string `json:"name"`
Value string `json:"VAL"`
Age int `json:"age"`
}
Then create with:
group := Animal{
Type: "dog",
Contents: []AnimalContent{
{
Name: "daisy",
Value: "234.92133",
Age: 3,
},
},
}
// to transform to json format
bts, err := json.Marshal(group)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(bts))

How to return content of a nested indexed field with some top level fields using Mongodb?

Consider these documents:
{
"chapterNumber": "1",
"contents": [
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 1",
"contents": [
"not clean content",
{
"p": null
}
]
},
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 2",
"contents": [
"not clean content",
{
"p": null
}
]
},
]
}
{
"chapterNumber": "2",
"contents": [
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 3",
"contents": [
"not clean content",
{
"p": null
}
]
},
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 4",
"contents": [
"not clean content",
{
"p": null
}
]
},
]
}
If I do an index on the field paragraphCleanText and then issue a query to search for this string cleaned content 3, is there a way to return the following structure from a single or optimized query?
{
"chapterNumber": "2",
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 3"
}
You need to use aggregation-pipeline for this :
1.If your "contents.paragraphCleanText" is unique :
Query :
db.collection.aggregate([
/** Filter docs based on condition */
{ $match: { "contents.paragraphCleanText": "cleaned content 3" } },
{
$project: {
_id: 0,
chapterNumber: 1,
/** contents will be an object, which is matched object, as `$filter` will return an array of matched objects, we're picking first object - assuming `contents.paragraphCleanText` is unique,
* Just in case if you've multiple objects that matches with given condition use `$reduce` instead of `$filter + $arrayElemAt` */
contents: {
$arrayElemAt: [
{ $filter: { input: "$contents", cond: { $eq: [ "$$this.paragraphCleanText", "cleaned content 3" ] } } },
0
]
}
}
},
{
$project: {
chapterNumber: 1,
paragraphNumber: "$contents.paragraphNumber",
paragraphCleanText: "$contents.paragraphCleanText"
}
}
])
Test : mongoplayground
Note : If you want entire object from contents array that matches condition then you can simply use $elemmatch-projection-operator or $-positional-projection-operator, but as you don't want the entire object and just couple of fields from the matched object then you need to use aggregation cause projection option in .find() is not capable of transforming fields - it's only capable of including or excluding fields from document, So you'll use $project stage of aggregation to do this.
If your "contents.paragraphCleanText" can have duplicates i.e; there can be multiple objects inside contents array with "contents.paragraphCleanText": "cleaned content 3" :
Query :
db.collection.aggregate([
{ $match: { "contents.paragraphCleanText": "cleaned content 3" } },
{
$project: {
_id: 0,
chapterNumber: 1,
contents: {
$reduce: {
input: "$contents",
initialValue: [], // create an empty array to start with
in: {
$cond: [
{
$eq: [ "$$this.paragraphCleanText", "cleaned content 3" ] // Condition to check
},
{ // If an object has matched with condition concatinate array converted that object into holding array
$concatArrays: [ "$$value", [ { paragraphCleanText: "$$this.paragraphCleanText", paragraphNumber: "$$this.paragraphNumber" } ] ]
},
"$$value" // If current object is not matched return holding array as is
]
}
}
}
}
},
{$unwind : '$contents'} // Purely optional stage - add this stage & test output to decide whether to include or not & add `$project` stage after this stage
])
Test : mongoplayground
Found a pretty simple solution for my use case:
add an index on paragraphCleanText field, plus chapterNumber and paragraphNumber as additional fields
make the following query db.yourCollection.find({"contents.paragraphCleanText" : { $regex: /^ some text that is present in the field .*/ } })._addSpecial( "$returnKey", true )
`

Get specified no of array elements from elasticsearch query

I am having an index on elasticsearch having an array in its record.
Say the field name is "samples" and the array is :
["abc","xyz","mnp".....]
So is there any query so that I could specify the no of elements to retrieve from the array .
Say I want that the retrieved record should only have first 2 elements in sample array
Assuming you have array of strings as a document. I have a couple of ideas in my mind which might help you.
PUT /arrayindex/
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"spacelyzer": {
"tokenizer": "whitespace"
},
"commalyzer": {
"type": "custom",
"tokenizer": "commatokenizer",
"char_filter": "square_bracket"
}
},
"tokenizer": {
"commatokenizer": {
"type": "pattern",
"pattern": ","
}
},
"char_filter": {
"square_bracket": {
"type": "mapping",
"mappings": [
"[=>",
"]=>"
]
}
}
}
}
},
"mappings": {
"array_set": {
"properties": {
"array_space": {
"analyzer": "spacelyzer",
"type": "string"
},
"array_comma": {
"analyzer": "commalyzer",
"type": "string"
}
}
}
}
}
POST /arrayindex/array_set/1
{
"array_space": "qwer qweee trrww ooenriwu njj"
}
POST /arrayindex/array_set/2
{
"array_comma": "[qwer,qweee,trrww,ooenriwu,njj]"
}
The above DSL accepts two types of arrays one is a white-space separated string where every string represents an element of array and the other is a type of array that was specified by you. This is array is possible in Python and in python if you index such a document it is automatically converted to string i.e. ["abc","xyz","mnp".....] would be converted to "["abc","xyz","mnp".....]".
spacelyzer tokenizes according to the whitespaces and commalyzer tokenizes according to the commas and removes [ and ] from the string.
Now if you'll the Termvector API like this:
GET arrayindex/array_set/1/_termvector
{
"fields" : ["array_space", "array_comma"],
"term_statistics" : true,
"field_statistics" : true
}
GET arrayindex/array_set/2/_termvector
{
"fields" : ["array_space", "array_comma"],
"term_statistics" : true,
"field_statistics" : true
}
You can simply get the position of the element from their responses e.g. to find the position of "njj" use
termvector_response["term_vectors"]["array_comma"]["terms"]["njj"]["tokens"][0]["position"] or,
termvector_response["term_vectors"]["array_space"]["terms"]["njj"]["tokens"][0]["position"]
Both will give you 4 which is the actual index in the array specified. I suggest you to the whitespace type design.
The Python code for this can be:
from elasticsearch import Elasticsearch
ES_HOST = {"host" : "localhost", "port" : 9200}
ES_CLIENT = Elasticsearch(hosts = [ES_HOST], timeout = 180)
def getTermVector(doc_id):
a = ES_CLIENT.termvector\
(index = "arrayindex",
doc_type = "array_set",
id = doc_id,
field_statistics = True,
fields = ['array_space', 'array_comma'],
term_statistics = True)
return a
def getElements(num, array_no):
all_terms = getTermVector(array_no)['term_vectors']['array_space']['terms']
for i in range(num):
for term in all_terms:
for jsons in all_terms[term]['tokens']:
if jsons['position'] == i:
print term, "# index", i
getElements(3, 1)
# qwer # index 0
# qweee # index 1
# trrww # index 2

AppEngine Datastore Encoding a Slice of Slices with Go

I have a need to store a slice of slices in that Datastore. Since the Datastore doesn't support two levels of slices, I have encode the child slice as JSON and store it as a []byte. I'm using the PropertyLoadSaver to accomplish this. The saving and loading works, but I've noticed that the saved entity has a empty []byte for every parent element. After some investigation I discovered this is caused by the Variants.OptionsJSON Property being referenced in the Save() method and also an attribute of the struct.
So my question are:
Is this a bug or am I doing something wrong?
Is there a better way to accomplish this?
The one constraint I have is that I need to be able to query the entity by one of the "Variant" attributes, but not the "Variant.Options".
Thanks!
Example:
Given JSON
{
"variants": [{
"options": [
{
"name": "One",
"value": "one"
},{
"name": "Two",
"value": "two"
}]
}, {
"options": [
{
"name": "Three",
"value": "three"
},{
"name": "Four",
"value": "four"
}]
}]
}]
}
How it's stored in the datastore, notice the two empty '', '' attributes at the end:
Entity Kind: Products
Entity Key: ahBkZXZ-c3RvcmVza2ktY29tchULEghQcm9kdWN0cxiAgICAgNDHCww
ID: 6507459568992256
Variants.OptionsJSON (list): `['[{"name":"One","value":"one"},{"name":"Two","value":"two"}]', '[{"name":"Three","value":"three"},{"name":"Four","value":"four"}]', '', '']`
model.go
import (
"appengine/datastore"
"encoding/json"
)
type Option struct {
Name string `json:"name"`
Value string `json:"value"`
}
type Variant struct {
OptionsJSON []byte `json:"-" datastore:"OptionsJSON"`
Options []Option `json:"options" datastore:"-"`
}
type Product struct {
Variants []Variant `json:"variants"`
}
func (x *Product) Load(c <-chan datastore.Property) error {
if err := datastore.LoadStruct(x, c); err != nil {
return err
}
for i, v := range x.Variants {
var opts []Option
json.Unmarshal(v.OptionsJSON, &opts)
x.Variants[i].Options = opts
}
return nil
}
func (x *Product) Save(c chan<- datastore.Property) error {
for _, v := range x.Variants {
b, _ := json.Marshal(v.Options)
c <- datastore.Property{
Name: "Variants.OptionsJSON",
Value: b,
NoIndex: true,
Multiple: true,
}
}
return datastore.SaveStruct(x, c)
}

Resources