ElasticSearch 7 Index way too big vs ElasticSearch 5

ElasticSearch 7 Index way too big vs ElasticSearch 5 - database

We're in the process of migrating from ElasticSearch 5.6 to 7.9. On 5.6, we have 2 indices one with 3.4k documents making up 111.2 MB, and another with 81.6k documents making up 845.6 MB. On 7.9 we have the same 2 indices (written to by the same process) with similar mappings, however it uses 14.3 GB and 15.6 GB respectively.
I don't understand what could be making these indices so much larger on 7.9 vs 5.6.
If you are curious, Here are the mappings (I've obfuscated the names of many fields to protect our data):
ES 5.6
{
"blah-state-37c088aea98d4b60ad58fb04abe55aa7": {
"mappings": {
"blahblah": {
"properties": {
"blahStatus": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"blah": {
"type": "integer"
},
"blahblah": {
"type": "long"
},
"blahblahblah": {
"type": "text"
},
"blahblahblahblah": {
"type": "integer"
},
"blahblahblahzzz": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "ngram_suggest"
},
"blahblahblahhh": {
"type": "text",
"index": false,
"store": true
},
"blahblahblaaaa": {
"type": "keyword"
},
"created": {
"type": "text"
},
"ended": {
"type": "text"
},
"blaaaaah": {
"type": "boolean"
},
"blaahaah": {
"type": "integer"
},
"bloop": {
"type": "boolean"
},
"bloopibob": {
"type": "integer"
},
"blabiba": {
"type": "keyword"
},
"blah": {
"type": "long"
},
"bleeeep": {
"type": "boolean"
},
"blahhh": {
"type": "boolean"
},
"blahah": {
"type": "text"
},
"hidden": {
"type": "boolean"
},
"blah1": {
"type": "boolean"
},
"blah2": {
"type": "boolean"
},
"blah3": {
"type": "boolean"
},
"blah4": {
"type": "boolean"
},
"blah5": {
"type": "boolean"
},
"blah6": {
"type": "boolean"
},
"blah7": {
"type": "boolean"
},
"blah8": {
"type": "boolean"
},
"blah9": {
"type": "boolean"
},
"blah10": {
"type": "boolean"
},
"blah11": {
"type": "boolean"
},
"blah12": {
"type": "boolean"
},
"blah13": {
"type": "boolean"
},
"isInvalid": {
"type": "boolean"
},
"blah14": {
"type": "boolean"
},
"isNew": {
"type": "boolean"
},
"blah15": {
"type": "boolean"
},
"keywords": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"languages": {
"type": "keyword"
},
"blah16": {
"type": "integer"
},
"blah17": {
"type": "integer"
},
"blah18": {
"type": "keyword"
},
"maxWait": {
"type": "integer"
},
"minBuyIn": {
"type": "float"
},
"nickname": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"nicknamePartial": {
"type": "text",
"analyzer": "ngram_partial"
},
"nicknameSuggest": {
"type": "text",
"analyzer": "ngram_suggest"
},
"blah19": {
"type": "text"
},
"blah20": {
"type": "boolean"
},
"DocumentID": {
"type": "keyword"
},
"pledgedAmt": {
"type": "float"
},
"preferredLanguage": {
"type": "text"
},
"blah21": {
"type": "integer"
},
"blah22": {
"type": "integer"
},
"rating": {
"type": "integer"
},
"region": {
"type": "keyword"
},
"requestedAmt": {
"type": "float"
},
"showInFreeAreas": {
"type": "boolean"
},
"blah23": {
"type": "boolean"
},
"blah24": {
"type": "text"
},
"blah25": {
"type": "scaled_float",
"scaling_factor": 100000
},
"sortScore": {
"type": "long"
},
"started": {
"type": "text"
},
"statusKey": {
"type": "text"
},
"blah26": {
"type": "long"
},
"blah27": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tagName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"tagNameRaw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "normalized"
},
"tagNameSuggest": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "ngram_suggest"
},
"blah28": {
"type": "boolean"
},
"traceId": {
"type": "object",
"enabled": false
},
"updated": {
"type": "long"
},
"blah29": {
"type": "boolean"
}
}
}
}
}
}
and 7.9
{
"blah-state-37c088aea98d4b60ad58fb04abe55aa7" : {
"mappings" : {
"properties" : {
"accountStatus" : {
"type" : "keyword"
},
"boost" : {
"type" : "integer"
},
"age" : {
"type" : "integer"
},
"bleeeeeep" : {
"type" : "keyword"
},
"bleeeep" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "ngram_suggest"
},
"bleeep" : {
"type" : "keyword"
},
"bleep" : {
"type" : "keyword"
},
"blah0" : {
"type" : "boolean"
},
"blah1" : {
"type" : "boolean"
},
"blah2" : {
"type" : "text"
},
"hidden" : {
"type" : "boolean"
},
"blah3" : {
"type" : "boolean"
},
"blah4" : {
"type" : "boolean"
},
"blah5" : {
"type" : "boolean"
},
"blah6" : {
"type" : "boolean"
},
"blah7" : {
"type" : "boolean"
},
"blah8" : {
"type" : "boolean"
},
"blah9" : {
"type" : "boolean"
},
"blah10" : {
"type" : "boolean"
},
"blah11" : {
"type" : "boolean"
},
"blah12" : {
"type" : "boolean"
},
"blah13" : {
"type" : "boolean"
},
"blah14" : {
"type" : "boolean"
},
"blah15" : {
"type" : "boolean"
},
"blah16" : {
"type" : "boolean"
},
"isNew" : {
"type" : "boolean"
},
"blah17" : {
"type" : "boolean"
},
"keywords" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"languages" : {
"type" : "keyword"
},
"blah18" : {
"type" : "integer"
},
"blah19" : {
"type" : "integer"
},
"nickname" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"nicknamePartial" : {
"type" : "text",
"analyzer" : "ngram_partial"
},
"nicknameSuggest" : {
"type" : "text",
"analyzer" : "ngram_suggest"
},
"blah20" : {
"type" : "boolean"
},
"blah21" : {
"type" : "boolean"
},
"DocumentId" : {
"type" : "keyword"
},
"preferredLanguage" : {
"type" : "keyword"
},
"rating" : {
"type" : "integer"
},
"region" : {
"type" : "keyword"
},
"blah22" : {
"type" : "boolean"
},
"blah23" : {
"type" : "boolean"
},
"blah24" : {
"type" : "scaled_float",
"scaling_factor" : 100000.0
},
"sortScore" : {
"type" : "integer"
},
"blah25" : {
"type" : "keyword"
},
"tagName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"tagNameRaw" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "normalized"
},
"tagNameSuggest" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "ngram_suggest"
},
"blah26" : {
"type" : "boolean"
},
"traceId" : {
"type" : "object",
"enabled" : false
},
"updated" : {
"type" : "long"
},
"blah27" : {
"type" : "boolean"
}
}
}
}
}
edit: here are the settings:
5.6:
"settings": {
"index": {
"analysis": {
"filter": {
"english_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"ngram_partial": {
"filter": [
"standard",
"asciifolding",
"lowercase"
],
"tokenizer": "ngram"
},
"ngram_suggest": {
"filter": [
"standard",
"asciifolding",
"lowercase"
],
"tokenizer": "edge_ngram"
},
"normalized": {
"filter": [
"standard",
"asciifolding",
"lowercase",
"english_stemmer"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"edge_ngram": {
"token_chars": [
"letter",
"digit",
"punctuation"
],
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "20"
},
"ngram": {
"token_chars": [
"letter",
"digit",
"punctuation"
],
"min_gram": "2",
"type": "ngram",
"max_gram": "20"
}
}
},
"number_of_shards": "12"
}
}
and 7.9:
"settings" : {
"index" : {
"analysis" : {
"filter" : {
"english_stemmer" : {
"type" : "stemmer",
"language" : "english"
}
},
"analyzer" : {
"ngram_partial" : {
"filter" : [
"asciifolding",
"lowercase"
],
"tokenizer" : "ngram"
},
"ngram_suggest" : {
"filter" : [
"asciifolding",
"lowercase"
],
"tokenizer" : "edge_ngram"
},
"normalized" : {
"filter" : [
"asciifolding",
"lowercase",
"english_stemmer"
],
"type" : "custom",
"tokenizer" : "standard"
}
},
"tokenizer" : {
"edge_ngram" : {
"token_chars" : [
"letter",
"digit",
"punctuation"
],
"min_gram" : "1",
"type" : "edge_ngram",
"max_gram" : "20"
},
"ngram" : {
"token_chars" : [
"letter",
"digit",
"punctuation"
],
"min_gram" : "3",
"type" : "ngram",
"max_gram" : "3"
}
}
},
"number_of_shards" : "12"
}
}
Results of _cat/shards
5.6:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 p STARTED 960 8mb 000.00.000.84 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 8mb 000.00.000.89 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 8.1mb 000.00.000.80 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 960 7.7mb 000.00.000.86 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 9.2mb 000.00.000.90 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 8.9mb 000.00.000.81 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 p STARTED 978 8.7mb 000.00.000.87 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 978 8.6mb 000.00.000.83 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 p STARTED 990 8.1mb 000.00.000.85 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 7.6mb 000.00.000.91 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 8.5mb 000.00.000.88 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 990 7.9mb 000.00.000.82 host3
and 7.9:
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8 p STARTED 262 673.4mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 8 r STARTED 286 667.8mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9 p STARTED 278 754.9mb 000.00.000.124 host10
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 9 r STARTED 196 729.7mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7 p STARTED 247 654.2mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 7 r STARTED 262 645.1mb 000.00.000.126 host12
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4 p STARTED 225 719.8mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 4 r STARTED 282 660.9mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6 p STARTED 274 715.6mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 6 r STARTED 334 706.3mb 000.00.000.119 host5
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11 r STARTED 194 691.6mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 11 p STARTED 255 713.1mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3 p STARTED 212 716.6mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 3 r STARTED 292 709.3mb 000.00.000.121 host7
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 p STARTED 249 749.5mb 000.00.000.118 host4
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 1 r STARTED 289 695.5mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5 p STARTED 243 701.4mb 000.00.000.122 host8
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 5 r STARTED 204 680.9mb 000.00.000.125 host11
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 p STARTED 246 685.8mb 000.00.000.116 host2
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 2 r STARTED 305 676.7mb 000.00.000.117 host3
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10 p STARTED 235 701.2mb 000.00.000.123 host9
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 10 r STARTED 276 690.5mb 000.00.000.115 host1
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 p STARTED 245 674.7mb 000.00.000.120 host6
redacted-state-37c088aea98d4b60ad58fb04abe55aa7 0 r STARTED 301 623.5mb 000.00.000.118 host4

After much experimenting, we've determined that the cause for this was soft deletes. Unfortunately, disabling soft deletes is deprecated so this will be a problem for us in the future.

Related

how to partially replace a string in array in APACHE NIFI using jolt

I want to always replace
abcd with abcd.india
xyxvv with ind.hello
india.gateway.url/time/123/v1 with india.ios.gw.url/time/123/v2
'someText' with 'diffText'
presented in website under information array, without modifying prefix and suffix
Input :
{
"requestId": 1122344,
"Name": "testing",
"information": [
{
"website": "abcd/122/ty",
"city": "pune",
"pincode": false,
"client_name": 5
},
{
"website": "http://xyxvv/122/ty",
"city": "delhi",
"pincode": false,
"client_name": 5
},
{
"website": "http://someText",
"city": "delhi",
"pincode": false,
"client_name": 5
},
{
"website": "http://india.gateway.url/time/123/v1",
"city": "maharashtra",
"pincode": false,
"client_name": 6
}
],
"ReasonText": "something",
"Code": "ABCD"
}
Desired Output :
{
"requestId" : 1122344,
"Name" : "testing",
"information" : [ {
"website" : "abcd.india/122/ty",
"city" : "pune",
"pincode" : false,
"client_name" : 5
}, {
"website" : "http://ind.hello/122/ty",
"city" : "delhi",
"pincode" : false,
"client_name" : 5
}, {
"website" : "http://diffText",
"city" : "delhi",
"pincode" : false,
"client_name" : 5
},
{
"website" : "http://india.ios.gw.url/time/123/v2",
"city" : "delhi",
"pincode" : false,
"client_name" : 6
} ],
"ReasonText" : "something",
"Code" : "ABCD"
}

You can consecutively use split, join and concat functions to split the string, and then conditionally convert substrings and concatenate back all the pieces such as
[
{
"operation": "modify-overwrite-beta",
"spec": {
"information": {
"*": {
"ht": "=split('://',#(1,website))",
"ws": "=split('/',#(1,ht[1]))",
"last_ws": "=lastElement(#(1,ws))",
"size_ws": "=size(#(1,ws))"
}
}
}
},
{
"operation": "shift",
"spec": {
"*": "&",
"information": {
"*": {
"ws": {
"0": { // for the first omponent of the array composed splitting the sting by / characters
"abcd": {
"#abcd.india": "&5[&4].ws"
},
"xyxvv": {
"#ind.hello": "&5[&4].ws"
},
"india.gateway.url": {
"#india.ios.gw.hello": "&5[&4].ws"
}
},
"*": {
"v1": {
"#(3,last_ws)": {//whenever match occurs with v1, set it to v2(occurence assumed only at this leaf level)
"#v2": "&6[&5].ws"
}
},
"*": {
"#ind.hello": "&5[&4].ws"
}
}
},
"*": "&2[&1].&"
}
}
}
}
,
{
"operation": "modify-overwrite-beta",
"spec": {
"information": {
"*": {
"ws": "=join('/',#(1,ws))",
"website": "=concat(#(1,ht[0]),'://',#(1,ws))"
}
}
}
},
{
"operation": "remove",
"spec": {
"information": {
"*": {
"ht": "",
"ws": "",
"*ws": ""
}
}
}
}
]

Unable to validate complete JSON schema using tv4

Total newb here.
I'm wondering if someone can help because i'm at a loss! I'm trying to assert that the response from a GET API Call meets this monster schema set - Using console.log() it looks like each of the properties is accessed successfully and if i change the object type the test will fail as i'd expect.
Working down the schema - If i try and change a property in the data array i.e from string to boolean the test will still pass although it doesn't meet the required schema i've defined,
Any guidance is massively appreciated!
//Test the schema is valid
const schema = {
"type": "object",
"properties": {
"current_page": {
"type": "integer"
},
"data": {
"type": "array",
"properties": {
"id": {
"type": "integer"
},
"first_name": {
"type": "string"
},
"last_name": {
"type": "string"
},
"telephone": {
"type": "string"
},
"postcode": {
"type": "string"
},
"date_of_birth": {
"type": "string"
},
"profession": {
"type": "string"
},
"time_served": {
"type": "integer"
},
"national_insurance_number": {
"type": "string"
},
"employer": {
"type": "string"
},
"lco_id": {
"type": "integer"
},
"company_employed": {
"type": "null"
},
"company_employed_email": {
"type": "null"
},
"company_employed_telephone": {
"type": "string"
},
"company_employed_address": {
"type": "null"
},
"apprentice": {
"type": "boolean"
},
"apprentice_trade": {
"type": "string"
},
"apprentice_course": {
"type": "string"
},
"apprentice_started_at": {
"type": "string"
},
"apprentice_ended_at": {
"type": "string"
},
"work_experience": {
"type": "boolean"
},
"work_experience_trade": {
"type": "null"
},
"work_experience_education": {
"type": "null"
},
"work_experience_started_at": {
"type": "null"
},
"work_experience_ended_at": {
"type": "null"
},
"nvq": {
"type": "boolean"
},
"nvq_trade": {
"type": "string"
},
"nvq_education": {
"type": "string"
},
"nvq_started_at": {
"type": "string"
},
"nvq_ended_at": {
"type": "string"
},
"unemployed": {
"type": "boolean"
},
"unemployed_months": {
"type": "null"
},
"company_employed_postcode": {
"type": "string"
},
"partner_relationship": {
"type": "null"
},
"self_partner_relationship": {
"type": "null"
},
"emergency_contact_first_name": {
"type": "string"
},
"emergency_contact_last_name": {
"type": "string"
},
"emergency_contact_telephone": {
"type": "string"
},
"enrollment_id": {
"type": "integer"
},
"created_at": {
"type": "string"
},
"updated_at": {
"type": "string"
},
"mode_of_travel": {
"type": "string"
},
"driver_or_passenger": {
"type": "string"
},
"fuel_type": {
"type": "string"
},
"engine_capacity": {
"type": "string"
},
"rtw_declaration": {
"type": "boolean"
},
"rtw_proof1_upload_id": {
"type": "null"
},
"rtw_proof2_upload_id": {
"type": "null"
},
"card_type": {
"type": "string"
},
"gender": {
"type": "string"
},
"self_gender": {
"type": "null"
},
"marital_status": {
"type": "string"
},
"disability_act": {
"type": "string"
},
"disability_description": {
"type": "null"
},
"ethnic_origin": {
"type": "string"
},
"religion": {
"type": "string"
},
"nationality": {
"type": "string"
},
"sexual_orient": {
"type": "string"
},
"checked_membership": {
"type": "integer"
},
"training_checked": {
"type": "integer"
},
"enrollment": {
"type": "object",
"properties": {
"id": {
"type": "integer"
},
"inducted": {
"type": "boolean"
},
"user_id": {
"type": "integer"
},
"created_at": {
"type": "string"
},
"updated_at": {
"type": "string"
},
"expiry_date": {
"type": "string"
},
"user": {
"type": "object",
"properties": {
"id": {
"type": "integer"
},
"email": {
"type": "string"
},
"role": {
"type": "integer"
},
"created_at": {
"type": "string"
},
"updated_at": {
"type": "string"
},
"state": {
"type": "integer"
},
"last_login_at": {
"type": "null"
}
},
"required": [
"id",
"email",
"role",
"created_at",
"updated_at",
"state",
"last_login_at"
]
}
},
"required": [
"id",
"inducted",
"user_id",
"created_at",
"updated_at",
"expiry_date",
"user"
]
}
},
"required": [
"id",
"first_name",
"last_name",
"telephone",
"postcode",
"date_of_birth",
"profession",
"time_served",
"national_insurance_number",
"employer",
"lco_id",
"company_employed",
"company_employed_email",
"company_employed_telephone",
"company_employed_address",
"apprentice",
"apprentice_trade",
"apprentice_course",
"apprentice_started_at",
"apprentice_ended_at",
"work_experience",
"work_experience_trade",
"work_experience_education",
"work_experience_started_at",
"work_experience_ended_at",
"nvq",
"nvq_trade",
"nvq_education",
"nvq_started_at",
"nvq_ended_at",
"unemployed",
"unemployed_months",
"company_employed_postcode",
"partner_relationship",
"self_partner_relationship",
"emergency_contact_first_name",
"emergency_contact_last_name",
"emergency_contact_telephone",
"enrollment_id",
"created_at",
"updated_at",
"mode_of_travel",
"driver_or_passenger",
"fuel_type",
"engine_capacity",
"rtw_declaration",
"rtw_proof1_upload_id",
"rtw_proof2_upload_id",
"card_type",
"gender",
"self_gender",
"marital_status",
"disability_act",
"disability_description",
"ethnic_origin",
"religion",
"nationality",
"sexual_orient",
"checked_membership",
"training_checked",
"enrollment",
]
},
}
}
// Use tiny validator to validate the results - Error if there are additional properties, and check recursion.
pm.test("Validate schema contains the relevant details", () => {
tv4.validateMultiple(jsonData, schema, true, true);
var jsonData = JSON.parse(responseBody);
// Log all errors to the console
var validationResult = tv4.validateMultiple(jsonData, schema, true, true);
for (var i = 0; i < validationResult.errors.length; i++) {
console.log("path :" + validationResult.errors[i].dataPath + " message :" + validationResult.errors[i].message);
}
});

const schema = {
"type": "object",
"properties": {
"current_page": {
"type": "boolean"
}
}
}
pm.test("Validate schema contains the relevant details", () => {
var jsonData = { "current_page": 2 }
// Log all errors to the console
var validationResult = tv4.validateMultiple(jsonData, schema, true, true);
validationResult.valid ? null : console.log(JSON.stringify(validationResult, null, 2))
pm.expect(validationResult.valid, JSON.stringify(validationResult, null, 2)).to.be.true
});
you don't have expect to assert it that's why it passes always , also as Danny mentioned use
const schema = {
"type": "object",
"properties": {
"current_page": {
"type": "boolean"
}
}
}
pm.test("Validate schema contains the relevant details", () => {
var jsonData = { "current_page": 2 }
pm.response.to.have.jsonSchema(schema)
});
And for array you should use items instead of property:
const schema = {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string"
}
},
"required":["id","name"]
}
}
}
}
pm.test("Validate schema contains the relevant details", () => {
var jsonData = { "data": [{ "id": 2,"name":"test" }] }
// Log all errors to the console
var validationResult = tv4.validateMultiple(jsonData, schema, true, true);
validationResult.valid ? null : console.log(JSON.stringify(validationResult, null, 2))
pm.expect(validationResult.valid, JSON.stringify(validationResult, null, 2)).to.be.true
});
you can create schema with so much ease using :
https://www.jsonschema.net/login

How to replace the value in nested arrays in multiple mongodb docs at once

Here i wanted to replace the value of country to something else wherever it presents like "India".
I have multiple docs with same structure and would like update all at once. It should not affect other keys and just wanted to update the country.
Tried with Set operator and not getting it correctly.
{
"_id" : "1",
"teams" :
[
{
"type" : "local",
"isEnabled" : "true",
"Country":"India"
"names" :
[
{ "name": "kumar","Nationality":"indian","BirthPlace":"Goa","Age":"U25" },
{ "name": "kannan","Nationality":"indian","BirthPlace":"Kerala","Age":"U25"}
]
},
{
"type" : "national",
"isEnabled" : "true",
"Country":"India"
"names" :
[
{ "name": "kumar","Nationality":"indian","BirthPlace":"Goa","Age":"U25" },
{ "name": "kannan","Nationality":"indian","BirthPlace":"Kerala","Age":"U25"}
]
},
{
"type" : "international",
"isEnabled" : "true",
"Country":"England"
"names" :
[
{ "name": "kumar","Nationality":"indian","BirthPlace":"Goa","Age":"U25" },
{ "name": "kannan","Nationality":"indian","BirthPlace":"Kerala","Age":"U25"}
]
},
{
"type" : "national",
"isEnabled" : "true",
"Country":"India"
"names" :
[
{ "name": "kumar","Nationality":"indian","BirthPlace":"Goa","Age":"U25" },
{ "name": "kannan","Nationality":"indian","BirthPlace":"Kerala","Age":"U25"}
]
},
{
"type" : "international",
"isEnabled" : "true",
"Country":"Newzealand"
"names" :
[
{ "name": "kumar","Nationality":"indian","BirthPlace":"Goa","Age":"U25" },
{ "name": "kannan","Nationality":"indian","BirthPlace":"Kerala","Age":"U25"}
]
}
]
}

Try this one:
db.collection.updateMany(
{},
{ $set: { "teams.$[element].Country": "Republic of India" } },
{ arrayFilters: [{ "element.Country": "India" }] }
);

Try this one:
db.collection.updateMany({},
{
$set: {
"teams.$[].Country": "India"
}
})

Elastic 7 mapping definition has unsupported parameters

I believe this issue has to do with the different syntax from elasticsearch 6.x -> 7.x
This is my first time using elasticsearch and havent been able to understand where the issue is.
How would I fix my mapping?
{
"settings": {
"analysis": {
"analyzer": {
"ssdeep_analyzer": {
"tokenizer": "ssdeep_tokenizer"
}
},
"tokenizer": {
"ssdeep_tokenizer": {
"type": "ngram",
"min_gram": 7,
"max_gram": 7,
"token_chars": [
"letter",
"digit",
"symbol"
]
}
}
}
},
"mappings": {
"_default_": {
"_all": {
"enabled": false
},
"dynamic": "strict",
"properties": {
"chunksize": {
"type": "integer"
},
"chunk": {
"analyzer": "ssdeep_analyzer",
"type": "text"
},
"double_chunk": {
"analyzer": "ssdeep_analyzer",
"type": "text"
},
"ssdeep": {
"type": "keyword"
},
"sha256": {
"type": "keyword"
}
}
},
"record": {}
}
}
The error that is being raised is
elasticsearch.exceptions.RequestError: RequestError(400, 'mapper_parsing_exception', 'Root mapping definition has unsupported parameters: [_default_ : {dynamic=strict, _all={enabled=false}, properties={sha256={type=keyword}, chunksize={type=integer}, chunk={analyzer=ssdeep_analyzer, type=text}, double_chunk={analyzer=ssdeep_analyzer, type=text}, ssdeep={type=keyword}}}] [record : {}]')

A bunch of issues here:
The _all field is deprecated (and has been since v6).
"record": {} is not a valid parameter.
_default_ is deprecated too.
Here's a working mapping:
{
"settings": {
"analysis": {
"analyzer": {
"ssdeep_analyzer": {
"tokenizer": "ssdeep_tokenizer"
}
},
"tokenizer": {
"ssdeep_tokenizer": {
"type": "ngram",
"min_gram": 7,
"max_gram": 7,
"token_chars": [
"letter",
"digit",
"symbol"
]
}
}
}
},
"mappings": {
"dynamic": "strict",
"properties": {
"chunksize": {
"type": "integer"
},
"chunk": {
"analyzer": "ssdeep_analyzer",
"type": "text"
},
"double_chunk": {
"analyzer": "ssdeep_analyzer",
"type": "text"
},
"ssdeep": {
"type": "keyword"
},
"sha256": {
"type": "keyword"
}
}
}
}

How to construct a JSON based query on nested object having array items with ES search?

[
{
"name": "Document 1",
"tags": {
"typeATags": ["a1"],
"typeBTags": ["b1"],
"typeCTags": ["c1"],
"typeDTags": ["d1"]
}
},
{
"name": "Document 2",
"tags": {
"typeATags": ["a2"],
"typeBTags": ["b1", "b2"],
"typeCTags": ["c2"],
"typeDTags": ["d1", "d2"]
}
},
{
"name": "Document 3",
"tags": {
"typeATags": ["a1", "a2", "a3"],
"typeBTags": ["b1", "b2", "b3"],
"typeCTags": ["c3"],
"typeDTags": ["d1", "d2", "d3"]
}
}
]
How to build a query on ES 6.0,
That will return all the records that has 'a1' and 'b1' tags ? // should return 1,3
That will return all the records that has 'a1' and 'a2' tags combined? // should return 3
That will return all the records that has 'a1' or 'a2' tags ? //should return 1,2,3
That will return all the records that has 'a1' AND ( 'c1' OR 'c3') tags ? //should return 1,2
Thanks #mickl for the Answer
Edit 1:
Here is my actual Schema,
{
"cmslocal": {
"mappings": {
"video": {
"properties": {
"assetProps": {
"properties": {
"assetType": {
"type": "string"
},
"configPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"contentSha1": {
"type": "string"
},
"originalPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"path": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
},
"thumbnailPath": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"analyzer": "standard"
}
}
},
"channel": {
"type": "string"
},
"configProps": {
"properties": {
"events": {
"type": "nested",
"include_in_root": true,
"properties": {
"Desc": {
"type": "string"
},
"Tags": {
"type": "string"
},
"UUID": {
"type": "string"
}
}
},
"roiUUID": {
"type": "string"
}
}
},
"contentSha1": {
"type": "string"
},
"eventDesc": {
"type": "string"
},
"ext": {
"type": "string"
},
"format": {
"type": "string"
},
"fovProps": {
"properties": {
"description": {
"type": "string"
},
"width": {
"type": "float"
}
}
},
"locationProps": {
"type": "nested",
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string"
},
"country": {
"type": "string"
},
"county": {
"type": "string"
},
"location": {
"type": "geo_point"
},
"postcode": {
"type": "string"
},
"state": {
"type": "string"
}
}
},
"nodeid": {
"type": "string"
},
"poleHeight": {
"type": "float"
},
"query": {
"properties": {
"bool": {
"properties": {
"filter": {
"properties": {
"term": {
"properties": {
"nodeid": {
"type": "string"
}
}
}
}
}
}
}
}
},
"retentionPolicy": {
"type": "string"
},
"siteScopeID": {
"type": "string"
},
"tagProps": {
"type": "nested",
"properties": {
"conditions": {
"type": "string"
},
"environment": {
"type": "string"
},
"events": {
"type": "string"
},
"lighting": {
"type": "string"
},
"objects": {
"type": "string"
},
"other": {
"type": "string"
},
"scenes": {
"type": "string"
},
"useCases": {
"type": "string"
},
"weather": {
"type": "string"
}
}
},
"test": {
"type": "string"
},
"title": {
"type": "string"
},
"uploadTime": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"videoProps": {
"properties": {
"bitrate": {
"type": "float"
},
"datetime": {
"type": "date",
"format": "date_hour_minute_second_millis"
},
"daySegments": {
"type": "string"
},
"duration": {
"type": "long"
},
"framerate": {
"type": "float"
},
"height": {
"type": "integer"
},
"overlaysOn": {
"type": "boolean"
},
"width": {
"type": "integer"
}
}
}
}
}
}
}
}
Please help to build the query so I can search for
Only nodeId, Only Channel, Date Range
Any of the Tags
I'm able to search for nodeId, Channel Id using
{
"query": {
"bool": {
"filter": [
{ "match": { "nodeid": "N02cff15a" } },
{ "match": { "channel": "1" } }
]
}
}
}
and I can able to Search for tagProps using
{
"nested": {
"path": "tagProps",
"query": {
"bool": {
"must": [
{ "match": { "tagProps.objects": "car" } },
{ "match": { "tagProps.objects": "truck" } }
]
}
}
}
}
Help me combine two queries so I can search for NodeId with Combination of Tags.

Since tags is a nested field you should define nested mapping before indexing your documents.
{
"mappings": {
"your_type": {
"properties": {
"tags": {
"type": "nested"
}
}
}
}
}
Now you can index your data and then you can utilize nested query syntax, for your first use case it's like:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }},
{ "match": { "tags.typeBTags": "b1" }}
]
}
}
}
}
}
Next queries can be composed of must and should, like for the last one:
{
"query": {
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{ "match": { "tags.typeATags": "a1" }}
],
"should": [
{"match": {"tags.typeCTags": "c1"}},
{"match": {"tags.typeCTags": "c3"}}
]
}
}
}
}
}

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

ElasticSearch 7 Index way too big vs ElasticSearch 5 - database

After much experimenting, we've determined that the cause for this was soft deletes. Unfortunately, disabling soft deletes is deprecated so this will be a problem for us in the future.

Related

how to partially replace a string in array in APACHE NIFI using jolt

Unable to validate complete JSON schema using tv4

How to replace the value in nested arrays in multiple mongodb docs at once

Elastic 7 mapping definition has unsupported parameters

How to construct a JSON based query on nested object having array items with ES search?

Categories

Resources