Update nested object using positional-filters on different levels - MongoDb update - arrays

Considering I have the following collection (ignoring documents _id):
Collection
[
{
"Id": "1",
"Level2": [
{
"Id": "1.1",
"Level3": [
{
"Id": "1.1.1",
"Level3_4": {
"Transport": "Car",
"Level4": [
{
"Id": "1.1.1.1",
"Status": "run",
"Direction": "N"
},
{
"Id": "1.1.1.2",
"Status": "run",
"Direction": "S"
},
{
"Id": "1.1.1.3",
"Status": "pause",
"Direction": "S"
},
{
"Id": "1.1.1.4",
"Status": "run",
"Direction": "W"
}
]
}
},
{
"Id": "1.1.2",
"Level3_4": {
"Transport": "Bus",
"Level4": [
{
"Id": "1.1.2.1",
"Status": "run",
"Direction": "S"
},
{
"Id": "1.1.2.2",
"Status": "stop",
"Direction": "N"
},
{
"Id": "1.1.2.3",
"Status": "stop",
"Direction": "W"
},
{
"Id": "1.1.2.4",
"Status": "run",
"Direction": "E"
}
]
}
}
]
},
{
"Id": "1.2",
"Level3": [
{
"Id": "1.2.1",
"Level3_4": {
"Transport": "Train",
"Level4": [
{
"Id": "1.2.1.1",
"Status": "run",
"Direction": "N"
}
]
}
},
{
"Id": "1.2.2",
"Level3_4": {
"Transport": "Bus",
"Level4": []
}
}
]
}
]
}
]
Update
I want to update all documents in the collection such as:
Level2.Level3.Level3_4.Level4.Status: "pause"
Using updateOne and without upsert
All Level4 that match the next conditions
Conditions
where:
Level2.Level3.Id $in ["1.1.1","1.1.2"] AND
Level2.Level3.Level3_4.Level4.Status $in ["run", "stop"] AND
Level2.Level3.Level3_4.Level4.Direction $in ["N", "S"]
RESULT
The result collection should be:
[
{
"Id": "1",
"Level2": [
{
"Id": "1.1",
"Level3": [
{
"Id": "1.1.1",
"Level3_4": {
"Transport": "Car",
"Level4": [
{
"Id": "1.1.1.1",
"Status": "pause",
"Direction": "N"
},
{
"Id": "1.1.1.2",
"Status": "pause",
"Direction": "S"
},
{
"Id": "1.1.1.3",
"Status": "pause",
"Direction": "S"
},
{
"Id": "1.1.1.4",
"Status": "run",
"Direction": "W"
}
]
}
},
{
"Id": "1.1.2",
"Level3_4": {
"Transport": "Bus",
"Level4": [
{
"Id": "1.1.2.1",
"Status": "pause",
"Direction": "S"
},
{
"Id": "1.1.2.2",
"Status": "pause",
"Direction": "N"
},
{
"Id": "1.1.2.3",
"Status": "stop",
"Direction": "W"
},
{
"Id": "1.1.2.4",
"Status": "run",
"Direction": "E"
}
]
}
}
]
} ...
]
}
]
I'm struggling with the $set operator and arrayFilters. How can I accomplish this?
This is where I went so far:
db.test.update({},
{
$set: {
"Level2.$[level2].Level3.$[level3].Level3_4.Level4.$[level4].Status": "pause"
}
},
{
arrayFilters: [
{
"level3.Id": {
$in: [
"1.1.1",
"1.1.2"
]
}
},
{
"level4.Status": {
$in: [
"run",
"stop"
]
}
},
{
"level4.Direction": {
$in: [
"N",
"S"
]
}
}
]
}
)
However it doesn't work... What am I doing wrong?

You were almost there :)
Just the issue was with the last array filter and in $set
I have Updated the query:
db.session.update(
{},
{
$set: {
"Level2.$[].Level3.$[level3].Level3_4.Level4.$[level4].Status": "pause"
}
},
{
arrayFilters: [
{
"level3.Id": {
$in: [
"1.1.1",
"1.1.2"
]
}
},
{
"level4.Status": {
$in: [
"run",
"stop"
]
},
"level4.Direction": {
$in: [
"N",
"S"
]
}
}
]
}
)
Hope this will help :)

Related

Removing and printing name/value pair from json using jolt

I want to remove a name/value pair from inside a json array and print it outside. I started by trying this and then expanding the whole request to be a json array. The solution mentioned above does not seem to be working.
Input :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
},
{
"name": "Details",
"value": {
"createdDate": "today",
"description": "offer",
"id": null
}
}
],
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Desired Output :
[
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
},
{
"createdBy": "Admin",
"createdDate": "2022-09-08",
"modifiedBy": "Admin",
"attrs": [
{
"name": "Type",
"value": "Postpaid"
},
{
"name": "subscriber",
"value": "Paid"
}
],
"Details": {
"createdDate": "today",
"description": "offer",
"id": null
},
"relatedInfo": [
{
"type": "Number",
"name": "000000"
},
{
"type": "Type",
"name": "Post"
}
]
}
]
Current Jolt spec:
[
{
"operation": "shift",
"spec": {
"*": "[&]",
"attrs": {
"*": {
"name": {
"*": { "#2": "&4" },
"Details": {
"#(2,value)": "&1"
}
}
}
}
}
}
]
I can't seem to figure out how the jolt spec would change in case of the array
So far so good, just need to combine the attributes at a common node. To do this, I've used the identifiers [&1] and [&5] in order to reach the level of the outermost index within the tree such as
[
{
"operation": "shift",
"spec": {
"*": {
"*": "[&1].&",
"attrs": {
"*": {
"name": {
"*": {
"#2": "[&5].&4"
},
"Details": {
"#(2,value)": "[&5].&1"
}
}
}
}
}
}
}
]

Add and update child arrays value in Mongo db

I want to add value in array of the following json structure in mongo db collection "Scdtls".
I need to add value only where "Type": "Fees" and "IsCurrentVersion": true.
{
"_id": ObjectId("60e71243b484cf3ec22a6845"),
"Item": "School",
"Text": "School Details",
"Types": [
{
"Type": "Library",
"Values": [
{
"IsCurrentVersion": false,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814f"),
"Value": "Physics"
}
]
}
]
},
{
"Type": "Fees",
"Values": [
{
"IsCurrentVersion": false,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814f"),
"Value": "Admission"
},
{
"_id": ObjectId("611a4f113800d6af3fc4814k"),
"Value": "Canteen"
}
]
},
{
"IsCurrentVersion": true,
"KeyWords": [
{
"_id": ObjectId("611a4f113800d6af3fc4814g"),
"Value": "Tution"
}
]
}
]
}
]
}
This is how I am trying
db.Scdtls.update({
"Item": "School",
"Types.Type": "Fees",
"Types.Values.IsCurrentVersion": true
},
{
"$push": {
"Types.1.Values.$[].KeyWords": {
"_id": ObjectId(),
"Value": "Sports"
}
}
})
But it is also adding "Value": "Sports" in "Type" : "Fees" where "IsCurrentVersion": false.
Not able to understand the reason.
If I understand correctly, you can do the update you want using "arrayFilters", like this:
db.Scdtls.update({
"Item": "School",
"Types.Type": "Fees",
"Types.Values.IsCurrentVersion": true
},
{
"$push": {
"Types.$[x].Values.$[y].KeyWords": {
"_id": ObjectId("deadbeafcafebabec0deface"),
"Value": "Sports"
}
}
},
{
"arrayFilters": [
{ "x.Type": "Fees" },
{ "y.IsCurrentVersion": true }
]
})
Try it on mongoplayground.net.

Performance issue running mongodb aggregation

I need to run a query that joins documents from two collections, I wrote an aggregation query but it takes too much time when running in the production database with many documents. Is there any way to write this query in a more efficient way?
Query in Mongo playground: https://mongoplayground.net/p/dLb3hsJHNYt
There are two collections users and activities. I need to run a query to get some users (from users collection), and also their last activity (from activities collection).
Database:
db={
"users": [
{
"_id": 1,
"email": "user1#gmail.com",
"username": "user1",
"country": "BR",
"creation_date": 1646873628
},
{
"_id": 2,
"email": "user2#gmail.com",
"username": "user2",
"country": "US",
"creation_date": 1646006402
}
],
"activities": [
{
"_id": 1,
"email": "user1#gmail.com",
"activity": "like",
"timestamp": 1647564787
},
{
"_id": 2,
"email": "user1#gmail.com",
"activity": "comment",
"timestamp": 1647564834
},
{
"_id": 3,
"email": "user2#gmail.com",
"activity": "like",
"timestamp": 1647564831
}
]
}
Inefficient Query:
db.users.aggregate([
{
// Get users using some filters
"$match": {
"$expr": {
"$and": [
{ "$not": { "$in": [ "$country", [ "AR", "CA" ] ] } },
{ "$gte": [ "$creation_date", 1646006400 ] },
{ "$lte": [ "$creation_date", 1648684800 ] }
]
}
}
},
{
// Get the last activity within the time range
"$lookup": {
"from": "activities",
"as": "last_activity",
"let": { "cur_email": "$email" },
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{ "$eq": [ "$email", "$$cur_email" ] },
{ "$gte": [ "$timestamp", 1647564787 ] },
{ "$lte": [ "$timestamp", 1647564834 ] }
]
}
}
},
{ "$sort": { "timestamp": -1 } },
{ "$limit": 1 }
]
}
},
{
// Remove users with no activity
"$match": {
"$expr": {
"$gt": [ { "$size": "$last_activity" }, 0 ] }
}
}
])
Result:
[
{
"_id": 1,
"country": "BR",
"creation_date": 1.646873628e+09,
"email": "user1#gmail.com",
"last_activity": [
{
"_id": 2,
"activity": "comment",
"email": "user1#gmail.com",
"timestamp": 1.647564788e+09
}
],
"username": "user1"
},
{
"_id": 2,
"country": "US",
"creation_date": 1.646006402e+09,
"email": "user2#gmail.com",
"last_activity": [
{
"_id": 3,
"activity": "like",
"email": "user2#gmail.com",
"timestamp": 1.647564831e+09
}
],
"username": "user2"
}
]
I'm more familiar with relational databases, so I'm struggling a little to run this query efficiently.
Thanks!

JQ: Remove json objects from array

I have this json file with hundreds of entries that I need to strip from data I do not need.
Snippet:
{
"entries": [
{
"metadata": {
"tags": [
]
},
"sys": {
"space": {
"sys": {
"type": "Link",
"linkType": "Space",
"id": "9kn72w8zc6fh"
}
},
"id": "vcLKKhJ3mZNfGMvVZZi07",
"type": "Entry",
"createdAt": "2021-05-20T15:14:01.358Z",
"updatedAt": "2021-09-20T15:28:30.799Z",
"environment": {
"sys": {
"id": "production",
"type": "Link",
"linkType": "Environment"
}
},
"publishedVersion": 47,
"publishedAt": "2021-09-20T15:28:30.799Z",
"firstPublishedAt": "2021-05-25T10:26:56.722Z",
"createdBy": {
"sys": {
"type": "Link",
"linkType": "User",
"id": "6F84RwUIY9cXNNXBoQemqX"
}
},
"updatedBy": {
"sys": {
"type": "Link",
"linkType": "User",
"id": "6F84RwUIY9cXNNXBoQemqX"
}
},
"publishedCounter": 4,
"version": 48,
"publishedBy": {
"sys": {
"type": "Link",
"linkType": "User",
"id": "6F84RwUIY9cXNNXBoQemqX"
}
},
"contentType": {
"sys": {
"type": "Link",
"linkType": "ContentType",
"id": "page"
}
}
},
"fields": {
"title": {
"de-DE": "Startseite",
"en-US": "Home"
},
"description": {
"en-US": "foo"
},
"keywords": {
"en-US": "bar"
},
"stageModules": {
"en-US": [
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "11AfBBuNK8bx3EygAS3WTY"
}
}
]
},
"contentModules": {
"en-US": [
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "7uyuyIBsXWApHqpR7Pgkac"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "4HILHPLjqQkP2H1hA2FeBG"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "QuwRHL3XMSkguqrL1hUzC"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "4ZyVef5oWhQWXK9V1lr3vz"
}
}
]
},
"layout": {
"en-US": "Wide"
}
}
}
]
}
From the entries array, I actually only need:
entries.sys.id
entries.sys.contentType.sys.id
entries.fields
I came up with:
jq \
'.entries | .[] .sys, .[] .fields | del(.createdAt, .createdBy, .environment, .firstPublishedAt, .metadata, .publishedAt, .publishedBy, .publishedCounter, .publishedVersion, .space, .type, .updatedAt, .updatedBy, .version)' \
$infile >| $outfile
However, this changes the structure of the document. The entries node is missing (due to the .entries filter):
{
"id": "vcLKKhJ3mZNfGMvVZZi07",
"contentType": {
"sys": {
"type": "Link",
"linkType": "ContentType",
"id": "page"
}
}
}
{
"id": "1UgOmHIvsWrFEf1VCa84kz",
"contentType": {
"sys": {
"type": "Link",
"linkType": "ContentType",
"id": "moduleText"
}
}
}
{
"title": {
"de-DE": "Startseite",
"en-US": "Home"
},
"description": {
"en-US": "Foo"
},
"keywords": {
"en-US": "Bar"
},
"stageModules": {
"en-US": [
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "11AfBBuNK8bx3EygAS3WTY"
}
}
]
},
"contentModules": {
"en-US": [
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "7uyuyIBsXWApHqpR7Pgkac"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "4HILHPLjqQkP2H1hA2FeBG"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "QuwRHL3XMSkguqrL1hUzC"
}
},
{
"sys": {
"type": "Link",
"linkType": "Entry",
"id": "4ZyVef5oWhQWXK9V1lr3vz"
}
}
]
},
"layout": {
"en-US": "Wide"
}
}
I have 2 questions:
How can I delete deeper objects, eg. .entries.sys.space.sys.linkType?
How can I keep the .entries node in the outfile?
Thank you for your help.
If you want full control over the output, I'd just re-create the desired format.
It sounds like you're trying to accieve the following format:
{
"entries": [
{
"sys": {
"id": ...
},
"contentType": {
"sys": {
"id": ...
}
},
"fields": ...
}
}
]
}
We can achieve this by using the following JQ selector:
.entries |= map({ "sys": { "id": .sys.id }, "contentType": { "sys": { "id": .sys.contentType.sys.id } }, fields })
Try it online!

How to score by max relevance match in array elements in ElasticSearch?

I have an autocomplete analyser for a field("keywords"). This field is an array of strings. When I query with a search string I want to show first the documents where a single element of the array keywords matches best. The problem is that if a part of the string matches with more elements of the array "keywords", then this document appears before another that has less but better matches. For example, if I have a query with the word "gas station" the returning documents' keywords are these:
"hits": [
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761acd",
"_score": 3.1974547,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076d8dc536a03b4761cc3",
"_score": 3.0407648,
"_source": {
"keywords": [
"Stationery Store",
"Stationery Store"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761ace",
"_score": 2.903595,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076cddc536a03b4761c87",
"_score": 2.517158,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
}
The "Gas Station" is fourth, although it has the best single element matching. Is there a way to tell ElasticSearch that I do not care about how many times "gas" or "station" appears in keywords? I want the max element of the array keywords match as the score factor.
My settings are:
{
"locali": {
"settings": {
"index": {
"creation_date": "1480937810266",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"keywords": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"char_filter": [
"my_char_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"ί => ι",
"Ί => Ι",
"ή => η",
"Ή => Η",
"ύ => υ",
"Ύ => Υ",
"ά => α",
"Ά => Α",
"έ => ε",
"Έ => Ε",
"ό => ο",
"Ό => Ο",
"ώ => ω",
"Ώ => Ω",
"ϊ => ι",
"ϋ => υ",
"ΐ => ι",
"ΰ => υ"
]
}
}
},
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "TJjOt9L9QE2HrsUFHM6zJg",
"version": {
"created": "2040099"
}
}
}
}
}
And the mappings:
{
"locali": {
"mappings": {
"places": {
"properties": {
"formattedCategories": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"location": {
"properties": {
"formattedAddress": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"locality": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"neighbourhood": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"rating": {
"properties": {
"rating": {
"type": "long"
}
}
},
"seenDetails": {
"type": "long"
},
"verified": {
"type": "long"
}
}
},
"regions": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"type": {
"type": "long"
},
"weight": {
"type": "long"
}
}
},
"categories": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"weight": {
"type": "long"
}
}
}
}
}
}
Can you post your query here that you are trying here as well.
I tried your example with the following query
{
"query": {"match": {
"keywords": "gas station"
}
}
}
And i got your desired result.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.081366636,
"hits": [
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6QnpdNp-z_ybGd-L",
"_score": 0.081366636,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP5-u5dNp-z_ybGd-I",
"_score": 0.03182549,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "stack",
"_type": "type",
"_id": "AVjP6KiKdNp-z_ybGd-K",
"_score": 0.03182549,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
}
]
}
}
Try this query to see if you are getting desired result. Also you can reply with your mappings, query and ES version if this does't work for you.
Hope this solves your problem. Thanks

Resources