Transform mongo arrarys - arrays

I have a collection, which contains documents like
{
"_id": ObjectId("60f561eb0d022a4c614966c1"),
"vehicleId": 288,
"startTime": [
ISODate("2021-06-19T13:00:19Z"),
ISODate("2021-06-19T13:00:40Z")
],
"odo": [0, 1116.0746443123298],
"location": [
{ "type": "Point", "coordinates": [75.759973, 26.977889] },
{ "type": "Point", "coordinates": [75.771209, 26.97858] }
]
}
I want to change the location array so that the document looks like
{
"_id": ObjectId("60f561eb0d022a4c614966c1"),
"vehicleId": 288,
"startTime": [
ISODate("2021-06-19T13:00:19Z"),
ISODate("2021-06-19T13:00:40Z")
],
"odo": [0, 1116.0746443123298],
"locations": [
{ loc: { "type": "Point", "coordinates": [75.759973, 26.977889] } },
{ loc: { "type": "Point", "coordinates": [75.771209, 26.97858] } }
]
}
that is, each entry becomes an object. I may want to do the same thing to odo and starttime.
I'm experimenting with aggregation pipelines, but I haven't found a way except using application code to read and transform the object.
I'd rather do it via a mongo-only way and not have use application and parse document by document, so any help with a mongo-only way would be much appreciated.

Would be this one:
db.collection.aggregate([
{
$set: {
location: "$$REMOVE",
locations: {
$map: {
input: "$location",
in: { loc: "$$this" }
}
}
}
}
])

Related

JOLT : Make Nested Make a nested array as part of main array

I am trying to transform the JSON as below expected output. Stuck with the spec below. Can someone help in this?
There is an inner array with name "content" in the "results" array which I want to make it as a part of main array.
Input JSON
{
"total": 100,
"start": 1,
"page-length": 10,
"results": [
{
"index": 1,
"uri": "uri1/uri2",
"extracted": {
"kind": "object",
"content": [
{
"code": "A1",
"region": "APAC"
}
]
}
},
{
"index": 2,
"uri": "uri1/uri2",
"extracted": {
"kind": "object",
"content": [
{
"code": "B1",
"region": "AMER"
}
]
}
},
{
"index": 3,
"uri": "uri1/uri2",
"extracted": {
"kind": "object",
"content": [
{
"code": "C1",
"region": "APAC"
}
]
}
}
]
}
Expected json output
[
{
"code": "A1",
"region": "APAC"
},
{
"code": "B1",
"region": "AMER"
},
{
"code": "C1",
"region": "APAC"
}
]
Spec Tried
[
{
"operation": "shift",
"spec": {
"results": {
"*": {
"extracted": { "content": { "#": "&" } }
}
}
}
},
{
"operation": "shift",
"spec": {
"content": {
"#": "&"
}
}
}
]
Find the output below I am getting on Jolt tool
You can use # wildcard nested within square brackets in order to reach the level of the indexes of the "results" array such that
[
{
"operation": "shift",
"spec": {
"results": {
"*": {//the indexes of the "results" array
"extracted": {
"content": {
"*": {//the indexes of the "content" array
"*": "[#5].&"
}
}
}
}
}
}
}
]
Also the following spec, which repeats the content of the inner array without keys, will give the same result :
[
{
"operation": "shift",
"spec": {
"results": {
"*": {
"extracted": {
"content": {
"*": "[]"// [] seems like redundant but kept for the case the array has a single object.
}
}
}
}
}
}
]
which is quite similar to your tried one.

Mongo DB query to match a field1 and loop thru another field2 and get output as a single array with all fields of field2

Need help with mongo db query
Mondo db query - search for parents with state good and children with state bad or missing. output should be an array of all the children with state bad or missing from parents with good state
Below is the JSON list
[
{
"name": "parent-a",
"status": {
"state": "good"
},
"children": [
"child-1",
"child-2"
]
},
{
"name": "child-1",
"state": "good",
"parent": "parent-a"
},
{
"name": "child-2",
"state": {},
"parent": "parent-a"
},
{
"name": "parent-b",
"status": {
"state": "good"
},
"children": [
"child-3",
"child-4"
]
},
{
"name": "child-3",
"state": "good",
"parent": "parent-b"
},
{
"name": "child-4",
"state": "bad",
"parent": "parent-b"
},
{
"name": "parent-c",
"status": {
"state": "bad"
},
"children": [
"child-5",
"child-6"
]
},
{
"name": "child-5",
"state": "good",
"parent": "parent-c"
},
{
"name": "child-6",
"state": "bad",
"parent": "parent-c"
}
]
Expected output
"children": [
{
"name": "child-2",
"state": {}
},
{
"name": "child-4",
"state": "bad"
}
]
Any inputs would be appreciated. Thanks in advance :)
One option is to use $lookup* for this:
db.collection.aggregate([
{$match: {state: {$in: ["bad", {}]}}},
{$lookup: {
from: "collection",
localField: "parent",
foreignField: "name",
pipeline: [
{$match: {"status.state": "good"}}
],
as: "hasGoodParent"
}},
{$match: {"hasGoodParent.0": {$exists: true}}},
{$project: {name: 1, state: 1, _id: 0}}
])
See how it works on the playground example
*If your mongoDB version is lower than 5.0 you need to change the syntax a bit. Drop the localField and foreignField of the $lookup and replace with let and equality match on the pipeline
Here is an approach doing this all without a "$lookup" stage as performance usually suffers when involved. Basically we match all relevant children and parents and we group by the child id. if it has a parent (which means the parent has a "good" state, and a "child" which means the child has a "bad/{}" state then it's matched).
You should make sure you have the appropriate indexes to support the initial query.
Additionally I would personally recommend adding a boolean field on each document to mark wether it's a parent or a child. right now we have to use the field structure based on your input to mark this type but I would consider this a bad practice.
Another thing we did not discuss which doesn't seem possible from the current structure is recursion, can a child have children of it's own? Just some things to consider
db.collection.aggregate([
{
$match: {
$or: [
{
$and: [
{
"status.state": "good"
},
{
parent: {
$exists: false
}
},
{
"children.0": {
$exists: true
}
}
]
},
{
$and: [
{
"state": {
$in: [
"bad",
null,
{}
]
}
},
{
parent: {
$exists: true
}
}
]
}
]
}
},
{
$unwind: {
path: "$children",
preserveNullAndEmptyArrays: true
}
},
{
$addFields: {
isParent: {
$cond: [
{
$eq: [
null,
{
$ifNull: [
"$parent",
null
]
}
]
},
1,
0
]
}
}
},
{
$group: {
_id: {
$cond: [
"$isParent",
"$children",
"$name"
]
},
hasParnet: {
$sum: "$isParent"
},
hasChild: {
$sum: {
$subtract: [
1,
"$isParent"
]
}
},
state: {
"$mergeObjects": {
$cond: [
"$isParent",
{},
{
state: "$state"
}
]
}
}
}
},
{
$match: {
hasChild: {
$gt: 0
},
hasParnet: {
$gt: 0
}
}
},
{
$group: {
_id: null,
children: {
$push: {
name: "$_id",
state: "$state.state"
}
}
}
}
])
Mongo Playground

MongoDB get only selected elements from objects inside an array

What I have is a collection of documents in MongoDB that have the structure something like this
[
{
"userid": "user1",
"addresses": [
{
"type": "abc",
"street": "xyz"
},
{
"type": "def",
"street": "www"
},
{
"type": "hhh",
"street": "mmm"
},
]
},
{
"userid": "user2",
"addresses": [
{
"type": "abc",
"street": "ccc"
},
{
"type": "def",
"street": "zzz"
},
{
"type": "hhh",
"street": "yyy"
},
]
}
]
If I can give the "type" and "userid", how can I get the result as
[
{
"userid": "user2",
"type": "abc",
"street": "ccc",
}
]
It would also be great even if I can get the "street" only as the result. The only constraint is I need to get it in the root element itself and not inside an array
Something like this:
db.collection.aggregate([
{
$match: {
userid: "user1" , "address.type":"abc"
}
},
{
$project: {
userid: 1,
address: {
$filter: {
input: "$addresses",
as: "a",
cond: {
$eq: [
"$$a.type",
"abc"
]
}
}
}
}
},
{
$unwind: "$address"
},
{
$project: {
userid: 1,
street: "$address.street",
_id: 0
}
}
])
explained:
Filter only documents with the userid & addresess.type you need
Project/Filter only the addresses elements with the needed type
unwind the address array
project only the needed elements as requested
For best results create index on the { userid:1 } field or compound index on { userid:1 , address.type:1 } fields
playground
You should be able to use unwind, match and project as shown below:
db.collection.aggregate([
{
"$unwind": "$addresses"
},
{
"$match": {
"addresses.type": "abc",
"userid": "user1"
}
},
{
"$project": {
"_id": 0,
"street": "$addresses.street"
}
}
])
You can also duplicate the match step as the first step to reduce the number of documents to unwind.
Here is the playground link.
There is a similar question/answer here.

how to create a key value pair from fields in mongodb?

i have some data structured like this :
"location_identifiers": [
{
"permalink": "olivette-missouri",
"uuid": "e1774b1c-634d-4ea4-1414-cd8be15df631",
"location_type": "city",
"entity_def_id": "location",
"value": "Olivette"
},
{
"permalink": "missouri-united-states",
"uuid": "51a065b8-05d5-1a28-3fcd-1ad143f1f725",
"location_type": "region",
"entity_def_id": "location",
"value": "Missouri"
}
i want to restructure it to look like this, i am using mongodb compass:
"location_identifiers": [
{
"city": "Olivette",
"region": "Missouri"
}
]
i have tried unwind and project query but i am stuck, any help would be greatly appreciated.
Use $arrayToObject
db.collection.aggregate([
{
"$match": {}
},
{
"$set": {
"location_identifiers": [
{
"$arrayToObject": {
"$map": {
"input": "$location_identifiers",
"as": "item",
"in": {
k: "$$item.location_type",
v: "$$item.value"
}
}
}
}
]
}
}
])
mongoplayground

ElasticSearch: Retrieve string concatenation, or partial array

I have many indexed documents such as this one:
{
"_index":"myindex",
"_type":"somedata",
"_id":"31d3255d-67b4-40e6-b9d4-637383eb72ad",
"_version":1,
"_score":1,
"_source":{
"otherID":"b4c95332-daed-49ae-99fe-c32482696d1c",
"data":[
{
"data":"d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD":"9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data":"some text",
"iD":"c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data":"5256983c-ef69-4363-9787-97074297c646",
"iD":"8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data":"other text",
"iD":"8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data":"3",
"iD":"c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD":"31d3255d-67b4-40e6-b9d4-637383eb72ad"
}
}
It's type mapping is configured this way:
{
"somedata":{
"dynamic_templates":[
{
"defaultIDs":{
"match_pattern":"regex",
"mapping":{
"index":"not_analyzed",
"type":"string"
},
"match":".*(id|ID|iD)"
}
}
],
"properties":{
"otherID":{
"index":"not_analyzed",
"type":"string"
},
"data":{
"properties":{
"data":{
"type":"string"
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
}
}
I wish to be able to retrieve a string concatenation of data based on it's ID.
For example, given the id c554b8ce-c873-4fef-b306-ec65d2f40394, and the id 8d8f8a61-02d6-4d3e-9912-9ebb5d213c15, I would like to retrieve some text other text.
These IDs repeat in other documents of the same type with different data.
If this is not possible (which I suspect this is the case), I would like to at least retrieve a partial array containing my requested data.
Those arrays can become large (and so is the number of documents) and I would only need one or two elements from each hit.
If both my requests are not possible, how would you suggest changing my mappings in order to facilitate my needs?
Thanks in advance, Jonathan.
I have found a way to do exactly what I needed without changing my data structure.
(I actually did end up changing my data structure, but for reasons of space and efficiency).
All you have to do is enjoy the groovy goodness ElasticSearch has to offer:
{
"query" : { "term" : { "otherID" : "b4c95332-daed-49ae-99fe-c32482696d1c" } },
"script_fields" : { "requestedFields" : { "script" : "_source.data.findAll({ it.iD == 'c554b8ce-c873-4fef-b306-ec65d2f40394' || it.iD == '8d8f8a61-02d6-4d3e-9912-9ebb5d213c15'}) data.join(' ') " } }
}
Just goes to show how strong ElasticSearch really is.
I cannot help you with the field concatenation (maybe it's possible with scripting but I'm not experienced enough with it. I would assume a new field would have to be generated, etc.) but how to only retrieve the partial data.
It requires at least ES 1.5 because it uses inner_hits and you need to change the mapping.
I added type and include_in_parent to your data type:
DELETE somedata
PUT somedata
PUT somedata/sometype/_mapping
{
"sometype":{
"dynamic_templates":[
{
"defaultIDs":{
"match_pattern":"regex",
"mapping":{
"index":"not_analyzed",
"type":"string"
},
"match":".*(id|ID|iD)"
}
}
],
"properties":{
"otherID":{
"index":"not_analyzed",
"type":"string"
},
"data":{
"type": "nested",
"include_in_parent": true,
"properties":{
"data":{
"type":"string"
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
},
"iD":{
"index":"not_analyzed",
"type":"string"
}
}
}
}
Now indexing your document:
PUT somedata/sometype/1
{
"otherID":"b4c95332-daed-49ae-99fe-c32482696d1c",
"data":[
{
"data":"d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD":"9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data":"some text",
"iD":"c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data":"5256983c-ef69-4363-9787-97074297c646",
"iD":"8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data":"other text",
"iD":"8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data":"3",
"iD":"c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD":"31d3255d-67b4-40e6-b9d4-637383eb72ad"
}
And here's how you can match and retrieve with inner_hits:
POST somedata/sometype/_search
{
"query": {
"nested": {
"path": "data",
"query": {
"bool": {
"should": [
{
"term": {
"data.iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
}
},
{
"term": {
"data.iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
}
}
]
}
},
"inner_hits": {}
}
}
}
In the result now look at this path: hits.hits[0].inner_hits.data.hits.hits[0]._source.data; it only contains your two requested matches:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.5986179,
"hits": [
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_score": 0.5986179,
"_source": {
"otherID": "b4c95332-daed-49ae-99fe-c32482696d1c",
"data": [
{
"data": "d2454d41-a74e-43af-b3b0-0febeaf67a99",
"iD": "9362f2eb-9bd7-4924-8b0e-77c27bb0aa56"
},
{
"data": "some text",
"iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
},
{
"data": "5256983c-ef69-4363-9787-97074297c646",
"iD": "8c90e2be-6042-4450-b0fd-0732900f8f65"
},
{
"data": "other text",
"iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
},
{
"data": "3",
"iD": "c880bfdf-eb4b-4c80-9871-fd44e06b2ed2"
}
],
"iD": "31d3255d-67b4-40e6-b9d4-637383eb72ad"
},
"inner_hits": {
"data": {
"hits": {
"total": 2,
"max_score": 0.5986179,
"hits": [
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_nested": {
"field": "data",
"offset": 3
},
"_score": 0.5986179,
"_source": {
"data": "other text",
"iD": "8d8f8a61-02d6-4d3e-9912-9ebb5d213c15"
}
},
{
"_index": "somedata",
"_type": "sometype",
"_id": "1",
"_nested": {
"field": "data",
"offset": 1
},
"_score": 0.5986179,
"_source": {
"data": "some text",
"iD": "c554b8ce-c873-4fef-b306-ec65d2f40394"
}
}
]
}
}
}
}
]
}
}
Now, inner_hits is fairly new and the documentation also states:
Warning: This functionality is experimental and may be changed or removed completely in a future release.
YMMV.
Another thing to watch out: the inner_hits are sorted by score. In your original document they're in an array which is ordered but that information is lost in the actual result. If you require to have them in the same order in the inner_hits, I think you need to add a separate field for sorting (could just be the array index...) and sort the inner_hits by it.

Resources