Group and Sum multi-dimensional array after unwinding elements - arrays

I have a multidimensional array below is my JSON data, I want the output mentioned below. Thanks for the help. I have tried two methods, 1. project with map there I am unable to group the data.
2. tried with unwind and group there unable to get the inside errorField array ("recordCount" : 73).
By using groupBy key needs to group by the results, if filed key matches & inside field--> detail key matches we need to merge those do document.
My JSON:
[
{
"_id":{
"$oid":"60f5c60fbc43cb00965ac641"
},
"groupBy":{
"$oid":"60f5c60fbc43cb00965ac641"
},
"employer":{
"$oid":"60d0e4001a6ccd764cb26df4"
},
"parameters":{
"begin":"2020-01-01",
"end":"2020-12-31"
},
"recordCount":74,
"errorFields":[
{
"field":"employeeStatus",
"recordCount":62,
"errorDetails":[
{
"recordIds":[
"10000986",
"10000990",
"10001020"
],
"detail":"Active employees should not have term date. Termed employees should have term date.",
"recordCount":3
},
{
"recordIds":[
"10001032"
],
"detail":"Stale pay period data (no new check for over 30 days from queried end date)",
"recordCount":1
}
]
},
{
"field":"ytdGrossWages.ytdTotalGrossWages",
"recordCount":8,
"errorDetails":[
{
"recordIds":[
"10001211",
"10001269",
"10001328",
"10001395"
],
"detail":"YTD total does not equal sum of YTD wage buckets",
"recordCount":4
}
]
}
],
"timestamp":{
"$date":"2021-07-19T18:35:59.031Z"
},
"__v":0
},
{
"_id":{
"$oid":"60f5c615bc43cb00965ac647"
},
"groupBy":{
"$oid":"60f5c60fbc43cb00965ac641"
},
"employer":{
"$oid":"60d0e4001a6ccd764cb26df4"
},
"parameters":{
"begin":"2020-01-01",
"end":"2020-12-31"
},
"recordCount":11,
"errorFields":[
{
"field":"employeeStatus",
"recordCount":11,
"errorDetails":[
{
"recordIds":[
"10003644",
"10003680"
],
"detail":"Active employees should not have term date. Termed employees should have term date.",
"recordCount":2
},
{
"recordIds":[
"10003667",
"10003694",
"10003807",
"10003789"
],
"detail":"Stale pay period data (no new check for over 30 days from queried end date)",
"recordCount":4
}
]
},
{
"field":"ssn",
"recordCount":2,
"errorDetails":[
{
"recordIds":[
"10003667"
],
"detail":"The ssn field is required.",
"recordCount":1
},
{
"recordIds":[
"10003694"
],
"detail":"The ssn must be 9 digits.",
"recordCount":1
}
]
},
{
"field":"employeeHomeAddressCountry",
"recordCount":1,
"errorDetails":[
{
"recordIds":[
"10003694"
],
"detail":"The employeeHomeAddressCountry field is required.",
"recordCount":1
}
]
}
],
"timestamp":{
"$date":"2021-07-19T18:36:05.135Z"
},
"__v":0
}
]
I want output like this:
{
"_id" : ObjectId("60f5c60fbc43cb00965ac641"),
"errorFields" : [
{
"field" : "employeeStatus",
"recordCount" : 73,
"errorDetails" : [
{
"recordIds" : [
"10001032",
"10003667",
"10003694",
"10003807",
"10003789"
],
"detail" : "Stale pay period data (no new check for over 30 days from queried end date)",
"recordCount" : 5
},
{
"recordIds" : [
"10000986",
"10000990",
"10001020",
"10001031",
"10001035"
],
"detail" : "Active employees should not have term date. Termed employees should have term date.",
"recordCount" : 5
}
]
},
{
"field" : "ytdGrossWages.ytdTotalGrossWages",
"recordCount" : 8,
"errorDetails" : [
{
"recordIds" : [
"10001211",
"10001269",
"10001328",
"10001395"
],
"detail" : "YTD total does not equal sum of YTD wage buckets",
"recordCount" : 8
}
]
},
{
"field" : "ssn",
"recordCount" : 2,
"errorDetails" : [
{
"recordIds" : [
"10003667"
],
"detail" : "The ssn field is required.",
"recordCount" : 1
},
{
"recordIds" : [
"10003694"
],
"detail" : "The ssn must be 9 digits.",
"recordCount" : 1
}
]
},
{
"field":"employeeHomeAddressCountry",
"recordCount":1,
"errorDetails":[
{
"recordIds":[
"10003694"
],
"detail":"The employeeHomeAddressCountry field is required.",
"recordCount":1
}
]
}
]
}
Here is the mycode method 1:
db.collection.aggregate([
{ $match: { groupBy: ObjectId("60f5c60fbc43cb00965ac641") } },
{ "$project": {
"_id": "$groupBy",
"errorFields": { "$map": {
"input": "$errorFields",
"as": "ef",
"in": {
"field": "$$ef.field",
"recordCount": {
$sum:"$$ef.recordCount"
},
"errorDetails": { "$map": {
"input": "$$ef.errorDetails",
"as": "ed",
"in": {
"detail": "$$ed.detail",
"recordIds": { "$map": {
"input": "$$ed.recordIds",
"as": "ri",
"in": {
$concat: [ "$$ri"]
}
}},
"recordCount": {
$size:"$$ed.recordIds"
}
}
}}
}
}}
}}
]).pretty()
Here is the mycode method 2:
db.collection.aggregate([
{ $match: { groupBy: ObjectId("60f5c60fbc43cb00965ac641") } },
{ $unwind: "$errorFields" },
{ $unwind: "$errorFields.errorDetails" },
{ $unwind: "$errorFields.errorDetails.recordIds" },
{ "$group": {
"_id": {
"_id": "$groupBy",
"errorFields": {
"field": "$errorFields.field",
"errorDetails": {
"detail": "$errorFields.errorDetails.detail"
}
}
},
"recordIds": {
"$push" : "$errorFields.errorDetails.recordIds",
},
"Idscount": { $sum: 1 }
}},
{ "$group": {
"_id": {
"_id": "$_id._id",
"errorFields": {
"field": "$_id.errorFields.field"
}
},
"errorDetails": {
"$push": {
"recordIds": "$recordIds",
"detail": "$_id.errorFields.errorDetails.detail",
"recordCount" : "$Idscount"
}
}
}},
{ "$group": {
"_id": 0,
"errorFields": {
"$push": {
"field": "$_id.errorFields.field",
"recordCount": "$fieldCount",
"errorDetails": "$errorDetails"
}
}
}}
]).pretty()

Related

Nested Query on Array in MongoDB collection

I have below User collection and I want find/update query to update nested array elements.
{
_id:"000-0000-0001",
Roles :{
0000-0000-0011: //EngagementId
["0000-0000-0111", "0000-0000-0112", "0000-0000-3333"],//RoleId
"0000-0000-0012" :
["0000-0000-0121", "0000-0000-0112"]
}
},
{
_id:"000-0000-0002",
Roles :{
"0000-0000-0021" : [ "0000-0000-0222", "0000-0000-0112"],
"0000-0000-0022" : [ "0000-0000-0121", "0000-0000-0112"],
"0000-0000-0022" : [ "0000-0000-0121", "0000-0000-0112", "0000-0000-3333"]
}
}
Requirement: I want to pull RoleId 0000-0000-3333 if the array have combination of 0000-0000-3333 and 0000-0000-0112
Below is expected result :
{
_id:"000-0000-0001",
Roles :{
"0000-0000-0011" : ["0000-0000-0111", "0000-0000-0112"],
"0000-0000-0012" : ["0000-0000-0121", "0000-0000-0112"]
}
},
{
_id:"000-0000-0002"
Roles :{
"0000-0000-0021" : [ "0000-0000-0222", "0000-0000-0112"],
"0000-0000-0022" : [ "0000-0000-0121", "0000-0000-0112"],
"0000-0000-0022" : [ "0000-0000-0121", "0000-0000-0112"]
}
}
Note : Find/update will work on Key:value or if it is nested then key.key:value, but in above example we have key.value.[values]:$pull(value) and that's the challaenge.
The data model necessitates a complicated update with a pipeline.
Here' one way to do it.
db.collection.update({
"Roles": {"$exists": true}
},
[
{
"$set": {
"Roles": {
"$arrayToObject": {
"$map": {
"input": {"$objectToArray": "$Roles"},
"as": "roleKV",
"in": {
"k": "$$roleKV.k",
"v": {
"$cond": [
{
"$and": [
{"$in": ["0000-0000-0112", "$$roleKV.v"]},
{"$in": ["0000-0000-3333", "$$roleKV.v"]}
]
},
{
"$filter": {
"input": "$$roleKV.v",
"cond": {"$ne": ["$$this", "0000-0000-3333"]}
}
},
"$$roleKV.v"
]
}
}
}
}
}
}
}
],
{"multi": true}
)
Try it on mongoplayground.net.

Aggregating a total from subdocuments in MongoDB

I have a document like the one below, I'd essentially like to produce an aggregate for the items in a sub document.
Essentially each document is a sales record, which has details of the sales and a sub document / array with the qtys of each item sold.
I'd like to produce a summary of all the items sold.
So an example collection is:
{
non_relevant_1: "ABC",
non_relevant_2: "DEF",
items_array: {
"item_1": 1,
"item_2": 2,
"item_3": 1,
"item_4": 1
}
},
{
non_relevant_1: "HIJ",
non_relevant_2: "KLM",
items_array: {
"item_1": 3,
"item_2": 2,
"item_3": 4
}
}
I'd then like to be able to produce something like:
{
items_array: {
"item_1": 4,
"item_2": 4,
"item_3": 5,
"item_4": 1
}
}
Many thanks in advance.
I think you need to change your schema, you are saving data in keys.
MongoDB operators are not made to have unknown keys, for example we can't group by an unknown key.To do those we do complicated and slow things like $objectToArray.
Also the data that you want as results have the same problem.
If you look at the query only the middle $unwind and $group would be needed it, with a changed schema, and asking for data without data in keys.
I mean instead of
items_array: {
"item_1": 1,
"item_2": 2,
"item_3": 1,
"item_4": 1
}
Your collection should have being like(first part of the query does that changing your schema)
items_array: [
{"name" "item_1",
"qty" : 1},
{"name" "item_2",
"qty" : 2},
{"name" "item_3",
"qty" : 1},
{"name" "item_4",
"qty" : 1}
]
Also the results should have known keys only.
Maybe the reason you were stuck is that.You will make things much easier for you.
Test code here
Query (query works, for your schema but i told you what i think)
db.collection.aggregate([
{
"$addFields": {
"items_array": {
"$map": {
"input": {
"$map": {
"input": {
"$objectToArray": "$items_array"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
"as": "item",
"in": {
"name": {
"$arrayElemAt": [
"$$item",
0
]
},
"qty": {
"$arrayElemAt": [
"$$item",
1
]
}
}
}
}
}
},
{
"$unwind": {
"path": "$items_array"
}
},
{
"$group": {
"_id": "$items_array.name",
"total-qty": {
"$sum": "$items_array.qty"
}
}
},
{
"$group": {
"_id": null,
"items_array": {
"$push": {
"$map": {
"input": {
"$map": {
"input": {
"$objectToArray": "$$ROOT"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
"as": "i",
"in": {
"$arrayElemAt": [
"$$i",
1
]
}
}
}
}
}
},
{
"$project": {
"_id": 0
}
},
{
"$addFields": {
"items_array": {
"$arrayToObject": "$items_array"
}
}
}
])

Simple calculation on array of arrays

I am having the following collection:
{
"price" : [
55800000,
62800000
],
"surface" : [
81.05,
97.4
],
}
I would want to calculate the price/m2. I have tried the following but got the following error: "$divide only supports numeric types, not array and array".
db.entries.aggregate(
[
{ $project: { url: 1, pricePerSquareMeter: { $divide: [ "$price", "$surfaces" ] } } }
]
)
Would you know how to solve this? Ultimately would want to have an array like this:
{
"price" : [
55800000,
62800000
],
"surface" : [
81.05,
97.4
],
"pricePerSquareMeter" : [
688463.91,
644763.86
]
}
Important: The price and surface should also be ordered so that the calculation is valid.
You can use below aggregation
db.collection.aggregate([
{ "$addFields": {
"pricePerSquareMeter": {
"$map": {
"input": { "$range": [0, { "$size": "$price" }] },
"in": {
"$divide": [
{ "$arrayElemAt": ["$price", "$$this"] },
{ "$arrayElemAt": ["$surface", "$$this"] }
]
}
}
}
}}
])
MongoPlayground
You can use below aggregation
db.entries.aggregate([
{ $addFields: {
pricePerSquareMeter: {
$map: {
input: '$price',
as: 'item',
in: {
$divide: [
"$$item",
{ $arrayElemAt: [
"$surface",
{ "$indexOfArray": ["$price", "$$item"] }
]}
]
}
}
},
}}])

What improvements I can make on student attendance NoSQL database?

I’m designing a NoSQL database for student attendance system, I want your advices to improve it since I’m new to this field.
I want to query 1. classes for specific student, 2. students on specific class, 3. Attendees for specific class on specific date, 4. attendance and absence count for specific student on specific class.
{
"attendance" : {
"CS 331" : {
"7-3-2019" : {
"2014901001" : true
}
}
},
"class" : {
"class01" : {
"id" : "CS 331",
"name" : "Software Design",
"students" : {
"2014901001" : true
}
}
},
"classEnrollment" : {
"CS 331" : {
"2014901001" : {
"absence" : 0,
"attendant" : 1
}
}
},
"instructor" : {
"instructor01" : {
"id" : "01"
}
},
"instructorEnrollment" : {
"01" : {
"CS 331" : true
}
},
"student" : {
"student01" : {
"id" : "2014901001",
"name" : "Paul Howard"
}
},
"studentEnrollment" : {
"2014901001" : {
"CS 331" : true
}
}
}
This what i will design for such a user requirement
{
"class_collection": {
"documents": [
{
"id": "CS_331_2019_1",
"type": "CS 331",
"name": "Software Design",
"enrolled_instructor": [
"instructor_id_1"
],
"enrolled_students": [
{
"id": "student_id_1",
"absence": 0,
"attendant": 2
},
{
"id": "student_id_2",
"absence": 2,
"attendant": 3
}
]
}
]
},
"class_attendance_collection": {
"documents": [
{
"2019-03-12T18:00:00": {
"class_id": "CS_331_2019_1",
"attended": ["student_id_1"],
"not_attended": ["student_id_2"]
}
},
{
"2019-03-13T16:00:00": {
"class_id": "CS_331_2019_1",
"attended": ["student_id_1","student_id_2"]
}
}
]
},
"student_collection": {
"documents": [
{
"id": "student_id_1",
"name": "name_1"
},
{
"id": "student_id_2",
"name": "name_2"
}
]
},
"instructor_collection": {
"documents": [
{
"id": "instructor_id_1",
"name": "name_1"
},
{
"id": "instructor_id_2",
"name": "name_2"
}
]
}
}
Have fun coding :)

Mongo regex with array of elements not accurate

Document schema is as follows
"events": [
{
"title": "title1"
},
{
"title": "title2"
},
{
"title": "title3"
}
]
I have requirement to search (regex ) in events.title field and get the only matching element/object from the array . For that i am querying like this ,
db.collection.find({ "$or" : [ { "events.title" : { "$regex" : ".*title2.*" , "$options" : "i"}} , { "events.title" : { "$regex" : ".*title5.*" , "$options" : "i"}}] , "events" : { "$elemMatch" : { "title" : { "$ne" : null }}}},{"events.$":1,"_id":0});
I was expecting the result would be { "events" : [ { "title" : "title2"} ] } , but it returns
{ "events" : [ { "title" : "title1"} ] }
How can i update the query so that only matching element in array is returned in result ?
UPDATE
"events": {
$elemMatch: {
"$or": [{
"title": {
"$regex": ".*title2.*",
"$options": "i"
}
},
{
"title": {
"$regex": ".*title5.*",
"$options": "i"
}
}]
}
}
did the trick . Now it returns only matching element in the array irrespective of the position.
Thanks for the help
Try this
db.collection.find({events: {
$elemMatch: {
title: {$in: [/.*title2.*/i,
/.*title5.*/i]
}
}
}
},
{"events.$": 1, _id: 0});
{"events": {
$elemMatch: {
"$or": [{
"title": {
"$regex": ".*title2.*",
"$options": "i"
}
},
{
"title": {
"$regex": ".*title5.*",
"$options": "i"
}
}]
}
}
}
in projection did the trick . Now it returns only matching element in the array irrespective of the position

Resources