Getting all data with highest value in MongoDB - database

I have a collection called "Vulnerabilities" in MongoDB 5. It looks like this:
[
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 1 },
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 1 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 2 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 3 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 2 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 3 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 }
]
I would like to retrieve, for each scanner, the documents with the highest version. It should return:
[
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 3 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 }
]
I would like to do it with only one query.
Any suggestions?
Many thanks

db.collection.aggregate([
{
$sort: {
version: -1
}
},
{
$group: {
_id: "$scanner",
version: {
$first: "$version"
},
test: {
$push: {
v: "$version",
id: "$_id",
p: "$project"
}
}
}
},
{
$project: {
items: {
$filter: {
input: "$test",
as: "item",
cond: {
$eq: [
"$$item.v",
"$version"
]
}
}
}
}
},
{
$unwind: "$items"
},
{
$project: {
scanner: "$_id",
_id: "$items.id",
project: "$items.p",
version: "$items.v"
}
}
])
Explained:
Order(Sort) descending by version
group by scanner taking the first value from the list per version and pushing all values in test array so we have suitable for filter in next stage
Filter from test array only the elements with the max version we need
Unwind the test array where only max values are filtered
In the final $project stage rename the fields as per original names you need
playground

Related

MOngoDB find maximum

Good Morning:
I have the following MongoDB structure:
{
"_id": 19,
"name": "Fredia Donan",
"faculty": "Zoomzone",
"lectures": [
{
"lecturerID": 25,
"name": "Sigismondo Brecknell",
"email": "sbrecknello#pinterest.com",
"imparts": [
{
"_id": 76,
"codCourse": 23,
"language": "Malay",
"max_students": 59,
"students": [
{
"studentID": 25
}
],
"classes": [
{
"date": ISODate("2022-02-02T09:23:32.59"),
"cod_classroom": 76
}
]
}
]
}
]
}
Having that I want to find the lecturer that imparts class to the maximum number of students. So I have to count the number of students that a lecturer teaches to and the find the max from across all lecturers from the department. How could I do it? Can it be done within the same code of the aggregation bellow? It is way over my MongoDB knowledge and I would be forever grateful if anyone could help.
I have read about $size and $count but everywhere I try it gives me different errors.
Thank you very much in advance!
The output above is done by an aggregation of two collections department and group with the following code:
[{"$unwind": "$lecturers"}, {"$unwind": "$lecturers.imparts"},
{"$lookup":
{"from": "group",
"localField": "lecturers.imparts.groupID",
"foreignField": "_id",
"as": "lecturers.imparts"}},
{"$group":
{"_id": {"_id": "$_id", "lecturersID": "$lecturers.lecturerID"},
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lecturers":
{"$first":
{"lecturerID": "$lecturers.lecturerID",
"name": "$lecturers.name",
"email": "$lecturers.email"}},
"imparts": {"$push": "$lecturers.imparts"}}},
{"$set":
{"lecturers":
{"$mergeObjects":
["$lecturers",
{"imparts":
{"$reduce":
{"input": "$imparts",
"initialValue": [],
"in": {"$concatArrays": ["$$value", "$$this"]}}}}]},
"imparts": "$$REMOVE"}},
{"$group":
{"_id": "$_id._id",
"name": {"$first": "$name"},
"faculty": {"$first": "$faculty"},
"lectures": {"$push": "$lecturers"}}}])
My desired output would be to get the lecturer of each department that teaches the most students. For instance, if I have this:
{
"_id": 19,
"name": "Fredia Donan",
"faculty": "Zoomzone",
"lectures": [
{
"lecturerID": 25,
"name": "Sigismondo Brecknell",
"email": "sbrecknello#pinterest.com",
"imparts": [
{
"_id": 76,
"codCourse": 23,
"language": "Malay",
"max_students": 59,
"students": [
{
"studentID": 25
}
],
"classes": [
{
"date": ISODate("2022-02-09T23:32:59.000Z"),
"cod_classroom": 76
}
]
},
{
"_id": 77,
"codCourse": 24,
"language": "Malayff",
"max_students": 59,
"students": [
{
"studentID": 28
}
],
"classes": [
{
"date": ISODate("2022-02-09T23:32:59.000Z"),
"cod_classroom": 77
}
]
}
]
},
{
"lecturerID": 36,
"name": "Sigismondo Brecknell",
"email": "sbrecknello#pinterest.com",
"imparts": [
{
"_id": 76,
"codCourse": 23,
"language": "Malay",
"max_students": 59,
"students": [
{
"studentID": 45
}
],
"classes": [
{
"date": ISODate("2022-02-09T23:32:59.000Z"),
"cod_classroom": 76
}
]
},
{
"_id": 77,
"codCourse": 24,
"language": "Malayff",
"max_students": 59,
"students": [
{
"studentID": 54
},
{
"studentID": 435
},
{
"studentID": 45
}
],
"classes": [
{
"date": ISODate("2022-02-09T23:32:59.000Z"),
"cod_classroom": 77
}
]
}
]
}
]
}
]
I would like to get that for the department with _id 19 the lecturer that teaches the most students is the one with id 36 since he imparts class to 4 students while the lecturer with id 25 imparts class to only 2 students.
So I would like to get the following output:
"_id": 19,
"name": "Fredia Donan",
"lecturerID": 36,
"maxImpartsStudents": 4
}
And I would like to get this information for every existing department. Every document in the collection department
Welcome Cristina Aranda!
EDIT:
For each lecturer, iterate all imparts using $map and creates one set that includes all students, returning its size as maxImpartsStudents. We use $setUnion to avoid counting the same student more than once, if they are in more than one class of the same lecturer.
$reduce the lecturers in each document to include only the one with maximal value of maxImpartsStudents .
Format
db.collection.aggregate([
{
$project: {
name: 1,
lectures: {
$map: {
input: "$lectures",
as: "item",
in: {
$mergeObjects: [
{
maxImpartsStudents: {
$size: {
$reduce: {
input: "$$item.imparts",
initialValue: [],
in: {$setUnion: ["$$value", "$$this.students"]}
}
}
}
},
{
name: "$$item.name",
lecturerID: "$$item.lecturerID"
}
]
}
}
}
}
},
{
$set: {
lectures: {
$reduce: {
input: "$lectures",
initialValue: {maxImpartsStudents: 0},
in: {
$cond: [
{$gte: ["$$this.maxImpartsStudents", "$$value.maxImpartsStudents"]},
"$$this", "$$value"]
}
}
}
}
},
{
$project: {
lecturerID: "$lectures.lecturerID",
maxImpartsStudents: "$lectures.maxImpartsStudents",
departmentName: "$name"
}
}
])
See how it works on the playground example

MongoDB filter by retrieved value in group

I have this collection in MongoDB:
[
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 1 },
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "sonarqube", "version": 2 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 1 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 2 },
{ _id: "...", "project": 244, "scanner": "shellcheck", "version": 3 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 2 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 3 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 },
{ _id: "...", "project": 244, "scanner": "powershell", "version": 4 }
]
I would like to return the number of items, grouped by scanner, with the latest (highest) version:
{ scanner: "sonarqube", count: 2 }, // All items with sonarqube and version: 2
{ scanner: "shellcheck", count: 1 }, // All items with shellcheck and version: 3
{ scanner: "powershell", count: 2 } // All items with powershell and version: 4
So far, I came out with this:
db.getCollection("vulnerabilities").aggregate([
{
$match: { project_id: 422 }
},
{
$sort: { version: -1 }
},
{
$group: {
_id: "$scanner",
'count': { $first: "$version"},
}
}
])
This returns the latest (highest) version for each group:
sonarqube | 2 |
shellcheck | 3 |
powershell | 4 |
Now, I need to tell Mongo:
filter the item with that version
group by scanner
add the count
Any ideas, suggestion?
Thanks
The simplest way would be to get the count first, before sorting by version, perhaps:
db.getCollection("vulnerabilities").aggregate([
{
$match: { project_id: 422 }
},
{
$group:{
_id: { scanner: "$scanner", version: "$version"},
count: { $sum: 1 }
}
},
{
$sort: { "_id.version": -1 }
},
{
$group: {
_id: "$_id.scanner",
count: { $first: "$count" },
version: { $first: "$_id.version" }
}
}
])
You can use $max in the $group stage
db.test.aggregate([
{$group : {_id : "$scanner", count : {$sum : 1}, version : {$max : "$version"}}}
])
or you can $sort by version and use $first
db.test.aggregate([
{$sort : {version: -1}},
{$group : {_id : "$scanner", count : {$sum : 1}, version : {$first : "$version"}}}
])

Filter an array of obj based on another array value

I have an array of object like this :
[
{
"url": "https://recipes-gatsby-react.netlify.app/",
"stack": [
{
"id": 1,
"title": "GatsbyJs"
},
{
"id": 2,
"title": "React"
},
{
"id": 3,
"title": "GraphQl"
}
],
"id": "Project_1",
"featured": false,
"title": "Gatsby recipes app"
},
{
"url": "https://resort-react-mr.netlify.app",
"stack": [
{
"id": 4,
"title": "React"
},
{
"id": 5,
"title": "Contentful"
}
],
"id": "Project_2",
"featured": false,
"title": "react resort"
},
{
"url": "https://color-generator-react-mr.netlify.app/",
"stack": [
{
"id": 6,
"title": "React"
}
],
"id": "Project_3",
"featured": false,
"title": "color generator"
},
{
"url": "",
"stack": [
{
"id": 7,
"title": "React Native"
},
{
"id": 8,
"title": "Firebase"
}
],
"id": "Project_4",
"featured": false,
"title": "Book-Worm"
},
{
"url": "",
"stack": [
{
"id": 9,
"title": "React Native"
},
{
"id": 10,
"title": "Firebase"
},
{
"id": 11,
"title": "Stripe"
},
{
"id": 12,
"title": "Google Api"
}
],
"id": "Project_5",
"featured": false,
"title": "Delivery App"
},
{
"url": "https://cocktails-react-app-mr.netlify.app/",
"stack": [
{
"id": 13,
"title": "React"
}
],
"id": "Project_6",
"featured": false,
"title": "Cocktails App"
},
{
"url": "https://www.ideacasaitalia.it/index.php",
"stack": [
{
"id": 14,
"title": "Prestashop"
}
],
"id": "Project_7",
"featured": false,
"title": "Idea Casa Italia"
}
]
and I have another (simple) array like this one for example
["react","prestashop"]
I would like to filter the first array based on STACK key on the value of the second array, i.e. I would like to be returned all elements that have at least one element of the array (variable).
I have come to this
const filter = value => {
const newProj = relProjects.filter(obj =>
obj.stack.some(st => st.title.toLowerCase().includes(value))
)
setProjRel(newProj)
}
console.log(filter(["react","prestashop"])
however in this way I can only get the elements that contain all the elements of the array. Instead I would like to have returned all the elements that have AT LEAST 1 stack of the ones I passedThanks to who will answer me
Using Array#filter and Array#includes
Filter through the data array. With the predicate being that the stack array has at least one item that is included in the matches array
const data = [{"url":"https://recipes-gatsby-react.netlify.app/","stack":[{"id":1,"title":"GatsbyJs"},{"id":2,"title":"React"},{"id":3,"title":"GraphQl"}],"id":"Project_1","featured":false,"title":"Gatsby recipes app"},{"url":"https://resort-react-mr.netlify.app","stack":[{"id":4,"title":"React"},{"id":5,"title":"Contentful"}],"id":"Project_2","featured":false,"title":"react resort"},{"url":"https://color-generator-react-mr.netlify.app/","stack":[{"id":6,"title":"React"}],"id":"Project_3","featured":false,"title":"color generator"},{"url":"","stack":[{"id":7,"title":"React Native"},{"id":8,"title":"Firebase"}],"id":"Project_4","featured":false,"title":"Book-Worm"},{"url":"","stack":[{"id":9,"title":"React Native"},{"id":10,"title":"Firebase"},{"id":11,"title":"Stripe"},{"id":12,"title":"Google Api"}],"id":"Project_5","featured":false,"title":"Delivery App"},{"url":"https://cocktails-react-app-mr.netlify.app/","stack":[{"id":13,"title":"React"}],"id":"Project_6","featured":false,"title":"Cocktails App"},{"url":"https://www.ideacasaitalia.it/index.php","stack":[{"id":14,"title":"Prestashop"}],"id":"Project_7","featured":false,"title":"Idea Casa Italia"}];
const matches = ["react", "prestashop"];
const filtered = data.filter(({ stack }) => stack.filter(({ title }) => matches.includes(title.toLowerCase())).length > 0);
console.log(filtered);

How to reverse $unwind or re-assemble after $lookup?

I´ve been trying to reverse $unwind in nested array. Please, if you could help me it would be great. Thanks in advance.
Here are the details:
checklists collection, this collection has steps and each step has many areas, and I'd like to lookup to fill the area by id. I did it but I cannot reverse $unwind.
{
"steps": [{
"name": "paso1",
"description": "paso1",
"estimated_time": 50,
"active": true,
"areas": [{
"area_id": "60b6e728c44f0365c0d547d6"
}, {
"area_id": "60b6e7a2c44f0365c0d547d8"
}]
}, {
"name": "paso2",
"description": "o",
"estimated_time": 7,
"active": true,
"areas": [{
"area_id": "60b6e76ac44f0365c0d547d7"
}]
}, {
"name": "paso2",
"description": "l",
"estimated_time": 7,
"active": true,
"areas": [{
"area_id": "60b6e728c44f0365c0d547d6"
}]
}],
"name": "prueba",
"description": "prueba",
"type": "prueba",
"active": true,
"updated_at": {
"$date": "2021-06-02T23:56:02.232Z"
},
"created_at": {
"$date": "2021-06-01T22:44:57.114Z"
},
"__v": 0
}
area collection
{
"_id":"60b6e706c44f0365c0d547d5"
"name": "Development",
"short_name": "DEV",
"description": "Development area",
"updated_at": {
"$date": "2021-06-02T02:03:50.383Z"
},
"created_at": {
"$date": "2021-06-02T02:03:50.383Z"
},
"__v": 0,
"active": true
}
My aggregation
db.checklists.aggregate([
{
"$unwind": "$steps"
},
{
"$unwind": "$steps.areas"
},
{
"$lookup": {
"from": "areas",
"let": {
"area_id": {
"$toObjectId": "$steps.areas.area_id"
}
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": [
"$_id",
"$$area_id"
]
}
}
}
],
"as": "convertedItems"
}
},
{
"$group": {
"_id": "$steps.name",
"root": {
"$first": "$$ROOT"
},
"items": {
"$push": {
"$mergeObjects": [
"$steps.areas",
{
"$arrayElemAt": [
"$convertedItems",
0
]
}
]
}
},
}
},
{
"$addFields": {
"values": {
"$reduce": {
"input": "$items",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
{
"$cond": [
{
"$in": [
"$$this.area_id",
"$$value.area_id"
]
},
[],
[
"$$this"
]
]
}
]
}
}
}
}
},
{
"$addFields": {
"root.steps.areas": "$values"
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
},
{
"$group": {
"_id": "$_id",
"root": {
"$first": "$$ROOT"
},
"steps": {
"$push": "$steps"
}
}
},
{
"$addFields": {
"root.steps": "$steps"
}
},
{
"$replaceRoot": {
"newRoot": "$root"
}
},
{
"$project": {
"convertedItems": 0
}
}
])
I don´t get to form this output:
{
"steps": [{
"name": "paso1",
"description": "paso1",
"estimated_time": 50,
"active": true,
"areas": [{
"_id": "60b6e728c44f0365c0d547d6",
"name":"Development",
..... //join or lookup
}, {
"_id": "60b6e7a2c44f0365c0d547d8",
"name":"Development",
..... //join or lookup
}]
}],
"name": "prueba",
"description": "prueba",
"type": "prueba",
"active": true,
"updated_at": {
"$date": "2021-06-02T23:56:02.232Z"
},
"created_at": {
"$date": "2021-06-01T22:44:57.114Z"
},
"__v": 0
}
Thank you very much!
$unwind deconstruct steps array
$lookup with areas collection pass area_id in let
$match to check is _id in area_ids after converting to string
$project to show required fields
$group by _id and reconstruct the steps array and pass your required fields
db.checklists.aggregate([
{ $unwind: "$steps" },
{
$lookup: {
from: "areas",
let: { area_id: "$steps.areas.area_id" },
pipeline: [
{
$match: {
$expr: { $in: [{ $toString: "$_id" }, "$$area_id"] }
}
},
{ $project: { name: 1 } }
],
as: "steps.areas"
}
},
{
$group: {
_id: "$_id",
steps: { $push: "$steps" },
name: { $first: "$name" },
description: { $first: "$description" },
type: { $first: "$type" },
active: { $first: "$active" },
updated_at: { $first: "$updated_at" },
created_at: { $first: "$created_at" },
__v: { $first: "$__v" }
}
}
])
Playground

Jq to add the element in json array conditionally and print the entire modified file

I have a json file with the below format.
I would like to add the element {"test" : "2"}in propDefs[] if the .children[].type=="environmentApprovalTask" and .children[].role.name== "GCM approver" and output that to the new file. I want the entire file with the modified content.The .children[] array may not have always 3 elements.
{
"edges": [
{
"to": "de32e562319310b7b4fe3736e22009",
"from": "99d5f0b278f08721adba7741b782d8",
"type": "SUCCESS",
"value": ""
},
{
"to": "06916609ad7fd4127815be3f075c81",
"from": "de32e562319310b7b4fe3736e22009",
"type": "SUCCESS",
"value": ""
},
{
"to": "99d5f0b278f08721adba7741b782d8",
"type": "ALWAYS",
"value": ""
}
],
"offsets": [
{
"name": "99d5f0b278f08721adba7741b782d8",
"x": -91,
"y": 100,
"h": 70,
"w": 290
},
{
"name": "06916609ad7fd4127815be3f075c81",
"x": -5,
"y": 420,
"h": 80,
"w": 120
},
{
"name": "de32e562319310b7b4fe3736e22009",
"x": 69,
"y": 240,
"h": 70,
"w": 240
}
],
"layoutMode": "manual",
"type": "graph",
"id": "d5d9c4c4-0c5f-4642-872c-ac892039eaa4",
"name": "79e8b952-dd59-4cfd-9c0c-e6c08a81d4ca",
"children": [
{
"type": "finish",
"id": "833e959c-6825-413d-afc4-7b74c0a87c3e",
"name": "06916609ad7fd4127815be3f075c81",
"children": []
},
{
"id": "e976041d-af9d-48cf-b838-735bb5efd483",
"type": "envApprovalTask",
"children": [],
"name": "de32e562319310b7b4fe3736e22009",
"roleRestrictionData": {
"contextType": "ENVIRONMENT",
"roleRestrictions": [
{
"roleId": "087175fb-5d38-42d1-b65a-2b6a6958bc21"
}
]
},
"propDefs": [{"test": "1"}],
"templateName": "ApprovalCreated",
"commentRequired": false,
"commentPrompt": "",
"role": {
"id": "087175fb-5d38-42d1-b65a-2b6a6958bc21",
"name": "Approver",
"isDeletable": true
}
},
{
"id": "513fbc6a-3c4a-4a10-9eb0-7dc893c69413",
"type": "environmentApprovalTask",
"children": [],
"name": "99d5f0b278f08721adba7741b782d8",
"roleRestrictionData": {
"contextType": "ENVIRONMENT",
"roleRestrictions": [
{
"roleId": "116b8cd5-e7e4-403d-9599-35fe25d3cba2"
}
]
},
"propDefs": [],
"templateName": "ApprovalCreated",
"commentRequired": false,
"commentPrompt": "",
"role": {
"id": "116b8cd5-e7e4-403d-9599-35fe25d3cba2",
"name": "Manager Approver",
"isDeletable": true
}
}
]
}
My final attempt with the code
cat file.json |jq '.|.children[]| select(.type=="envApprovalTask")|select(.role.name=="Approver") |.propDefs[.profDefs|length] |= .+ {"test" : "2"}'
This only produces the modified element, didnt produce the entire file as output. Please help how can i get the desired output.
TIL about complex assignments in jq. This actually works:
(.children[] | select(.type=="envApprovalTask" and .role.name=="Approver") | .propDefs) |= .+[{"test":"2"}]
The only significant difference from your version is the parenthesised left side of the assignment.

Resources