Perform a join on a collection only WHERE substring matches - arrays

I would like to perform a join on a collection where student name are equal in each collection AND WHERE the last string after "_" in column log's value is what the variable id is.
I got the join to work but the issue is on the match statement. How can I match with a substring of a string on the logs column in the collection that I am about to join?
I can split the log column value into an array like this:
{ $split: [ "$studentInfo.log", "_" ]}
I just need to get the last value after underscore now to match variable id
var id = "123";
dbo.collection("student").aggregate([
{ "$lookup": {
"localField": "name",
"from": "data",
"foreignField": "data.studentName",
"as": "studentInfo"
}
}]).toArray(function(err, results) {
console.log(results);
});
The issue is that student name is not unique so in order to get the join to work correctly we need to join by name AND make sure the characters after underscore match with the variable we have id
student collection
{
"_id" : ObjectId("(Object ID here"),
"name": "Test"
}
data collection
{
"_id" : ObjectId("(Object ID here"),
"studentName": "Test",
"log": "NC_Test_123"
},
{
"_id" : ObjectId("(Object ID here"),
"studentName": "Test",
"log": "FC_Test_444"
}
I need to get NC_Test_123 when the variable I have for id is 123.

You need to define custom lookup condition with both conditions: student name and log field. To get the last value of splitted string you can use $arrayElemAt with index set to -1
db.student.aggregate([
{
$lookup: {
from: "data",
let: { "student_name": "$name" },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: [ "$$student_name", "$studentName" ] },
{ $eq: [ { $arrayElemAt: [ { $split: [ "$log", "_" ]}, -1 ] }, "123" ] }
]
}
}
}
],
as: "studentInfo"
}
},
{
$unwind: "$studentInfo"
}
])

Related

Is there a way to use value from field which is a collection name as value for 'from' parameter in $lookup (mongodb)

Mongo V5.03
I am using Compass for building a pipeline. And I am stuck here.
collection: hello
{
"_id" : "...",
"collection_name" : "world"
}
collection: world
{
"_id" : "..."
}
while building a pipeline with mongodb aggregation, to call another collection, we can use $lookup operator. Syntax of $lookup looks like this :
{
* from: The target collection.
* localField: The local join field.
* foreignField: The target join field.
* as: The name for the results.
* pipeline: The pipeline to run on the joined collection.
* let: Optional variables to use in the pipeline field stages.
}
For one time use, I can directly write { from : 'world' , ...}. But I want to do this instead { from : '$collection_name', ... } so that I can keep calling field value because that collection_names field is an array which I $unwind it.
Comeon tips, suggestions, solution
We probably do not have method to $lookup from dynamic collection name as of now. However, if we have already known all possibilities of the collection names, we may use multiple $lookup to perform conditional $lookup and use $setUnion to join the lookup results together.
Here is an example of knowing all 2 possibilities, world and foo; syntax in MongoDB 5.0+:
db.hello.aggregate([
{
"$lookup": {
"from": "world",
"localField": "key",
"foreignField": "_id",
"let": {
c: "$collection_name"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$c",
"world"
]
}
}
}
],
"as": "worldLookup"
}
},
{
"$lookup": {
"from": "foo",
"localField": "key",
"foreignField": "_id",
"let": {
c: "$collection_name"
},
"pipeline": [
{
$match: {
$expr: {
$eq: [
"$$c",
"foo"
]
}
}
}
],
"as": "fooLookup"
}
},
{
"$project": {
collection_name: 1,
key: 1,
allLookup: {
"$setUnion": [
"$worldLookup",
"$fooLookup"
]
}
}
}
])
Here is the Mongo playground for your reference.

How to return content of a nested indexed field with some top level fields using Mongodb?

Consider these documents:
{
"chapterNumber": "1",
"contents": [
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 1",
"contents": [
"not clean content",
{
"p": null
}
]
},
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 2",
"contents": [
"not clean content",
{
"p": null
}
]
},
]
}
{
"chapterNumber": "2",
"contents": [
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 3",
"contents": [
"not clean content",
{
"p": null
}
]
},
{
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 4",
"contents": [
"not clean content",
{
"p": null
}
]
},
]
}
If I do an index on the field paragraphCleanText and then issue a query to search for this string cleaned content 3, is there a way to return the following structure from a single or optimized query?
{
"chapterNumber": "2",
"paragraphNumber": "1 ",
"paragraphCleanText": "cleaned content 3"
}
You need to use aggregation-pipeline for this :
1.If your "contents.paragraphCleanText" is unique :
Query :
db.collection.aggregate([
/** Filter docs based on condition */
{ $match: { "contents.paragraphCleanText": "cleaned content 3" } },
{
$project: {
_id: 0,
chapterNumber: 1,
/** contents will be an object, which is matched object, as `$filter` will return an array of matched objects, we're picking first object - assuming `contents.paragraphCleanText` is unique,
* Just in case if you've multiple objects that matches with given condition use `$reduce` instead of `$filter + $arrayElemAt` */
contents: {
$arrayElemAt: [
{ $filter: { input: "$contents", cond: { $eq: [ "$$this.paragraphCleanText", "cleaned content 3" ] } } },
0
]
}
}
},
{
$project: {
chapterNumber: 1,
paragraphNumber: "$contents.paragraphNumber",
paragraphCleanText: "$contents.paragraphCleanText"
}
}
])
Test : mongoplayground
Note : If you want entire object from contents array that matches condition then you can simply use $elemmatch-projection-operator or $-positional-projection-operator, but as you don't want the entire object and just couple of fields from the matched object then you need to use aggregation cause projection option in .find() is not capable of transforming fields - it's only capable of including or excluding fields from document, So you'll use $project stage of aggregation to do this.
If your "contents.paragraphCleanText" can have duplicates i.e; there can be multiple objects inside contents array with "contents.paragraphCleanText": "cleaned content 3" :
Query :
db.collection.aggregate([
{ $match: { "contents.paragraphCleanText": "cleaned content 3" } },
{
$project: {
_id: 0,
chapterNumber: 1,
contents: {
$reduce: {
input: "$contents",
initialValue: [], // create an empty array to start with
in: {
$cond: [
{
$eq: [ "$$this.paragraphCleanText", "cleaned content 3" ] // Condition to check
},
{ // If an object has matched with condition concatinate array converted that object into holding array
$concatArrays: [ "$$value", [ { paragraphCleanText: "$$this.paragraphCleanText", paragraphNumber: "$$this.paragraphNumber" } ] ]
},
"$$value" // If current object is not matched return holding array as is
]
}
}
}
}
},
{$unwind : '$contents'} // Purely optional stage - add this stage & test output to decide whether to include or not & add `$project` stage after this stage
])
Test : mongoplayground
Found a pretty simple solution for my use case:
add an index on paragraphCleanText field, plus chapterNumber and paragraphNumber as additional fields
make the following query db.yourCollection.find({"contents.paragraphCleanText" : { $regex: /^ some text that is present in the field .*/ } })._addSpecial( "$returnKey", true )
`

Use the value of a key as a key in mongodb aggregation

I am working on a project where user can upload a *.csv* file to enter user data, also he can choose which one is the email field (user may have used Email, email, E-mail or something else as the email field).
Previous Developer saved the data in following format.
[
{
"_id" : ObjectId("5de7d0d65223850135eac968"),
groupName:"AGroup",
primaryField:"EMail",
FieldsData:[
{EMail:"abc#gmail.com",otherField:123},
{EMail:"def#outlook.com",otherField:345}
]
},
{
"_id" : ObjectId("5de7d0d65223850135eac969"),
groupName:"BGroup",
primaryField:"userEmail",
FieldsData:[
{userEmail:"hij#hotmail.com",otherField:678},
{userEmail:"kl#outlook.com",otherField:910}
]
}
]
here FieldData is the data from .csv file and primaryField is the field user have chosen as the email field .
I have to get the primaryField name and get the corresponding field value from FieldData.
For Example: For first group i.e.. AGroup primaryField is EMail hence the mailList will contain the value of FieldData.EMail
Right now I have to do this in following steps:
Get the primaryField of all the groups
db.Group.aggregate({$project:{_id:1,PrimaryKey:"$primaryField"}})
Loop over all the data
Make query with each groupId and primaryField
db.Group.aggregate({$match:{ "_id" : ObjectId("5de7d0d65223850135eac968")}},
{$project:{MailList:"$FieldsData.EMail"}})
Please suggest a way to do the job in one query. Basically I need the data in following format
[
{ "_id" : ObjectId("5de7d0d65223850135eac968"), "MailList" : [ "abc#gmail.com", "def#outlook.com" ] }
{ "_id" : ObjectId("5de7d0d65223850135eac969"), "MailList" : [ "hij#hotmail.com", "kl#outlook.com" ] }
]
To read and compare object keys dynamically you need to use $objectToArray. You can use $let to define temporary variable element which will hold first found k-vpair matching your PrimaryField and then return v part.
db.collection.aggregate([
{
$project: {
_id: 1,
MailList: {
$map: {
input: "$FieldsData",
in: {
$let: {
vars: {
element: {
$arrayElemAt: [
{
$filter: {
input: { $objectToArray: "$$this" },
cond: {
$eq: [ "$$this.k", "$primaryField" ]
}
}
}, 0
]
}
},
in: "$$element.v"
}
}
}
}
}
}
])
Mongo Playground

MongoDB perform $match with two input array values?

In MongoDB, I am trying to write a query where I have two input array Bills, Names where the first one contains billids and the second one contains names of the person. Also in reality Bills at index i and Names at index i is the actual document which we want to search in MongoDB.
I want to write a query such that Bills[i] = db_billid && Names[i] = db_name which means I want to return the result where at a particular index both billid and name matches.
I thought of using $in but the thing is I can apply $in in Bills but I don't know at which index that billid is found.
{ $and: [{ billid: { $in: Bills } }, {name: Names[**index at which this bill is found]}] }
Can anyone please help me how can I solve this ??
MongoDB Schema
var transactionsschema = new Schema({
transactionid: {type: String},
billid: {type: String},
name: {type: String}
});
Sample documents in MongoDB
{ _id: XXXXXXXXXXX, transactionid: 1, billid : bnb1234, name: "sudhanshu"}, { _id: XXXXXXXXXXX, transactionid: 2, billid : bnb1235, name: "michael"}, { _id: XXXXXXXXXXX, transactionid: 3, billid : bnb1236, name: "Morgot"}
Sample arrays
Bills = ["bill1", "bill2", "bill3"], Names = ["name1", "name2", "name"]
Edit - If $in can work in array of objects then I can have array of object with keys as billid and name
var arr = [{ billid: "bill1", "name": "name1"}, {billid: "bill2", "name": "name2"}, {billid: "bill3", "name": "name3"}]
But the thing is then how can put below query
{ $and: [{ billid: { $in: arr.Bills } }, {name: arr.Names}] }
Knowing that bills are unique but names may have duplicates you can use $indexOfArray to get index of matching bill and then use that index to compare names array at evaluated index (using $arrayElemAt to retrieve value). Also you have to check if value returned by $indexOfArray is not equal to -1 (no match)
var bills = ["bnb1234", "bnb1235"];
var names = ["sudhanshu", "michael"];
db.col.aggregate([
{
$match: {
$expr: {
$and: [
{ $ne: [{ $indexOfArray: [ bills, "$billid" ] }, -1] },
{ $eq: ["$name", { $arrayElemAt: [ names, { $indexOfArray: [ bills, "$billid" ] } ] }] },
]
}
}
}
])
alternatively $let can be used to avoid duplication:
db.col.aggregate([
{
$match: {
$expr: {
$let: {
vars: { index: { $indexOfArray: [ bills, "$billid" ] } },
in: { $and: [ { $ne: [ "$$index", -1 ] }, { $eq: [ "$name", { $arrayElemAt: [ names, "$$index" ] }] }]}
}
}
}
}
])

MongoDB query to find document with duplicate value in array

tldr; I'm struggling to construct a query to
Make an aggregation to get a count of values on a certain key ("original_text_source"), which
Is in a sub-document that is in an array
Full description
I have embedded documents with arrays that are structured like this:
{
"_id" : ObjectId("0123456789"),
"type" : "some_object",
"relationships" : {
"x" : [ ObjectId("0123456789") ],
"y" : [ ObjectId("0123456789") ],
},
"properties" : [
{
"a" : "1"
},
{
"b" : "1"
},
{
"original_text_source" : "foo.txt"
},
]
}
The docs were created from exactly 10k text files, sorted in various folders. During inserting documents into the MongoDB (in batches) I messed up and moved a few files around, causing one file to be imported twice (my database has a count of exactly 10001 docs), but obviously I don't know which one it is. Since one of the "original_text_source" values has to have a count of 2, I was planning on just deleting one.
I read up on solutions with $elemMatch, but since my array element is a document, I'm not sure how to proceed. Maybe with mapReduce? But I can't transfer the logic to my doc structure.
I also could just create a new collection and reupload all, but in case I mess up again, I'd rather like to learn how to query for duplicates. It seems more elegant :-)
You can find duplicates with a simple aggregation like this:
db.collection.aggregate(
{ $group: { _id: "$properties.original_text_source", docIds: { $push: "$_id" }, docCount: { $sum: 1 } } },
{ $match: { "docCount": { $gt: 1 } } }
)
which gives you something like this:
{
"_id" : [
"foo.txt"
],
"docIds" : [
ObjectId("59d6323613940a78ba1d5ffa"),
ObjectId("59d6324213940a78ba1d5ffc")
],
"docCount" : 2.0
}
Run the following:
db.collection.aggregate([
{ $group: {
_id: { name: "$properties.original_text_source" },
idsForDuplicatedDocs: { $addToSet: "$_id" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} }
]);
Given a collection which contains two copies of the document you showed in your question, the above command will return:
{
"_id" : {
"name" : [
"foo.txt"
]
},
"idsForDuplicatedDocs" : [
ObjectId("59d631d2c26584cd8b7b3337"),
ObjectId("59d631cbc26584cd8b7b3333")
],
"count" : 2
}
Where ...
The attribute _id.name is the value of the duplicated properties.original_text_source
The attribute idsForDuplicatedDocs contains the _id values for each of the documents which have a duplicated properties.original_text_source
"reviewAndRating": [
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7517",
"_id": "5bd9745e2d66162a6dd1f0ef",
"rating": "5"
},
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7518",
"_id": "5bd974612d66162a6dd1f0f0",
"rating": "5"
},
{
"review": "aksjdhfkashdfkashfdkjashjdkfhasdkjfhsafkjhasdkjfhasdjkfhsdakfj",
"productId": "5bd956f29fcaca161f6b7517",
"_id": "5bd974622d66162a6dd1f0f1",
"rating": "5"
}
]

Resources