I'm a novice with mongodb so please excuse me if the question is a little basic. I have a mongo collection with a relatively complex document structure. The documents contain sub documents and arrays. I need to add additional data to some of the documents in this collection. A cut down version of the document is:
"date" : ISODate("2018-08-07T08:00:00.000+0000"),
.
. <<-- Other fields
.
"basket" :
[
{
"assetId" : NumberInt(639),
"securityId" : NumberInt(12470),
.
. <<-- Other fields
.
"exGroup" : [
. << -- Fields......
.
. << -- New Data will go here
]
}
.
. << More elements
]
The following (abridged) aggregation query finds the documents that need modifying:
{
"$match" : {
"date" : {
"$gte" : ISODate("2018-08-07T00:00:00.000+0000"),
"$lt" : ISODate("2018-08-08T00:00:00.000+0000")
}
}
},
{
"$unwind" : {
"path" : "$basket"
}
},
{
"$unwind" : {
"path" : "$basket.exGroup"
}
},
{
"$project" : {
"_id" : 1.0,
"date" : 1.0,
"assetId" : "$basket.assetId",
"securityId" : "$basket.securityId",
"exGroup" : "$basket.exGroup"
}
},
{
"$unwind" : {
"path" : "$exGroup"
}
},
{
"$match" : {
"exGroup.order" : {
"$exists" : true
}
}
}
For each document returned by the mongo query I need to (in python) retrieve a set of additional data from a SQL database and then append this data to the original mongo document as shown above. The set of new fields will be the same, the data will be different. What is not clear to me is how, once I have the data I go about updating the array values.
Could somebody give me a pointer?
Try this, it works for me!
mySchema.aggregate([
//your aggregation code
],function(err, docList){
//for each doc in docList
async.each(docList, function(doc, callback){
query = {$and:[{idField:doc.idField},{"myArray.ArrayId":doc.myArray.ArrayId}]}
//Update or create field in array
update = {$set:"myArray.$.FieldNameToCreateOrUpdate":value}}
projection = {field1:1, field2:1, field3:1}
mySchema.findOneAndUpdate(query, update, projection, function(err, done){
if(err){callback(err,null)}
callback(null,'done')
})
,function(err){
//code if error
//code if no error
}
})
Related
Below is MongoDB document.
`{
"_id" : ObjectId("588f09c8d466d7054114b456"),
"phonebook" : [
{
"pb_name_first" : "Aasu bhai",
"pb_phone_number" : [
{
"ph_id" : 2,
"ph_no" : "+91111111",
"ph_type" : "Mobile"
}
],
"pb_email_id" : [
{
"email_id" : "temp#gmail.com",
"email_type" : "Home",
"em_id" :1
},
{
"email_id" : "test#gmail.com",
"email_type" : "work",
"em_id" :2
}
],
"pb_name_prefix" : "MR."
}
]
}`
I want mongodb query that will update email_id data in pb_email_id array on basis of em_id. If i select em_id=1 then that record temp#gmail.com will update.if i select em_id=2 then test#gmail.com will update.
I don't think you can apply if-else logic in update call, you can run two separate update calls
db.collection.update({'pb_email_id.em_id':1},{$set : {'pb_email_id.$.email_id' : 'temp#gmail.com'}},{multi:true});
db.collection.update({'pb_email_id.em_id':2},{$set : {'pb_email_id.$.email_id' : 'test#gmail.com'}},{multi:true});
However you can run a script on collection to apply multiple logic
db.collection.find({}).forEach(function(doc){
if(doc.pb_email_id && doc.pb_email_id.length>0){
for(var i in doc.pb_email_id){
if(doc.pb_email_id[i].em_id === 1){
doc.pb_email_id[i].email_id = "temp#gmail.com"}
else if(doc.pb_email_id[i].em_id === 2){doc.pb_email_id[i].email_id = "test#gmail.com"}
db.collection.save(db)
}
}
})
If you have to apply multiple logic, you can run script, otherwise two update calls if that's as much as needed.
P.S - since you didn't mentioned collection name, I used db.collection.update it should be collection name like db.phonebook.find etc.
I'm using a model tree structures with an array of ancestors and I need to check if any document is missing.
{
"_id" : "GbxvxMdQ9rv8p6b8M",
"type" : "article",
"ancestors" : [ ]
}
{
"_id" : "mtmTBW8nA4YoCevf4",
"parent" : "GbxvxMdQ9rv8p6b8M",
"ancestors" : [
"GbxvxMdQ9rv8p6b8M"
]
}
{
"_id" : "J5Dg4fB5Kmdbi8mwj",
"parent" : "mtmTBW8nA4YoCevf4",
"ancestors" : [
"GbxvxMdQ9rv8p6b8M",
"mtmTBW8nA4YoCevf4"
]
}
{
"_id" : "tYmH8fQeTLpe4wxi7",
"refType" : "reference",
"parent" : "J5Dg4fB5Kmdbi8mwj",
"ancestors" : [
"GbxvxMdQ9rv8p6b8M",
"mtmTBW8nA4YoCevf4",
"J5Dg4fB5Kmdbi8mwj"
]
}
My attempt would be to check each ancestors id if it is existing. If this fails, this document is missing and the data structure is corrupted.
let ancestors;
Collection.find().forEach(r => {
if (r.ancestors) {
r.ancestors.forEach(a => {
if (!Collection.findOne(a))
missing.push(r._id);
});
}
});
But doing it like this will need MANY db calls. Is it possible to optimize this?
Maybe I could get an array with all unique ancestor ids first and check if these documents are existing within one db call??
First take out all distinct ancesstors from your collections.
var allAncesstorIds = db.<collectionName>.distinct("ancestors");
Then check if any of the ancesstor IDs are not in the collection.
var cursor = db.<collectionName>.find({_id : {$nin : allAncesstorIds}}, {_id : 1})
Iterate the cursor and insert all missing docs in a collection.
cursor.forEach(function (missingDocId) {
db.missing.insert(missingDocId);
});
I have a collection in mongo that I need to migrate to Neo4j. To do that, I will export it to CSV. Then, I'll import the resultant CSV to Neo4j using Cypher. The documents from the collection have an object with an array that contains objects with arrays inside them. Take a look at the JSON above:
"services" : [
{
"max_id" : "646767779849326594",
"log" : [
{
"date" : 1443024000,
"steps" : 6
},
{
"date" : 1442512800,
"steps" : 1
}
],
"service" : "home_timeline"
},
{
"max_id" : 0.0,
"log" : [
{
"date" : 1443024000,
"steps" : 4
},
{
"date" : 1442512800,
"steps" : 1
}
],
"service" : "user_timeline"
},
{
"max_id" : 0.0,
"log" : [
{
"date" : 1443024000,
"steps" : 6
},
{
"date" : 1442512800,
"steps" : 1
}
],
"service" : "mentions_timeline"
}
]
How can I import this to Neo4 properly?? I already found a solution to import arrays. But I didn't find nothing similar to my problem. How should be the header of the CSV? How should be the Cypher code to get these objets??
You can use JSON as a parameter to a Cypher query. There are a few examples of this here and here.
With your example something like this:
WITH {json} AS data
UNWIND data.services AS service
// Insert data for each service.
MERGE (s:Service { "service_name": service.service})
SET s.max_id = service.max_id
FOREACH (log IN service.logs | CREATE (l:Log {date: log.date, steps: log.steps})<-[:LOGGED]-(s))
There is also a tool for translating data from MongoDB document data model to Neo4j property graph model that you might find useful: https://github.com/neo4j-contrib/neo4j_doc_manager
I am trying to query a single embedded document in an array in MongoDB. I don't know what I am doing wrong. Programmatically, I will query this document and insert new embedded documents into the currently empty trips arrays.
{
"_id" : ObjectId("564b3300953d9d51429163c3"),
"agency_key" : "DDOT",
"routes" : [
{
"route_id" : "6165",
"route_type" : "3",
"trips" : [ ]
},
{
"route_id" : "6170",
"route_type" : "3",
"trips" : [ ]
},
...
]
}
Following queries -I run in mongo shell- return empty:
db.tm_routes.find( { routes : {$elemMatch: { route_id:6165 } } } ).pretty();
db.tm_routes.find( { routes : {$elemMatch: { route_id:6165,route_type:3 } } } ).pretty();
db.tm_routes.find({'routes.route_id':6165}).pretty()
also db.tm_routes.find({'routes.route_id':6165}).count() is 0.
The following query returns every document in the array
db.tm_routes.find({'routes.route_id':'6165'}).pretty();
{
"_id" : ObjectId("564b3300953d9d51429163c3"),
"agency_key" : "DDOT",
"routes" : [
{
"route_id" : "6165",
"route_type" : "3",
"trips" : [ ]
},
{
"route_id" : "6170",
"route_type" : "3",
"trips" : [ ]
},
...
]}
but db.tm_routes.find({'routes.route_id':'6165'}).count() returns 1.
And finally, here is how I inserted data in the first place -in Node.JS-:
async.waterfall([
...
//RETRIEVE ALL ROUTEIDS FOR EVERY AGENCY
function(agencyKeys, callback) {
var routeIds = [];
var routesArr = [];
var routes = db.collection('routes');
//CALL GETROUTES FUNCTION FOR EVERY AGENCY
async.map(agencyKeys, getRoutes, function(err, results){
if (err) throw err;
else {
callback(null, results);
}
});
//GET ROUTE IDS
function getRoutes(agencyKey, callback){
var cursor = routes.find({agency_key:agencyKey});
cursor.toArray(function(err, docs){
if(err) throw err;
for(i in docs){
routeIds.push(docs[i].route_id);
var routeObj = {
route_id:docs[i].route_id,
route_type:docs[i].route_type,
trips:[]
};
routesArr.push(routeObj);
/* I TRIED 3 DIFFERENT WAYS TO PUSH DATA
//1->
collection.update({agency_key:agencyKey}, {$push:{"routes":{
'route_id':docs[i].route_id,
'route_type':docs[i].route_type,
'trips':[]
}}});
//2->
collection.update({agency_key:agencyKey}, {$push:{"routes":routeObj}});
*/
}
// 3->
collection.update({agency_key:agencyKey}, {$push:{routes:{$each:routesArr}}});
callback(null, routeIds);
});
};
},
...
var collection = newCollection(db, 'tm_routes',[]);
function newCollection(db, name, options){
var collection = db.collection(name);
if (collection){
collection.drop();
}
db.createCollection(name, options);
return db.collection(name);
}
Note: I am not using Mongoose and don't want to use if possible.
Melis,
I see what you are asking for, and what you need is help understanding how things are stored in mongodb. Things to understand:
A document is the basic unit of data for MongoDB and can be roughly compared to a row in a relational database.
A collection can be thought of as a table with a dynamic schema
So documents are stored in collections.Every document has a special _id, that is unique within a collection. What you showed us above in the following format is One document.
{
"_id" : ObjectId("564b3300953d9d51429163c3"),
"agency_key" : "DDOT",
"routes" : [
{
"route_id" : "6165",
"route_type" : "3",
"trips" : [ ]
},
{
"route_id" : "6170",
"route_type" : "3",
"trips" : [ ]
},
...
]}
If you run a query in your tm_routes collection. The find() will return each document in the collection that matches that query. Therefore when you run the query db.tm_routes.find({'routes.route_id':'6165'}).pretty(); it is returning the entire document that matches the query. Therefore this statement is wrong:
The following query returns every document in the array
If you need to find a specific route in that document, and only return that route, depending on your use, because its an array, you may have to use the $-Positional Operator or the aggregation framework.
For Node and Mongodb users using Mongoose, this is one of the ways to write the query to the above problem:
db.tm_routes.updateOne(
{
routes: {
$elemMatch: {
route_id: 6165 (or if its in a route path then **6165** could be replaced by **req.params.routeid**
}
}
},
{
$push: {
"routes.$.trips":{
//the content you want to push into the trips array goes here
}
}
}
)
I'm using MongoDB shell version: 2.4.8, and would simply like to know why a nested array search doesn't work quite as expected.
Assume we have 2 document collections, (a) Users:
{
"_id" : ObjectId("u1"),
"username" : "user1",
"org_ids" : [
ObjectId("o1"),
ObjectId("o2")
]
}
{
"_id" : ObjectId("u2"),
"username" : "user2",
"org_ids" : [
ObjectId("o1")
]
}
and (b) Organisations:
{
"_id" : ObjectId("o1"),
"name" : "Org 1"
}
{
"_id" : "ObjectId("o2"),
"name" : "Org 2"
}
Collections have indexes defined for
Users._id, Users.org_id, Organisations._id
I would like to find all Organisations a specific user is a member of.
I've tried this:
> myUser = db.Users.find( { _id: ObjectId("u1") })
> db.Organisations.find( { _id : { $in : [myUser.org_ids] }})
yet it yields nothing as a result. I've also tried this:
> myUser = db.Users.find( { _id: ObjectId("u1") })
> db.Organisations.find( { _id : { $in : myUser.org_ids }})
but it outputs the error:
error: { "$err" : "invalid query", "code" : 12580 }
(which basically says you need to pass $in an array) ... but that's what I thought I was doing originally ? baffled.
Any ideas what I'm doing wrong?
db.collection.find() returns a cursor - according to documentation. Then myUser.org_ids is undefined, but $in field must be an array. Let's see the solution!
_id is unique in a collection. So you can do findOne:
myUser = db.Users.findOne( { _id: ObjectId("u1") })
db.Organisations.find( { _id : { $in : myUser.org_ids }})
If you are searching for a non-unique field you can use toArray:
myUsers = db.Users.find( { username: /^user/ }).toArray()
Then myUsers will be an array of objects matching to the query.