Mapreduce split input string into output array - arrays

I am dealing with documents like the following one:
> db.productData.find({"upc" : "XXX"}).pretty()
{
"_id" : ObjectId("538dfa3d44e19b2bcf590a77"),
"upc" : "XXX",
"productDescription" : "bla foo bar bla bla fooX barY",
"productSize" : "",
"ingredients" : "foo; bar; foo1; bar1.",
"notes" : "bla bla bla"
}
>
I would like to have a document containing, among the fields, a list/array of splitted ingredients (on the ;). I want to split the string of the original collection into an array of strings.
I would like to map only some of the input fields in the output collection.
I would like to use mapreduce on MongoDB.
I've tried many different ways moving stuff from the map function to the reduce function failing to find a proper solution.
From all the attempts I performed, now I know I need to check for null values etc, so the following one is my last attempt:
The map function:
var mapperProductData = function () {
var ingredientsSplitted = values.ingredientsString.split(';');
var objToEmit = {barcode : "", description : "", ingredients : []};
// checking for null (is this strictly necessary? why?)
if (
this.hasOwnProperty('ingredients')
&& this.hasOwnProperty('productDescription')
&& this.hasOwnProperty('upc')
) {
for (var i = 0; i < ingredientsSplitted.length; i++) {
// I want to emit a new document only when I have all the splitted strings inside the array
if (i == ingredientsSplitted.length - 1) {
objToEmit.barcode = this.upc;
objToEmit.description = this.productDescription;
objToEmit.ingredients = ingredientsSplitted;
emit(this.upc, objToEmit);
}
}
}
};
The reduce function:
var reducerNewMongoCollection = function(key, values) {
return values;
};
The map-reduce call:
db.productData.mapReduce(
mapperProductData,
reducerNewMongoCollection,
{
out : "newMongoCollection" ,
query: { "values" : {$exists: true} }
}
);
I am getting an empty collection in output (newMongoCollection is empty).
What am I doing wrong?

Let's start from the beginning. Your map function should look like this:
var mapperProductData = function () {
var ingredientsSplitted = this.ingredients.split(';');
var objToEmit = {
barcode : this.upc,
description : this.productDescription,
ingredients : ingredientsSplitted
};
emit(this.upc, objToEmit);
};
Your map-reduce call should be:
db.productData.mapReduce(
mapperProductData,
reducerNewMongoCollection,
{
out : "newMongoCollection",
query : {
upc : { $exists : true },
productDescription : { $exists : true },
ingredients : { $exists : true , $type : 4 }
}
}
);
The query part will filter the documents that do have relevant fields. Also the query parameter $type will match only documents where ingredients is an array. This way you don't need to do complicated checking inside your map function and the number of documents sent to map function will be lower.
The result for your test document document will look like this:
key : XXX,
value: {
"barcode" : "XXX",
"description" : "bla foo bar bla bla fooX barY",
"ingredients" : [
"foo",
" bar",
" foo1",
" bar1."
]
}

Related

Doing a 1:many:many join in mongodb

On this earlier thread, some of my code worked perfectly for the scenario in question.
I want to adapt the same code for another similar scenario and I'm yet to understand what could be wrong. This time around I have a coursemodule collection, a many:many relationship collection between courses and modules which only stores coursesId and moduleId. Since the code worked perfectly, I simply copied, did a little modification and arrived at the code below:
courses(){
var theslug = FlowRouter.getParam('myslug');
var mySchoolDocs = SchoolDb.findOne({slug: theslug});
var arrayModuleSchools = ModuleSchool.find({schoolId: mySchoolDocs._id});
// Transform the array of document into an array with only the ids
var arrayModuleId = [];
arrayModuleSchools.forEach(function(moduleSchools){
arrayModuleId.push(moduleSchools.moduleId);
});
var coursetoMod = CourseModules.find({}, {moduleId: {$in: arrayModuleId}});
if (coursetoMod) {
coursesArrayIds = [];
console.log(coursetoSchool);
coursetoMod.forEach(function (courseToModules) {
coursesArrayIds.push(courseToModules.coursesId);
});
return Courses.find({_id: {$in: coursesArrayIds}}).fetch();
}
}
To be specific, only 2 modules exist in the Modules collection, with ids - xfLM9DEzhCMYQpQ32 and PTbZQ9cTG9pByFsY2. The CourseModule collection has this has docs:
{
"_id" : "iXX4unJZRNcCw9bAm",
"moduleId" : "PTbZQ9cTG9pByFsY2",
"coursesId" : "FbgcdZxADHKRBj98z",
"createdAt" : ISODate("2017-08-25T16:36:17.173Z"),
"userId" : "n5rqFSHbhm7zqADyB"
}
{
"_id" : "RAJJFjqAjGoDeNhko",
"moduleId" : "PTbZQ9cTG9pByFsY2",
"coursesId" : "ESAf6NGpZzXeioecp",
"createdAt" : ISODate("2017-08-25T16:36:17.182Z"),
"userId" : "n5rqFSHbhm7zqADyB"
}
{
"_id" : "8ceuFwZK8Qduo5J5P",
"moduleId" : "xfLM9DEzhCMYQpQ32",
"coursesId" : "KnNj4GLcyMtvF8JmB",
"createdAt" : ISODate("2017-08-25T16:38:15.368Z"),
"userId" : "n5rqFSHbhm7zqADyB"
}
At the point where I log into the console I got that the selectorId is undefined:
L…n.Cursor {collection: LocalCollection, sorter: null, matcher:
M…o.Matcher, _selectorId: undefined, skip: undefined…}_projectionFn:
(obj)_selectorId: undefined_transform: nullcollection:
LocalCollectionfields: undefinedlimit: undefinedmatcher:
Minimongo.Matcherreactive: trueskip: undefinedsorter: null__proto__:
Object_depend: (changers, _allow_unordered)_getCollectionName:
()_getRawObjects: (options)_publishCursor: (sub)constructor:
(collection, selector, options)count: ()fetch: ()forEach: (callback,
thisArg)getTransform: ()map: (callback, thisArg)observe:
(options)observeChanges: (options)rewind: ()proto: Object
view.js:30 L…n.Cursor {collection: LocalCollection, sorter: null, matcher: M…o.Matcher, _selectorId: undefined, skip: undefined…}
All I want to do is to fetch the courses attached to a specific school currently displayed via the modules.
You are using the find function the wrong way:
var coursetoMod = CourseModules.find({}, {moduleId: {$in: arrayModuleId}});
The find() function takes two parameters : myCollection.find(query, projection). When you are filtering documents by field, it must be inside the query parameter. And the projection parameter is used to chose which fields to return.
In your case, here's the parameters you are using: query: {} and projection: {moduleId: {$in: arrayModuleId}}. But it needs to be: query: {moduleId: {$in: arrayModuleId}}
So you just have to use the $in as first parameter:
var coursetoMod = CourseModules.find({moduleId: {$in: arrayModuleId}});
By the way, if you want to see directly the documents returned by the find function inside a console.log, use .fetch() :
var coursetoMod = CourseModules.find({moduleId: {$in: arrayModuleId}}).fetch();
MongoDB find function documentation: https://docs.mongodb.com/manual/reference/method/db.collection.find/
#gaetan is on the right track with the answer, you need to use the query parameter instead of the projection parameter. There are some other simplifications that can be made in your code as well using the underscore library that is packaged with Meteor.
courses() {
const slug = FlowRouter.getParam('myslug');
const schoolId = SchoolDb.findOne({ slug })._id;
const Modules = ModuleSchool.find({ schoolId });
const ModuleIds = _.pluck(Modules,'moduleId');
const coursetoMod = CourseModules.find({ moduleId: { $in: ModuleIds }});
if (coursetoMod.length) {
coursesIds = _.pluck(coursetoMod,'coursesId');
return Courses.find({ _id: { $in: coursesArrayIds }}).fetch();
}
}

Grouping Of Data in AngularJs

I am building a site for a restaurant . On the site I am rendering the restaurant menu on a page .
The Menu Items Json, I am getting from server looks like this : -
{
"_id" : ObjectId("54c3c6e6cd7fe7df22cae87e"),
"restaurant" : ObjectId("54b3a7ef613e89f64654f2b3"),
"name" : "Pasta",
"itemCode" : "PST",
"price" : 240,
"sellingPrice" : 280,
"cuisine" : "Ittalian",
"category" : "starter",
"type" : "non-veg",
"created" : ISODate("2015-01-24T16:23:02.652Z"),
"Head" : pasta
"__v" : 0
}
So basically the requirement is to group menu items on basis of head field (which can be soups, pizza, pastas,etc).
so that all the item with "head" : "soups" will list down together and all item with "head" : "pizza" will list down together .
The way I thought of doing this is through designing custom filter, where I will pass all the unique "head" attribute and I will get data accordingly .
Just need a better and more optimize approach for the situation .
Depending on how often that filter will be called, you should probably just postprocess the data and create a new result set, grouped by the correct value. Something like (untested):
var jsonResults = [{head:'pasta',...},{head:'soups',...}];
, jsonResultsProcessed = { };
, rec
for(var i=0; i<jsonResults.length; i++) {
rec = jsonResults[i];
if(!jsonResultsProcessed[rec.head]) {
jsonResultsProcessed[rec.head] = [ rec ];
} else {
jsonResultsProcessed[rec.head].push(rec);
}
}
$scope.results = jsonResultsProcessed;
and then in your view, you can just refer to results.pizza or whatever.
var dishesPerHead = {};
var heads = [];
angular.forEach(dishes, function(dish) {
var dishesForHead = dishesPerHead[head];
if (!dishesForHead) {
dishesForHead = [];
heads.push(dish.head);
dishesPerHead[dish.head] = dishesForHead;
}
dishesForHead.push(dish);
}
Now you have an array of distinct heads, and for each head, dishesPerHead contains the array of dishes having this head.

Count how many and which index of a array

I have an array of objects:
result = [
{ _id: 53d0dfe3c42047c81386df9d, video_id: '1' },
{ _id: 53d0dfe3c42047c81386df9e, video_id: '1' },
{ _id: 53d0dfe3c42047c81386df9f, video_id: '1' },
{ _id: 53d0dfe3c42047c81386dfa0, video_id: '2' },
{ _id: 53d0dfe3c42047c81386dfa1, video_id: '2' },
{ _id: 53d0dfe3c42047c81386dfa2, video_id: '1' },
{ _id: 53d0dfe3c42047c81386dfa3, video_id: '2' },
{ _id: 53d0dfe3c42047c81386dfa4, video_id: '1' }
]
I need to create another array, which takes video_id as the index, and contains how many times this video_id appears in the first array:
list = [
{'1' : 5},
{'2' : 4}
]
Currently, I use this code:
while (i < result.length)
{
if(list[result[i].video_id] === undefined) {
list[result[i].video_id] = 0;
}
list[result[i].video_id] = list[result[i].video_id] + 1;
i = i + 1;
}
It works, but I wonder if there is any faster and cleaner way to do so? (the real result array has over 10k elements, and I doubt >10k conditional statements are optimal...).
I am using node.js, result is from a mongoose (mongoDB) query, and I didn't see any way to get this done by mongoose itself:
var now = new Date();
//M_logs is a mongoose model
query = M_logs.where('time').gt(new Date(now.getFullYear(), 0, 1).getTime() / 1000).lt(now.getTime() / 1000).select('video_id');
(PS: I wonder if this isn't more a Code Review question, please tell me if I am off-topic so I can migrate the question).
EDIT:
To answer to Juan Carlos Farah:
S_logs = new mongoose.Schema({
user_ip : String,
user_id : String,
user_agent : String,
canal_id : String,
theme_id : String,
video_id : String,
osef : String,
time : Number,
action: String,
is_newuser : String,
operator : String,
template : String,
catalogue : String,
referer : String,
from : String,
osef1 : String
});
M_logs = mongoose.model('logs', S_logs);
You can do this using the aggregation framework. The idea is to do something as follows:
Match the documents you are looking for. Based on your current query, I understand it would be documents where time is between new Date(now.getFullYear(), 0, 1).getTime() / 1000 and now.getTime() / 1000.
Group the matched documents by video_id and keep track of their count.
Optionally sort by _id, which would be equivalent to the original video_id.
The following is in mongo shell syntax:
var now = new Date();
db.M_logs.aggregate([
{
"$match" : {
"time" : {
"$gt" : new Date(now.getFullYear(), 0, 1).getTime() / 1000,
"$lt" : now.getTime() / 1000
}
}
},
{
"$group" : {
"_id" : "$video_id",
"count" : { "$sum" : 1 }
}
},
{
"$sort" : { "_id" : 1 }
}
]);
If this works for you, you can easily implement it in Mongoose or Node.js driver syntax. Note that the aggregation framework returns a cursor, which you can iterate through to populate your array.
EDIT:
Using the Node.js driver, you can access the results from the aggregation query in the callback function. Something as follows:
...
, function(err, result) {
console.dir(result);
db.close();
}
Note that the Mongoose syntax for aggregation queries is slightly different.
Example:
Model.aggregate([ <QUERY> ]).exec( <CALLBACK> );
For more information, consult the documentation here.
I would suggest that you use aggregation framework to count number of documents. It will be significantly faster than iterating all your documents and counting them.
Using mongoose you can do it like this:
var now = new Date();
var startTime = new Date(now.getFullYear(), 0, 1).getTime() / 1000):
var endTime = now.getTime() / 1000;
M_logs.aggregate([
// filter the documents you're looking for
{"$match" : { "time" : {"$gt": startTime, "$lt": endTime}}},
// group by to get the count for each video_id
{"$group" : {"_id" : "$video_id", "count" : {"$sum" : 1}}},
// make the output more explanatory; this part is optional
{"$project" : { "video_id" : "$_id", "count" : "$count", _id : 0}}
]).exec(function(err, docs){
if (err) console.err(err);
console.log(docs);
});
The output of the docs will be:
[ { count: 4, video_id: '2' }, { count: 5, video_id: '1' } ]
use
var list = {};
result.forEach(function (el) {
list[el.video_id] = (list[el.video_id] || 0) + 1;
});
the resuling list will look something like this:
var list = {
'1': 5,
'2': 4
};

How can i find object from an array in mongoose

I have a schema like following : -
var P = {
s : [{
data : [],
sec : mongoose.Schema.Types.ObjectId
}]
};
Now I want to find only the object of section not entire the row. Like If I pass sec value I want only the value of s.data of that sec object.
example : -
{ s : [
{
data : [],
sec : '52b9830cadaa9d273200000d'
},{
data : [],
sec : '52b9830cadaa9d2732000005'
}
]
}
Result should be look like -
{
data : [],
sec : '52b9830cadaa9d2732000005'
}
I do not want all entire row. Is it possible? If yes, then please help me.
You can use the $elemMatch projection operator to limit an array field like s to a single, matched element, but you can't remove the s level of your document using find.
db.test.find({}, {_id: 0, s: {$elemMatch: {sec: '52b9830cadaa9d2732000005'}}})
outputs:
{
"s": [
{
"data": [ ],
"sec": "52b9830cadaa9d2732000005"
}
]
}
You can always get the value of some field by using find(). For example in your case:
db.collectionName.find({},{s.data:1})
So the first bracket is to apply any condition or query, and in the second bracket you have to define the field as 1(to fetch only those fields value).
Please check http://docs.mongodb.org/manual/reference/method/db.collection.find for more information.
Let me know if it solves your problem.
Not into Mongo or db but working with Pure JavaSript skills here is the Solution as you mentioned Node.js which would do the execution task of the below.
Schema
var P = { s : [
{
data : [],
sec : '52b9830cadaa9d273200000d'
},{
data : [],
sec : '52b9830cadaa9d2732000005'
}
]
};
Search Method Code
var search = function (search_sec){
for (var i=0; i<(P.s.length);i++){
var pointer = P.s[i].sec;
var dataRow = P.s[i];
if((pointer) === search_sec ){
console.log(dataRow);
}
}
};
Here is How you can call - search('search_id');
For example input :
search('52b9830cadaa9d2732000005');
Output:
[object Object] {
data: [],
sec: "52b9830cadaa9d2732000005"
}
Working Demo here - http://jsbin.com/UcobuVOf/1/watch?js,console

How to get unique/Distinct data from store?

i am using the extjs. i need to retrieve distinct data from store as per requirement. In my store contains single list not nested list in that have regions like AMERICAS , North Sea and SE Asia. these region it self have subRegion,vesselName and vesselType values. I need to retrive unique value based on region, bacasue it contains many duplicate records. I have tried like as below, but it is not working me. Can anybody tel me how to achieve ?. great appreciated. Thank you.
var vesselStore=Ext.getStore('VesselStatusReportStore');
var arr=new Array();
var obj;
vesselStore.each(function(rec,index)
{
obj=new Object();
if(rec.get('region')=='AMERICAS'){
obj.subRegionAmerica=rec.get('subRegion');
obj.vesselNameAmerica=rec.get('vesselName');
obj.vesselTypeAmerica=rec.get('vesselType');
}
if(rec.get('region')=='NorthSea'){
obj.subRegionNorthSea=rec.get('subRegion');
obj.vesselNameNorthSea=rec.get('vesselName');
obj.vesselTypeNorthSea=rec.get('vesselType');
}
if(rec.get('region')=='SE Asia'){
obj.subRegionSEAsia=rec.get('subRegion');
obj.vesselNameSEAsia=rec.get('vesselName');
obj.vesselTypeSEAsia=rec.get('vesselType');
}
arr.push(obj);
console.log(obj);
});
Json:
[ {
"region" : "AMERICAS",
"startDate" : null,
"subRegion" : "US",
"vesselName" : "Thoma-Sea � Hull #147",
"vesselType" : "PSV"
},
{
"region" : "AMERICAS",
"startDate" : null,
"subRegion" : "US",
"vesselName" : "Thoma-Sea � Hull #148",
"vesselType" : "PSV"
},
{
"region" : "AMERICAS",
"startDate" : null,
"subRegion" : "Mexico",
"vesselName" : "Thoma-Sea � Hull #148",
"vesselType" : "PSV"
}]
It looks like you want to use collect. http://docs.sencha.com/ext-js/4-1/#!/api/Ext.data.Store-method-collect
vesselStore.collect('region')
This section doesn't make sense:
obj=new Object();
if(rec.get('region')=='AMERICAS'){
obj.subRegionAmerica=rec.get('subRegion');
obj.vesselNameAmerica=rec.get('vesselName');
obj.vesselTypeAmerica=rec.get('vesselType');
}
if(rec.get('region')=='NorthSea'){
obj.subRegionNorthSea=rec.get('subRegion');
obj.vesselNameNorthSea=rec.get('vesselName');
obj.vesselTypeNorthSea=rec.get('vesselType');
}
if(rec.get('region')=='SE Asia'){
obj.subRegionSEAsia=rec.get('subRegion');
obj.vesselNameSEAsia=rec.get('vesselName');
obj.vesselTypeSEAsia=rec.get('vesselType');
}
arr.push(obj);
You are basically saying "Whatever the region is, copy the record to obj, then add that obj to my array".
I believe you meant something more along these lines:
var vesselStore=Ext.getStore('VesselStatusReportStore');
var iRegions = [];
vesselStore.each(function(rec,index)
{
var iRegionName = rec.get('region');
// Make sure theres a array item with the region name,
// if not create a blank array;
iRegions[ iRegionName ] = iRegions[ iRegionName ] || [];
// Add the record to its corresponding array item.
// (If you don't want the full record, you can just get the
// fields individually like you did in your code).
iRegions[ iRegionName ] = rec;
}
console.log( iRegions );

Resources