Currently I need to push a large CSV file into a mongo DB and the order of the values needs to determine the key for the DB entry:
Example CSV file:
9,1557,358,286,Mutantville,4368,2358026,,M,0,0,0,1,0
9,1557,359,147,Wroogny,4853,2356061,,D,0,0,0,1,0
Code to parse it into arrays:
var fs = require("fs");
var csv = require("fast-csv");
fs.createReadStream("rank.txt")
.pipe(csv())
.on("data", function(data){
console.log(data);
})
.on("end", function(data){
console.log("Read Finished");
});
Code Output:
[ '9',
'1557',
'358',
'286',
'Mutantville',
'4368',
'2358026',
'',
'M',
'0',
'0',
'0',
'1',
'0' ]
[ '9',
'1557',
'359',
'147',
'Wroogny',
'4853',
'2356061',
'',
'D',
'0',
'0',
'0',
'1',
'0' ]
How do I insert the arrays into my mongoose schema to go into mongo db?
Schema:
var mongoose = require("mongoose");
var rankSchema = new mongoose.Schema({
serverid: Number,
resetid: Number,
rank: Number,
number: Number,
name: String,
land: Number,
networth: Number,
tag: String,
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
});
module.exports = mongoose.model("Rank", rankSchema);
The order of the array needs to match the order of the schema for instance in the array the first number 9 needs to always be saved as they key "serverid" and so forth. I'm using Node.JS
You can do it with fast-csv by getting the headers from the schema definition which will return the parsed lines as "objects". You actually have some mismatches, so I've marked them with corrections:
const fs = require('mz/fs');
const csv = require('fast-csv');
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/test';
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
const rankSchema = new Schema({
serverid: Number,
resetid: Number,
rank: Number,
name: String,
land: String, // <-- You have this as Number but it's a string
networth: Number,
tag: String,
stuff: String, // the empty field in the csv
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
});
const Rank = mongoose.model('Rank', rankSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
await Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()));
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
console.log(headers);
await new Promise((resolve,reject) => {
let buffer = [],
counter = 0;
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }))
.on("error", reject)
.on("data", async doc => {
stream.pause();
buffer.push(doc);
counter++;
log(doc);
try {
if ( counter > 10000 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
}
} catch(e) {
stream.destroy(e);
}
stream.resume();
})
.on("end", async () => {
try {
if ( counter > 0 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
resolve();
}
} catch(e) {
stream.destroy(e);
}
});
});
} catch(e) {
console.error(e)
} finally {
process.exit()
}
})()
As long as the schema actually lines up to the provided CSV then it's okay. These are the corrections that I can see but if you need the actual field names aligned differently then you need to adjust. But there was basically a Number in the position where there is a String and essentially an extra field, which I'm presuming is the blank one in the CSV.
The general things are getting the array of field names from the schema and passing that into the options when making the csv parser instance:
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }))
Once you actually do that then you get an "Object" back instead of an array:
{
"serverid": "9",
"resetid": "1557",
"rank": "358",
"name": "286",
"land": "Mutantville",
"networth": "4368",
"tag": "2358026",
"stuff": "",
"gov": "M",
"gdi": "0",
"protection": "0",
"vacation": "0",
"alive": "1",
"deleted": "0"
}
Don't worry about the "types" because Mongoose will cast the values according to schema.
The rest happens within the handler for the data event. For maximum efficiency we are using insertMany() to only write to the database once every 10,000 lines. How that actually goes to the server and processes depends on the MongoDB version, but 10,000 should be pretty reasonable based on the average number of fields you would import for a single collection in terms of the "trade-off" for memory usage and writing a reasonable network request. Make the number smaller if necessary.
The important parts are to mark these calls as async functions and await the result of the insertMany() before continuing. Also we need to pause() the stream and resume() on each item otherwise we run the risk of overwriting the buffer of documents to insert before they are actually sent. The pause() and resume() are necessary to put "back-pressure" on the pipe, otherwise items just keep "coming out" and firing the data event.
Naturally the control for the 10,000 entries requires we check that both on each iteration and on stream completion in order to empty the buffer and send any remaining documents to the server.
That's really what you want to do, as you certainly don't want to fire off an async request to the server both on "every" iteration through the data event or essentially without waiting for each request to complete. You'll get away with not checking that for "very small files", but for any real world load you're certain to exceed the call stack due to "in flight" async calls which have not yet completed.
FYI - a package.json used. The mz is optional as it's just a modernized Promise enabled library of standard node "built-in" libraries that I'm simply used to using. The code is of course completely interchangeable with the fs module.
{
"description": "",
"main": "index.js",
"dependencies": {
"fast-csv": "^2.4.1",
"mongoose": "^5.1.1",
"mz": "^2.7.0"
},
"keywords": [],
"author": "",
"license": "ISC"
}
Actually with Node v8.9.x and above then we can even make this much simpler with an implementation of AsyncIterator through the stream-to-iterator module. It's still in Iterator<Promise<T>> mode, but it should do until Node v10.x becomes stable LTS:
const fs = require('mz/fs');
const csv = require('fast-csv');
const streamToIterator = require('stream-to-iterator');
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/test';
mongoose.Promise = global.Promise;
mongoose.set('debug', true);
const rankSchema = new Schema({
serverid: Number,
resetid: Number,
rank: Number,
name: String,
land: String,
networth: Number,
tag: String,
stuff: String, // the empty field
gov: String,
gdi: Number,
protection: Number,
vacation: Number,
alive: Number,
deleted: Number
});
const Rank = mongoose.model('Rank', rankSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
await Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()));
let headers = Object.keys(Rank.schema.paths)
.filter(k => ['_id','__v'].indexOf(k) === -1);
//console.log(headers);
let stream = fs.createReadStream('input.csv')
.pipe(csv({ headers }));
const iterator = await streamToIterator(stream).init();
let buffer = [],
counter = 0;
for ( let docPromise of iterator ) {
let doc = await docPromise;
buffer.push(doc);
counter++;
if ( counter > 10000 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
}
}
if ( counter > 0 ) {
await Rank.insertMany(buffer);
buffer = [];
counter = 0;
}
} catch(e) {
console.error(e)
} finally {
process.exit()
}
})()
Basically, all of the stream "event" handling and pausing and resuming gets replaced by a simple for loop:
const iterator = await streamToIterator(stream).init();
for ( let docPromise of iterator ) {
let doc = await docPromise;
// ... The things in the loop
}
Easy! This gets cleaned up in later node implementation with for..await..of when it becomes more stable. But the above runs fine on the from the specified version and above.
By saying #Neil Lunn need headerline within the CSV itself.
Example using csvtojson module.
const csv = require('csvtojson');
const csvArray = [];
csv()
.fromFile(file-path)
.on('json', (jsonObj) => {
csvArray.push({ name: jsonObj.name, id: jsonObj.id });
})
.on('done', (error) => {
if (error) {
return res.status(500).json({ error});
}
Model.create(csvArray)
.then((result) => {
return res.status(200).json({result});
}).catch((err) => {
return res.status(500).json({ error});
});
});
});
Related
const config = require(`${process.cwd()}/botconfig/config.json`)
var ee = require(`${process.cwd()}/botconfig/embed.json`)
const fetch = require("node-fetch");
const { MessageEmbed } = require(`discord.js`);
module.exports = {
//definition
name: "glifestats", //the name of the command
category: "⌨️ Programming", //the category this will be listed at, for the help cmd
aliases: [""], //every parameter can be an alias
cooldown: 4, //this will set it to a 4 second cooldown
usage: "glifestats <id>", //this is for the help command for EACH cmd
description: "check stats", //the description of the command
};
run: async (client, message, args, cmduser, text, prefix) => {
await interaction.deferReply();
const term = interaction.options.getString('term');
const query = new URLSearchParams({ term });
const { list } = await fetch(`https://api.gtaliferp.fr:8443/v1/extinction/profiles/main/${query}`)
.then(response => response.json());
}
When i try to do this command, it does nothing.
It needs to get the query from the user message and post it to the url, example:
.command 4443
bot returns the https://api.gtaliferp.fr:8443/v1/extinction/profiles/main/4443 data and postes it.
And also, i wanted to make the different data on an embed like this: data 1: data data 2: data .... but i cant do it, ( check the url provided for the data titles if you want to help with that)
So you seperated your module from the rest of the command with this line marked below
module.exports = {
//definition
name: "glifestats", //the name of the command
category: "⌨️ Programming", //the category this will be listed at, for the help cmd
aliases: [""], //every parameter can be an alias
cooldown: 4, //this will set it to a 4 second cooldown
usage: "glifestats <id>", //this is for the help command for EACH cmd
description: "check stats", //the description of the command
} //here
Also term is reqesting an option that is not defined
const term = interaction.options.getString('term');
Interaction is not defined
run: async (client, message, args, cmduser, text, prefix) => {
await interaction.deferReply();
const term = interaction.options.getString('term');
Try this
const config = require(`${process.cwd()}/botconfig/config.json`) // is this needed if not, delete
var ee = require(`${process.cwd()}/botconfig/embed.json`) // is this needed if not, delete
const fetch = require("node-fetch");
const {
MessageEmbed
} = require(`discord.js`); // is this needed if not, delete
module.exports = {
name: "glifestats", //the name of the command
category: "⌨️ Programming", //the category this will be listed at, for the help cmd
aliases: [""], //every parameter can be an alias
cooldown: 4, //this will set it to a 4 second cooldown
usage: "glifestats <id>", //this is for the help command for EACH cmd
description: "check stats", //the description of the command
options: [{
name: "term",
description: "Code to search",
required: true,
type: "STRING",
}],
run: async (client, interaction) => {
await interaction.deferReply();
const term = interaction.options.getString('term');
const url = `https://api.gtaliferp.fr:8443/v1/extinction/profiles/main/${term}`
const list = await fetch(url).then(response => response.json())
// Do something with list
console.log(list)
return interaction.followUp({
content: `List has been logged but can't be sent yet because it is an object and need to be further worked to get specific elements from it`
})
// Above seemed cleaner to me
/*
const query = new URLSearchParams({
term
});
const {
list
} = await fetch(`https://api.gtaliferp.fr:8443/v1/extinction/profiles/main/${query}`)
.then(response => response.json());
*/
}
}
function shield() {
setInterval(async function () {
const ProfileModelS = require("../models/ProfileSchema");
await ProfileModelS.find({}).then((doc) => {
doc.forEach(async (u) => {
if (u.ShieldPoints <= 0) return console.log(u.Name);
if (u.ShieldPoints > 0) {
await ProfileModelS.findOneAndUpdate(
{ userID: u.userID },
{
$inc: {
ShieldPoints: -1,
},
},
console.log("done")
);
}
});
});
}, 1000);
}
module.exports = shield
I want my mongodb to fetch the model on every Interval but its not doing that, whenever i run my code it fetches the model for example, it will fetch
[{name: 'Joseph' , Points: 10}, {name: 'carman' , Points: -1}, {name: 'thee' , Points: 2}]
according to code it will properly not reduce the points of objects whose points are less than 0,
but it will go on decreasing points of object more than 0, i want it to stop reducing points if the objects point reaches 0, and it should go on decreasing the points of object whose points are greater than 0
In short the process for a particular object should be stop once it points reaches 0
You could try using Promise.all to iterate over all of the retrieved models, as you seem to want to perform an async operation on every instance with ShieldPoints > 0 and your findOneAndUpdate operations are independent of one another.
function shield() {
// Import the model schema
const ProfileModelS = require("../models/ProfileSchema");
setInterval(async function () {
// Retrieve all models
const users = await ProfileModelS.find();
// Parallelise the process of updating the models that need to be updated
Promise.all(
users.map(async (user) => {
if (user.ShieldPoints > 0) {
await ProfileModelS.findOneAndUpdate(
{ userID: user.userID },
{
$inc: {
ShieldPoints: -1,
},
},
);
};
})
);
}, 1000);
}
module.exports = shield
However, if your findOneAndUpdate operations do not complete within the 1000ms interval, then the code will issue a second findOneAndUpdate operation for those same model instances. This could then cause your models to update more than once, which is unintended behaviour. To address this, you would need to add some form of guard against this.
Following is a code I implemented to create a bar chart using chart js in React app. Here it creates a bar chart with all the data in an array. But, I want to change this code only to give the output in the x-axis - destination, y-axis - no. of occurrence of this destination since it has many repeated destinations.
I searched methods to this but I couldn't get a correct solution.
Can anyone help me to do this?
const dataArrayY4 = [];
res.data.map(item => {
dataArrayY4.push(item.time)
})
const dataArrayX4 = []
res.data.map(item => {
dataArrayX4.push(item.destination)
})
this.setState({
data4: dataArrayY4,
labels4: dataArrayX4,
});
This could be done as follows:
const res = {
data: [
{ time: 1, destination: 'A'},
{ time: 3, destination: 'A'},
{ time: 2, destination: 'B'}
]
};
let tmp4 = [];
res.data.map((o, i) => {
const existing = tmp4.find(e => e.destination == o.destination);
if (existing) {
existing.time += o.time;
} else {
tmp4.push({time: o.time, destination: o.destination});
}
})
this.setState({
data4: tmp.map(o => o.time);
labels4: tmp.map(o => o.destination);
});
Above code could further be optimized by using Array.reduce() instead of Array.map().
I would make the code more efficient. Instead of dataArrayY4 being an array, I would make it an object that has a key of value and the number of occurrence of each value. This way, you can count all the number of occurrences of the all items in res.data
const dataArrayY4 = {};
res.data.map(item => {
dataArrayY4[item.destination] = (dataArrayY4[item.destination] || 0) + 1
})
const dataArrayX4 = []
res.data.forEach(item => {
dataArrayX4.push(item.destination)
})
this.setState({
data4: dataArrayY4,
labels4: dataArrayX4,
});
Then if you want to look for the occurrence of a particular value you
use this eg. Sri Lanka
this.state.data4['Sri Lanka']
I am using findOne() to retrieve a document like this:
let staffToUpdate = await Staff.findOne({
_id: request.parameters.id
}).exec();
let historyArray = await crewToUpdate.history;
console.log("historyArray: ", await historyArray);
console.log(Array.isArray(historyArray)); // returns true
The data looks like this:
history: [
{
status: "active",
startDate: <Date>,
endDate: <Date>,
completed: false
},
{
status: "training",
startDate: <Date>,
endDate: <Date>,
completed: true
}
]
When I do the above I get an array of objects printed out, as well as a return of "true" on the check to see if "historyArray" is indeed an array.
So now that I have this array, I'd like to run a transformation on the objects found within it, like so:
let updatedHistoryArray = historyArray.then(
updatedHistoryArray.forEach(history => {
history.completed = true;
history.endDate = new Date();
})
);
However, this is the part that's not working. When I try this I get this error:
Reason: ReferenceError: historyArray is not defined
What am I missing here?
UPDATE: After a suggestion from a commenter below, I tried this:
let staffToUpdate = await Staff.findOne({
_id: request.parameters.id
}).exec();
let staffObject = staffToUpdate.toObject();
let historyArray = await staffObject.history;
console.log(await historyArray); // prints the array
console.log(Array.isArray(historyArray)); // returns true
historyArray.forEach(history => { // this is where the error occurs
history.completed = true;
history.endDate = new Date();
});
With this last block of code I get this error:
Reason: ReferenceError: historyArray is not defined
historyArray is not a Promise and you can not run then on it.
When this code runs
let staffToUpdate = await Staff.findOne({
_id: request.parameters.id
}).exec();
it waits until query is executed and assigns the actual result (mongoose Document), not a promise and assigns it to staffToUpdate. You need to run toObject() on mongoose Document to get plain object without the wrapper:
const unwrappedStaffToUpdate = staffToUpdate.toObject();
After that you don't need to use await on crewToUpdate.history because it is not a Promise and it is synchronious. That is why you can not run then on historyArray because it is a normal Array and not a Promise.
Try this code:
unwrappedStaffToUpdate.historyArray.forEach(history => {
history.completed = true;
history.endDate = new Date();
});
Or if you do not want to mutate your Array use map instead of forEach:
const updatedHistoryArray = unwrappedStaffToUpdate.historyArray.map(history => ({
...history
completed: true;
endDate: new Date()
})
);
I have looked around at a few answers/questions regarding this issue but yet to find a solution.
I have a collection with documents (simplified) as such:
{
"id": 123
"stuff": "abc"
"array":[
{
"id2":456
"properties": [
{
"id3": 789
"important": true
}
]
}
]
}
I want to check for each document in my collection, for each array object within array, for each properties, if it has important: true for example. Then return:
"id": 123
"id2": 456
"id3": 789
I have tried using:
client.queryDocuments(self.collection._self, querySpec).toArray(function(err, results) {
if (err) {
callback(err);
} else {
callback(null, results[0]);
}
});
But the issue is an array has a maximum character limit. If my collection has millions of documents, this would presumably be exceeded. (Javascript Increase max array size)
Or, am I misunderstanding the above question? Is it talking about the number of objects in an array (of which, each can have unlimited object character length?)
Thus I am looking a for loop-esque solution, where each document is returned, I do my analysis, then move to then next/do them in parallel.
Any insight would be greatly appreciated.
But the issue is an array has a maximum character limit. If my
collection has millions of documents, this would presumably be
exceeded. (Javascript Increase max array size)
Based on my research,the longest possible array in js could have 232-1 = 4,294,967,295 = 4.29 billion elements. However, it is perfectly enough to meet your millions data volume requirements. In addition,you can't query such huge volume data directly surely,that's impossible you do that.
Whether about throughput constraints(RUs settings) or query efficiency factors, you should consider batching large volumes of data anyway.
Thus I am looking a for loop-esque solution, where each document is
returned, I do my analysis, then move to then next/do them in
parallel.
Maybe you could use v2 js sdk for cosmos db sql api.Please refer to the sample code:
const cosmos = require('#azure/cosmos');
const CosmosClient = cosmos.CosmosClient;
const endpoint = "https://***.documents.azure.com:443/"; // Add your endpoint
const masterKey = "***"; // Add the masterkey of the endpoint
const client = new CosmosClient({ endpoint, auth: { masterKey } });
const databaseId = "db";
const containerId = "coll";
async function run() {
const { container, database } = await init();
const querySpec = {
query: "SELECT r.id,r._ts FROM root r"
};
const queryOptions = {
maxItemCount : -1
}
const queryIterator = await container.items.query(querySpec,queryOptions);
while (queryIterator.hasMoreResults()) {
const { result: results, headers } = await queryIterator.executeNext();
console.log(results)
console.log(headers)
//do what you want to do
if (results === undefined) {
// no more results
break;
}
}
}
async function init() {
const { database } = await client.databases.createIfNotExists({ id: databaseId });
const { container } = await database.containers.createIfNotExists({ id: containerId });
return { database, container };
}
run().catch(err => {
console.error(err);
});
More details about continuation token ,please refer to my previous case.Any concern,please let me know.
I am using Cosmos DB SQL API Node.js library. I am unable to find the Continuation Token from this library so that I can return it to client. The idea is to get it back from the client for the next pagination request.
I have a working code which iterates multiple times to get all the documents. What changes will be required here to get the continuation token?
function queryCollectionPaging() {
return new Promise((resolve, reject) => {
function executeNextWithRetry(iterator, callback) {
iterator.executeNext(function (err, results, responseHeaders) {
if (err) {
return callback(err, null);
}
else {
documents = documents.concat(results);
if (iterator.hasMoreResults()) {
executeNextWithRetry(iterator, callback);
}
else {
callback();
}
}
});
}
let options = {
maxItemCount: 1,
enableCrossPartitionQuery: true
};
let documents = []
let iterator = client.queryDocuments( collectionUrl, 'SELECT r.partitionkey, r.documentid, r._ts FROM root r WHERE r.partitionkey in ("user1", "user2") ORDER BY r._ts', options);
executeNextWithRetry(iterator, function (err, result) {
if (err) {
reject(err)
}
else {
console.log(documents);
resolve(documents)
}
});
});
};