The node.js
driver is a package for interacting with MongoDB
.
The command mongoimport
allows us to import human readable JSON
in a specific database & a collection. To import a JSON
data in a specific database & a collection, type mongoimport -d databaseName -c collectionName jsonFileName.json
var MongoClient = require('mongodb').MongoClient,assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = {"category_code": "biotech"};
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {console.log(doc.name + " is a " + doc.category_code + " company.");});
db.close();
});
});
Notice that the call .toArray
is making the application to fetch the entire dataset.
var MongoClient = require('mongodb').MongoClient,assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = {"category_code": "biotech"};
var cursor = db.collection('companies').find(query);
function(doc) {cursor.forEach(console.log( doc.name + " is a " + doc.category_code + " company." );},function(err) {assert.equal(err, null);return db.close();});});
Notice that the cursor returned by the find()
is assigned to var cursor
. With this approach, instead of fetching all data in memort and consuming data at once, we’re streaming the data to our application. find()
can create a cursor immediately because it doesn’t actually make a request to the database until we try to use some of the documents it will provide. The point of cursor
is to describe our query. The 2nd parameter to cursor.forEach
shows what to do when the driver gets exhausted or an error occurs.
In the initial version of the above code, it was toArray()
which forced the database call. It meant we needed ALL the documents and wanted them to be in an array
.
Also, MongoDB
returns data in batch format. The image below shows, requests from cursors (from application) to MongoDB
forEach
is better than toArray
because we can process documents as they come in until we reach the end. Contrast it with toArray
- where we wait for ALL the documents to be retrieved and the entire array is built. This means we’re not getting any advantage from the fact that the driver and the database system are working together to batch results to your application. Batching is meant to provide efficiency in terms of memory overhead and the execution time. Take advantage of it, if you can in your application.
Remember that projections allows us to explicitly include or exclude fields in a MongoDB
query. We use **1**
to indicate that we want to include a field and **0**
to indicate that we wish to exclude the field. Remember that _id
field is special -
_id
field gets included by default, unless we explicitly exclude itAlso, since we’re working in javascript
, we can construct our project documents and documents to be inserted into our collections in a way that is very similar to the way we do this in the mongo
shell. What’s different is the driver provides one set of classes and methods we use to interact with MongoDB
and the mongo
shell provides it’s own API.
W. r. t. CRUD
as of MongoDB 3.2
the driver and the mongo
shell adhere to the same spec. How you access these methods and how they’re implemented varies of course, from the mongo
shell.
var MongoClient = require('mongodb').MongoClient,assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = {"category_code": "biotech"};var projection = {"name": 1, "category_code": 1, "_id": 0};
var cursor = db.collection('companies').find(query);cursor.project(projection);
cursor.forEach(function(doc) {console.log(doc.name + " is a " + doc.category_code + " company.");console.log(doc);},function(err) {assert.equal(err, null);return db.close();});
});
The current best practice in the node.js
driver, is to chain a call to project
onto our cursor
i.e. cursor.project
. This call to project sets a field projection for the query. This call does not force a request to retrieve documents from the database, as does the foreach
method. Rather it adds some additional details to the query representation maintained by our cursor
. There’re a number of cursor methods, we can chain together to fully express the operation we wish to execute against our MongoDB
database. The call to db.collection
is synchronous. We’re going to modify that cursor
with a field projection
here using the project
method on the cursor.
var MongoClient = require('mongodb').MongoClient,commandLineArgs = require('command-line-args'),assert = require('assert');
var options = commandLineOptions();
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = queryDocument(options);var projection = {"_id": 1, "name": 1, "founded_year": 1,"number_of_employees": 1, "crunchbase_url": 1};
var cursor = db.collection('companies').find(query, projection);var numMatches = 0;
cursor.forEach(function(doc) {numMatches = numMatches + 1;console.log( doc );},function(err) {assert.equal(err, null);console.log("Our query was:" + JSON.stringify(query));console.log("Matching documents: " + numMatches);return db.close();});
});
function queryDocument(options) {
console.log(options);
var query = {"founded_year": {"$gte": options.firstYear,"$lte": options.lastYear}};
if ("employees" in options) {query.number_of_employees = { "$gte": options.employees };}
return query;
}
function commandLineOptions() {
var cli = commandLineArgs([{ name: "firstYear", alias: "f", type: Number },{ name: "lastYear", alias: "l", type: Number },{ name: "employees", alias: "e", type: Number }]);
var options = cli.parse()if ( !(("firstYear" in options) && ("lastYear" in options))) {console.log(cli.getUsage({title: "Usage",description: "The first two options below are required. The rest are optional."}));process.exit();}
return options;
}
To call it use node app.js -f 2004 -l 2008 -e 100
. Notice that commandLineArgs
is getting parse
d. Also, you might see the difference between the _id
field representation by mongo
shell and node.js
driver:
_id : ObjectId("507f1f77bcf86cd799439011")
on mongo
shell_id : 507f1f77bcf86cd799439011
on node.js
driverThis happens because of the difference in the way node.js
driver and the mongo
shell chooses to stringify the _id
field.
var MongoClient = require('mongodb').MongoClient,commandLineArgs = require('command-line-args'),assert = require('assert');
var options = commandLineOptions();
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = queryDocument(options);var projection = projectionDocument(options);
var cursor = db.collection('companies').find(query);cursor.project(projection);
var numMatches = 0;
cursor.forEach(function(doc) {numMatches = numMatches + 1;console.log( doc );},function(err) {assert.equal(err, null);console.log("Our query was:" + JSON.stringify(query));console.log("Matching documents: " + numMatches);return db.close();});
});
function queryDocument(options) {
console.log(options);
var query = {};
if ("overview" in options) {query.overview = {"$regex": options.overview, "$options": "i"};}
if ("milestones" in options) {query["milestones.source_description"] ={"$regex": options.milestones, "$options": "i"};}
return query;}
function projectionDocument(options) {
var projection = {"_id": 0,"name": 1,"founded_year": 1};
if ("overview" in options) {projection.overview = 1;}
if ("milestones" in options) {projection["milestones.source_description"] = 1;}
return projection;}
function commandLineOptions() {
var cli = commandLineArgs([{ name: "overview", alias: "o", type: String },{ name: "milestones", alias: "m", type: String }]);
var options = cli.parse()if (Object.keys(options).length < 1) {console.log(cli.getUsage({title: "Usage",description: "You must supply at least one option. See below."}));process.exit();}
return options;}
The $options
set to i
indicates that the regex
search needs to be case-insensitive. MongoDB
supports perl
like regular expressions. There’re a couple of ways in which we can set the values for the query documents:
{ "_id": 0, "name": 1, "founded_year": 1 }
projection.overview = 1;
projection["milestones.source_description"] = 1;
To sort documents, we can apply sort
on a cursor
object. To enforce order of sort, instead of passing an object, we need to pass an array to the sort
method.
var MongoClient = require('mongodb').MongoClient,commandLineArgs = require('command-line-args'),assert = require('assert');
var options = commandLineOptions();
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);console.log("Successfully connected to MongoDB.");
var query = queryDocument(options);var projection = {"_id": 0,"name": 1,"founded_year": 1,"number_of_employees": 1};
var cursor = db.collection('companies').find(query);cursor.project(projection);cursor.limit(options.limit);cursor.skip(options.skip);cursor.sort([["founded_year", 1],["number_of_employees", -1]]);
var numMatches = 0;
cursor.forEach(function(doc) {numMatches = numMatches + 1;console.log(doc.name + "\n\tfounded " + doc.founded_year +"\n\t" + doc.number_of_employees + " employees");},function(err) {assert.equal(err, null);console.log("Our query was:" + JSON.stringify(query));console.log("Documents displayed: " + numMatches);return db.close();});
});
function queryDocument(options) {
console.log(options);
var query = {"founded_year": {"$gte": options.firstYear,"$lte": options.lastYear}};
if ("employees" in options) {query.number_of_employees = {"$gte": options.employees};}
return query;
}
function commandLineOptions() {
var cli = commandLineArgs([{name: "firstYear",alias: "f",type: Number}, {name: "lastYear",alias: "l",type: Number}, {name: "employees",alias: "e",type: Number}, {name: "skip",type: Number,defaultValue: 0}, {name: "limit",type: Number,defaultValue: 20000}]);
var options = cli.parse()if (!(("firstYear" in options) && ("lastYear" in options))) {console.log(cli.getUsage({title: "Usage",description: "The first two options below are required. The rest are optional."}));process.exit();}
return options;
}
One thing to notice is the order in which MongoDB
applies skip
, limit
and sort
There’s also a possibility that we can sort data on the MongoDB
side as well, provided that we’ve setup the indexing.
Notice that MongoDB
driver will send a query when we call a cursor method passing a callback function to process query results.
Originally published at xameeramir.github.io.