  1. Start server > mongod
  2. Launch the mongo shell > mongo


show database names

> show dbs

show collections in current database

> show collections


db.movies.insertOne({ "title": "Jaws", "year": 1975, "imdb": "tt0073195" });


All db.movies.find()

One db.movies.findOne({"year":1975})


Create a cursor var cursor = db.movies.find()

Iterate through a cursor

Cursor finished cursor.hasNext()


Connection var url = 'mongodb://<MACHINE>:<PORT>/<DB_NAME>';

	var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');

	var url = 'mongodb://localhost:27017/video';

	MongoClient.connect(url, function(err, db) {

	    assert.equal(null, err);
	    console.log("Successfully connected to server");

	    // Find some documents in our collection
	    db.collection('movies').find({}).toArray(function(err, docs) {

	        // Print the documents returned
	        docs.forEach(function(doc) {

	        // Close the DB

	    // Declare success
	    console.log("Called find()");


consolidate Allow express to use several templating engines consolidate.js

__dirname This file path

	var express = require('express'),
    app = express(),
    engines = require('consolidate');

	app.engine('html', engines.nunjucks);
	app.set('view engine', 'html');
	app.set('views', __dirname + '/views');

	app.get('/', function(req, res) {
	    res.render('hello', { name : 'Templates' });

	app.use(function(req, res){

	var server = app.listen(3000, function() {
	    var port = server.address().port;
	    console.log('Express server listening on port %s', port);


	var express = require('express'),
	    app = express(),
	    engines = require('consolidate'),
	    MongoClient = require('mongodb').MongoClient,
	    assert = require('assert');

	app.engine('html', engines.nunjucks);
	app.set('view engine', 'html');
	app.set('views', __dirname + '/views');

	MongoClient.connect('mongodb://localhost:27017/video', function(err, db) {

	    assert.equal(null, err);
	    console.log("Successfully connected to MongoDB.");

	    app.get('/', function(req, res){

	        db.collection('movies').find({}).toArray(function(err, docs) {
	            res.render('movies', { 'movies': docs } );


	    app.use(function(req, res){
	    var server = app.listen(3000, function() {
	        var port = server.address().port;
	        console.log('Express server listening on port %s.', port);




mongod --dbpath=/data/db mongorestore dump #2. CRUD

Creating Documents


	db.moviesScratch.insertOne({ "title": "Rocky", "year": "1976", "imdb": "tt0075148"});
	db.moviesScratch.insertOne({ "_id": "tt0075148", "title": "Rocky", "year": "1976" });


		    "_id" : "tt0084726",
		    "title" : "Star Trek II: The Wrath of Khan",
		    "year" : 1982,
		    "type" : "movie"
		    "_id" : "tt0796366",
		    "title" : "Star Trek",
		    "year" : 2009,
		    "type" : "movie"
		    "_id" : "tt0084726",
		    "title" : "Star Trek II: The Wrath of Khan",
		    "year" : 1982,
		    "type" : "movie"
		    "_id" : "tt1408101",
		    "title" : "Star Trek Into Darkness",
		    "year" : 2013,
		    "type" : "movie"
		    "_id" : "tt0117731",
		    "title" : "Star Trek: First Contact",
		    "year" : 1996,
		    "type" : "movie"
	        "ordered": false  

"ordered": false allow to make all the insert but the ones that give errors. If the default "ordered": true is set, once there is an error no more inserts will take place.

Reading Documents

db.movies.find({"name":"Star Wars"}) db.movies.find({rated:"PG"}) db.movies.find({"tomate.meter":"100"})

Equality matches on Scalars

Equality matches on embedded Documents

Equality matches on arrays

  • On the entire Array
  • Based on any element
  • Based on specific element
  • More complex matches using operators

On the entire Array

writers is an array of elements
They have to be in the same ordered

Based on any element

actor is an array of elements
Find any document with an occurrence.
db.movies.find({"actor":"John Doe"})

Based on specific element

Find any document with an occurrence as firs element.
db.movies.find({"actor.0":"John Doe"})


find returns a cursor. If we add it to a variable we can use it. var cursor = db.movies.find({"tomato.meter":"100"})

We can use a function to use the cursor
var doc function(){return cursor.hasNext() ? : null;}

See how many objects are left in the Batch cursor.objsLeftInBatch()


Reducing size of elements returned by the queries. db.movies.find({rated:"PG"},{title:1})
Explicit _id exclusion db.movies.find({rated:"PG"},{title:1, _id:0})
Explicit exclusion db.movies.find({rated:"PG"},{writers:0, actors:0})

Comparison Operators

Query Selectors

More, less, equals than $gt,$gte,$lt,$lte

db.movies.find({runtime:{$gte:90, $lte:120}})

Not equal and In $ne, $in

Also returns the ones that do not have the field at all.

Element Operators

Element Operators


Matches documents that have the specified field


Selects documents if a field is of the specified type. db.movies.find({"_id":{$type:"string"}})

Logical Operators

Logical Operators




Is the same as: db.movies.find({tomato.meter:{$gt:95}},{"metacritic":{$gt:95}})
$and is used if we need to specify the same field more than once in the same query. db.movies.find({"$and":[{"metacritic":{$ne:null}},{"metacritic":{$exists:true}}]})

Regex Operators

Regex Operators db.movies.find({"awards.text":{$regex:/^Won\s.*/}})

Array Operators

Array Operators





Element Match

For embedded documents db.movies.find({boxOffice:{$elemMatch{{country:"UK", revenue:{$gt:15}}}})
Is not the same as:
db.movies.find({boxOffice:{country:"UK", revenue:{$gt:15}}})
here the query search in the box office element as an all. So it will retrieve if there is an element in the array with country UK and if there is any element with revenue grater than 15.


Update one

Update Field

Update Field


  • $set Sets the value of a field in a document.
  • $unset Removes the specified field from a document.
  • $inc Increments the value of the field by the specified amount.
  • $min Only updates the field if the specified value is less than the existing field value.
  • $max Only updates the field if the specified value is greater than the existing field value.
  • $mul Multiplies the value of the field by the specified amount.
  • $rename Renames a field.
  • $setOnInsert Sets the value of a field if an update results in an insert of a document.
  • $currentDate Sets the value of a field to current date, either as a Date or a Timestamp.


Update Array

Update Array

  • $addToSet Adds elements to an array only if they do not already exist in the set.
  • $pop Removes the first or last item of an array.
  • $pullAll Removes all matching values from an array.
  • $ Acts as a placeholder to update the first element that matches the query condition in an update.
  • $pull Removes all array elements that match a specified query.
  • $pushAll Deprecated. Adds several items to an array.
  • $push Adds an item to an array.
	db.movieDetails.updateOne({title: "The Martian"},
                          {$push: { reviews: { rating: 4.5,
                                               date: ISODate("2016-01-12T09:00:00Z"),
                                               reviewer: "Spencer H.",
                                               text: ".34.."} } })

Update Array modifiers

Update Operator Modifiers

  • $each Modifies the $push and $addToSet operators to append multiple items for array updates.
  • $slice Modifies the $push operator to limit the size of updated arrays.
  • $sort Modifies the $push operator to reorder documents stored in an array.
  • $position Modifies the $push operator to specify the position in the array to add elements.

For more than one use

	db.movieDetails.updateOne({title: "The Martian"},
                          {$push: { reviews:
                                    { $each: [
                                        { rating: 0.5,
                                          date: ISODate("2016-01-12T07:00:00Z"),
                                          reviewer: "Yabo A.",
                                          text: "..."},
                                        { rating: 4.5,
                                          date: ISODate("2016-01-12T09:00:00Z"),
                                          reviewer: "Spencer H.",
                                          text: "..."} ] } } } )

To keep a maximum number of elements in an array (do not forget to set the position $position)

	db.movieDetails.updateOne({ title: "The Martian" },
                          {$push: { reviews:
                                    { $each: [
                                        { rating: 0.5,
                                          date: ISODate("2016-01-13T07:00:00Z"),
                                          reviewer: "Shannon B.",
                                          text: "Enjoyed watching with my kids!" } ],
                                      $position: 0,
                                      $slice: 5 } } } )

Update many

db.movieDetails.updateMany( { rated: null },{ $unset: { rated: "" } } )


    	{$set: detail},
    	{upsert: true}




find() and Cursors in the Node.js Driver

mongoimport > mongoimport -d crunchbase -c companies companies.json


	var MongoClient = require('mongodb').MongoClient,
	    assert = require('assert');

	MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

	    assert.equal(err, null);
	    console.log("Successfully connected to MongoDB.");


	    var query = {"category_code": "biotech"};

	    db.collection('companies').find(query).toArray(function(err, docs) {

	        assert.equal(err, null);
	        assert.notEqual(docs.length, 0);
	        docs.forEach(function(doc) {
	            console.log( + " is a " + doc.category_code + " company." );


You get a cursor object and then you iterate through it. Until there is no iteration there is no call to the database.

	var query = {"category_code": "biotech"};

    var cursor = db.collection('companies').find(query);

        function(doc) {
            console.log( + " is a " + doc.category_code + " company." );
        function(err) { // Called always when there are no more documents
            assert.equal(err, null);
            return db.close(); 

toArray vs forEch

forEch works with batches of information that it automatically retrieve from the database every time the previous batch runs out, until it reaches the end of the result set. With forEch we can process the data as it comes from the database.

toArray callback is not call until the entire data is retrieve from the database system, and the entire array is built.

Projection in the Node.js Driver

Get only the fields we really need, we can reduce the amount of data transferred.

	var query = {"category_code": "biotech"};
    var projection = {"name": 1, "category_code": 1, "_id": 0};

    var cursor = db.collection('companies').find(query);

Query Operators in the Node.js Driver

	function queryDocument(options) {

	    var query = {
	        "founded_year": {
	            "$gte": options.firstYear,
	            "$lte": options.lastYear

	    if ("employees" in options) {
	        query.number_of_employees = { "$gte": options.employees };
	    return query;

$regex in the Node.js Driver

"$options": "i" case insensitive.

>node app.js -m "billion.+valuation

	function queryDocument(options) {

	    var query = {};

	    if ("overview" in options) {
	        query.overview = {"$regex": options.overview, "$options": "i"};

	    if ("milestones" in options) {
	        query["milestones.source_description"] =
	            {"$regex": options.milestones, "$options": "i"};

	    return query;

	function projectionDocument(options) {

	    var projection = {
	        "_id": 0,
	        "name": 1,
	        "founded_year": 1

	    if ("overview" in options) {
	        projection.overview = 1;

	    if ("milestones" in options) {
	        projection["milestones.source_description"] = 1;

	    return projection;

Dot Notation in the Node.js Driver

	if ("ipo" in options) {
        if (options.ipo == "yes") {
            query["ipo.valuation_amount"] = {"$exists": true, "$ne": null};
        } else if (options.ipo == "no") {
            query["ipo.valuation_amount"] = null;

Dot Notation on Embedded Documents in Arrays

    if ("country" in options) {
        query["offices.country_code"] =;

Sort, Skip, and Limit in the Node.js Driver

It doesn't matter in which order we apply sort, skip and limit. Mongo always do it in the same order.

  • 1.-sort
  • 2.-skip
  • 3.-limit


Passing an array of tuples (not objects) we set the order of the sorts.

	cursor.sort([["founded_year", 1], ["number_of_employees", -1]]); 


    cursor.sort({"founded_year", 1}); 

insertOne() and insertMany() in the Node.js Driver


	db.collection("statuses").insertOne(status, function(err, res) {
        console.log("Inserted document with _id: " + res.insertedId + "\n");


	db.collection("statuses").insertMany(statuses, function(err, res) {

        done += 1;
        if (done == screenNames.length) {

deleteOne() and deleteMany() in the Node.js Driver


	db.collection('companies').deleteOne(filter, function(err, res) {
        assert.equal(err, null);



	var filter = {"_id": {"$in": markedForRemoval}};

	db.collection("companies").deleteMany(filter, function(err, res) {
        console.log(markedForRemoval.length + " documents removed.");

        return db.close();


  • Rich Documents
  • PreJoin / Embed Data
  • No Mongo Joins
  • No Constrains
  • Atomic Operations
  • No Declared Schema

No Constrains

Keeping your data consistent even though MongoDB lacks foreign key constraints, by embedding documents.

No transactions (ACID)

  • Restructure: Use atomic operations to achieve it, because the documents (if they are so prepared) are embedded and there is no need to access several documents.
  • Implement in Software
  • Tolerate

One to One Relationships

Employees Resume Sample


  • 1.-SQL style: With foreign _id, in one or the other side
  • 2.-MongoDB Style: Embed one into another

Considerations to avoid embedding

  • 1.-Frequently access (READ) If we access rearely to the Resume and it is a big document we don't want to embed the Resume in the Employee document.
    If Only access the info of one of the types you don't want it embed

  • 2.-Growing documents(WRITE) If we write a lot in the Resume document but not in the Employee If the writes are mostly in one document we don't want to incur in overhead in the other.

  • 3.-Size If the document is larger than 16MB it can not be embedded.

  • 4.-Not Atomic If you need to update both types at same time is better to have the documents embedded in into the other.

One to Many Relationships

City Person Sample

  • 1.- Embed people in city: To many people in one city document
  • 2.- Embed city in people: To many duplicated city info. (In some cases can be ok.)
  • 3.- "TRUE LINKING": in People. save the id of city.
  • 4.- "ONE TO FEW": Blog vs Comments. Embed the many in the one. One post with few comments.

Many to Many

Books Authors, Students Teachers Samples

Normally they are "FEW TO FEW"


  • 1.- Array of ids of the other type. Put it depending on the access patterns.You can also have ids in both documents.
  • 2.- Embedded: Books in the Authors. Can be duplicated, inconsistence after updates, wont work if you wnat to insert a Teacher before it have students.

Multikey indexes

Students Teachers Sample


		name: "Hugo",


		name: "Dr Who",

Find all teachers from a Student. Direct. Find all students from a teacher. Use a Multikey index. Add index
db.students.ensureIdex({'teachers:1'}) Find
db.students.find({'teachers':{$all:[0,1]}}).explain teachers whose id is 0 and 1

explain will tell us how the query was execute. Use or not of the index


Use ancestors


## When Denormalize
We normalize to not have inconsistence because of duplicate data.

We can avoid having duplicate data:

* 1:1 Embed There is no duplication
* 1:Many From the many to the one
* Many : Many Link



db.students.explain(true).find({student_id:5})// more detail

db.students.createIndex({student_id:1}) // Take some time

db.students.createIndex({student_id:1, class_id:-1}) // -1 descending (good for sorting)





Multikey indexes

One array and the other a scalar{a:1,b:1}){a:1,b:1}){a:1,b:[2,4,5]}){a:1,b:1}) // isMultikey : true{a:[5,6,8],b:[2,4,5]}) // Can't put index when both a and b are arrays{a:[5,6,8],b:2})// Legal

Dot Notations and Multikey

db.students.createIndex({'scores.score':1}) db.people.createIndex({'':-1}) db.students.explain().find({'scores': {elemMatch:{type:'exam', score:{'$gt':99.8})

Unique indexes


Sparse Indexes


Sparse option :unique indexes that do not include docs that has a null value. db.people.createIndex({'phone_number':1},{sparse:true})

Index creation in background

Foreground:Fast, Blocks writes and reads
Background:Slow, Don't blocks writes and reads


	db.example.find( { a : 1, b : 2 } ).explain()
	db.example.explain().remove( { a : 1, b : 2 } )// Does not work because remove does not return a cursor
	var exp = db.example.explain(); exp.find( { a : 1, b : 2 } )
	db.example.remove( { a : 1, b : 2 } ).explain()
	db.example.explain().find( { a : 1, b : 2 } )
	curs = db.example.find( { a : 1, b : 2 } ); curs.explain()

Explain Verbosity


  • executionStats: Stat for the winning plan
  • allPlansExecution: Stat for all the plans

Covered Queries

Satisfy a query only with the index
Make searches that project only what is in the index

	{ name : 1, dob : 1 }
	{ _id : 1 }
	{ hair : 1, name : 1 }

	db.example.find( { name : { $in : [ "Alfred", "Bruce" ] } }, { name : 1, hair : 1 } )
	db.example.find( { _id : 1117008 }, { _id : 0, name : 1, dob : 1 } )
	db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, hair : 1, name : 1} )
    ->db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, dob : 1, name : 1} )// Only this one is covered

Special Indexes

Geospatial Index

shops = {'shop_name':'name', 'location':[x,y]}



Geospatial spherical Index


db.stores.find({ loc:{ $near: { $geometry: { type: "Point", coordinates: [-130, 39]}, $maxDistance:1000000 } } })

Full Text Search Indexes

	db.shops.find({$text:{$search:'dog cat food'}},{score:{$meta:'textScore'}}).sort({score:{$meta:'textScore'}})


hint Use it to give to mongo the index you want to use.




  • 0: off
  • 1: slow ones
  • 2: all

db.system.profile.find( { millis : { $gt:1000 } } ).sort( { ts : -1 } )


Where mongo is spending his time


get statistics that happened in mongo in 1 seconds



  • Match (find)
  • Project
  • Sort
  • Skip
  • Limit
	    { $match: { founded_year: 2004 } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        founded_year: 1
	    } }

Be careful with the order, here if limit goes before than skip the result will be erroneous.

	    { $match: { founded_year: 2004 } },
	    { $sort: { name: 1} },
	    { $skip: 10 },
	    { $limit: 5 },
	    { $project: {
	        _id: 0,
	        name: 1 } },

Aggregation Pipeline Quick Reference


Expressions reference

Reshaping. Promoting Nested fields

	    { $match: {"": "greylock" } },
	    { $project: {
	        _id: 0, 
	        name: 1,
	        ipo: "$ipo.pub_year",
	        valuation: "$ipo.valuation_amount",
	        funders: "$"
	    } }

Create our own objects

	    { $match: {"": "greylock" } },
	    { $project: {
	        _id: 0, 
	        name: 1,
	        founded: {
	            year: "$founded_year",
	            month: "$founded_month",
	            day: "$founded_day"
	    } }


Covert documents with arrays to an array of documents each with one doc.

	// unwind
	    { $match: {"": "greylock" } },
	    { $unwind: "$funding_rounds" },
	    { $project: {
	        _id: 0,
	        name: 1,
	        amount: "$funding_rounds.raised_amount",
	        year: "$funding_rounds.funded_year"
	    } }

Multiple stages

	// If we don't care about the funder we can simplify.
	// Let's sort as well.
	    { $match: {"": "greylock" } },
	    { $unwind: "$funding_rounds" },
	    { $match: {"": "greylock" } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        amount: "$funding_rounds.raised_amount",
	        year: "$funding_rounds.funded_year" } },
	    { $sort: { year: 1 } }

Array Expressions


	    { $match: {"": "greylock" } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        founded_year: 1,
	        rounds: { $filter: {
	            input: "$funding_rounds",
	            as: "round",
	            cond: { $gte: ["$$round.raised_amount", 100000000] } } }
	    } },
	    { $match: {"": "greylock" } },    


	    { $match: { "founded_year": 2010 } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        founded_year: 1,
	        first_round: { $arrayElemAt: [ "$funding_rounds", 0 ] },
	        last_round: { $arrayElemAt: [ "$funding_rounds", -1 ] }
	    } }



		    { $match: { "founded_year": 2010 } },
		    { $project: {
		        _id: 0,
		        name: 1,
		        founded_year: 1,
		        first_round: { $slice: [ "$funding_rounds", 1 ] },
		        last_round: { $slice: [ "$funding_rounds", -1 ] }
		    } }


	    { $match: { "founded_year": 2004 } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        founded_year: 1,
	        total_rounds: { $size: "$funding_rounds" }
	    } }


$max, $min, $avg, $first...

	    { $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        largest_round: { $max: "$funding_rounds.raised_amount" }
	    } }
	    { $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
	    { $project: {
	        _id: 0,
	        name: 1,
	        total_funding: { $sum: "$funding_rounds.raised_amount" }
	    } }
	    { $group: {
	        _id: { founded_year: "$founded_year" },
	        average_number_of_employees: { $avg: "$number_of_employees" }
	    } },
	    { $sort: { average_number_of_employees: -1 } }


	    { $group: {
	        _id: { founded_year: "$founded_year" },
	        average_number_of_employees: { $avg: "$number_of_employees" }
	    } },
	    { $sort: { average_number_of_employees: -1 } }
	db.companies.aggregate( [
	    { $match: { "relationships.person": { $ne: null } } },
	    { $project: { relationships: 1, _id: 0 } },
	    { $unwind: "$relationships" },
	    { $group: {
	        _id: "$relationships.person",
	        count: { $sum: 1 }
	    } },
	    { $sort: { count: -1 } }
	] )

Set a proper _id


	    { $match: { "relationships.person": { $ne: null } } },
	    { $project: { name: 1, relationships: 1, _id: 0 } },
	    { $unwind: "$relationships" }, {
	        $group: {
	            _id: "$relationships.person.permalink",
	            company: { $addToSet: "$name" }
	    { $unwind: "$company" }, {
	        $group: {
	            _id: "$_id",
	            count: { $sum: 1 }
	    { $sort: { count: -1 } }


	    { $project: { "class_id": 1, "student_id": 1, "scores.type": 1, "scores.score": 1, _id: 0 } },
	    { $unwind: "$scores" },
	    { $match: { "scores.type": { $ne: "quiz" } } }, {
	        $group: {
	            _id: "$class_id",
	            stdDev: { $stdDevPop: "$scores.score" }

Why the result is differnt if we dont have "student_id": 1 in the project stage?


	    { $match: { founded_year: 2004 } }, {
	        $project: {
	            _id: 1,
	            name: 1,

	            rounds: { $size: "$funding_rounds" },
	            founded_year: 1,
	            "funding_rounds.raised_amount": 1
	    { $match: { rounds: { $gte: 5 } } },
	    { $unwind: "$funding_rounds" }, {
	        $group: {
	            _id: "$name",
	            stdDev: { $sum: "$funding_rounds.raised_amount" }
	    { $sort: { stdDev: 1 } }


Write Concern

Journal part on memory where documents are stored before writing them to disk.
w = 1 wait to respond of the write. j = false wait for the journal to write on disk.

w j
1 false wait for the server but not for the journal Fast, Small window of vulnerability
1 true Wait until is write directly in the disk Slow
0 Unacknowledged write Don't

Network Errors

What if we can not see the response?

  • Insert: Just try again until is done
  • Update: problem in cases like in $inc. In the case of the need of to avoid this error use inserts instead.

##Introduction to Replication

  • Availability
  • Fault Tolerance

Replica Set

Primary and secondaries
Writes only in primaries
Minimun number of nodes in a Replica set is 3
If primary is down, there is an election in which of the secondaries is the new primary.

Replica Set Elections

Type of nodes:

  • Regular
  • Arbiter: for voting purposes. No data on it.
  • Delayed: for back ups. Priority = 0 can not be primary
  • Hidden: Can not be primary. Priority= 0

Write Consistency

  • Writes goes to the primary
  • Reads can go to secondaries, but there are options that the info is not updated.
  • Replication is asynchronous
  • Use for read scaling.

Creating Replica Set

	#!/usr/bin/env bash
	mkdir -p /data/rs1 /data/rs2 /data/rs3
	mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --oplogSize 64 --fork --smallfiles
	mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --oplogSize 64 --smallfiles --fork
	mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --oplogSize 64 --smallfiles --fork


	config = { _id: "m101", members:[
          { _id : 0, host : "localhost:27017" priority:0, slaveDelay:5 },
          { _id : 1, host : "localhost:27018"},
          { _id : 2, host : "localhost:27019"} ]


Can not read in a secondary by default rs.slaveOk()

Replica Set Internals

  • oplog is the operations log. Inserts, updates...
  • oplog is in sync.
  • Secondaries are constantly reading the primary oplog

Command to see which mongo servers are we running
ps -ef | grep mongod

In the server in the local database use local there is a collection

		"ts" : Timestamp(1467796049, 1),
		"t" : NumberLong(1),
		"h" : NumberLong("-2180104970022902937"),
		"v" : 2,
		"op" : "c",
		"ns" : "test.$cmd",
		"o" : {
			"create" : "people"
		"ts" : Timestamp(1467796049, 2),
		"t" : NumberLong(1),
		"h" : NumberLong("7319531361958625272"),
		"v" : 2,
		"op" : "i",
		"ns" : "test.people",
		"o" : {
			"_id" : ObjectId("577cca5151c935c6195553ad"),
			"name" : "Hugo"

rs.status() Give us the optime

			"_id" : 0,
			"name" : "localhost:27017",
			"health" : 1,
			"state" : 2,
			"stateStr" : "SECONDARY",
			"uptime" : 1088,
			"optime" : { 			//<-- when was the last update
				"ts" : Timestamp(1467796049, 2),
				"t" : NumberLong(1)
			"optimeDate" : ISODate("2016-07-06T09:07:29Z"),
			"lastHeartbeat" : ISODate("2016-07-06T09:12:47.037Z"),
			"lastHeartbeatRecv" : ISODate("2016-07-06T09:12:47.683Z"),
			"pingMs" : NumberLong(0),
			"syncingTo" : "localhost:27018", //<-- where the info comes from
			"configVersion" : 1

oplog is a capped collection. It is going to roll off after a certain amount of time. Have a big enough oplog when the secondary can not see the primary. Depens on how fast the it is growing

Failover and Rollback

If the primary stops and when it comes back up sees that he has writes that are not in the 'new' primary, these writes are rolled back and saved in a file in case a we want to manually add them.

Connecting to a Replica Set from the Node.js Driver

If you leave a replica set node out of the seedlist within the driver, the missing node will be discovered as long as you list at least one valid node.

	var MongoClient = require('mongodb').MongoClient;

	MongoClient.connect("mongodb://localhost:30001,localhost:30002,localhost:30003/course", function(err, db) {
	    if (err) throw err;

	    db.collection("repl").insert({ 'x' : 1 }, function(err, doc) {
	        if (err) throw err;

	        db.collection("repl").findOne({ 'x' : 1 }, function(err, doc) {
	            if (err) throw err;


Failover in the Node.js Driver

If a insert happens during a primary election, the insert will be buffered until the election completes, then the callback will be called after the operation is sent and a response is received.

Write Concern Revisited

w j
1 false wait for the server but not for the journal Fast, Small window of vulnerability
1 true Wait until is write directly in the disk Slow
0 Unacknowledged write Don't
X false Wait until X nodes are acknowledge the write Slow
majority Wait for the majority of Nodes will (in most cases) avoid rollbacks

wtimeout How long you wait

j only wait for writing in the primary node.

                        w=3, wtimeout=10000, j=True, 

Read Preferences

Read and writes go to the primary.

  • Primary
  • Primary Preferred
  • Secondary
  • Secondary Preferred
  • Nearest

When reading from secondaries we get Eventually consistent reads

Review of Implications of Replication

  • Seed Lists
  • Write Concern: w,j, wtimeout
  • Read Preferences
  • Errors can happen

Introduction to Sharding

Use for scalability.
Shards typically are replica sets.
mongos handle the shards. Shard contain chunks (bunch of documents) sort in some way (shard_key), any access to the database is done in its own shard. mongos know depending in the shard_key to whom send the request. To work with shards we will work with mongos instead of mongod. mongod is used to work with replica sets but not with shards.

Building a Sharded Environment

How to separate documents into shards:

  • Range based: documents from 1..100 to s1
  • Hash based: using an key that match a document to a shard.
    • Does not need to be unique.
    • Every document must have a shard_key

Implications of Sharding

  • Every doc includes the shard_key
  • shard_key is immutable
  • Is needed an index that starts with the shard_key (could be multi index)
  • On updates shard_keys must be specified
  • No shard_key -> scatter gather operation (expensive)
  • You can not have a unique key unless is part of the shard_key.

Sharding + Replication

Drivers ¯_(ツ)_/¯

Choosing a Shard Key

  • Sufficient cardinality (enough values)
  • Hotspotting writes: Write everything in the same place. Like using time creation values.

