- Start server
> mongod
- Launch the mongo shell
> mongo
show database names
> show dbs
show collections in current database
> show collections
db.movies.insertOne({ "title": "Jaws", "year": 1975, "imdb": "tt0073195" });
All
db.movies.find()
One
db.movies.findOne({"year":1975})
Create a cursor
var cursor = db.movies.find()
Iterate through a cursor
cursor.next()
Cursor finished
cursor.hasNext()
Connection
var url = 'mongodb://<MACHINE>:<PORT>/<DB_NAME>';
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
var url = 'mongodb://localhost:27017/video';
MongoClient.connect(url, function(err, db) {
assert.equal(null, err);
console.log("Successfully connected to server");
// Find some documents in our collection
db.collection('movies').find({}).toArray(function(err, docs) {
// Print the documents returned
docs.forEach(function(doc) {
console.log(doc.title);
});
// Close the DB
db.close();
});
// Declare success
console.log("Called find()");
});
consolidate Allow express to use several templating engines consolidate.js
__dirname This file path
var express = require('express'),
app = express(),
engines = require('consolidate');
app.engine('html', engines.nunjucks);
app.set('view engine', 'html');
app.set('views', __dirname + '/views');
app.get('/', function(req, res) {
res.render('hello', { name : 'Templates' });
});
app.use(function(req, res){
res.sendStatus(404);
});
var server = app.listen(3000, function() {
var port = server.address().port;
console.log('Express server listening on port %s', port);
});
var express = require('express'),
app = express(),
engines = require('consolidate'),
MongoClient = require('mongodb').MongoClient,
assert = require('assert');
app.engine('html', engines.nunjucks);
app.set('view engine', 'html');
app.set('views', __dirname + '/views');
MongoClient.connect('mongodb://localhost:27017/video', function(err, db) {
assert.equal(null, err);
console.log("Successfully connected to MongoDB.");
app.get('/', function(req, res){
db.collection('movies').find({}).toArray(function(err, docs) {
res.render('movies', { 'movies': docs } );
});
});
app.use(function(req, res){
res.sendStatus(404);
});
var server = app.listen(3000, function() {
var port = server.address().port;
console.log('Express server listening on port %s.', port);
});
});
mongod --dbpath=/data/db
mongorestore dump
#2. CRUD
insert
db.moviesScratch.insertOne({ "title": "Rocky", "year": "1976", "imdb": "tt0075148"});
db.moviesScratch.insertOne({ "_id": "tt0075148", "title": "Rocky", "year": "1976" });
insertMany
db.moviesScratch.insertMany(
[
{
"_id" : "tt0084726",
"title" : "Star Trek II: The Wrath of Khan",
"year" : 1982,
"type" : "movie"
},
{
"_id" : "tt0796366",
"title" : "Star Trek",
"year" : 2009,
"type" : "movie"
},
{
"_id" : "tt0084726",
"title" : "Star Trek II: The Wrath of Khan",
"year" : 1982,
"type" : "movie"
},
{
"_id" : "tt1408101",
"title" : "Star Trek Into Darkness",
"year" : 2013,
"type" : "movie"
},
{
"_id" : "tt0117731",
"title" : "Star Trek: First Contact",
"year" : 1996,
"type" : "movie"
}
],
{
"ordered": false
}
);
"ordered": false
allow to make all the insert but the ones that give errors. If the default "ordered": true
is set, once there is an error no more inserts will take place.
db.movies.find({"name":"Star Wars"})
db.movies.find({rated:"PG"})
db.movies.find({"tomate.meter":"100"})
- On the entire Array
- Based on any element
- Based on specific element
- More complex matches using operators
writers
is an array of elements
They have to be in the same ordered
db.movies.find({"writers":["Joel","Ethan"]}
actor
is an array of elements
Find any document with an occurrence.
db.movies.find({"actor":"John Doe"})
Find any document with an occurrence as firs element.
db.movies.find({"actor.0":"John Doe"})
find
returns a cursor.
If we add it to a variable we can use it.
var cursor = db.movies.find({"tomato.meter":"100"})
We can use a function to use the cursor
var doc function(){return cursor.hasNext() ? cursor.next() : null;}
See how many objects are left in the Batch
cursor.objsLeftInBatch()
Reducing size of elements returned by the queries.
db.movies.find({rated:"PG"},{title:1})
Explicit _id
exclusion
db.movies.find({rated:"PG"},{title:1, _id:0})
Explicit exclusion
db.movies.find({rated:"PG"},{writers:0, actors:0})
db.movies.find({runtime:{$gt:90}})
db.movies.find({runtime:{$gte:90, $lte:120}})
Also returns the ones that do not have the field at all.
db.movies.find({rated:{$ne:"UNRATED"}})
In
db.movies.find({rated:{$in:["G","PG"]}})
Matches documents that have the specified field
db.movies.find({"tomate.meter":{$exists:true}})
Selects documents if a field is of the specified type.
db.movies.find({"_id":{$type:"string"}})
db.movies.find({"$or":[{tomato.meter:{$gt:90}},{"metacritic":{$gt:80}}]})
db.movies.find({"$and":[{tomato.meter:{$gt:95}},{"metacritic":{$gt:95}}]})
Is the same as:
db.movies.find({tomato.meter:{$gt:95}},{"metacritic":{$gt:95}})
$and
is used if we need to specify the same field more than once in the same query.
db.movies.find({"$and":[{"metacritic":{$ne:null}},{"metacritic":{$exists:true}}]})
Regex Operators
db.movies.find({"awards.text":{$regex:/^Won\s.*/}})
db.movies.find({genres:{$all:["Comedy","Action"]}})
db.movies.find({countries:{$size:1}})
For embedded documents
db.movies.find({boxOffice:{$elemMatch{{country:"UK", revenue:{$gt:15}}}})
Is not the same as:
db.movies.find({boxOffice:{country:"UK", revenue:{$gt:15}}})
here the query search in the box office element as an all. So it will retrieve if there is an element in the array with country UK and if there is any element with revenue grater than 15.
db.movies.updateOne({title:"StarWars"},{$set:{poster:"img.jpg"}})
$set
Sets the value of a field in a document.$unset
Removes the specified field from a document.$inc
Increments the value of the field by the specified amount.$min
Only updates the field if the specified value is less than the existing field value.$max
Only updates the field if the specified value is greater than the existing field value.$mul
Multiplies the value of the field by the specified amount.$rename
Renames a field.$setOnInsert
Sets the value of a field if an update results in an insert of a document.$currentDate
Sets the value of a field to current date, either as a Date or a Timestamp.
db.movies.updateOne({title:"StarWars"}{$inc:{tomato.reviews:5}})
$addToSet
Adds elements to an array only if they do not already exist in the set.$pop
Removes the first or last item of an array.$pullAll
Removes all matching values from an array.$
Acts as a placeholder to update the first element that matches the query condition in an update.$pull
Removes all array elements that match a specified query.$pushAll
Deprecated. Adds several items to an array.$push
Adds an item to an array.
db.movieDetails.updateOne({title: "The Martian"},
{$push: { reviews: { rating: 4.5,
date: ISODate("2016-01-12T09:00:00Z"),
reviewer: "Spencer H.",
text: ".34.."} } })
$each
Modifies the $push and $addToSet operators to append multiple items for array updates.$slice
Modifies the $push operator to limit the size of updated arrays.$sort
Modifies the $push operator to reorder documents stored in an array.$position
Modifies the $push operator to specify the position in the array to add elements.
For more than one use
$each
db.movieDetails.updateOne({title: "The Martian"},
{$push: { reviews:
{ $each: [
{ rating: 0.5,
date: ISODate("2016-01-12T07:00:00Z"),
reviewer: "Yabo A.",
text: "..."},
{ rating: 4.5,
date: ISODate("2016-01-12T09:00:00Z"),
reviewer: "Spencer H.",
text: "..."} ] } } } )
To keep a maximum number of elements in an array (do not forget to set the position $position
)
$slice
db.movieDetails.updateOne({ title: "The Martian" },
{$push: { reviews:
{ $each: [
{ rating: 0.5,
date: ISODate("2016-01-13T07:00:00Z"),
reviewer: "Shannon B.",
text: "Enjoyed watching with my kids!" } ],
$position: 0,
$slice: 5 } } } )
db.movieDetails.updateMany( { rated: null },{ $unset: { rated: "" } } )
db.movieDetails.updateOne(
{"imdb.id": detail.imdb.id},
{$set: detail},
{upsert: true}
);
db.movies.replaceOne({"imdb": detail.imdb.id},detail);
mongoimport
> mongoimport -d crunchbase -c companies companies.json
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
...
});
var query = {"category_code": "biotech"};
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);
assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {
console.log( doc.name + " is a " + doc.category_code + " company." );
});
db.close();
});
You get a cursor object and then you iterate through it. Until there is no iteration there is no call to the database.
var query = {"category_code": "biotech"};
var cursor = db.collection('companies').find(query);
cursor.forEach(
function(doc) {
console.log( doc.name + " is a " + doc.category_code + " company." );
},
function(err) { // Called always when there are no more documents
assert.equal(err, null);
return db.close();
}
);
forEch
works with batches of information that it automatically retrieve from the database every time the previous batch runs out, until it reaches the end of the result set. With forEch
we can process the data as it comes from the database.
toArray
callback is not call until the entire data is retrieve from the database system, and the entire array is built.
Get only the fields we really need, we can reduce the amount of data transferred.
var query = {"category_code": "biotech"};
var projection = {"name": 1, "category_code": 1, "_id": 0};
var cursor = db.collection('companies').find(query);
cursor.project(projection);
function queryDocument(options) {
var query = {
"founded_year": {
"$gte": options.firstYear,
"$lte": options.lastYear
}
};
if ("employees" in options) {
query.number_of_employees = { "$gte": options.employees };
}
return query;
}
"$options": "i"
case insensitive.
>node app.js -m "billion.+valuation
function queryDocument(options) {
var query = {};
if ("overview" in options) {
query.overview = {"$regex": options.overview, "$options": "i"};
}
if ("milestones" in options) {
query["milestones.source_description"] =
{"$regex": options.milestones, "$options": "i"};
}
return query;
}
function projectionDocument(options) {
var projection = {
"_id": 0,
"name": 1,
"founded_year": 1
};
if ("overview" in options) {
projection.overview = 1;
}
if ("milestones" in options) {
projection["milestones.source_description"] = 1;
}
return projection;
}
if ("ipo" in options) {
if (options.ipo == "yes") {
query["ipo.valuation_amount"] = {"$exists": true, "$ne": null};
} else if (options.ipo == "no") {
query["ipo.valuation_amount"] = null;
}
}
if ("country" in options) {
query["offices.country_code"] = options.country;
}
It doesn't matter in which order we apply sort, skip and limit. Mongo always do it in the same order.
- 1.-
sort
- 2.-
skip
- 3.-
limit
Passing an array of tuples (not objects) we set the order of the sorts.
cursor.sort([["founded_year", 1], ["number_of_employees", -1]]);
cursor.limit(options.limit);
cursor.skip(options.skip);
cursor.sort({"founded_year", 1});
db.collection("statuses").insertOne(status, function(err, res) {
console.log("Inserted document with _id: " + res.insertedId + "\n");
});
db.collection("statuses").insertMany(statuses, function(err, res) {
console.log(res);
done += 1;
if (done == screenNames.length) {
db.close();
}
});
db.collection('companies').deleteOne(filter, function(err, res) {
assert.equal(err, null);
console.log(res.result);
});
var filter = {"_id": {"$in": markedForRemoval}};
db.collection("companies").deleteMany(filter, function(err, res) {
console.log(res.result);
console.log(markedForRemoval.length + " documents removed.");
return db.close();
});
- Rich Documents
- PreJoin / Embed Data
- No Mongo Joins
- No Constrains
- Atomic Operations
- No Declared Schema
Keeping your data consistent even though MongoDB lacks foreign key constraints, by embedding documents.
- Restructure: Use atomic operations to achieve it, because the documents (if they are so prepared) are embedded and there is no need to access several documents.
- Implement in Software
- Tolerate
Options
- 1.-SQL style: With foreign _id, in one or the other side
- 2.-MongoDB Style: Embed one into another
-
1.-Frequently access (READ) If we access rearely to the
Resume
and it is a big document we don't want to embed theResume
in theEmployee
document.
If Only access the info of one of the types you don't want it embed -
2.-Growing documents(WRITE) If we write a lot in the
Resume
document but not in theEmployee
If the writes are mostly in one document we don't want to incur in overhead in the other. -
3.-Size If the document is larger than 16MB it can not be embedded.
-
4.-Not Atomic If you need to update both types at same time is better to have the documents embedded in into the other.
- 1.- Embed people in city: To many people in one city document
- 2.- Embed city in people: To many duplicated city info. (In some cases can be ok.)
- 3.- "TRUE LINKING": in People. save the
id
of city. - 4.- "ONE TO FEW": Blog vs Comments. Embed the many in the one. One post with few comments.
Normally they are "FEW TO FEW"
Options
- 1.- Array of ids of the other type. Put it depending on the access patterns.You can also have ids in both documents.
- 2.- Embedded: Books in the Authors. Can be duplicated, inconsistence after updates, wont work if you wnat to insert a Teacher before it have students.
Student
{
_id:0,
name: "Hugo",
teachers:[1,4,6,8]
}
Teacher
{
_id:0,
name: "Dr Who",
}
Find all teachers from a Student. Direct.
Find all students from a teacher. Use a Multikey index.
Add index
db.students.ensureIdex({'teachers:1'})
Find
db.students.find({'teachers':{$all:[0,1]}}).explain
teachers whose id
is 0 and 1
explain
will tell us how the query was execute. Use or not of the index
Use ancestors
{
_id:89
category_name:"movies"
parent_id:6,
ancestors:[2,5,7,8]
}
## When Denormalize
We normalize to not have inconsistence because of duplicate data.
We can avoid having duplicate data:
* 1:1 Embed There is no duplication
* 1:Many From the many to the one
* Many : Many Link
db.students.explain().find({student_id:5})
db.students.explain(true).find({student_id:5})
// more detail
db.students.createIndex({student_id:1})
// Take some time
db.students.createIndex({student_id:1, class_id:-1})
// -1 descending (good for sorting)
db.students.getIndexes()
db.students.dropIndexes({student_id:1})
One array and the other a scalar
db.foo.createIndex({a:1,b:1})
db.foo.explain().find({a:1,b:1})
db.foo.insert({a:1,b:[2,4,5]})
db.foo.explain().find({a:1,b:1})
// isMultikey : true
db.foo.insert({a:[5,6,8],b:[2,4,5]})
// Can't put index when both a
and b
are arrays
db.foo.insert({a:[5,6,8],b:2})
// Legal
db.students.createIndex({'scores.score':1})
db.people.createIndex({'work_history.company':-1})
db.students.explain().find({'scores': {elemMatch:{type:'exam', score:{'$gt':99.8})
db.students.createIndex({'studen_id':1},{unique:true})
{a:1,b:1,c:5}
{a:10,b:5,c:10}
{a:31,b:41}
{a:12,b:23}
Sparse option :unique indexes that do not include docs that has a null value.
db.people.createIndex({'phone_number':1},{sparse:true})
Foreground:Fast, Blocks writes and reads
Background:Slow, Don't blocks writes and reads
db.example.find( { a : 1, b : 2 } ).explain()
db.example.explain().remove( { a : 1, b : 2 } )// Does not work because remove does not return a cursor
var exp = db.example.explain(); exp.find( { a : 1, b : 2 } )
db.example.remove( { a : 1, b : 2 } ).explain()
db.example.explain().find( { a : 1, b : 2 } )
curs = db.example.find( { a : 1, b : 2 } ); curs.explain()
Options:
- executionStats: Stat for the winning plan
- allPlansExecution: Stat for all the plans
Satisfy a query only with the index
Make searches that project only what is in the index
//Indexes
{ name : 1, dob : 1 }
{ _id : 1 }
{ hair : 1, name : 1 }
db.example.find( { name : { $in : [ "Alfred", "Bruce" ] } }, { name : 1, hair : 1 } )
db.example.find( { _id : 1117008 }, { _id : 0, name : 1, dob : 1 } )
db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, hair : 1, name : 1} )
->db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, dob : 1, name : 1} )// Only this one is covered
shops = {'shop_name':'name', 'location':[x,y]}
db.shops.ensureIndex({'location':'2d'})
db.shops.find({'location':{$near:[50,50]}})
db.shops.ensureIndex({'location':'2dsphere'})
db.stores.find({ loc:{ $near: { $geometry: { type: "Point", coordinates: [-130, 39]}, $maxDistance:1000000 } } })
db.sentences.ensureIndex({'words':'text'})
db.shops.find({$text:{$search:'dog'}})
db.shops.find({$text:{$search:'dog cat food'}},{score:{$meta:'textScore'}}).sort({score:{$meta:'textScore'}})
hint
Use it to give to mongo the index you want to use.
Status:
- 0: off
- 1: slow ones
- 2: all
db.system.profile.find( { millis : { $gt:1000 } } ).sort( { ts : -1 } )
Where mongo is spending his time
get statistics that happened in mongo in 1 seconds
- Match (find)
- Project
- Sort
- Skip
- Limit
db.companies.aggregate([
{ $match: { founded_year: 2004 } },
{ $project: {
_id: 0,
name: 1,
founded_year: 1
} }
])
Be careful with the order, here if limit
goes before than skip
the result will be erroneous.
db.companies.aggregate([
{ $match: { founded_year: 2004 } },
{ $sort: { name: 1} },
{ $skip: 10 },
{ $limit: 5 },
{ $project: {
_id: 0,
name: 1 } },
])
Aggregation Pipeline Quick Reference
db.companies.aggregate([
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $project: {
_id: 0,
name: 1,
ipo: "$ipo.pub_year",
valuation: "$ipo.valuation_amount",
funders: "$funding_rounds.investments.financial_org.permalink"
} }
]).pretty()
Create our own objects
db.companies.aggregate([
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $project: {
_id: 0,
name: 1,
founded: {
year: "$founded_year",
month: "$founded_month",
day: "$founded_day"
}
} }
]).pretty()
Covert documents with arrays to an array of documents each with one doc.
// unwind
db.companies.aggregate([
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $unwind: "$funding_rounds" },
{ $project: {
_id: 0,
name: 1,
amount: "$funding_rounds.raised_amount",
year: "$funding_rounds.funded_year"
} }
])
Multiple stages
// If we don't care about the funder we can simplify.
// Let's sort as well.
db.companies.aggregate([
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $unwind: "$funding_rounds" },
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $project: {
_id: 0,
name: 1,
amount: "$funding_rounds.raised_amount",
year: "$funding_rounds.funded_year" } },
{ $sort: { year: 1 } }
])
db.companies.aggregate([
{ $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
{ $project: {
_id: 0,
name: 1,
founded_year: 1,
rounds: { $filter: {
input: "$funding_rounds",
as: "round",
cond: { $gte: ["$$round.raised_amount", 100000000] } } }
} },
{ $match: {"rounds.investments.financial_org.permalink": "greylock" } },
]).pretty()
$arrayElemAt
db.companies.aggregate([
{ $match: { "founded_year": 2010 } },
{ $project: {
_id: 0,
name: 1,
founded_year: 1,
first_round: { $arrayElemAt: [ "$funding_rounds", 0 ] },
last_round: { $arrayElemAt: [ "$funding_rounds", -1 ] }
} }
]).pretty()
$slice
db.companies.aggregate([
{ $match: { "founded_year": 2010 } },
{ $project: {
_id: 0,
name: 1,
founded_year: 1,
first_round: { $slice: [ "$funding_rounds", 1 ] },
last_round: { $slice: [ "$funding_rounds", -1 ] }
} }
]).pretty()
$size
db.companies.aggregate([
{ $match: { "founded_year": 2004 } },
{ $project: {
_id: 0,
name: 1,
founded_year: 1,
total_rounds: { $size: "$funding_rounds" }
} }
]).pretty()
$max, $min, $avg, $first...
db.companies.aggregate([
{ $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
{ $project: {
_id: 0,
name: 1,
largest_round: { $max: "$funding_rounds.raised_amount" }
} }
])
db.companies.aggregate([
{ $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
{ $project: {
_id: 0,
name: 1,
total_funding: { $sum: "$funding_rounds.raised_amount" }
} }
])
db.companies.aggregate([
{ $group: {
_id: { founded_year: "$founded_year" },
average_number_of_employees: { $avg: "$number_of_employees" }
} },
{ $sort: { average_number_of_employees: -1 } }
])
db.companies.aggregate([
{ $group: {
_id: { founded_year: "$founded_year" },
average_number_of_employees: { $avg: "$number_of_employees" }
} },
{ $sort: { average_number_of_employees: -1 } }
])
db.companies.aggregate( [
{ $match: { "relationships.person": { $ne: null } } },
{ $project: { relationships: 1, _id: 0 } },
{ $unwind: "$relationships" },
{ $group: {
_id: "$relationships.person",
count: { $sum: 1 }
} },
{ $sort: { count: -1 } }
] )
Set a proper _id
Ex1:
db.companies.aggregate([
{ $match: { "relationships.person": { $ne: null } } },
{ $project: { name: 1, relationships: 1, _id: 0 } },
{ $unwind: "$relationships" }, {
$group: {
_id: "$relationships.person.permalink",
company: { $addToSet: "$name" }
}
},
{ $unwind: "$company" }, {
$group: {
_id: "$_id",
count: { $sum: 1 }
}
},
{ $sort: { count: -1 } }
]);
Ex2
db.grades.aggregate([
{ $project: { "class_id": 1, "student_id": 1, "scores.type": 1, "scores.score": 1, _id: 0 } },
{ $unwind: "$scores" },
{ $match: { "scores.type": { $ne: "quiz" } } }, {
$group: {
_id: "$class_id",
stdDev: { $stdDevPop: "$scores.score" }
}
}
]);
Why the result is differnt if we dont have "student_id": 1
in the project stage?
Ex3
db.companies.aggregate([
{ $match: { founded_year: 2004 } }, {
$project: {
_id: 1,
name: 1,
rounds: { $size: "$funding_rounds" },
founded_year: 1,
"funding_rounds.raised_amount": 1
}
},
{ $match: { rounds: { $gte: 5 } } },
{ $unwind: "$funding_rounds" }, {
$group: {
_id: "$name",
stdDev: { $sum: "$funding_rounds.raised_amount" }
}
},
{ $sort: { stdDev: 1 } }
])
Journal part on memory where documents are stored before writing them to disk.
w = 1
wait to respond of the write.
j = false
wait for the journal to write on disk.
w | j | ||
---|---|---|---|
1 | false | wait for the server but not for the journal | Fast, Small window of vulnerability |
1 | true | Wait until is write directly in the disk | Slow |
0 | Unacknowledged write | Don't |
What if we can not see the response?
- Insert: Just try again until is done
- Update: problem in cases like in
$inc
. In the case of the need of to avoid this error use inserts instead.
##Introduction to Replication
- Availability
- Fault Tolerance
Nodes
Primary and secondaries
Writes only in primaries
Minimun number of nodes in a Replica set is 3
If primary is down, there is an election
in which of the secondaries is the new primary.
Type of nodes:
- Regular
- Arbiter: for voting purposes. No data on it.
- Delayed: for back ups.
Priority = 0
can not be primary - Hidden: Can not be primary.
Priority= 0
- Writes goes to the primary
- Reads can go to secondaries, but there are options that the info is not updated.
- Replication is asynchronous
- Use for read scaling.
#!/usr/bin/env bash
mkdir -p /data/rs1 /data/rs2 /data/rs3
mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --oplogSize 64 --fork --smallfiles
mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --oplogSize 64 --smallfiles --fork
mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --oplogSize 64 --smallfiles --fork
Configuration
config = { _id: "m101", members:[
{ _id : 0, host : "localhost:27017" priority:0, slaveDelay:5 },
{ _id : 1, host : "localhost:27018"},
{ _id : 2, host : "localhost:27019"} ]
};
rs.initiate(config);
rs.status();
Can not read in a secondary by default
rs.slaveOk()
- oplog is the operations log. Inserts, updates...
- oplog is in sync.
- Secondaries are constantly reading the primary oplog
Command to see which mongo servers are we running
ps -ef | grep mongod
In the server in the local database use local
there is a oplog.rs collection
m101:PRIMARY> db.oplog.rs.find().pretty()
...
{
"ts" : Timestamp(1467796049, 1),
"t" : NumberLong(1),
"h" : NumberLong("-2180104970022902937"),
"v" : 2,
"op" : "c",
"ns" : "test.$cmd",
"o" : {
"create" : "people"
}
}
{
"ts" : Timestamp(1467796049, 2),
"t" : NumberLong(1),
"h" : NumberLong("7319531361958625272"),
"v" : 2,
"op" : "i",
"ns" : "test.people",
"o" : {
"_id" : ObjectId("577cca5151c935c6195553ad"),
"name" : "Hugo"
}
}
rs.status()
Give us the optime
{
"_id" : 0,
"name" : "localhost:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1088,
"optime" : { //<-- when was the last update
"ts" : Timestamp(1467796049, 2),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2016-07-06T09:07:29Z"),
"lastHeartbeat" : ISODate("2016-07-06T09:12:47.037Z"),
"lastHeartbeatRecv" : ISODate("2016-07-06T09:12:47.683Z"),
"pingMs" : NumberLong(0),
"syncingTo" : "localhost:27018", //<-- where the info comes from
"configVersion" : 1
},
oplog is a capped collection. It is going to roll off after a certain amount of time. Have a big enough oplog when the secondary can not see the primary. Depens on how fast the it is growing
If the primary stops and when it comes back up sees that he has writes that are not in the 'new' primary, these writes are rolled back and saved in a file in case a we want to manually add them.
If you leave a replica set node out of the seedlist within the driver, the missing node will be discovered as long as you list at least one valid node.
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect("mongodb://localhost:30001,localhost:30002,localhost:30003/course", function(err, db) {
if (err) throw err;
db.collection("repl").insert({ 'x' : 1 }, function(err, doc) {
if (err) throw err;
db.collection("repl").findOne({ 'x' : 1 }, function(err, doc) {
if (err) throw err;
console.log(doc);
db.close();
});
});
});
If a insert happens during a primary election, the insert will be buffered until the election completes, then the callback will be called after the operation is sent and a response is received.
w | j | ||
---|---|---|---|
1 | false | wait for the server but not for the journal | Fast, Small window of vulnerability |
1 | true | Wait until is write directly in the disk | Slow |
0 | Unacknowledged write | Don't | |
X | false | Wait until X nodes are acknowledge the write | Slow |
majority | Wait for the majority of Nodes | will (in most cases) avoid rollbacks |
wtimeout How long you wait
j
only wait for writing in the primary node.
pymongo.MongoClient(host="mongodb://localhost:27017",
replicaSet="rs1",
w=3, wtimeout=10000, j=True,
read_preference=read_pref)
Read and writes go to the primary.
- Primary
- Primary Preferred
- Secondary
- Secondary Preferred
- Nearest
When reading from secondaries we get Eventually consistent reads
- Seed Lists
- Write Concern: w,j, wtimeout
- Read Preferences
- Errors can happen
Use for scalability.
Shards typically are replica sets.
mongos
handle the shards.
Shard contain chunks (bunch of documents) sort in some way (shard_key
), any access to the database is done in its own shard. mongos
know depending in the shard_key
to whom send the request.
To work with shards we will work with mongos
instead of mongod
. mongod
is used to work with replica sets but not with shards.
How to separate documents into shards:
- Range based: documents from 1..100 to s1
- Hash based: using an key that match a document to a shard.
- Does not need to be unique.
- Every document must have a
shard_key
- Every doc includes the
shard_key
shard_key
is immutable- Is needed an index that starts with the
shard_key
(could be multi index) - On updates
shard_keys
must be specified - No
shard_key
-> scatter gather operation (expensive) - You can not have a unique key unless is part of the
shard_key
.
Drivers ¯_(ツ)_/¯
- Sufficient cardinality (enough values)
- Hotspotting writes: Write everything in the same place. Like using time creation values.