Continuo a pensare che le prestazioni di MongoDB siano veramente buone, ancora meglio se unite ad un linguaggio di scripting come NodeJs.
Ecco un esempio di utilizzo su moli di dati consistenti (testato con server MongoDB in locale quindi con poche risorse di sistema).
Resources:
Installare i seguenti moduli
MongoDB ODM Mongoose
npm install mongoose
Modulo per leggere file di grosse dimensioni
npm install carrier
Lib per il calcolo delle query
npm install moment
definiamo la struttura della collection, poi si prosegue con l'inserimento delle info.
var mongoose = require('mongoose');
var databaseName = 'geonames';
var connection_string = 'mongodb://localhost/'+databaseName;
var db = mongoose.connect(connection_string);
var schema_geonames = mongoose.Schema({
geonameid : Number,
name : String,
asciiname : String,
alternatenames : String,
loc : [Number],
feature_class : String,
feature_code : String,
country_code : String,
alternatenames : String,
cc2 : String,
admin1 : String,
admin2 : String,
admin3 : String,
admin4 : String,
population : String,
elevation : Number,
dem : String,
timezone : String,
modification_date : String
});
schema_geonames.index ({
loc : "2d",
asciiname : 1,
name : 1
});
var Geonames = db.model('Geonames', schema_geonames);
var carrier = require('carrier');
var fs = require('fs');
var filename = '/path/of/file/allCountries.txt';
var inStream = fs.createReadStream(filename, {flags:'r'});
carrier.carry(inStream)
.on('line',
function(line) {
var fields = line.split('\t');
var geonames = new Geonames({
geonameid : fields[0],
name : fields[1],
asciiname : fields[2],
alternatenames : fields[3],
loc : [fields[4],fields[5]],
feature_class : fields[6],
feature_code : fields[7],
country_code : fields[8],
alternatenames : fields[9],
cc2 : fields[10],
admin1 : fields[11],
admin2 : fields[12],
admin3 : fields[13],
admin4 : fields[14],
population : fields[15],
elevation : fields[16],
dem : fields[17],
timezone : fields[18],
modification_date : fields[19]
});
geonames.save(function (err) {
if (err)
console.log('Error save geonames');
});
}
)
.on('end',
function(){
console.log('end');
process.exit(1);
}
);
db.geonames.find().count();
8309522
Dopo aver aspettato che a procedura di import abbia concluso possiamo eseguire qualche semplice query es di $near:
GeonamesModel.find({ feature_class: 'P', loc : { $near : [45.32306, 8.41533] }} ,{},{ limit: 20 }, function(err, docs){
if (err) {
console.log("error in finding near", err);
throw err;
}
console.log('docs.length : ' , docs.length);
docs.forEach(function(doc)
{
console.log( doc.asciiname + ' (' + doc.admin1 + ')');
}
)
})
Risultato:
docs.length : 20
Vercelli (VC) 45.32306,8.41533
Larizzate (VC) 45.3,8.38333
Caresanablot (VC) 45.35736,8.39203
Torrione (VC) 45.31667,8.46667
Borgo Vercelli (VC) 45.35786,8.46303
Scavarda (NO) 45.33218,8.47477
Asigliano Vercellese (VC) 45.26146,8.40853
Villata (VC) 45.38776,8.43263
Prarolo (VC) 45.28206,8.47814
Desana (VC) 45.26966,8.35973
Quinto Vercellese (VC) 45.37966,8.36173
Lignana (VC) 45.28606,8.34393
Oldenico (VC) 45.40276,8.38103
Sali Vercellese (VC) 45.30986,8.32893
Pertengo (VC) 45.23556,8.41754
Casalvolone (NO) 45.40096,8.46463
Pezzana (VC) 45.26206,8.48504
Costanzana (VC) 45.23816,8.36813
Collobiano (VC) 45.39686,8.34833
Stroppiana (VC) 45.23026,8.45384
Tot time 205ms
[Sorgenti completi]
2 commenti:
Hi,
I thank you for your script. I would like use your script, but the processing stopped the 268000 rekord. What is the problem?
Best regards,
Zsolt Boszormenyi
from Hungary
HI!
hello,
I downloaded the whole project and the new file Geonames.
While I'm writing I came to:
> db.geonames.count ()
730 347
without any problems.
to run the mongod using:
/opt/mongo/mongodb/bin/mongod --dbpath /media/usbkey/mongo/mongodatastandalone/ --logpath "standalone.log" --fork --smallfiles
let me know if you still have problems!
thank you!
Posta un commento