mongodb - Mongo Map-Reduce - Top Venues By Users in a Radius -


i'm having issues mapreduce function - goal list of top venues, within lat/lng, group vid, ordered distinct user_id.

here sample data set:

  { "_id" : objectid("51f9234feb97ff0700000046"), "checkin_id" : 39286249, "created_at" : isodate("2013-07-31t14:47:11z"), "loc" : { "lat" : 42.3672, "lon" : -86.2681 }, "icv" : 1, "ipv" : 1, "vid" : 348442, "user_id" : 151556, "bid" : 9346, "pid" : 549 }   { "_id" : objectid("51f9234b488fff0700000006"), "checkin_id" : 39286247, "created_at" : isodate("2013-07-31t14:47:07z"), "loc" : { "lat" : 55.6721, "lon" : 12.5576 }, "icv" : 1, "ipv" : 1, "vid" : 3124, "user_id" : 472486, "bid" : 7983, "pid" : 2813 }   ... 

here map function:

map1 = function() {   var tempdoc = {};   tempdoc[this.user_id] = 1;    emit(this.vid, {      users: tempdoc,      count: 1   }); } 

and reduce:

reduce1 = function(key, values) {      var summary = {      users: {},      total: 0     };      values.foreach(function (doc) {         // increment total every value        summary.total += doc.count;         // object.extend() add keys right object not exist on left object       object.extend(summary.users, doc.user);      });      return summary; }; 

my geo_query:

var d = date("2013-07-31t14:47:11z"); var geo_query = {loc: {$near: [40.758318,-73.952985], $maxdistance: 25}, "icv":1, "created_at": {$gte: d}}; 

and mapreduce query:

var res = db.mycolelction.mapreduce(map1, reduce1,  { out : { inline : 1 }, query : geo_query }); 

the results returned matching reduce function, not hitting finalize1 function:

... {     "_id" : 609096,     "value" : {         "users" : {             "487586" : 1         },         "count" : 1     } }, {     "_id" : 622448,     "value" : {         "users" : {             "313755" : 1,             "443180" : 1         },         "total" : 4     } }, ... 

at point, think have result set, $near function scans 100 of venues nearby, , want scan venues (all documents match radius (25m), , @ venues - group them, , count unique users in time period. i've search around, looked @ documentation , i'm not sure of solution. takers?

the final result me sort , limit result "total" attribute. ideally, want to sort total desc , limit 15.

i following. first of all, have coordinates wrong way around. mongodb wants longitude, latitude, preferably in geojson format:

loc: { type: 'point', coordinates: [-73.952985, 40.758318] }, 

mongodb does not care lat , lon field names, , ignore them.

but should avoid map/reduce it's slow , complex. instead, can use aggregation framework similar:

db.so.aggregate( [     // search (well, million) venues within **250**km     { $geonear: {         near: { type: 'point', coordinates: [-73.952985, 40.758318] },         spherical: true,         distancefield: 'd',         maxdistance: 250 * 1000,         limit: 1000000     } },     // find items icv=1     { $match: { icv: 1 } },     // group venue , user     { $group: {          _id: { vid: '$vid', user_id: '$user_id' },          count: { $sum: 1 } }      },     // regroup venue:     { $group: {          _id: '$_id.vid',          users: { $addtoset: { user_id: '$_id.user_id', count: '$count' } },          total: { $sum: '$count' }      } },     // sort "total", desc:     { $sort: { 'total': -1 } },     // , limit 15:     { $limit: 15 } ] ); 

i've used $geonear first stage, , match on $icv second stage $geonear index going lot better $icv 1 (as guess, have values 0 or 1 anyway).

please note example, used 250 km (250 * 1000 meters) , not 25 km.

with following input:

db.so.insert( { "_id" : objectid("51f9234feb97ff0700000046"), "loc" : { type: 'point', coordinates: [ -73.2681, 40.3672 ] }, "vid" : 348442, "user_id" : 151556 } ); db.so.insert( { "_id" : objectid("51f9234b488fff0700000006"), "loc" : { type: 'point', coordinates: [ -73.5576, 40.6721 ] }, "vid" : 3124, "user_id" : 472486 } ); db.so.insert( { "_id" : objectid("51f92345488fff0700000006"), "loc" : { type: 'point', coordinates: [ -73.5576, 40.6721 ] }, "vid" : 3124, "user_id" : 47286 } ); db.so.insert( { "_id" : objectid("52f92345488fff0700000006"), "loc" : { type: 'point', coordinates: [ -73.5576, 40.6721 ] }, "vid" : 3124, "user_id" : 47286 } ); 

you result:

{     "result" : [         {             "_id" : 3124,             "users" : [                 { "user_id" : 472486, "count" : 1 },                 { "user_id" : 47286, "count" : 2 }             ],             "total" : 3         },         {             "_id" : 348442,             "users" : [                 { "user_id" : 151556, "count" : 1 }             ],             "total" : 1         }     ],     "ok" : 1 } 

there 1 difference wanted output, , user_id not key count, field in sub-document. in general can't change value key or key value aggregation framework.


Comments

Popular posts from this blog

c++ - Creating new partition disk winapi -

Android Prevent Bluetooth Pairing Dialog -

VBA function to include CDATA -