Getting Counts of Distinct Key Values

The following example shows how to get results of an aggregate operation where the counts of distinct key values are in the format { key: count }.

Suppose you have a test collection with the sample documents:

db.test.insert([
    { "_id": 1, "name" : "t1", "loglevel" : "ERROR" },
    { "_id": 2, "name" : "t1", "loglevel" : "ERROR" },
    { "_id": 3, "name" : "t1", "loglevel" : "WARNING" },
    { "_id": 4, "name" : "t1", "loglevel" : "WARNING" },
    { "_id": 5, "name" : "t1", "loglevel" : "WARNING" },
    { "_id": 6, "name" : "t1", "loglevel" : "INFO" },
    { "_id": 7, "name" : "t2", "loglevel" : "INFO" },
    { "_id": 8, "name" : "t2", "loglevel" : "ERROR" },
    { "_id": 9, "name" : "t2", "loglevel" : "ERROR" },
    { "_id": 10, "name" : "t2", "loglevel" : "ERROR" },
    { "_id": 11, "name" : "t2", "loglevel" : "ERROR" },
    { "_id": 12, "name" : "t3", "loglevel" : "INFO" }
])

and would like to aggregate the collection to return the following result

/* 1 */
{
    "_id" : "t1",
    "error" : 2,
    "warning" : 3,
    "info" : 1
}

/* 2 */
{
    "_id" : "t2",
    "error" : 4,
    "warning" : 0,
    "info" : 1
}

/* 3 */
{
    "_id" : "t3",
    "error" : 0,
    "warning" : 0,
    "info" : 1
}

Accomplishing the above would require using the $cond operator in the $sum accumulator operator. The $cond operator will evaluate a logical condition based on its first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). This converts the true/false logic into 1 and 0 numerical values that feed into $sum respectively:

{
    "$sum": {
        "$cond": [ { "$eq": [ "$loglevel", "WARNING" ] }, 1, 0 ]
    }
}

As a resulting pipeline, one needs to run the aggregation operation

db.test.aggregate([
    {
        "$group": {
            "_id": "$name",
            "error": {
                "$sum": {
                   "$cond": [ { "$eq": [ "$loglevel",  "ERROR" ] }, 1, 0]
                }
            },
            "warning":{
                "$sum": {
                   "$cond": [ { "$eq": [ "$loglevel", "WARNING" ] }, 1, 0 ]
                }
            },
            "info": {
                "$sum": {
                   "$cond": [ { "$eq": [ "$loglevel",  "INFO" ] }, 1, 0 ]
                }
            }
        }
    }
])

The above pipeline can also be dynamically generated, given an array of possible statuses:

var statuses = ["ERROR", "WARNING", "INFO"],
    groupOperator = { "$group": { "_id": "$name" } };

statuses.forEach(function (status){
    groupOperator["$group"][status.toLowerCase()] = {
       "$sum": {
           "$cond": [ { "$eq": [ "$loglevel",  status ] }, 1, 0]
       }
   }
});

db.test.aggregate([groupOperator]);

If the possible key values are not known in advance, an initial step which queries for this list is necessary by running the distinct command on the loglevel field. This will give you an object that contains a list of the distinct roles:

var result = db.runCommand ( { distinct: "test", key: "loglevel" } )
var statuses = result.values;
printjson(statuses); // this will print ["ERROR", "WARNING", "INFO"]

Now given the list above, you can assemble your pipeline by creating an object that will have its properties set using JavaScript’s reduce() method. The following demonstrates this:


var groupObj = { "$group": { "_id": "$name" } };
var groupPipeline = statuses.reduce(function(obj, status) {  
    obj["$group"][status.toLowerCase()] = {
        "$sum": {
            "$cond": [ { "$eq": [ "$loglevel", status ] }, 1, 0 ]
        }
    };
    return obj;
}, groupObj );

Use the resulting document in the final aggregation pipeline as:

db.test.aggregate([groupPipeline]);

For a more flexible and better performant approach which executes much faster than the above, consider running an alternative pipeline as follows

db.test.aggregate([
    { 
        "$group": {
            "_id": { 
                "name": "$name",
                "status": { "$toLower": "$loglevel" }
            },
            "count": { "$sum": 1 }
        }
    },
    { 
        "$group": {
            "_id": "$_id.name",
            "counts": {
                "$push": {
                    "loglevel": "$_id.status",
                    "count": "$count"
                }
            }
        }
    }
])

Sample Output

/* 1 */
{
    "_id" : "t2",
    "counts" : [ 
        {
            "loglevel" : "error",
            "count" : 4
        }, 
        {
            "loglevel" : "info",
            "count" : 1
        }
    ]
}

/* 2 */
{
    "_id" : "t1",
    "counts" : [ 
        {
            "loglevel" : "error",
            "count" : 2
        }, 
        {
            "loglevel" : "warning",
            "count" : 3
        }, 
        {
            "loglevel" : "info",
            "count" : 1
        }
    ]
}

/* 3 */
{
    "_id" : "t3",
    "counts" : [ 
        {
            "loglevel" : "info",
            "count" : 1
        }
    ]
}