MongoDB 聚合 $group 和 $match 与组结果

声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow 原文地址: http://stackoverflow.com/questions/29958579/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me): StackOverFlow

提示:将鼠标放在中文语句上可以显示对应的英文。显示中英文
时间:2020-09-08 20:24:31  来源:igfitidea点击:

MongoDB aggregate $group and $match with group result

mongodb

提问by sasidharan

I have a collection as follows

我有一个集合如下

{

    "_id" : ObjectId("553b2c740f12bb30f85bd41c"),
    "symbol" : "EUR/GBP",
    "order_id" : "PW_BarclaysTrades60530",
    "ticket_id" : "PW_BarclaysTrades.60530",
    "basketid" : "TESTBASKET-1428483828043",
    "date_sent" : ISODate("2015-04-07T18:30:00.000Z"),
    "destination" : "BarclaysTrades",
    "order_price" : 0.0000000000000000,
    "order_quantity" : 4000000.0000000000000000,
    "order_type" : 1.0000000000000000,
    "parent_quantity" : 250000000.0000000000000000,
    "time_sent" : "09:03:48",
    "side" : 1,
    "tif" : "0",
    "execution_id" : 88939,
    "date_recvd" : ISODate("2015-04-07T18:30:00.000Z"),
    "exe_quantity" : 50000.0000000000000000,
    "time_recvd" : "09:03:48",
    "execution_price" : 2.5000000000000000,
    "execution_type" : 1
}

I would like to get the documents whose execution_price greater than average(execution_price) for each destination in the collection

我想获取集合中每个目的地的 execution_price 大于 average(execution_price) 的文档

Trying to aggregate as follows:

尝试聚合如下:

db.orders_by_symbol.aggregate( [
{ $limit:300000 },
{ $match:{ destination: "PAPER" } },
{ $group:{_id:{Destination:"$destination"},avg_exec_price:  
              {$avg:"$execution_price"} ,"data":{"$push": "$$ROOT"}}},
{$unwind:"$data"},
{$match:{execution_price:{$ne: "$avg_exec_price"}}},
{$project:{_id:0,symbol:"$data.symbol",destination:"$data.destination",
          execution_id:"$data.execution_id",
          exec_price:"$data.execution_price",
         avg_ex_price:"$avg_exec_price"}}], 
{allowDiskUse:true})

Getting the following Result

得到以下结果

{

    "result" : [ 
        {
            "symbol" : "EUR/GBP",

            "destination" : "PAPER",
            "execution_id" : 89109,
            "exec_price" : 6.5000000000000000,
            "avg_ex_price" : 95.0747920857049140
        }, 
        {
            "symbol" : "EUR/GBP",
            "destination" : "PAPER",
            "execution_id" : 89110,
            "exec_price" : 6.0000000000000000,
            "avg_ex_price" : 95.0747920857049140
        }, 
        {
            "symbol" : "EUR/GBP",
            "destination" : "PAPER",
            "execution_id" : 89111,
            "exec_price" : 6.5000000000000000,
            "avg_ex_price" : 95.0747920857049140
        }

But when I change the '$ne' operator with '$gt' no result is being produced. Both exec_price and avg_ex_price are double datatype.Not sure why it is not working as expected.

但是,当我用 '$gt' 更改 '$ne' 运算符时,不会产生任何结果。exec_price 和 avg_ex_price 都是双数据类型。不知道为什么它没有按预期工作。

回答by chridam

Using MongoDB Server 3.6 and newer:

使用 MongoDB Server 3.6 及更新版本:

var pipeline = [
    { "$match": { "destination": "PAPER" } },
    { "$facet": {
        "average": [
            { "$group": {
                "_id": null,
                "avg_exec_price": { "$avg": "$execution_price" }
            } }
        ],
        "data": [
            { "$project": { 
                "_id": 0,
                "symbol": 1,
                "destination": 1,
                "execution_id": 1,
                "execution_price": 1
            } }                   
        ]
    } },
    { "$addFields": {
        "average": { "$arrayElemAt": ["$average", 0] }
    } },
    { "$addFields": {
        "data": { 
            "$filter" : {
                "input": {
                    "$map": {
                        "input": "$data",
                        "as": "el",
                        "in": {
                            "symbol": "$$el.symbol",
                            "destination": "$$el.symbol",
                            "execution_id": "$$el.symbol",
                            "exec_price": "$$el.execution_price",
                            "avg_exec_price": "$average.avg_exec_price"
                        }
                    }
                },
                "as": "doc",
                "cond": {
                    "$gt" : [ 
                        "$$doc.exec_price", 
                        "$$doc.avg_exec_price"
                    ]
                }
            }
        }
    } },  
    { "$unwind": "$data" },  
    { "$replaceRoot": {  "newRoot": "$data" } }
];


For MongoDB versions which do not support the above operators and pipelines, use the $projectoperator to create an additional field that stores the comparison of the two fields via the $gtaggregation operator:

对于不支持上述运算符和管道的 MongoDB 版本,使用$project运算符创建一个附加字段,通过$gt聚合运算符存储两个字段的比较:

var pipeline = [
    { "$match": {
        "destination": "PAPER"
    } },
    { "$group": {
        "_id": null,
        "avg_exec_price": { "$avg": "$execution_price" },
        "data": { "$addToSet": "$$ROOT" }
    } },
    { "$unwind": "$data" },
    { "$project": { 
        "_id": 0,
        "data": 1,
        "avg_exec_price": 1,            
        "isGreaterThanAverage": { 
            "$gt" : [ "$data.execution_price", "$avg_exec_price" ] 
        }
    } },    
    { "$match": {            
        "isGreaterThanAverage": true
    } },
    { "$project": { 
        "_id": 0,
        "symbol": "$data.symbol",
        "destination": "$data.destination",
        "execution_id": "$data.execution_id",
        "exec_price": "$data.execution_price",
        "avg_ex_price": "$avg_exec_price"
    } }
];

Now to test the above aggregation, suppose you have the following minimum test case collection:

现在要测试上述聚合,假设您有以下最小测试用例集合:

db.test.insert([{
    "symbol" : "EUR/GBP",    
    "destination" : "PAPER",    
    "execution_id" : 88939,    
    "execution_price" : 1.8
},
{
    "symbol" : "EUR/GBP",    
    "destination" : "PAPER",    
    "execution_id" : 88921,    
    "execution_price" : 6.8
},
{
    "symbol" : "USD/GBP",    
    "destination" : "foo",    
    "execution_id" : 88955,    
    "execution_price" : 3.1
},
{
    "symbol" : "AUD/GBP",    
    "destination" : "PAPER",    
    "execution_id" : 88941,    
    "execution_price" : 1.1
},
{
    "symbol" : "EUR/GBP",    
    "destination" : "PAPER",    
    "execution_id" : 88907,    
    "execution_price" : 9.4
}]);

Running the above aggregation

运行上面的聚合

db.test.aggregate(pipeline);

will produce the result:

将产生结果:

/* 0 */
{
    "result" : [ 
        {
            "symbol" : "EUR/GBP",
            "destination" : "PAPER",
            "execution_id" : 88907,
            "exec_price" : 9.4,
            "avg_ex_price" : 4.775
        }, 
        {
            "symbol" : "EUR/GBP",
            "destination" : "PAPER",
            "execution_id" : 88921,
            "exec_price" : 6.8,
            "avg_ex_price" : 4.775
        }
    ],
    "ok" : 1
}

回答by Yogesh

After reading your questions you should use $condin your aggregation as below :

阅读您的问题后,您应该在聚合中使用$cond,如下所示:

db.collectionName.aggregate({
  "$match": {
    "destination": "PAPER"
  }
}, {
  "$group": {
    "_id": "$destination",
    "avg_exec_price": {
      "$avg": "$execution_price"
    },
    "data": {
      "$push": "$$ROOT"
    }
  }
}, {
  "$unwind": "$data"
}, {
  "$group": {
    "_id": "$_id",
    "data": {
      "$push": {
        "check": {
          "$cond": [{
              "$gt": ["$data.execution_price", "$avg_exec_price"] // check in $cond if execution_price gt avg_exec_price
            }, "$data", ""] //push data if true else blank 
        }
      }
    }
  }
}, {
  "$unwind": "$data"
}, {
  "$match": {
    "data.check": {
      "$exists": true, // check data.check not empty or blank
      "$ne": ""
    }
  }
}, {
  "$project": {
    "_id": "$_id",
    "data": "$data.check"
  }
}).pretty()