Find MongoDB records where array field is not empty
After some more looking, especially in the mongodb documents, and puzzling bits together, this was the answer:
ME.find({pictures: {$exists: true, $not: {$size: 0}}})
You care about two things when querying - accuracy and performance. With that in mind, I tested a few different approaches in MongoDB v3.0.14.
TL;DR db.doc.find({ nums: { $gt: -Infinity }})
is the fastest and most reliable (at least in the MongoDB version I tested).
EDIT: This no longer works in MongoDB v3.6! See the comments under this post for a potential solution.
Setup
I inserted 1k docs w/o a list field, 1k docs with an empty list, and 5 docs with a non-empty list.
for (var i = 0; i < 1000; i++) { db.doc.insert({}); }
for (var i = 0; i < 1000; i++) { db.doc.insert({ nums: [] }); }
for (var i = 0; i < 5; i++) { db.doc.insert({ nums: [1, 2, 3] }); }
db.doc.createIndex({ nums: 1 });
I recognize this isn't enough of a scale to take performance as seriously as I am in the tests below, but it's enough to present the correctness of various queries and behavior of chosen query plans.
Tests
db.doc.find({'nums': {'$exists': true}})
returns wrong results (for what we're trying to accomplish).
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': {'$exists': true}}).count()
1005
--
db.doc.find({'nums.0': {'$exists': true}})
returns correct results, but it's also slow using a full collection scan (notice COLLSCAN
stage in the explanation).
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums.0': {'$exists': true}}).count()
5
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums.0': {'$exists': true}}).explain()
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.doc",
"indexFilterSet": false,
"parsedQuery": {
"nums.0": {
"$exists": true
}
},
"winningPlan": {
"stage": "COLLSCAN",
"filter": {
"nums.0": {
"$exists": true
}
},
"direction": "forward"
},
"rejectedPlans": [ ]
},
"serverInfo": {
"host": "MacBook-Pro",
"port": 27017,
"version": "3.0.14",
"gitVersion": "08352afcca24bfc145240a0fac9d28b978ab77f3"
},
"ok": 1
}
--
db.doc.find({'nums': { $exists: true, $gt: { '$size': 0 }}})
returns wrong results. That's because of an invalid index scan advancing no documents. It will likely be accurate but slow without the index.
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $gt: { '$size': 0 }}}).count()
0
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $gt: { '$size': 0 }}}).explain('executionStats').executionStats.executionStages
{
"stage": "KEEP_MUTATIONS",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 2,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"inputStage": {
"stage": "FETCH",
"filter": {
"$and": [
{
"nums": {
"$gt": {
"$size": 0
}
}
},
{
"nums": {
"$exists": true
}
}
]
},
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"nums": 1
},
"indexName": "nums_1",
"isMultiKey": true,
"direction": "forward",
"indexBounds": {
"nums": [
"({ $size: 0.0 }, [])"
]
},
"keysExamined": 0,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
--
db.doc.find({'nums': { $exists: true, $not: { '$size': 0 }}})
returns correct results, but the performance is bad. It technically does an index scan, but then it still advances all the docs and then has to filter through them).
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $not: { '$size': 0 }}}).count()
5
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $not: { '$size': 0 }}}).explain('executionStats').executionStats.executionStages
{
"stage": "KEEP_MUTATIONS",
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 2016,
"advanced": 5,
"needTime": 2010,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"inputStage": {
"stage": "FETCH",
"filter": {
"$and": [
{
"nums": {
"$exists": true
}
},
{
"$not": {
"nums": {
"$size": 0
}
}
}
]
},
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 2016,
"advanced": 5,
"needTime": 2010,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 2005,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 2005,
"executionTimeMillisEstimate": 0,
"works": 2015,
"advanced": 2005,
"needTime": 10,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"nums": 1
},
"indexName": "nums_1",
"isMultiKey": true,
"direction": "forward",
"indexBounds": {
"nums": [
"[MinKey, MaxKey]"
]
},
"keysExamined": 2015,
"dupsTested": 2015,
"dupsDropped": 10,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
--
db.doc.find({'nums': { $exists: true, $ne: [] }})
returns correct results and is slightly faster, but the performance is still not ideal. It uses IXSCAN which only advances docs with an existing list field, but then has to filter out the empty lists one by one.
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $ne: [] }}).count()
5
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $exists: true, $ne: [] }}).explain('executionStats').executionStats.executionStages
{
"stage": "KEEP_MUTATIONS",
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 1018,
"advanced": 5,
"needTime": 1011,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"inputStage": {
"stage": "FETCH",
"filter": {
"$and": [
{
"$not": {
"nums": {
"$eq": [ ]
}
}
},
{
"nums": {
"$exists": true
}
}
]
},
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 1017,
"advanced": 5,
"needTime": 1011,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 1005,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 1005,
"executionTimeMillisEstimate": 0,
"works": 1016,
"advanced": 1005,
"needTime": 11,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"nums": 1
},
"indexName": "nums_1",
"isMultiKey": true,
"direction": "forward",
"indexBounds": {
"nums": [
"[MinKey, undefined)",
"(undefined, [])",
"([], MaxKey]"
]
},
"keysExamined": 1016,
"dupsTested": 1015,
"dupsDropped": 10,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
--
db.doc.find({'nums': { $gt: [] }})
IS DANGEROUS BECAUSE DEPENDING ON THE INDEX USED IT MIGHT GIVE UNEXPECTED RESULTS. That's because of an invalid index scan which advances no documents.
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $gt: [] }}).count()
0
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $gt: [] }}).hint({ nums: 1 }).count()
0
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $gt: [] }}).hint({ _id: 1 }).count()
5
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $gt: [] }}).explain('executionStats').executionStats.executionStages
{
"stage": "KEEP_MUTATIONS",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"inputStage": {
"stage": "FETCH",
"filter": {
"nums": {
"$gt": [ ]
}
},
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"nums": 1
},
"indexName": "nums_1",
"isMultiKey": true,
"direction": "forward",
"indexBounds": {
"nums": [
"([], BinData(0, ))"
]
},
"keysExamined": 0,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
--
db.doc.find({'nums.0’: { $gt: -Infinity }})
returns correct results, but has bad performance (uses a full collection scan).
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums.0': { $gt: -Infinity }}).count()
5
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums.0': { $gt: -Infinity }}).explain('executionStats').executionStats.executionStages
{
"stage": "COLLSCAN",
"filter": {
"nums.0": {
"$gt": -Infinity
}
},
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 2007,
"advanced": 5,
"needTime": 2001,
"needFetch": 0,
"saveState": 15,
"restoreState": 15,
"isEOF": 1,
"invalidates": 0,
"direction": "forward",
"docsExamined": 2005
}
--
db.doc.find({'nums': { $gt: -Infinity }})
surprisingly, this works very well! It gives the right results and it's fast, advancing 5 docs from the index scan phase.
MacBook-Pro(mongod-3.0.14) test> db.doc.find({'nums': { $gt: -Infinity }}).explain('executionStats').executionStats.executionStages
{
"stage": "FETCH",
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 16,
"advanced": 5,
"needTime": 10,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 5,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 5,
"executionTimeMillisEstimate": 0,
"works": 15,
"advanced": 5,
"needTime": 10,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"nums": 1
},
"indexName": "nums_1",
"isMultiKey": true,
"direction": "forward",
"indexBounds": {
"nums": [
"(-inf.0, inf.0]"
]
},
"keysExamined": 15,
"dupsTested": 15,
"dupsDropped": 10,
"seenInvalidated": 0,
"matchTested": 0
}
}
This might also work for you:
ME.find({'pictures.0': {$exists: true}});
If you also have documents that don't have the key, you can use:
ME.find({ pictures: { $exists: true, $not: {$size: 0} } })
MongoDB doesn't use indexes if $size
is involved, so here is a better solution:
ME.find({ pictures: { $exists: true, $ne: [] } })
If your property can have invalid values (like null
boolean
or others) , then you an add an additional check using $types
as proposed in this answer:
With mongo >= 3.2:
ME.find({ pictures: { $exists: true, $type: 'array', $ne: [] } })
With mongo < 3.2:
ME.find({ pictures: { $exists: true, $type: 4, $ne: [] } })
Since the MongoDB 2.6 release, you can compare with the operator $gt
, but this could lead to unexpected results (you can find a detailed explanation in this answer):
ME.find({ pictures: { $gt: [] } })