Short-circuit row group evaluation
We can avoid eagerly computing bitmasks for other constraints this way. Possible future work - order the constraints such that we evaluate the one that is cheapest/most likely to prune a row group first. This reduces the cyclist query from ~65ms to ~60ms
This commit is contained in:
parent
fd87c44ccd
commit
16cdd70f2b
|
@ -18,13 +18,15 @@ SQLITE_EXTENSION_INIT1
|
|||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <iomanip>
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <memory>
|
||||
|
||||
#include "parquet_table.h"
|
||||
#include "parquet_cursor.h"
|
||||
#include "parquet_filter.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
/* Forward references to the various virtual table methods implemented
|
||||
* in this file. */
|
||||
static int parquetCreate(sqlite3*, void*, int, const char*const*,
|
||||
|
@ -532,7 +534,13 @@ static int parquetFilter(
|
|||
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc);
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
unsigned long long millisecondsSinceEpoch =
|
||||
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||
(unsigned long long)(tv.tv_usec) / 1000;
|
||||
|
||||
printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc);
|
||||
debugConstraints(indexInfo, cursor->getTable(), argc, argv);
|
||||
#endif
|
||||
std::vector<Constraint> constraints;
|
||||
|
@ -631,8 +639,15 @@ static int parquetBestIndex(
|
|||
try {
|
||||
|
||||
#ifdef DEBUG
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
unsigned long long millisecondsSinceEpoch =
|
||||
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||
(unsigned long long)(tv.tv_usec) / 1000;
|
||||
|
||||
|
||||
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
|
||||
printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
||||
printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
||||
debugConstraints(pIdxInfo, table, 0, NULL);
|
||||
#endif
|
||||
|
||||
|
@ -647,6 +662,7 @@ static int parquetBestIndex(
|
|||
if(pIdxInfo->aConstraint[i].usable) {
|
||||
j++;
|
||||
pIdxInfo->aConstraintUsage[i].argvIndex = j;
|
||||
// pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -517,7 +517,6 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
|
|||
// This avoids opening rowgroups that can't return useful
|
||||
// data, which provides substantial performance benefits.
|
||||
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||
bool overallRv = true;
|
||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||
int column = constraints[i].column;
|
||||
int op = constraints[i].op;
|
||||
|
@ -567,12 +566,12 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
|||
if(!rv) {
|
||||
constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv);
|
||||
constraints[i].bitmap.setActualMembership(rowGroupId, rv);
|
||||
return rv;
|
||||
}
|
||||
overallRv = overallRv && rv;
|
||||
}
|
||||
|
||||
// printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy");
|
||||
return overallRv;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue