Short-circuit row group evaluation

We can avoid eagerly computing bitmasks for other constraints this way.

Possible future work - order the constraints such that we evaluate the
one that is cheapest/most likely to prune a row group first.

This reduces the cyclist query from ~65ms to ~60ms
This commit is contained in:
Colin Dellow 2018-06-24 11:08:56 -04:00
parent fd87c44ccd
commit 16cdd70f2b
2 changed files with 21 additions and 6 deletions

View File

@ -18,13 +18,15 @@ SQLITE_EXTENSION_INIT1
#include <ctype.h> #include <ctype.h>
#include <stdio.h> #include <stdio.h>
#include <iomanip> #include <iomanip>
#include <sys/time.h>
#include <memory> #include <memory>
#include "parquet_table.h" #include "parquet_table.h"
#include "parquet_cursor.h" #include "parquet_cursor.h"
#include "parquet_filter.h" #include "parquet_filter.h"
//#define DEBUG
/* Forward references to the various virtual table methods implemented /* Forward references to the various virtual table methods implemented
* in this file. */ * in this file. */
static int parquetCreate(sqlite3*, void*, int, const char*const*, static int parquetCreate(sqlite3*, void*, int, const char*const*,
@ -532,7 +534,13 @@ static int parquetFilter(
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr; sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
#ifdef DEBUG #ifdef DEBUG
printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc); struct timeval tv;
gettimeofday(&tv, NULL);
unsigned long long millisecondsSinceEpoch =
(unsigned long long)(tv.tv_sec) * 1000 +
(unsigned long long)(tv.tv_usec) / 1000;
printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc);
debugConstraints(indexInfo, cursor->getTable(), argc, argv); debugConstraints(indexInfo, cursor->getTable(), argc, argv);
#endif #endif
std::vector<Constraint> constraints; std::vector<Constraint> constraints;
@ -631,8 +639,15 @@ static int parquetBestIndex(
try { try {
#ifdef DEBUG #ifdef DEBUG
struct timeval tv;
gettimeofday(&tv, NULL);
unsigned long long millisecondsSinceEpoch =
(unsigned long long)(tv.tv_sec) * 1000 +
(unsigned long long)(tv.tv_usec) / 1000;
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table; ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy); printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
debugConstraints(pIdxInfo, table, 0, NULL); debugConstraints(pIdxInfo, table, 0, NULL);
#endif #endif
@ -647,6 +662,7 @@ static int parquetBestIndex(
if(pIdxInfo->aConstraint[i].usable) { if(pIdxInfo->aConstraint[i].usable) {
j++; j++;
pIdxInfo->aConstraintUsage[i].argvIndex = j; pIdxInfo->aConstraintUsage[i].argvIndex = j;
// pIdxInfo->aConstraintUsage[i].omit = 1;
} }
} }
} }

View File

@ -517,7 +517,6 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
// This avoids opening rowgroups that can't return useful // This avoids opening rowgroups that can't return useful
// data, which provides substantial performance benefits. // data, which provides substantial performance benefits.
bool ParquetCursor::currentRowGroupSatisfiesFilter() { bool ParquetCursor::currentRowGroupSatisfiesFilter() {
bool overallRv = true;
for(unsigned int i = 0; i < constraints.size(); i++) { for(unsigned int i = 0; i < constraints.size(); i++) {
int column = constraints[i].column; int column = constraints[i].column;
int op = constraints[i].op; int op = constraints[i].op;
@ -567,12 +566,12 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
if(!rv) { if(!rv) {
constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv); constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv);
constraints[i].bitmap.setActualMembership(rowGroupId, rv); constraints[i].bitmap.setActualMembership(rowGroupId, rv);
return rv;
} }
overallRv = overallRv && rv;
} }
// printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy"); // printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy");
return overallRv; return true;
} }