Short-circuit row group evaluation
We can avoid eagerly computing bitmasks for other constraints this way. Possible future work - order the constraints such that we evaluate the one that is cheapest/most likely to prune a row group first. This reduces the cyclist query from ~65ms to ~60ms
This commit is contained in:
parent
fd87c44ccd
commit
16cdd70f2b
|
@ -18,13 +18,15 @@ SQLITE_EXTENSION_INIT1
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#include <sys/time.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "parquet_table.h"
|
#include "parquet_table.h"
|
||||||
#include "parquet_cursor.h"
|
#include "parquet_cursor.h"
|
||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
|
|
||||||
|
//#define DEBUG
|
||||||
|
|
||||||
/* Forward references to the various virtual table methods implemented
|
/* Forward references to the various virtual table methods implemented
|
||||||
* in this file. */
|
* in this file. */
|
||||||
static int parquetCreate(sqlite3*, void*, int, const char*const*,
|
static int parquetCreate(sqlite3*, void*, int, const char*const*,
|
||||||
|
@ -532,7 +534,13 @@ static int parquetFilter(
|
||||||
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc);
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
unsigned long long millisecondsSinceEpoch =
|
||||||
|
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||||
|
(unsigned long long)(tv.tv_usec) / 1000;
|
||||||
|
|
||||||
|
printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc);
|
||||||
debugConstraints(indexInfo, cursor->getTable(), argc, argv);
|
debugConstraints(indexInfo, cursor->getTable(), argc, argv);
|
||||||
#endif
|
#endif
|
||||||
std::vector<Constraint> constraints;
|
std::vector<Constraint> constraints;
|
||||||
|
@ -631,8 +639,15 @@ static int parquetBestIndex(
|
||||||
try {
|
try {
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
unsigned long long millisecondsSinceEpoch =
|
||||||
|
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||||
|
(unsigned long long)(tv.tv_usec) / 1000;
|
||||||
|
|
||||||
|
|
||||||
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
|
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
|
||||||
printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
||||||
debugConstraints(pIdxInfo, table, 0, NULL);
|
debugConstraints(pIdxInfo, table, 0, NULL);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -647,6 +662,7 @@ static int parquetBestIndex(
|
||||||
if(pIdxInfo->aConstraint[i].usable) {
|
if(pIdxInfo->aConstraint[i].usable) {
|
||||||
j++;
|
j++;
|
||||||
pIdxInfo->aConstraintUsage[i].argvIndex = j;
|
pIdxInfo->aConstraintUsage[i].argvIndex = j;
|
||||||
|
// pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -517,7 +517,6 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
|
||||||
// This avoids opening rowgroups that can't return useful
|
// This avoids opening rowgroups that can't return useful
|
||||||
// data, which provides substantial performance benefits.
|
// data, which provides substantial performance benefits.
|
||||||
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
bool overallRv = true;
|
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
int column = constraints[i].column;
|
int column = constraints[i].column;
|
||||||
int op = constraints[i].op;
|
int op = constraints[i].op;
|
||||||
|
@ -567,12 +566,12 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
if(!rv) {
|
if(!rv) {
|
||||||
constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv);
|
constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv);
|
||||||
constraints[i].bitmap.setActualMembership(rowGroupId, rv);
|
constraints[i].bitmap.setActualMembership(rowGroupId, rv);
|
||||||
|
return rv;
|
||||||
}
|
}
|
||||||
overallRv = overallRv && rv;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy");
|
// printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy");
|
||||||
return overallRv;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue