Cache clauses -> row group mapping
Create a shadow table. For `stats`, it'd be `_stats_rowgroups`. It contains three columns: - the clause (eg `city = 'Dawson Creek'`) - the initial estimate, as a bitmap of rowgroups based on stats - the actual observed rowgroups, as a bitmap This papers over poorly sorted parquet files, at the cost of some disk space. It makes interactive queries much more natural -- drilldown style queries are much faster, as they can leverage work done by previous queries. eg 'SELECT * FROM stats WHERE city = 'Dawson Creek' and question_id >= 1935 and question_id <= 1940` takes ~584ms on first run, but 9ms on subsequent runs. We only create entries when the estimates don't match the actual results. Fixes #6
This commit is contained in:
parent
d2c736f25a
commit
d3ab5ff3e7
12
README.md
12
README.md
|
@ -72,6 +72,18 @@ constraints before returning control to SQLite's virtual machine. This minimizes
|
||||||
the number of allocations performed when many rows are filtered out by
|
the number of allocations performed when many rows are filtered out by
|
||||||
the user's criteria.
|
the user's criteria.
|
||||||
|
|
||||||
|
### Memoized slices
|
||||||
|
|
||||||
|
Individual clauses are mapped to the row groups they match.
|
||||||
|
|
||||||
|
eg going on row group statistics, which store minimum and maximum values, a clause
|
||||||
|
like `WHERE city = 'Dawson Creek'` may match 80% of row groups.
|
||||||
|
|
||||||
|
In reality, it may only be present in one or two row groups.
|
||||||
|
|
||||||
|
This is recorded in a shadow table so future queries that contain that clause
|
||||||
|
can read only the necessary row groups.
|
||||||
|
|
||||||
### Types
|
### Types
|
||||||
|
|
||||||
These Parquet types are supported:
|
These Parquet types are supported:
|
||||||
|
|
|
@ -17,6 +17,7 @@ SQLITE_EXTENSION_INIT1
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
@ -32,6 +33,7 @@ static int parquetConnect(sqlite3*, void*, int, const char*const*,
|
||||||
sqlite3_vtab**,char**);
|
sqlite3_vtab**,char**);
|
||||||
static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*);
|
static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*);
|
||||||
static int parquetDisconnect(sqlite3_vtab*);
|
static int parquetDisconnect(sqlite3_vtab*);
|
||||||
|
static int parquetDestroy(sqlite3_vtab*);
|
||||||
static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
|
static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
|
||||||
static int parquetClose(sqlite3_vtab_cursor*);
|
static int parquetClose(sqlite3_vtab_cursor*);
|
||||||
static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
|
static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
|
||||||
|
@ -45,6 +47,7 @@ static int parquetRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
|
||||||
typedef struct sqlite3_vtab_parquet {
|
typedef struct sqlite3_vtab_parquet {
|
||||||
sqlite3_vtab base; /* Base class. Must be first */
|
sqlite3_vtab base; /* Base class. Must be first */
|
||||||
ParquetTable* table;
|
ParquetTable* table;
|
||||||
|
sqlite3* db;
|
||||||
} sqlite3_vtab_parquet;
|
} sqlite3_vtab_parquet;
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,6 +57,21 @@ typedef struct sqlite3_vtab_cursor_parquet {
|
||||||
ParquetCursor* cursor;
|
ParquetCursor* cursor;
|
||||||
} sqlite3_vtab_cursor_parquet;
|
} sqlite3_vtab_cursor_parquet;
|
||||||
|
|
||||||
|
static int parquetDestroy(sqlite3_vtab *pVtab) {
|
||||||
|
sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab;
|
||||||
|
|
||||||
|
// Clean up our shadow table. This is useful if the user has recreated
|
||||||
|
// the parquet file, and our mappings would now be invalid.
|
||||||
|
std::string drop = "DROP TABLE IF EXISTS _";
|
||||||
|
drop.append(p->table->getTableName());
|
||||||
|
drop.append("_rowgroups");
|
||||||
|
int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0);
|
||||||
|
if(rv != 0)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
return SQLITE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** This method is the destructor fo a sqlite3_vtab_parquet object.
|
** This method is the destructor fo a sqlite3_vtab_parquet object.
|
||||||
*/
|
*/
|
||||||
|
@ -78,6 +96,7 @@ static int parquetConnect(
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string tableName = argv[2];
|
||||||
// Remove the delimiting single quotes
|
// Remove the delimiting single quotes
|
||||||
std::string fname = argv[3];
|
std::string fname = argv[3];
|
||||||
fname = fname.substr(1, fname.length() - 2);
|
fname = fname.substr(1, fname.length() - 2);
|
||||||
|
@ -87,7 +106,7 @@ static int parquetConnect(
|
||||||
memset(vtab.get(), 0, sizeof(*vtab.get()));
|
memset(vtab.get(), 0, sizeof(*vtab.get()));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::unique_ptr<ParquetTable> table(new ParquetTable(fname));
|
std::unique_ptr<ParquetTable> table(new ParquetTable(fname, tableName));
|
||||||
|
|
||||||
std::string create = table->CreateStatement();
|
std::string create = table->CreateStatement();
|
||||||
int rc = sqlite3_declare_vtab(db, create.data());
|
int rc = sqlite3_declare_vtab(db, create.data());
|
||||||
|
@ -95,6 +114,7 @@ static int parquetConnect(
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
vtab->table = table.release();
|
vtab->table = table.release();
|
||||||
|
vtab->db = db;
|
||||||
*ppVtab = (sqlite3_vtab*)vtab.release();
|
*ppVtab = (sqlite3_vtab*)vtab.release();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
|
@ -119,16 +139,81 @@ static int parquetCreate(
|
||||||
sqlite3_vtab **ppVtab,
|
sqlite3_vtab **ppVtab,
|
||||||
char **pzErr
|
char **pzErr
|
||||||
){
|
){
|
||||||
return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr);
|
try {
|
||||||
|
// Create shadow table for storing constraint -> rowid mappings
|
||||||
|
std::string create = "CREATE TABLE IF NOT EXISTS _";
|
||||||
|
create.append(argv[2]);
|
||||||
|
create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)");
|
||||||
|
int rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
||||||
|
if(rv != 0)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
create = "CREATE UNIQUE INDEX IF NOT EXISTS _";
|
||||||
|
create.append(argv[2]);
|
||||||
|
create.append("_index ON _");
|
||||||
|
create.append(argv[2]);
|
||||||
|
create.append("_rowgroups(clause)");
|
||||||
|
rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
||||||
|
|
||||||
|
return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr);
|
||||||
|
} catch (std::bad_alloc& ba) {
|
||||||
|
return SQLITE_NOMEM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string quoteBlob(const std::vector<unsigned char>& bytes) {
|
||||||
|
std::ostringstream ss;
|
||||||
|
ss << "X'" << std::hex;
|
||||||
|
for(unsigned int i = 0; i < bytes.size(); i++) {
|
||||||
|
ss << std::setfill('0') << std::setw(2) << (unsigned int)(unsigned char)bytes[i];
|
||||||
|
}
|
||||||
|
ss << "'";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void persistConstraints(sqlite3* db, ParquetCursor* cursor) {
|
||||||
|
for(unsigned int i = 0; i < cursor->getNumConstraints(); i++) {
|
||||||
|
const Constraint& constraint = cursor->getConstraint(i);
|
||||||
|
const std::vector<unsigned char>& estimated = constraint.bitmap.estimatedMembership;
|
||||||
|
const std::vector<unsigned char>& actual = constraint.bitmap.actualMembership;
|
||||||
|
if(estimated == actual) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::string desc = constraint.describe();
|
||||||
|
|
||||||
|
std::string estimatedStr = quoteBlob(estimated);
|
||||||
|
std::string actualStr = quoteBlob(actual);
|
||||||
|
|
||||||
|
// This is only advisory, so ignore failures.
|
||||||
|
char* sql = sqlite3_mprintf(
|
||||||
|
"INSERT OR REPLACE INTO _%s_rowgroups(clause, estimate, actual) VALUES ('%q', %s, %s)",
|
||||||
|
cursor->getTable()->getTableName().c_str(),
|
||||||
|
desc.c_str(),
|
||||||
|
estimatedStr.c_str(),
|
||||||
|
actualStr.c_str());
|
||||||
|
|
||||||
|
|
||||||
|
if(sql == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
sqlite3_exec(db, sql, 0, 0, 0);
|
||||||
|
sqlite3_free(sql);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Destructor for a sqlite3_vtab_cursor_parquet.
|
** Destructor for a sqlite3_vtab_cursor_parquet.
|
||||||
*/
|
*/
|
||||||
static int parquetClose(sqlite3_vtab_cursor *cur){
|
static int parquetClose(sqlite3_vtab_cursor *cur){
|
||||||
sqlite3_vtab_cursor_parquet* p = (sqlite3_vtab_cursor_parquet*)cur;
|
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
||||||
p->cursor->close();
|
sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab);
|
||||||
delete p->cursor;
|
ParquetCursor* cursor = vtab_cursor_parquet->cursor;
|
||||||
|
persistConstraints(vtab_parquet->db, cursor);
|
||||||
|
|
||||||
|
vtab_cursor_parquet->cursor->close();
|
||||||
|
delete vtab_cursor_parquet->cursor;
|
||||||
sqlite3_free(cur);
|
sqlite3_free(cur);
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
|
@ -196,7 +281,8 @@ const char* opName(int op) {
|
||||||
*/
|
*/
|
||||||
static int parquetNext(sqlite3_vtab_cursor *cur){
|
static int parquetNext(sqlite3_vtab_cursor *cur){
|
||||||
try {
|
try {
|
||||||
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
|
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
||||||
|
ParquetCursor* cursor = vtab_cursor_parquet->cursor;
|
||||||
cursor->next();
|
cursor->next();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch(std::bad_alloc& ba) {
|
||||||
|
@ -395,6 +481,38 @@ ConstraintOperator constraintOperatorFromSqlite(int op) {
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table, std::string clause) {
|
||||||
|
std::vector<unsigned char> rv;
|
||||||
|
|
||||||
|
std::unique_ptr<char, void(*)(void*)> sql(sqlite3_mprintf(
|
||||||
|
"SELECT actual FROM _%s_rowgroups WHERE clause = '%q'",
|
||||||
|
table.c_str(),
|
||||||
|
clause.c_str()), sqlite3_free);
|
||||||
|
|
||||||
|
if(sql.get() == NULL)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
sqlite3_stmt* pStmt = NULL;
|
||||||
|
int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL);
|
||||||
|
if(rc != 0)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
rc = sqlite3_step(pStmt);
|
||||||
|
if(rc == SQLITE_ROW) {
|
||||||
|
int size = sqlite3_column_bytes(pStmt, 0);
|
||||||
|
unsigned char* blob = (unsigned char*)sqlite3_column_blob(pStmt, 0);
|
||||||
|
// TODO: there is a memory leak here if we get a std::bad_alloc while populating rv;
|
||||||
|
// we fail to free pStmt
|
||||||
|
for(int i = 0; i < size; i++) {
|
||||||
|
rv.push_back(blob[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_finalize(pStmt);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Only a full table scan is supported. So xFilter simply rewinds to
|
** Only a full table scan is supported. So xFilter simply rewinds to
|
||||||
** the beginning.
|
** the beginning.
|
||||||
|
@ -407,7 +525,10 @@ static int parquetFilter(
|
||||||
sqlite3_value **argv
|
sqlite3_value **argv
|
||||||
){
|
){
|
||||||
try {
|
try {
|
||||||
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
|
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
||||||
|
sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab);
|
||||||
|
sqlite3* db = vtab_parquet->db;
|
||||||
|
ParquetCursor* cursor = vtab_cursor_parquet->cursor;
|
||||||
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -451,13 +572,40 @@ static int parquetFilter(
|
||||||
type = Null;
|
type = Null;
|
||||||
}
|
}
|
||||||
|
|
||||||
Constraint constraint(
|
std::string columnName = "rowid";
|
||||||
|
if(indexInfo->aConstraint[i].iColumn >= 0) {
|
||||||
|
columnName = cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn);
|
||||||
|
}
|
||||||
|
|
||||||
|
RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups());
|
||||||
|
Constraint dummy(
|
||||||
|
bitmap,
|
||||||
indexInfo->aConstraint[i].iColumn,
|
indexInfo->aConstraint[i].iColumn,
|
||||||
|
columnName,
|
||||||
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
|
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
|
||||||
type,
|
type,
|
||||||
intValue,
|
intValue,
|
||||||
doubleValue,
|
doubleValue,
|
||||||
blobValue);
|
blobValue);
|
||||||
|
|
||||||
|
std::vector<unsigned char> actual = getRowGroupsForClause(db, cursor->getTable()->getTableName(), dummy.describe());
|
||||||
|
if(actual.size() > 0) {
|
||||||
|
// Initialize the estimate to be the actual -- eventually they'll converge
|
||||||
|
// and we'll stop writing back to the db.
|
||||||
|
std::vector<unsigned char> estimate = actual;
|
||||||
|
bitmap = RowGroupBitmap(estimate, actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
Constraint constraint(
|
||||||
|
bitmap,
|
||||||
|
indexInfo->aConstraint[i].iColumn,
|
||||||
|
columnName,
|
||||||
|
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
|
||||||
|
type,
|
||||||
|
intValue,
|
||||||
|
doubleValue,
|
||||||
|
blobValue);
|
||||||
|
|
||||||
constraints.push_back(constraint);
|
constraints.push_back(constraint);
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
|
@ -555,7 +703,7 @@ static sqlite3_module ParquetModule = {
|
||||||
parquetConnect, /* xConnect */
|
parquetConnect, /* xConnect */
|
||||||
parquetBestIndex, /* xBestIndex */
|
parquetBestIndex, /* xBestIndex */
|
||||||
parquetDisconnect, /* xDisconnect */
|
parquetDisconnect, /* xDisconnect */
|
||||||
parquetDisconnect, /* xDestroy */
|
parquetDestroy, /* xDestroy */
|
||||||
parquetOpen, /* xOpen - open a cursor */
|
parquetOpen, /* xOpen - open a cursor */
|
||||||
parquetClose, /* xClose - close a cursor */
|
parquetClose, /* xClose - close a cursor */
|
||||||
parquetFilter, /* xFilter - configure scan constraints */
|
parquetFilter, /* xFilter - configure scan constraints */
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#include "parquet_cursor.h"
|
#include "parquet_cursor.h"
|
||||||
|
|
||||||
ParquetCursor::ParquetCursor(ParquetTable* table) {
|
ParquetCursor::ParquetCursor(ParquetTable* table): table(table) {
|
||||||
this->table = table;
|
|
||||||
reader = NULL;
|
reader = NULL;
|
||||||
reset(std::vector<Constraint>());
|
reset(std::vector<Constraint>());
|
||||||
}
|
}
|
||||||
|
@ -518,6 +517,7 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
|
||||||
// This avoids opening rowgroups that can't return useful
|
// This avoids opening rowgroups that can't return useful
|
||||||
// data, which provides substantial performance benefits.
|
// data, which provides substantial performance benefits.
|
||||||
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
|
bool overallRv = true;
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
int column = constraints[i].column;
|
int column = constraints[i].column;
|
||||||
int op = constraints[i].op;
|
int op = constraints[i].op;
|
||||||
|
@ -527,47 +527,52 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
rv = currentRowGroupSatisfiesRowIdFilter(constraints[i]);
|
rv = currentRowGroupSatisfiesRowIdFilter(constraints[i]);
|
||||||
} else {
|
} else {
|
||||||
std::unique_ptr<parquet::ColumnChunkMetaData> md = rowGroupMetadata->ColumnChunk(column);
|
std::unique_ptr<parquet::ColumnChunkMetaData> md = rowGroupMetadata->ColumnChunk(column);
|
||||||
if(!md->is_stats_set()) {
|
if(md->is_stats_set()) {
|
||||||
continue;
|
std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics();
|
||||||
}
|
|
||||||
std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics();
|
|
||||||
|
|
||||||
// SQLite is much looser with types than you might expect if you
|
// SQLite is much looser with types than you might expect if you
|
||||||
// come from a Postgres background. The constraint '30.0' (that is,
|
// come from a Postgres background. The constraint '30.0' (that is,
|
||||||
// a string containing a floating point number) should be treated
|
// a string containing a floating point number) should be treated
|
||||||
// as equal to a field containing an integer 30.
|
// as equal to a field containing an integer 30.
|
||||||
//
|
//
|
||||||
// This means that even if the parquet physical type is integer,
|
// This means that even if the parquet physical type is integer,
|
||||||
// the constraint type may be a string, so dispatch to the filter
|
// the constraint type may be a string, so dispatch to the filter
|
||||||
// fn based on the Parquet type.
|
// fn based on the Parquet type.
|
||||||
|
|
||||||
if(op == IsNull) {
|
if(op == IsNull) {
|
||||||
rv = stats->null_count() > 0;
|
rv = stats->null_count() > 0;
|
||||||
} else if(op == IsNotNull) {
|
} else if(op == IsNotNull) {
|
||||||
rv = stats->num_values() > 0;
|
rv = stats->num_values() > 0;
|
||||||
} else {
|
} else {
|
||||||
parquet::Type::type pqType = types[column];
|
parquet::Type::type pqType = types[column];
|
||||||
|
|
||||||
if(pqType == parquet::Type::BYTE_ARRAY && logicalTypes[column] == parquet::LogicalType::UTF8) {
|
if(pqType == parquet::Type::BYTE_ARRAY && logicalTypes[column] == parquet::LogicalType::UTF8) {
|
||||||
rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats);
|
rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats);
|
||||||
} else if(pqType == parquet::Type::BYTE_ARRAY) {
|
} else if(pqType == parquet::Type::BYTE_ARRAY) {
|
||||||
rv = currentRowGroupSatisfiesBlobFilter(constraints[i], stats);
|
rv = currentRowGroupSatisfiesBlobFilter(constraints[i], stats);
|
||||||
} else if(pqType == parquet::Type::INT32 ||
|
} else if(pqType == parquet::Type::INT32 ||
|
||||||
pqType == parquet::Type::INT64 ||
|
pqType == parquet::Type::INT64 ||
|
||||||
pqType == parquet::Type::INT96 ||
|
pqType == parquet::Type::INT96 ||
|
||||||
pqType == parquet::Type::BOOLEAN) {
|
pqType == parquet::Type::BOOLEAN) {
|
||||||
rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats);
|
rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats);
|
||||||
} else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) {
|
} else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) {
|
||||||
rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats);
|
rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!rv)
|
// and it with the existing actual, which may have come from a previous run
|
||||||
return false;
|
rv = rv && constraints[i].bitmap.getActualMembership(rowGroupId);
|
||||||
|
if(!rv) {
|
||||||
|
constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv);
|
||||||
|
constraints[i].bitmap.setActualMembership(rowGroupId, rv);
|
||||||
|
}
|
||||||
|
overallRv = overallRv && rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
// printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy");
|
||||||
|
return overallRv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -609,9 +614,22 @@ start:
|
||||||
// Increment rowId so currentRowGroupSatisfiesRowIdFilter can access it;
|
// Increment rowId so currentRowGroupSatisfiesRowIdFilter can access it;
|
||||||
// it'll get decremented by our caller
|
// it'll get decremented by our caller
|
||||||
rowId++;
|
rowId++;
|
||||||
|
|
||||||
|
// We're going to scan this row group; reset the expectation of discovering
|
||||||
|
// a row
|
||||||
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
|
if(rowGroupId > 0 && constraints[i].rowGroupId == rowGroupId - 1) {
|
||||||
|
constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows);
|
||||||
|
}
|
||||||
|
constraints[i].hadRows = false;
|
||||||
|
}
|
||||||
|
|
||||||
if(!currentRowGroupSatisfiesFilter())
|
if(!currentRowGroupSatisfiesFilter())
|
||||||
goto start;
|
goto start;
|
||||||
|
|
||||||
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
|
constraints[i].rowGroupId = rowGroupId;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -623,6 +641,7 @@ start:
|
||||||
// and the extension, which can add up on a dataset of tens
|
// and the extension, which can add up on a dataset of tens
|
||||||
// of millions of rows.
|
// of millions of rows.
|
||||||
bool ParquetCursor::currentRowSatisfiesFilter() {
|
bool ParquetCursor::currentRowSatisfiesFilter() {
|
||||||
|
bool overallRv = true;
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
bool rv = true;
|
bool rv = true;
|
||||||
int column = constraints[i].column;
|
int column = constraints[i].column;
|
||||||
|
@ -648,13 +667,18 @@ bool ParquetCursor::currentRowSatisfiesFilter() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!rv)
|
// it defaults to false; so only set it if true
|
||||||
return false;
|
// ideally we'd short-circuit if we'd already set this group as visited
|
||||||
|
if(rv) {
|
||||||
|
constraints[i].hadRows = true;
|
||||||
|
}
|
||||||
|
overallRv = overallRv && rv;
|
||||||
}
|
}
|
||||||
return true;
|
return overallRv;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ParquetCursor::next() {
|
void ParquetCursor::next() {
|
||||||
|
// Returns true if we've crossed a row group boundary
|
||||||
start:
|
start:
|
||||||
if(rowsLeftInRowGroup == 0) {
|
if(rowsLeftInRowGroup == 0) {
|
||||||
if(!nextRowGroup()) {
|
if(!nextRowGroup()) {
|
||||||
|
@ -672,7 +696,6 @@ start:
|
||||||
rowId++;
|
rowId++;
|
||||||
if(!currentRowSatisfiesFilter())
|
if(!currentRowSatisfiesFilter())
|
||||||
goto start;
|
goto start;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int ParquetCursor::getRowId() {
|
int ParquetCursor::getRowId() {
|
||||||
|
@ -939,7 +962,7 @@ void ParquetCursor::reset(std::vector<Constraint> constraints) {
|
||||||
// TODO: consider having a long lived handle in ParquetTable that can be borrowed
|
// TODO: consider having a long lived handle in ParquetTable that can be borrowed
|
||||||
// without incurring the cost of opening the file from scratch twice
|
// without incurring the cost of opening the file from scratch twice
|
||||||
reader = parquet::ParquetFileReader::OpenFile(
|
reader = parquet::ParquetFileReader::OpenFile(
|
||||||
table->file.data(),
|
table->getFile().data(),
|
||||||
true,
|
true,
|
||||||
parquet::default_reader_properties(),
|
parquet::default_reader_properties(),
|
||||||
table->getMetadata());
|
table->getMetadata());
|
||||||
|
@ -955,4 +978,10 @@ void ParquetCursor::reset(std::vector<Constraint> constraints) {
|
||||||
numRowGroups = reader->metadata()->num_row_groups();
|
numRowGroups = reader->metadata()->num_row_groups();
|
||||||
}
|
}
|
||||||
|
|
||||||
ParquetTable* ParquetCursor::getTable() { return table; }
|
ParquetTable* ParquetCursor::getTable() const { return table; }
|
||||||
|
|
||||||
|
unsigned int ParquetCursor::getNumRowGroups() const { return numRowGroups; }
|
||||||
|
unsigned int ParquetCursor::getNumConstraints() const { return constraints.size(); }
|
||||||
|
const Constraint& ParquetCursor::getConstraint(unsigned int i) const { return constraints[i]; }
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -56,9 +56,12 @@ public:
|
||||||
|
|
||||||
void ensureColumn(int col);
|
void ensureColumn(int col);
|
||||||
bool isNull(int col);
|
bool isNull(int col);
|
||||||
|
unsigned int getNumRowGroups() const;
|
||||||
|
unsigned int getNumConstraints() const;
|
||||||
|
const Constraint& getConstraint(unsigned int i) const;
|
||||||
parquet::Type::type getPhysicalType(int col);
|
parquet::Type::type getPhysicalType(int col);
|
||||||
parquet::LogicalType::type getLogicalType(int col);
|
parquet::LogicalType::type getLogicalType(int col);
|
||||||
ParquetTable* getTable();
|
ParquetTable* getTable() const;
|
||||||
|
|
||||||
int getInt32(int col);
|
int getInt32(int col);
|
||||||
long getInt64(int col);
|
long getInt64(int col);
|
||||||
|
|
|
@ -1,19 +1,25 @@
|
||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
|
|
||||||
Constraint::Constraint(
|
Constraint::Constraint(
|
||||||
|
RowGroupBitmap bitmap,
|
||||||
int column,
|
int column,
|
||||||
|
std::string columnName,
|
||||||
ConstraintOperator op,
|
ConstraintOperator op,
|
||||||
ValueType type,
|
ValueType type,
|
||||||
int64_t intValue,
|
int64_t intValue,
|
||||||
double doubleValue,
|
double doubleValue,
|
||||||
std::vector<unsigned char> blobValue
|
std::vector<unsigned char> blobValue
|
||||||
) {
|
): bitmap(bitmap),
|
||||||
this->column = column;
|
column(column),
|
||||||
this->op = op;
|
columnName(columnName),
|
||||||
this->type = type;
|
op(op),
|
||||||
this->intValue = intValue;
|
type(type),
|
||||||
this->doubleValue = doubleValue;
|
intValue(intValue),
|
||||||
this->blobValue = blobValue;
|
doubleValue(doubleValue),
|
||||||
|
blobValue(blobValue),
|
||||||
|
hadRows(false) {
|
||||||
|
RowGroupBitmap bm = bitmap;
|
||||||
|
this->bitmap = bm;
|
||||||
|
|
||||||
if(type == Text) {
|
if(type == Text) {
|
||||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
||||||
|
@ -34,3 +40,72 @@ Constraint::Constraint(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string Constraint::describe() const {
|
||||||
|
std::string rv;
|
||||||
|
rv.append(columnName);
|
||||||
|
rv.append(" ");
|
||||||
|
switch(op) {
|
||||||
|
case Equal:
|
||||||
|
rv.append("=");
|
||||||
|
break;
|
||||||
|
case GreaterThan:
|
||||||
|
rv.append(">");
|
||||||
|
break;
|
||||||
|
case LessThanOrEqual:
|
||||||
|
rv.append("<=");
|
||||||
|
break;
|
||||||
|
case LessThan:
|
||||||
|
rv.append("<");
|
||||||
|
break;
|
||||||
|
case GreaterThanOrEqual:
|
||||||
|
rv.append(">=");
|
||||||
|
break;
|
||||||
|
case Match:
|
||||||
|
rv.append("MATCH");
|
||||||
|
break;
|
||||||
|
case Like:
|
||||||
|
rv.append("LIKE");
|
||||||
|
break;
|
||||||
|
case Glob:
|
||||||
|
rv.append("GLOB");
|
||||||
|
break;
|
||||||
|
case Regexp:
|
||||||
|
rv.append("REGEXP");
|
||||||
|
break;
|
||||||
|
case NotEqual:
|
||||||
|
rv.append("<>");
|
||||||
|
break;
|
||||||
|
case IsNot:
|
||||||
|
rv.append("IS NOT");
|
||||||
|
break;
|
||||||
|
case IsNotNull:
|
||||||
|
rv.append("IS NOT NULL");
|
||||||
|
break;
|
||||||
|
case IsNull:
|
||||||
|
rv.append("IS NULL");
|
||||||
|
break;
|
||||||
|
case Is:
|
||||||
|
rv.append("IS");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
rv.append(" ");
|
||||||
|
|
||||||
|
switch(type) {
|
||||||
|
case Null:
|
||||||
|
rv.append("NULL");
|
||||||
|
break;
|
||||||
|
case Integer:
|
||||||
|
rv.append(std::to_string(intValue));
|
||||||
|
break;
|
||||||
|
case Double:
|
||||||
|
rv.append(std::to_string(doubleValue));
|
||||||
|
break;
|
||||||
|
case Blob:
|
||||||
|
break;
|
||||||
|
case Text:
|
||||||
|
rv.append(stringValue);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
|
@ -30,11 +30,63 @@ enum ValueType {
|
||||||
Text
|
Text
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class RowGroupBitmap {
|
||||||
|
void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) {
|
||||||
|
int byte = rowGroup / 8;
|
||||||
|
int offset = rowGroup % 8;
|
||||||
|
unsigned char c = membership[byte];
|
||||||
|
c &= ~(1UL << offset);
|
||||||
|
if(isSet) {
|
||||||
|
c |= 1UL << offset;
|
||||||
|
}
|
||||||
|
membership[byte] = c;
|
||||||
|
}
|
||||||
|
// Compares estimated rowGroupFilter results against observed results
|
||||||
|
// when we explored the row group. This lets us cache
|
||||||
|
public:
|
||||||
|
RowGroupBitmap(unsigned int totalRowGroups) {
|
||||||
|
// Initialize everything to assume that all row groups match.
|
||||||
|
// As we discover otherwise, we'll update that assumption.
|
||||||
|
for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
||||||
|
estimatedMembership.push_back(0xFF);
|
||||||
|
actualMembership.push_back(0xFF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RowGroupBitmap(
|
||||||
|
std::vector<unsigned char> estimatedMembership,
|
||||||
|
std::vector<unsigned char> actualMembership) :
|
||||||
|
estimatedMembership(estimatedMembership),
|
||||||
|
actualMembership(actualMembership) {
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<unsigned char> estimatedMembership;
|
||||||
|
std::vector<unsigned char> actualMembership;
|
||||||
|
// Pass false only if definitely does not have rows
|
||||||
|
void setEstimatedMembership(unsigned int rowGroup, bool hasRows) {
|
||||||
|
setBit(estimatedMembership, rowGroup, hasRows);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass false only after exhausting all rows
|
||||||
|
void setActualMembership(unsigned int rowGroup, bool hadRows) {
|
||||||
|
setBit(actualMembership, rowGroup, hadRows);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool getActualMembership(unsigned int rowGroup) {
|
||||||
|
int byte = rowGroup / 8;
|
||||||
|
int offset = rowGroup % 8;
|
||||||
|
|
||||||
|
return (actualMembership[byte] >> offset) & 1U;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Constraint {
|
class Constraint {
|
||||||
public:
|
public:
|
||||||
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
||||||
Constraint(
|
Constraint(
|
||||||
|
RowGroupBitmap bitmap,
|
||||||
int column,
|
int column,
|
||||||
|
std::string columnName,
|
||||||
ConstraintOperator op,
|
ConstraintOperator op,
|
||||||
ValueType type,
|
ValueType type,
|
||||||
int64_t intValue,
|
int64_t intValue,
|
||||||
|
@ -42,7 +94,9 @@ public:
|
||||||
std::vector<unsigned char> blobValue
|
std::vector<unsigned char> blobValue
|
||||||
);
|
);
|
||||||
|
|
||||||
|
RowGroupBitmap bitmap;
|
||||||
int column; // underlying column in the query
|
int column; // underlying column in the query
|
||||||
|
std::string columnName;
|
||||||
ConstraintOperator op;
|
ConstraintOperator op;
|
||||||
ValueType type;
|
ValueType type;
|
||||||
|
|
||||||
|
@ -54,6 +108,15 @@ public:
|
||||||
|
|
||||||
// Only set when stringValue is set and op == Like
|
// Only set when stringValue is set and op == Like
|
||||||
std::string likeStringValue;
|
std::string likeStringValue;
|
||||||
|
|
||||||
|
// A unique identifier for this constraint, e.g.
|
||||||
|
// col0 = 'Dawson Creek'
|
||||||
|
std::string describe() const;
|
||||||
|
|
||||||
|
// This is a temp field used while evaluating if a rowgroup had rows
|
||||||
|
// that matched this constraint.
|
||||||
|
int rowGroupId;
|
||||||
|
bool hadRows;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,9 +2,7 @@
|
||||||
|
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
|
||||||
ParquetTable::ParquetTable(std::string file) {
|
ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) {
|
||||||
this->file = file;
|
|
||||||
|
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data());
|
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data());
|
||||||
metadata = reader->metadata();
|
metadata = reader->metadata();
|
||||||
}
|
}
|
||||||
|
@ -138,3 +136,6 @@ std::string ParquetTable::CreateStatement() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; }
|
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; }
|
||||||
|
|
||||||
|
const std::string& ParquetTable::getFile() { return file; }
|
||||||
|
const std::string& ParquetTable::getTableName() { return tableName; }
|
||||||
|
|
|
@ -6,16 +6,19 @@
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
|
||||||
class ParquetTable {
|
class ParquetTable {
|
||||||
|
std::string file;
|
||||||
|
std::string tableName;
|
||||||
std::vector<std::string> columnNames;
|
std::vector<std::string> columnNames;
|
||||||
std::shared_ptr<parquet::FileMetaData> metadata;
|
std::shared_ptr<parquet::FileMetaData> metadata;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ParquetTable(std::string file);
|
ParquetTable(std::string file, std::string tableName);
|
||||||
std::string CreateStatement();
|
std::string CreateStatement();
|
||||||
std::string file;
|
|
||||||
std::string columnName(int idx);
|
std::string columnName(int idx);
|
||||||
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
||||||
|
const std::string& getFile();
|
||||||
|
const std::string& getTableName();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -47,7 +47,7 @@ main() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cat "$root"/parquet-generator/*.sql > "$root"/testcase-bootstrap.sql
|
cat "$root"/parquet-generator/*.sql > "$root"/testcase-bootstrap.sql
|
||||||
rm test.db
|
rm -f test.db
|
||||||
"$root"/sqlite/sqlite3 test.db -init "$root"/testcase-bootstrap.sql < /dev/null
|
"$root"/sqlite/sqlite3 test.db -init "$root"/testcase-bootstrap.sql < /dev/null
|
||||||
if [ ! -v NO_DEBUG ] && [ "$(cat testcases.txt | wc -l)" == "1" ]; then
|
if [ ! -v NO_DEBUG ] && [ "$(cat testcases.txt | wc -l)" == "1" ]; then
|
||||||
set -x
|
set -x
|
||||||
|
|
Loading…
Reference in New Issue