1
0
mirror of https://github.com/cldellow/sqlite-parquet-vtable.git synced 2025-09-16 22:49:59 +00:00

Cache clauses -> row group mapping

Create a shadow table. For `stats`, it'd be `_stats_rowgroups`.

It contains three columns:

- the clause (eg `city = 'Dawson Creek'`)
- the initial estimate, as a bitmap of rowgroups based on stats
- the actual observed rowgroups, as a bitmap

This papers over poorly sorted parquet files, at the cost of some disk
space. It makes interactive queries much more natural -- drilldown style
queries are much faster, as they can leverage work done by previous
queries.

eg 'SELECT * FROM stats WHERE city = 'Dawson Creek' and question_id >= 1935 and question_id <= 1940`
takes ~584ms on first run, but 9ms on subsequent runs.

We only create entries when the estimates don't match the actual
results.

Fixes #6
This commit is contained in:
Colin Dellow
2018-03-24 23:51:15 -04:00
parent d2c736f25a
commit d3ab5ff3e7
9 changed files with 397 additions and 63 deletions

View File

@@ -1,19 +1,25 @@
#include "parquet_filter.h"
Constraint::Constraint(
RowGroupBitmap bitmap,
int column,
std::string columnName,
ConstraintOperator op,
ValueType type,
int64_t intValue,
double doubleValue,
std::vector<unsigned char> blobValue
) {
this->column = column;
this->op = op;
this->type = type;
this->intValue = intValue;
this->doubleValue = doubleValue;
this->blobValue = blobValue;
): bitmap(bitmap),
column(column),
columnName(columnName),
op(op),
type(type),
intValue(intValue),
doubleValue(doubleValue),
blobValue(blobValue),
hadRows(false) {
RowGroupBitmap bm = bitmap;
this->bitmap = bm;
if(type == Text) {
stringValue = std::string((char*)&blobValue[0], blobValue.size());
@@ -34,3 +40,72 @@ Constraint::Constraint(
}
}
}
std::string Constraint::describe() const {
std::string rv;
rv.append(columnName);
rv.append(" ");
switch(op) {
case Equal:
rv.append("=");
break;
case GreaterThan:
rv.append(">");
break;
case LessThanOrEqual:
rv.append("<=");
break;
case LessThan:
rv.append("<");
break;
case GreaterThanOrEqual:
rv.append(">=");
break;
case Match:
rv.append("MATCH");
break;
case Like:
rv.append("LIKE");
break;
case Glob:
rv.append("GLOB");
break;
case Regexp:
rv.append("REGEXP");
break;
case NotEqual:
rv.append("<>");
break;
case IsNot:
rv.append("IS NOT");
break;
case IsNotNull:
rv.append("IS NOT NULL");
break;
case IsNull:
rv.append("IS NULL");
break;
case Is:
rv.append("IS");
break;
}
rv.append(" ");
switch(type) {
case Null:
rv.append("NULL");
break;
case Integer:
rv.append(std::to_string(intValue));
break;
case Double:
rv.append(std::to_string(doubleValue));
break;
case Blob:
break;
case Text:
rv.append(stringValue);
break;
}
return rv;
}