mirror of
https://github.com/cldellow/sqlite-parquet-vtable.git
synced 2025-09-16 22:49:59 +00:00
Cache clauses -> row group mapping
Create a shadow table. For `stats`, it'd be `_stats_rowgroups`. It contains three columns: - the clause (eg `city = 'Dawson Creek'`) - the initial estimate, as a bitmap of rowgroups based on stats - the actual observed rowgroups, as a bitmap This papers over poorly sorted parquet files, at the cost of some disk space. It makes interactive queries much more natural -- drilldown style queries are much faster, as they can leverage work done by previous queries. eg 'SELECT * FROM stats WHERE city = 'Dawson Creek' and question_id >= 1935 and question_id <= 1940` takes ~584ms on first run, but 9ms on subsequent runs. We only create entries when the estimates don't match the actual results. Fixes #6
This commit is contained in:
@@ -1,19 +1,25 @@
|
||||
#include "parquet_filter.h"
|
||||
|
||||
Constraint::Constraint(
|
||||
RowGroupBitmap bitmap,
|
||||
int column,
|
||||
std::string columnName,
|
||||
ConstraintOperator op,
|
||||
ValueType type,
|
||||
int64_t intValue,
|
||||
double doubleValue,
|
||||
std::vector<unsigned char> blobValue
|
||||
) {
|
||||
this->column = column;
|
||||
this->op = op;
|
||||
this->type = type;
|
||||
this->intValue = intValue;
|
||||
this->doubleValue = doubleValue;
|
||||
this->blobValue = blobValue;
|
||||
): bitmap(bitmap),
|
||||
column(column),
|
||||
columnName(columnName),
|
||||
op(op),
|
||||
type(type),
|
||||
intValue(intValue),
|
||||
doubleValue(doubleValue),
|
||||
blobValue(blobValue),
|
||||
hadRows(false) {
|
||||
RowGroupBitmap bm = bitmap;
|
||||
this->bitmap = bm;
|
||||
|
||||
if(type == Text) {
|
||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
||||
@@ -34,3 +40,72 @@ Constraint::Constraint(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Constraint::describe() const {
|
||||
std::string rv;
|
||||
rv.append(columnName);
|
||||
rv.append(" ");
|
||||
switch(op) {
|
||||
case Equal:
|
||||
rv.append("=");
|
||||
break;
|
||||
case GreaterThan:
|
||||
rv.append(">");
|
||||
break;
|
||||
case LessThanOrEqual:
|
||||
rv.append("<=");
|
||||
break;
|
||||
case LessThan:
|
||||
rv.append("<");
|
||||
break;
|
||||
case GreaterThanOrEqual:
|
||||
rv.append(">=");
|
||||
break;
|
||||
case Match:
|
||||
rv.append("MATCH");
|
||||
break;
|
||||
case Like:
|
||||
rv.append("LIKE");
|
||||
break;
|
||||
case Glob:
|
||||
rv.append("GLOB");
|
||||
break;
|
||||
case Regexp:
|
||||
rv.append("REGEXP");
|
||||
break;
|
||||
case NotEqual:
|
||||
rv.append("<>");
|
||||
break;
|
||||
case IsNot:
|
||||
rv.append("IS NOT");
|
||||
break;
|
||||
case IsNotNull:
|
||||
rv.append("IS NOT NULL");
|
||||
break;
|
||||
case IsNull:
|
||||
rv.append("IS NULL");
|
||||
break;
|
||||
case Is:
|
||||
rv.append("IS");
|
||||
break;
|
||||
}
|
||||
rv.append(" ");
|
||||
|
||||
switch(type) {
|
||||
case Null:
|
||||
rv.append("NULL");
|
||||
break;
|
||||
case Integer:
|
||||
rv.append(std::to_string(intValue));
|
||||
break;
|
||||
case Double:
|
||||
rv.append(std::to_string(doubleValue));
|
||||
break;
|
||||
case Blob:
|
||||
break;
|
||||
case Text:
|
||||
rv.append(stringValue);
|
||||
break;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
Reference in New Issue
Block a user