Dispatch row group filtering based on parquet type

This commit is contained in:
Colin Dellow 2018-03-15 20:25:02 -04:00
parent 92ba5f94e0
commit dc431aee20
1 changed files with 22 additions and 6 deletions

View File

@ -91,18 +91,34 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
} }
std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics(); std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics();
// SQLite is much looser with types than you might expect if you
// come from a Postgres background. The constraint '30.0' (that is,
// a string containing a floating point number) should be treated
// as equal to a field containing an integer 30.
//
// This means that even if the parquet physical type is integer,
// the constraint type may be a string, so dispatch to the filter
// fn based on the Parquet type.
if(op == IsNull) { if(op == IsNull) {
rv = stats->null_count() > 0; rv = stats->null_count() > 0;
} else if(op == IsNotNull) { } else if(op == IsNotNull) {
rv = stats->num_values() > 0; rv = stats->num_values() > 0;
} else if(type == Text) { } else {
parquet::Type::type pqType = types[column];
if(pqType == parquet::Type::BYTE_ARRAY) {
rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats); rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats);
} else if(type == Integer) { } else if(pqType == parquet::Type::INT32 ||
pqType == parquet::Type::INT64 ||
pqType == parquet::Type::INT96 ||
pqType == parquet::Type::BOOLEAN) {
rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats); rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats);
} else if(type == Double) { } else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) {
rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats); rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats);
} }
} }
}
if(!rv) if(!rv)
return false; return false;