diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc index e2afbbd..6222926 100644 --- a/parquet/parquet_cursor.cc +++ b/parquet/parquet_cursor.cc @@ -6,7 +6,7 @@ ParquetCursor::ParquetCursor(ParquetTable* table) { reset(std::vector()); } -bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint constraint) { +bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint) { int64_t target = constraint.getInt(); switch(constraint.getOperator()) { case IsNull: @@ -29,7 +29,7 @@ bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint constraint) { } } -bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint constraint, std::shared_ptr _stats) { +bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr _stats) { std::vector target = constraint.getBytes(); parquet::TypedRowGroupStatistics>* stats = (parquet::TypedRowGroupStatistics>*)_stats.get(); @@ -66,14 +66,52 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint constraint, st } } -bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint constraint, std::shared_ptr stats) { +bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr stats) { return true; } -bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint constraint, std::shared_ptr stats) { +bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr stats) { return true; } +bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) { + if(constraint.getType() != Text) { + return true; + } + + std::vector blob = constraint.getBytes(); + parquet::ByteArray* ba = getByteArray(constraint.getColumn()); + + switch(constraint.getOperator()) { + case Is: + case Equal: + if(blob.size() != ba->len) + return false; + + return 0 == memcmp(&blob[0], ba->ptr, ba->len); + case GreaterThan: + case GreaterThanOrEqual: + case LessThan: + case LessThanOrEqual: + case IsNot: + case NotEqual: + case Like: + + default: + return true; + } + +} + +bool ParquetCursor::currentRowSatisfiesIntegerFilter(Constraint& constraint) { + return true; +} + +bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) { + return true; +} + + // Return true if it is _possible_ that the current // rowgroup satisfies the constraints. Only return false // if it definitely does not. @@ -194,6 +232,19 @@ bool ParquetCursor::currentRowSatisfiesFilter() { rv = isNull(column); } else if(op == IsNotNull) { rv = !isNull(column); + } else { + parquet::Type::type pqType = types[column]; + + if(pqType == parquet::Type::BYTE_ARRAY) { + rv = currentRowSatisfiesTextFilter(constraints[i]); + } else if(pqType == parquet::Type::INT32 || + pqType == parquet::Type::INT64 || + pqType == parquet::Type::INT96 || + pqType == parquet::Type::BOOLEAN) { + rv = currentRowSatisfiesIntegerFilter(constraints[i]); + } else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) { + rv = currentRowSatisfiesDoubleFilter(constraints[i]); + } } if(!rv) diff --git a/parquet/parquet_cursor.h b/parquet/parquet_cursor.h index 26c7316..af553fe 100644 --- a/parquet/parquet_cursor.h +++ b/parquet/parquet_cursor.h @@ -35,10 +35,15 @@ class ParquetCursor { bool currentRowSatisfiesFilter(); bool currentRowGroupSatisfiesFilter(); - bool currentRowGroupSatisfiesRowIdFilter(Constraint constraint); - bool currentRowGroupSatisfiesTextFilter(Constraint constraint, std::shared_ptr stats); - bool currentRowGroupSatisfiesIntegerFilter(Constraint constraint, std::shared_ptr stats); - bool currentRowGroupSatisfiesDoubleFilter(Constraint constraint, std::shared_ptr stats); + bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint); + bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr stats); + bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr stats); + bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr stats); + + bool currentRowSatisfiesTextFilter(Constraint& constraint); + bool currentRowSatisfiesIntegerFilter(Constraint& constraint); + bool currentRowSatisfiesDoubleFilter(Constraint& constraint); + public: ParquetCursor(ParquetTable* table); diff --git a/parquet/parquet_filter.cc b/parquet/parquet_filter.cc index 503ff4a..1af14eb 100644 --- a/parquet/parquet_filter.cc +++ b/parquet/parquet_filter.cc @@ -39,10 +39,10 @@ double Constraint::getDouble() { return doubleValue; } -const std::vector Constraint::getBytes() { +const std::vector& Constraint::getBytes() { return blobValue; } -std::string Constraint::getString() { +const std::string& Constraint::getString() { return stringValue; } diff --git a/parquet/parquet_filter.h b/parquet/parquet_filter.h index 55e6d7d..ae5a535 100644 --- a/parquet/parquet_filter.h +++ b/parquet/parquet_filter.h @@ -57,8 +57,8 @@ public: ValueType getType(); int64_t getInt(); double getDouble(); - const std::vector getBytes(); - std::string getString(); + const std::vector& getBytes(); + const std::string& getString(); }; #endif