diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc index 2468a58..bf4ae37 100644 --- a/parquet/parquet_cursor.cc +++ b/parquet/parquet_cursor.cc @@ -630,12 +630,9 @@ start: // a row for(unsigned int i = 0; i < constraints.size(); i++) { if(rowGroupId > 0 && constraints[i].rowGroupId == rowGroupId - 1) { - if(constraints[i].valid || constraints[i].hadRows) { - constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows); - } + constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows); } constraints[i].hadRows = false; - constraints[i].valid = true; } if(!currentRowGroupSatisfiesFilter()) @@ -655,6 +652,7 @@ start: // and the extension, which can add up on a dataset of tens // of millions of rows. bool ParquetCursor::currentRowSatisfiesFilter() { + bool overallRv = true; for(unsigned int i = 0; i < constraints.size(); i++) { bool rv = true; int column = constraints[i].column; @@ -686,15 +684,10 @@ bool ParquetCursor::currentRowSatisfiesFilter() { // ideally we'd short-circuit if we'd already set this group as visited if(rv) { constraints[i].hadRows = true; - } else { - // When we short circuit, mark the other constraints as not evaluated to avoid persisting incorrect data. - for(unsigned int j = i + 1; j < constraints.size(); j++) { - constraints[j].valid = false; - } - return false; } + overallRv = overallRv && rv; } - return true; + return overallRv; } void ParquetCursor::next() { diff --git a/parquet/parquet_filter.h b/parquet/parquet_filter.h index d350bfb..d9f336e 100644 --- a/parquet/parquet_filter.h +++ b/parquet/parquet_filter.h @@ -117,7 +117,6 @@ public: // that matched this constraint. int rowGroupId; bool hadRows; - bool valid; }; #endif