From e1a86954e517cdc00b61e3ede70e789524419298 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Sat, 23 Jun 2018 20:48:39 -0400 Subject: [PATCH] Revert "Don't eagerly evaluate constraints" This reverts commit cbde3c73b601383fff33ce501d0b26047326e93f. This regresses: ``` WITH inputs AS ( SELECT geo_name, CASE WHEN profile_id = 1930 THEN 'total' ELSE 'cyclist' END AS mode, female, male FROM census WHERE profile_id IN ( '1930', '1935') AND csd_type_name = 'CY' AND geo_name IN ('Victoria', 'Dawson Creek', 'Kitchener') ) SELECT total.geo_name, cyclist.male, cyclist.female, 100.0 * cyclist.male / total.male, 100.0 * cyclist.female / total.female FROM inputs AS total JOIN inputs AS cyclist USING (geo_name) WHERE total.mode = 'total' AND cyclist.mode = 'cyclist'; ``` while improving: ``` select count(*) from census where geo_name in ('Dawson Creek', 'Kitchener', 'Victoria') and csd_type_name = 'CY' and profile_id = '1930'; ``` which seems like a bad tradeoff. --- parquet/parquet_cursor.cc | 15 ++++----------- parquet/parquet_filter.h | 1 - 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc index 2468a58..bf4ae37 100644 --- a/parquet/parquet_cursor.cc +++ b/parquet/parquet_cursor.cc @@ -630,12 +630,9 @@ start: // a row for(unsigned int i = 0; i < constraints.size(); i++) { if(rowGroupId > 0 && constraints[i].rowGroupId == rowGroupId - 1) { - if(constraints[i].valid || constraints[i].hadRows) { - constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows); - } + constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows); } constraints[i].hadRows = false; - constraints[i].valid = true; } if(!currentRowGroupSatisfiesFilter()) @@ -655,6 +652,7 @@ start: // and the extension, which can add up on a dataset of tens // of millions of rows. bool ParquetCursor::currentRowSatisfiesFilter() { + bool overallRv = true; for(unsigned int i = 0; i < constraints.size(); i++) { bool rv = true; int column = constraints[i].column; @@ -686,15 +684,10 @@ bool ParquetCursor::currentRowSatisfiesFilter() { // ideally we'd short-circuit if we'd already set this group as visited if(rv) { constraints[i].hadRows = true; - } else { - // When we short circuit, mark the other constraints as not evaluated to avoid persisting incorrect data. - for(unsigned int j = i + 1; j < constraints.size(); j++) { - constraints[j].valid = false; - } - return false; } + overallRv = overallRv && rv; } - return true; + return overallRv; } void ParquetCursor::next() { diff --git a/parquet/parquet_filter.h b/parquet/parquet_filter.h index d350bfb..d9f336e 100644 --- a/parquet/parquet_filter.h +++ b/parquet/parquet_filter.h @@ -117,7 +117,6 @@ public: // that matched this constraint. int rowGroupId; bool hadRows; - bool valid; }; #endif