From 8ba13f44d504791fedd90cdd80dcd0509848a1ce Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Thu, 15 Mar 2018 22:10:45 -0400 Subject: [PATCH] Remove unnecessary copy Now the `== 'Dawson Creek'` query is ~210ms, which is approx the same as a `count(*)` query. This seems maybe OK, since the row group filter is only excluding 30% of records. --- parquet/Makefile | 1 - parquet/parquet_cursor.cc | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/parquet/Makefile b/parquet/Makefile index 07fa587..6fedd2e 100644 --- a/parquet/Makefile +++ b/parquet/Makefile @@ -9,7 +9,6 @@ ARROW_LIB = /usr/local/lib/libarrow.so BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so LDFLAGS = -O3 $(PARQUET_LIB) $(THRIFT_LIB) $(ARROW_LIB) $(BOOST_LIB) -DEPS = hellomake.h OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o libparquet.so: $(OBJ) diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc index 6222926..de0eeaf 100644 --- a/parquet/parquet_cursor.cc +++ b/parquet/parquet_cursor.cc @@ -42,9 +42,9 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s return true; } - std::string str = constraint.getString(); - parquet::ByteArray min = stats->min(); - parquet::ByteArray max = stats->max(); + const std::string& str = constraint.getString(); + const parquet::ByteArray& min = stats->min(); + const parquet::ByteArray& max = stats->max(); std::string minStr((const char*)min.ptr, min.len); std::string maxStr((const char*)max.ptr, max.len); // printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data()); @@ -79,7 +79,7 @@ bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) { return true; } - std::vector blob = constraint.getBytes(); + const std::vector& blob = constraint.getBytes(); parquet::ByteArray* ba = getByteArray(constraint.getColumn()); switch(constraint.getOperator()) { @@ -300,7 +300,6 @@ void ParquetCursor::ensureColumn(int col) { if(scanners[col].get() == NULL) { std::shared_ptr colReader = rowGroup->Column(col); scanners[col] = parquet::Scanner::Make(colReader); - // TODO: potentially skip rows if rowsLeftInRowGroup != rowGroupMetadata->num_rows() } // Actually fetch a value, stash data in colRows, colNulls, colValues