Remove unnecessary copy

Now the `== 'Dawson Creek'` query is ~210ms, which is approx the
same as a `count(*)` query. This seems maybe OK, since the row group
filter is only excluding 30% of records.
This commit is contained in:
Colin Dellow 2018-03-15 22:10:45 -04:00
parent f7f1ed03d1
commit 8ba13f44d5
2 changed files with 4 additions and 6 deletions

View File

@ -9,7 +9,6 @@ ARROW_LIB = /usr/local/lib/libarrow.so
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
LDFLAGS = -O3 $(PARQUET_LIB) $(THRIFT_LIB) $(ARROW_LIB) $(BOOST_LIB)
DEPS = hellomake.h
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
libparquet.so: $(OBJ)

View File

@ -42,9 +42,9 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
return true;
}
std::string str = constraint.getString();
parquet::ByteArray min = stats->min();
parquet::ByteArray max = stats->max();
const std::string& str = constraint.getString();
const parquet::ByteArray& min = stats->min();
const parquet::ByteArray& max = stats->max();
std::string minStr((const char*)min.ptr, min.len);
std::string maxStr((const char*)max.ptr, max.len);
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
@ -79,7 +79,7 @@ bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
return true;
}
std::vector<unsigned char> blob = constraint.getBytes();
const std::vector<unsigned char>& blob = constraint.getBytes();
parquet::ByteArray* ba = getByteArray(constraint.getColumn());
switch(constraint.getOperator()) {
@ -300,7 +300,6 @@ void ParquetCursor::ensureColumn(int col) {
if(scanners[col].get() == NULL) {
std::shared_ptr<parquet::ColumnReader> colReader = rowGroup->Column(col);
scanners[col] = parquet::Scanner::Make(colReader);
// TODO: potentially skip rows if rowsLeftInRowGroup != rowGroupMetadata->num_rows()
}
// Actually fetch a value, stash data in colRows, colNulls, colValues