Remove unnecessary copy
Now the `== 'Dawson Creek'` query is ~210ms, which is approx the same as a `count(*)` query. This seems maybe OK, since the row group filter is only excluding 30% of records.
This commit is contained in:
parent
f7f1ed03d1
commit
8ba13f44d5
|
@ -9,7 +9,6 @@ ARROW_LIB = /usr/local/lib/libarrow.so
|
||||||
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
|
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
|
||||||
|
|
||||||
LDFLAGS = -O3 $(PARQUET_LIB) $(THRIFT_LIB) $(ARROW_LIB) $(BOOST_LIB)
|
LDFLAGS = -O3 $(PARQUET_LIB) $(THRIFT_LIB) $(ARROW_LIB) $(BOOST_LIB)
|
||||||
DEPS = hellomake.h
|
|
||||||
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
|
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
|
||||||
|
|
||||||
libparquet.so: $(OBJ)
|
libparquet.so: $(OBJ)
|
||||||
|
|
|
@ -42,9 +42,9 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string str = constraint.getString();
|
const std::string& str = constraint.getString();
|
||||||
parquet::ByteArray min = stats->min();
|
const parquet::ByteArray& min = stats->min();
|
||||||
parquet::ByteArray max = stats->max();
|
const parquet::ByteArray& max = stats->max();
|
||||||
std::string minStr((const char*)min.ptr, min.len);
|
std::string minStr((const char*)min.ptr, min.len);
|
||||||
std::string maxStr((const char*)max.ptr, max.len);
|
std::string maxStr((const char*)max.ptr, max.len);
|
||||||
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
|
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
|
||||||
|
@ -79,7 +79,7 @@ bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<unsigned char> blob = constraint.getBytes();
|
const std::vector<unsigned char>& blob = constraint.getBytes();
|
||||||
parquet::ByteArray* ba = getByteArray(constraint.getColumn());
|
parquet::ByteArray* ba = getByteArray(constraint.getColumn());
|
||||||
|
|
||||||
switch(constraint.getOperator()) {
|
switch(constraint.getOperator()) {
|
||||||
|
@ -300,7 +300,6 @@ void ParquetCursor::ensureColumn(int col) {
|
||||||
if(scanners[col].get() == NULL) {
|
if(scanners[col].get() == NULL) {
|
||||||
std::shared_ptr<parquet::ColumnReader> colReader = rowGroup->Column(col);
|
std::shared_ptr<parquet::ColumnReader> colReader = rowGroup->Column(col);
|
||||||
scanners[col] = parquet::Scanner::Make(colReader);
|
scanners[col] = parquet::Scanner::Make(colReader);
|
||||||
// TODO: potentially skip rows if rowsLeftInRowGroup != rowGroupMetadata->num_rows()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Actually fetch a value, stash data in colRows, colNulls, colValues
|
// Actually fetch a value, stash data in colRows, colNulls, colValues
|
||||||
|
|
Loading…
Reference in New Issue