Don't use accessors

This drops the `= 'Dawson Creek'` query from 210ms to 145ms.

Maybe inlining would have been an option here? I'm not familiar enough
with g++ to know. :(
This commit is contained in:
Colin Dellow 2018-03-15 23:04:11 -04:00
parent 8ba13f44d5
commit 1f4cebe2a6
3 changed files with 22 additions and 59 deletions

View File

@ -7,8 +7,8 @@ ParquetCursor::ParquetCursor(ParquetTable* table) {
} }
bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint) { bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint) {
int64_t target = constraint.getInt(); int64_t target = constraint.intValue;
switch(constraint.getOperator()) { switch(constraint.op) {
case IsNull: case IsNull:
return false; return false;
case Is: case Is:
@ -30,7 +30,6 @@ bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint)
} }
bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> _stats) { bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> _stats) {
std::vector<unsigned char> target = constraint.getBytes();
parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>* stats = parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>* stats =
(parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>*)_stats.get(); (parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>*)_stats.get();
@ -38,18 +37,18 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
return true; return true;
} }
if(constraint.getType() != Text) { if(constraint.type != Text) {
return true; return true;
} }
const std::string& str = constraint.getString(); const std::string& str = constraint.stringValue;
const parquet::ByteArray& min = stats->min(); const parquet::ByteArray& min = stats->min();
const parquet::ByteArray& max = stats->max(); const parquet::ByteArray& max = stats->max();
std::string minStr((const char*)min.ptr, min.len); std::string minStr((const char*)min.ptr, min.len);
std::string maxStr((const char*)max.ptr, max.len); std::string maxStr((const char*)max.ptr, max.len);
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data()); // printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
switch(constraint.getOperator()) { switch(constraint.op) {
case Is: case Is:
case Equal: case Equal:
return str >= minStr && str <= maxStr; return str >= minStr && str <= maxStr;
@ -75,14 +74,14 @@ bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint,
} }
bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) { bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
if(constraint.getType() != Text) { if(constraint.type != Text) {
return true; return true;
} }
const std::vector<unsigned char>& blob = constraint.getBytes(); const std::vector<unsigned char>& blob = constraint.blobValue;
parquet::ByteArray* ba = getByteArray(constraint.getColumn()); parquet::ByteArray* ba = getByteArray(constraint.column);
switch(constraint.getOperator()) { switch(constraint.op) {
case Is: case Is:
case Equal: case Equal:
if(blob.size() != ba->len) if(blob.size() != ba->len)
@ -120,8 +119,8 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
// data, which provides substantial performance benefits. // data, which provides substantial performance benefits.
bool ParquetCursor::currentRowGroupSatisfiesFilter() { bool ParquetCursor::currentRowGroupSatisfiesFilter() {
for(unsigned int i = 0; i < constraints.size(); i++) { for(unsigned int i = 0; i < constraints.size(); i++) {
int column = constraints[i].getColumn(); int column = constraints[i].column;
int op = constraints[i].getOperator(); int op = constraints[i].op;
bool rv = true; bool rv = true;
if(column == -1) { if(column == -1) {
@ -224,9 +223,9 @@ start:
bool ParquetCursor::currentRowSatisfiesFilter() { bool ParquetCursor::currentRowSatisfiesFilter() {
for(unsigned int i = 0; i < constraints.size(); i++) { for(unsigned int i = 0; i < constraints.size(); i++) {
bool rv = true; bool rv = true;
int column = constraints[i].getColumn(); int column = constraints[i].column;
ensureColumn(column); ensureColumn(column);
int op = constraints[i].getOperator(); int op = constraints[i].op;
if(op == IsNull) { if(op == IsNull) {
rv = isNull(column); rv = isNull(column);

View File

@ -18,31 +18,3 @@ Constraint::Constraint(
if(type == Text) if(type == Text)
stringValue = std::string((char*)&blobValue[0], blobValue.size()); stringValue = std::string((char*)&blobValue[0], blobValue.size());
} }
int Constraint::getColumn() {
return column;
}
ConstraintOperator Constraint::getOperator() {
return op;
}
ValueType Constraint::getType() {
return type;
}
int64_t Constraint::getInt() {
return intValue;
}
double Constraint::getDouble() {
return doubleValue;
}
const std::vector<unsigned char>& Constraint::getBytes() {
return blobValue;
}
const std::string& Constraint::getString() {
return stringValue;
}

View File

@ -31,16 +31,6 @@ enum ValueType {
}; };
class Constraint { class Constraint {
int column; // underlying column in the query
ConstraintOperator op;
ValueType type;
int64_t intValue;
double doubleValue;
std::vector<unsigned char> blobValue;
// Only set when blobValue is set
std::string stringValue;
public: public:
// Kind of a messy constructor function, but it's just for internal use, so whatever. // Kind of a messy constructor function, but it's just for internal use, so whatever.
Constraint( Constraint(
@ -52,13 +42,15 @@ public:
std::vector<unsigned char> blobValue std::vector<unsigned char> blobValue
); );
int getColumn(); int column; // underlying column in the query
ConstraintOperator getOperator(); ConstraintOperator op;
ValueType getType(); ValueType type;
int64_t getInt();
double getDouble(); int64_t intValue;
const std::vector<unsigned char>& getBytes(); double doubleValue;
const std::string& getString(); std::vector<unsigned char> blobValue;
// Only set when blobValue is set
std::string stringValue;
}; };
#endif #endif