Don't use accessors
This drops the `= 'Dawson Creek'` query from 210ms to 145ms. Maybe inlining would have been an option here? I'm not familiar enough with g++ to know. :(
This commit is contained in:
parent
8ba13f44d5
commit
1f4cebe2a6
|
@ -7,8 +7,8 @@ ParquetCursor::ParquetCursor(ParquetTable* table) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint) {
|
bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint) {
|
||||||
int64_t target = constraint.getInt();
|
int64_t target = constraint.intValue;
|
||||||
switch(constraint.getOperator()) {
|
switch(constraint.op) {
|
||||||
case IsNull:
|
case IsNull:
|
||||||
return false;
|
return false;
|
||||||
case Is:
|
case Is:
|
||||||
|
@ -30,7 +30,6 @@ bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint)
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> _stats) {
|
bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> _stats) {
|
||||||
std::vector<unsigned char> target = constraint.getBytes();
|
|
||||||
parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>* stats =
|
parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>* stats =
|
||||||
(parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>*)_stats.get();
|
(parquet::TypedRowGroupStatistics<parquet::DataType<parquet::Type::BYTE_ARRAY>>*)_stats.get();
|
||||||
|
|
||||||
|
@ -38,18 +37,18 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(constraint.getType() != Text) {
|
if(constraint.type != Text) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string& str = constraint.getString();
|
const std::string& str = constraint.stringValue;
|
||||||
const parquet::ByteArray& min = stats->min();
|
const parquet::ByteArray& min = stats->min();
|
||||||
const parquet::ByteArray& max = stats->max();
|
const parquet::ByteArray& max = stats->max();
|
||||||
std::string minStr((const char*)min.ptr, min.len);
|
std::string minStr((const char*)min.ptr, min.len);
|
||||||
std::string maxStr((const char*)max.ptr, max.len);
|
std::string maxStr((const char*)max.ptr, max.len);
|
||||||
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
|
// printf("min=%s [%d], max=%s [%d], target=%s\n", minStr.data(), min.len, maxStr.data(), max.len, str.data());
|
||||||
|
|
||||||
switch(constraint.getOperator()) {
|
switch(constraint.op) {
|
||||||
case Is:
|
case Is:
|
||||||
case Equal:
|
case Equal:
|
||||||
return str >= minStr && str <= maxStr;
|
return str >= minStr && str <= maxStr;
|
||||||
|
@ -75,14 +74,14 @@ bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
|
bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
|
||||||
if(constraint.getType() != Text) {
|
if(constraint.type != Text) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<unsigned char>& blob = constraint.getBytes();
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
parquet::ByteArray* ba = getByteArray(constraint.getColumn());
|
parquet::ByteArray* ba = getByteArray(constraint.column);
|
||||||
|
|
||||||
switch(constraint.getOperator()) {
|
switch(constraint.op) {
|
||||||
case Is:
|
case Is:
|
||||||
case Equal:
|
case Equal:
|
||||||
if(blob.size() != ba->len)
|
if(blob.size() != ba->len)
|
||||||
|
@ -120,8 +119,8 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) {
|
||||||
// data, which provides substantial performance benefits.
|
// data, which provides substantial performance benefits.
|
||||||
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
int column = constraints[i].getColumn();
|
int column = constraints[i].column;
|
||||||
int op = constraints[i].getOperator();
|
int op = constraints[i].op;
|
||||||
bool rv = true;
|
bool rv = true;
|
||||||
|
|
||||||
if(column == -1) {
|
if(column == -1) {
|
||||||
|
@ -224,9 +223,9 @@ start:
|
||||||
bool ParquetCursor::currentRowSatisfiesFilter() {
|
bool ParquetCursor::currentRowSatisfiesFilter() {
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
bool rv = true;
|
bool rv = true;
|
||||||
int column = constraints[i].getColumn();
|
int column = constraints[i].column;
|
||||||
ensureColumn(column);
|
ensureColumn(column);
|
||||||
int op = constraints[i].getOperator();
|
int op = constraints[i].op;
|
||||||
|
|
||||||
if(op == IsNull) {
|
if(op == IsNull) {
|
||||||
rv = isNull(column);
|
rv = isNull(column);
|
||||||
|
|
|
@ -18,31 +18,3 @@ Constraint::Constraint(
|
||||||
if(type == Text)
|
if(type == Text)
|
||||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
int Constraint::getColumn() {
|
|
||||||
return column;
|
|
||||||
}
|
|
||||||
|
|
||||||
ConstraintOperator Constraint::getOperator() {
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
ValueType Constraint::getType() {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t Constraint::getInt() {
|
|
||||||
return intValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
double Constraint::getDouble() {
|
|
||||||
return doubleValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::vector<unsigned char>& Constraint::getBytes() {
|
|
||||||
return blobValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string& Constraint::getString() {
|
|
||||||
return stringValue;
|
|
||||||
}
|
|
||||||
|
|
|
@ -31,16 +31,6 @@ enum ValueType {
|
||||||
};
|
};
|
||||||
|
|
||||||
class Constraint {
|
class Constraint {
|
||||||
int column; // underlying column in the query
|
|
||||||
ConstraintOperator op;
|
|
||||||
ValueType type;
|
|
||||||
|
|
||||||
int64_t intValue;
|
|
||||||
double doubleValue;
|
|
||||||
std::vector<unsigned char> blobValue;
|
|
||||||
// Only set when blobValue is set
|
|
||||||
std::string stringValue;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
||||||
Constraint(
|
Constraint(
|
||||||
|
@ -52,13 +42,15 @@ public:
|
||||||
std::vector<unsigned char> blobValue
|
std::vector<unsigned char> blobValue
|
||||||
);
|
);
|
||||||
|
|
||||||
int getColumn();
|
int column; // underlying column in the query
|
||||||
ConstraintOperator getOperator();
|
ConstraintOperator op;
|
||||||
ValueType getType();
|
ValueType type;
|
||||||
int64_t getInt();
|
|
||||||
double getDouble();
|
int64_t intValue;
|
||||||
const std::vector<unsigned char>& getBytes();
|
double doubleValue;
|
||||||
const std::string& getString();
|
std::vector<unsigned char> blobValue;
|
||||||
|
// Only set when blobValue is set
|
||||||
|
std::string stringValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue