2018-03-11 17:58:10 +00:00
|
|
|
#ifndef PARQUET_FILTER_H
|
|
|
|
#define PARQUET_FILTER_H
|
|
|
|
|
|
|
|
#include <vector>
|
2018-03-16 00:40:21 +00:00
|
|
|
#include <string>
|
2018-03-11 17:58:10 +00:00
|
|
|
#include <cstdint>
|
|
|
|
|
|
|
|
enum ConstraintOperator {
|
|
|
|
Equal,
|
|
|
|
GreaterThan,
|
|
|
|
LessThanOrEqual,
|
|
|
|
LessThan,
|
|
|
|
GreaterThanOrEqual,
|
|
|
|
Match,
|
|
|
|
Like,
|
|
|
|
Glob,
|
|
|
|
Regexp,
|
|
|
|
NotEqual,
|
|
|
|
IsNot,
|
|
|
|
IsNotNull,
|
|
|
|
IsNull,
|
|
|
|
Is
|
|
|
|
};
|
|
|
|
|
|
|
|
enum ValueType {
|
|
|
|
Null,
|
|
|
|
Integer,
|
|
|
|
Double,
|
|
|
|
Blob,
|
|
|
|
Text
|
|
|
|
};
|
|
|
|
|
2018-03-25 03:51:15 +00:00
|
|
|
class RowGroupBitmap {
|
|
|
|
void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) {
|
|
|
|
int byte = rowGroup / 8;
|
|
|
|
int offset = rowGroup % 8;
|
|
|
|
unsigned char c = membership[byte];
|
|
|
|
c &= ~(1UL << offset);
|
|
|
|
if(isSet) {
|
|
|
|
c |= 1UL << offset;
|
|
|
|
}
|
|
|
|
membership[byte] = c;
|
|
|
|
}
|
|
|
|
// Compares estimated rowGroupFilter results against observed results
|
|
|
|
// when we explored the row group. This lets us cache
|
|
|
|
public:
|
|
|
|
RowGroupBitmap(unsigned int totalRowGroups) {
|
|
|
|
// Initialize everything to assume that all row groups match.
|
|
|
|
// As we discover otherwise, we'll update that assumption.
|
|
|
|
for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
|
|
|
estimatedMembership.push_back(0xFF);
|
|
|
|
actualMembership.push_back(0xFF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
RowGroupBitmap(
|
|
|
|
std::vector<unsigned char> estimatedMembership,
|
|
|
|
std::vector<unsigned char> actualMembership) :
|
|
|
|
estimatedMembership(estimatedMembership),
|
|
|
|
actualMembership(actualMembership) {
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<unsigned char> estimatedMembership;
|
|
|
|
std::vector<unsigned char> actualMembership;
|
|
|
|
// Pass false only if definitely does not have rows
|
|
|
|
void setEstimatedMembership(unsigned int rowGroup, bool hasRows) {
|
|
|
|
setBit(estimatedMembership, rowGroup, hasRows);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pass false only after exhausting all rows
|
|
|
|
void setActualMembership(unsigned int rowGroup, bool hadRows) {
|
|
|
|
setBit(actualMembership, rowGroup, hadRows);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool getActualMembership(unsigned int rowGroup) {
|
|
|
|
int byte = rowGroup / 8;
|
|
|
|
int offset = rowGroup % 8;
|
|
|
|
|
|
|
|
return (actualMembership[byte] >> offset) & 1U;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-03-11 17:58:10 +00:00
|
|
|
class Constraint {
|
|
|
|
public:
|
|
|
|
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
|
|
|
Constraint(
|
2018-03-25 03:51:15 +00:00
|
|
|
RowGroupBitmap bitmap,
|
2018-03-11 17:58:10 +00:00
|
|
|
int column,
|
2018-03-25 03:51:15 +00:00
|
|
|
std::string columnName,
|
2018-03-11 17:58:10 +00:00
|
|
|
ConstraintOperator op,
|
|
|
|
ValueType type,
|
2018-03-13 00:42:50 +00:00
|
|
|
int64_t intValue,
|
2018-03-11 17:58:10 +00:00
|
|
|
double doubleValue,
|
|
|
|
std::vector<unsigned char> blobValue
|
|
|
|
);
|
|
|
|
|
2018-03-25 03:51:15 +00:00
|
|
|
RowGroupBitmap bitmap;
|
2018-03-16 03:04:11 +00:00
|
|
|
int column; // underlying column in the query
|
2018-03-25 03:51:15 +00:00
|
|
|
std::string columnName;
|
2018-03-16 03:04:11 +00:00
|
|
|
ConstraintOperator op;
|
|
|
|
ValueType type;
|
|
|
|
|
|
|
|
int64_t intValue;
|
|
|
|
double doubleValue;
|
|
|
|
std::vector<unsigned char> blobValue;
|
|
|
|
// Only set when blobValue is set
|
|
|
|
std::string stringValue;
|
2018-03-17 19:28:51 +00:00
|
|
|
|
|
|
|
// Only set when stringValue is set and op == Like
|
|
|
|
std::string likeStringValue;
|
2018-03-25 03:51:15 +00:00
|
|
|
|
|
|
|
// A unique identifier for this constraint, e.g.
|
|
|
|
// col0 = 'Dawson Creek'
|
|
|
|
std::string describe() const;
|
|
|
|
|
|
|
|
// This is a temp field used while evaluating if a rowgroup had rows
|
|
|
|
// that matched this constraint.
|
|
|
|
int rowGroupId;
|
|
|
|
bool hadRows;
|
2018-06-24 00:31:03 +00:00
|
|
|
bool valid;
|
2018-03-11 17:58:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|