Row-filtering for other string ops
This commit is contained in:
parent
03a20a9432
commit
a3af16eb54
|
@ -66,22 +66,10 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
|
||||||
return !(minStr == maxStr && str == minStr);
|
return !(minStr == maxStr && str == minStr);
|
||||||
case Like:
|
case Like:
|
||||||
{
|
{
|
||||||
std::string truncated = str;
|
const std::string& likeStringValue = constraint.likeStringValue;
|
||||||
size_t idx = truncated.find_first_of("%");
|
std::string truncatedMin = minStr.substr(0, likeStringValue.size());
|
||||||
if(idx != std::string::npos) {
|
std::string truncatedMax = maxStr.substr(0, likeStringValue.size());
|
||||||
truncated = truncated.substr(0, idx);
|
return likeStringValue.empty() || (likeStringValue >= truncatedMin && likeStringValue <= truncatedMax);
|
||||||
}
|
|
||||||
idx = truncated.find_first_of("_");
|
|
||||||
if(idx != std::string::npos) {
|
|
||||||
truncated = truncated.substr(0, idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This permits more rowgroups than is strictly needed
|
|
||||||
// since it assumes an implicit wildcard. But it's
|
|
||||||
// simple to implement, so we'll go with it.
|
|
||||||
std::string truncatedMin = minStr.substr(0, truncated.size());
|
|
||||||
std::string truncatedMax = maxStr.substr(0, truncated.size());
|
|
||||||
return truncated.empty() || (truncated >= truncatedMin && truncated <= truncatedMax);
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
|
@ -245,33 +233,87 @@ bool ParquetCursor::currentRowSatisfiesTextFilter(Constraint& constraint) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<unsigned char>& blob = constraint.blobValue;
|
|
||||||
parquet::ByteArray* ba = getByteArray(constraint.column);
|
parquet::ByteArray* ba = getByteArray(constraint.column);
|
||||||
|
|
||||||
switch(constraint.op) {
|
switch(constraint.op) {
|
||||||
case Is:
|
case Is:
|
||||||
case Equal:
|
case Equal:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
if(blob.size() != ba->len)
|
if(blob.size() != ba->len)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return 0 == memcmp(&blob[0], ba->ptr, ba->len);
|
return 0 == memcmp(&blob[0], ba->ptr, ba->len);
|
||||||
|
}
|
||||||
case IsNot:
|
case IsNot:
|
||||||
case NotEqual:
|
case NotEqual:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
if(blob.size() != ba->len)
|
if(blob.size() != ba->len)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return 0 != memcmp(&blob[0], ba->ptr, ba->len);
|
return 0 != memcmp(&blob[0], ba->ptr, ba->len);
|
||||||
|
}
|
||||||
case GreaterThan:
|
case GreaterThan:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
|
return std::lexicographical_compare(
|
||||||
|
&blob[0],
|
||||||
|
&blob[0] + blob.size(),
|
||||||
|
ba->ptr,
|
||||||
|
ba->ptr + ba->len);
|
||||||
|
}
|
||||||
case GreaterThanOrEqual:
|
case GreaterThanOrEqual:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
|
bool equal = blob.size() == ba->len && 0 == memcmp(&blob[0], ba->ptr, ba->len);
|
||||||
|
|
||||||
|
return equal || std::lexicographical_compare(
|
||||||
|
&blob[0],
|
||||||
|
&blob[0] + blob.size(),
|
||||||
|
ba->ptr,
|
||||||
|
ba->ptr + ba->len);
|
||||||
|
}
|
||||||
case LessThan:
|
case LessThan:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
|
return std::lexicographical_compare(
|
||||||
|
ba->ptr,
|
||||||
|
ba->ptr + ba->len,
|
||||||
|
&blob[0],
|
||||||
|
&blob[0] + blob.size());
|
||||||
|
}
|
||||||
case LessThanOrEqual:
|
case LessThanOrEqual:
|
||||||
|
{
|
||||||
|
const std::vector<unsigned char>& blob = constraint.blobValue;
|
||||||
|
|
||||||
|
bool equal = blob.size() == ba->len && 0 == memcmp(&blob[0], ba->ptr, ba->len);
|
||||||
|
|
||||||
|
return equal || std::lexicographical_compare(
|
||||||
|
ba->ptr,
|
||||||
|
ba->ptr + ba->len,
|
||||||
|
&blob[0],
|
||||||
|
&blob[0] + blob.size());
|
||||||
|
}
|
||||||
case Like:
|
case Like:
|
||||||
|
{
|
||||||
|
const std::string& likeStringValue = constraint.likeStringValue;
|
||||||
|
if(likeStringValue.size() > ba->len)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
size_t len = ba->len;
|
||||||
|
if(likeStringValue.size() < len)
|
||||||
|
len = likeStringValue.size();
|
||||||
|
return 0 == memcmp(&likeStringValue[0], ba->ptr, len);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParquetCursor::currentRowSatisfiesIntegerFilter(Constraint& constraint) {
|
bool ParquetCursor::currentRowSatisfiesIntegerFilter(Constraint& constraint) {
|
||||||
|
|
|
@ -15,6 +15,22 @@ Constraint::Constraint(
|
||||||
this->doubleValue = doubleValue;
|
this->doubleValue = doubleValue;
|
||||||
this->blobValue = blobValue;
|
this->blobValue = blobValue;
|
||||||
|
|
||||||
if(type == Text)
|
if(type == Text) {
|
||||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
||||||
|
|
||||||
|
if(op == Like) {
|
||||||
|
// This permits more rowgroups than is strictly needed
|
||||||
|
// since it assumes an implicit wildcard. But it's
|
||||||
|
// simple to implement, so we'll go with it.
|
||||||
|
likeStringValue = stringValue;
|
||||||
|
size_t idx = likeStringValue.find_first_of("%");
|
||||||
|
if(idx != std::string::npos) {
|
||||||
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
|
}
|
||||||
|
idx = likeStringValue.find_first_of("_");
|
||||||
|
if(idx != std::string::npos) {
|
||||||
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,6 +51,9 @@ public:
|
||||||
std::vector<unsigned char> blobValue;
|
std::vector<unsigned char> blobValue;
|
||||||
// Only set when blobValue is set
|
// Only set when blobValue is set
|
||||||
std::string stringValue;
|
std::string stringValue;
|
||||||
|
|
||||||
|
// Only set when stringValue is set and op == Like
|
||||||
|
std::string likeStringValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where string_8 <= '003'
|
||||||
|
4
|
Loading…
Reference in New Issue