LIKE row group filtering

~1.7s -> ~1.0s for the census data set on `LIKE 'Dawson %'`
This commit is contained in:
Colin Dellow 2018-03-17 00:11:38 -04:00
parent 753a490687
commit 03a20a9432
1 changed files with 18 additions and 2 deletions

View File

@ -65,8 +65,24 @@ bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, s
// If min == max == str, we can skip this. // If min == max == str, we can skip this.
return !(minStr == maxStr && str == minStr); return !(minStr == maxStr && str == minStr);
case Like: case Like:
// TODO: We could do something here where we filter based on the leading characters {
// of the target. For now, do nothing. std::string truncated = str;
size_t idx = truncated.find_first_of("%");
if(idx != std::string::npos) {
truncated = truncated.substr(0, idx);
}
idx = truncated.find_first_of("_");
if(idx != std::string::npos) {
truncated = truncated.substr(0, idx);
}
// This permits more rowgroups than is strictly needed
// since it assumes an implicit wildcard. But it's
// simple to implement, so we'll go with it.
std::string truncatedMin = minStr.substr(0, truncated.size());
std::string truncatedMax = maxStr.substr(0, truncated.size());
return truncated.empty() || (truncated >= truncatedMin && truncated <= truncatedMax);
}
default: default:
return true; return true;
} }