Scaffolding for row group filters, tests
rowid is special since its column index is -1, so add explicit tests around it
This commit is contained in:
parent
5559a7b563
commit
095b576cc2
|
@ -6,8 +6,35 @@ ParquetCursor::ParquetCursor(ParquetTable* table) {
|
||||||
reset(std::vector<Constraint>());
|
reset(std::vector<Constraint>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return true if it is _possible_ that the current
|
||||||
|
// rowgroup satisfies the constraints. Only return false
|
||||||
|
// if it definitely does not.
|
||||||
|
//
|
||||||
|
// This avoids opening rowgroups that can't return useful
|
||||||
|
// data, which provides substantial performance benefits.
|
||||||
|
bool ParquetCursor::currentRowGroupSatisfiesFilter() {
|
||||||
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
|
int column = constraints[i].getColumn();
|
||||||
|
int op = constraints[i].getOperator();
|
||||||
|
bool rv = true;
|
||||||
|
|
||||||
|
// printf("column = %d\n", column);
|
||||||
|
// std::unique_ptr<parquet::ColumnChunkMetaData> md = rowGroupMetadata->ColumnChunk(column);
|
||||||
|
|
||||||
|
if(op == IsNull) {
|
||||||
|
} else if(op == IsNotNull) {
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!rv)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool ParquetCursor::nextRowGroup() {
|
bool ParquetCursor::nextRowGroup() {
|
||||||
// TODO: skip row groups that cannot satisfy the constraints
|
start:
|
||||||
if((rowGroupId + 1) >= numRowGroups)
|
if((rowGroupId + 1) >= numRowGroups)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -36,23 +63,34 @@ bool ParquetCursor::nextRowGroup() {
|
||||||
colRows[i] = rowId;
|
colRows[i] = rowId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!currentRowGroupSatisfiesFilter())
|
||||||
|
goto start;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return true if it is _possible_ that the current
|
// Return true if it is _possible_ that the current
|
||||||
// row satisfies the constraints. Only return false
|
// row satisfies the constraints. Only return false
|
||||||
// if it definitely does not.
|
// if it definitely does not.
|
||||||
|
//
|
||||||
|
// This avoids pointless transitions between the SQLite VM
|
||||||
|
// and the extension, which can add up on a dataset of tens
|
||||||
|
// of millions of rows.
|
||||||
bool ParquetCursor::currentRowSatisfiesFilter() {
|
bool ParquetCursor::currentRowSatisfiesFilter() {
|
||||||
for(unsigned int i = 0; i < constraints.size(); i++) {
|
for(unsigned int i = 0; i < constraints.size(); i++) {
|
||||||
|
bool rv = true;
|
||||||
int column = constraints[i].getColumn();
|
int column = constraints[i].getColumn();
|
||||||
ensureColumn(column);
|
ensureColumn(column);
|
||||||
int op = constraints[i].getOperator();
|
int op = constraints[i].getOperator();
|
||||||
|
|
||||||
if(op == IsNull) {
|
if(op == IsNull) {
|
||||||
return isNull(column);
|
rv = isNull(column);
|
||||||
} else if(op == IsNotNull) {
|
} else if(op == IsNotNull) {
|
||||||
return !isNull(column);
|
rv = !isNull(column);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!rv)
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ public:
|
||||||
ParquetCursor(ParquetTable* table);
|
ParquetCursor(ParquetTable* table);
|
||||||
int getRowId();
|
int getRowId();
|
||||||
bool currentRowSatisfiesFilter();
|
bool currentRowSatisfiesFilter();
|
||||||
|
bool currentRowGroupSatisfiesFilter();
|
||||||
void next();
|
void next();
|
||||||
void close();
|
void close();
|
||||||
void reset(std::vector<Constraint> constraints);
|
void reset(std::vector<Constraint> constraints);
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid > 100
|
||||||
|
0
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid >= 100
|
||||||
|
0
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid < 0
|
||||||
|
0
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid < -1
|
||||||
|
0
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid <= 0
|
||||||
|
1
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid < 1
|
||||||
|
1
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid <> 1
|
||||||
|
98
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid is null
|
||||||
|
0
|
|
@ -0,0 +1,2 @@
|
||||||
|
select count(*) from no_nulls1 where rowid is not null
|
||||||
|
99
|
Loading…
Reference in New Issue