1
0
mirror of https://github.com/cldellow/sqlite-parquet-vtable.git synced 2025-09-10 22:28:53 +00:00

Add query test framework, fix xFilter

This commit is contained in:
Colin Dellow
2018-03-04 21:05:26 -05:00
parent 4c54ab89ae
commit bb3a9440f7
8 changed files with 284 additions and 32 deletions

View File

@@ -121,6 +121,7 @@ static int parquetCreate(
*/
static int parquetClose(sqlite3_vtab_cursor *cur){
sqlite3_vtab_cursor_parquet* p = (sqlite3_vtab_cursor_parquet*)cur;
p->cursor->close();
delete p->cursor;
sqlite3_free(cur);
return SQLITE_OK;
@@ -249,28 +250,14 @@ static int parquetEof(sqlite3_vtab_cursor *cur){
** the beginning.
*/
static int parquetFilter(
sqlite3_vtab_cursor *pVtabCursor,
sqlite3_vtab_cursor *cur,
int idxNum, const char *idxStr,
int argc, sqlite3_value **argv
){
printf("xFilter\n");
//sqlite3_vtab_cursor_parquet *pCur = (sqlite3_vtab_cursor_parquet*)pVtabCursor;
//sqlite3_vtab_parquet *pTab = (sqlite3_vtab_parquet*)pVtabCursor->pVtab;
/*
pCur->iRowid = 0;
if( pCur->rdr.in==0 ){
assert( pCur->rdr.zIn==pTab->zData );
assert( pTab->iStart>=0 );
assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
pCur->rdr.iIn = pTab->iStart;
}else{
fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
pCur->rdr.iIn = 0;
pCur->rdr.nIn = 0;
}
*/
return parquetNext(pVtabCursor);
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
cursor->reset();
return parquetNext(cur);
}
/*

View File

@@ -2,27 +2,17 @@
ParquetCursor::ParquetCursor(ParquetTable* table) {
this->table = table;
this->rowId = -1;
// TODO: consider having a long lived handle in ParquetTable that can be borrowed
// without incurring the cost of opening the file from scratch twice
this->reader = parquet::ParquetFileReader::OpenFile(this->table->file.data());
this->rowGroupId = -1;
// TODO: handle the case where rowgroups have disjoint schemas?
// TODO: or at least, fail fast if detected
this->rowsLeftInRowGroup = 0;
this->numRows = reader->metadata()->num_rows();
this->numRowGroups = reader->metadata()->num_row_groups();
reader = NULL;
reset();
}
bool ParquetCursor::nextRowGroup() {
// TODO: skip row groups that cannot satisfy the constraints
if((this->rowGroupId + 1) >= this->numRowGroups)
if((rowGroupId + 1) >= numRowGroups)
return false;
rowGroupId++;
rowGroupMetadata = this->reader->metadata()->RowGroup(0);
rowGroupMetadata = reader->metadata()->RowGroup(0);
rowsLeftInRowGroup = rowGroupMetadata->num_rows();
rowGroup = reader->RowGroup(rowGroupId);
for(unsigned int i = 0; i < scanners.size(); i++)
@@ -235,3 +225,25 @@ parquet::Type::type ParquetCursor::getPhysicalType(int col) {
parquet::LogicalType::type ParquetCursor::getLogicalType(int col) {
return logicalTypes[col];
}
void ParquetCursor::close() {
if(reader != NULL) {
reader->Close();
}
}
void ParquetCursor::reset() {
close();
rowId = -1;
// TODO: consider having a long lived handle in ParquetTable that can be borrowed
// without incurring the cost of opening the file from scratch twice
reader = parquet::ParquetFileReader::OpenFile(table->file.data());
rowGroupId = -1;
// TODO: handle the case where rowgroups have disjoint schemas?
// TODO: or at least, fail fast if detected
rowsLeftInRowGroup = 0;
numRows = reader->metadata()->num_rows();
numRowGroups = reader->metadata()->num_row_groups();
}

View File

@@ -32,6 +32,8 @@ public:
ParquetCursor(ParquetTable* table);
int getRowId();
void next();
void close();
void reset();
bool eof();
void ensureColumn(int col);