diff --git a/parquet/cmds.txt b/parquet/cmds.txt index 482a267..5db014f 100644 --- a/parquet/cmds.txt +++ b/parquet/cmds.txt @@ -22,3 +22,4 @@ create virtual table parquet using parquet('/home/cldellow/src/csv2parquet/12m.p --select * from parquet limit 10; --select sum(length(col3)) from parquet; select * from parquet where rowid = 123 or (col3 = 'Dawson Creeks') or col9 LIKE '%Bicycqq%' limit 20000; +select * from parquet limit 1; diff --git a/parquet/go b/parquet/go index 2891431..b6f4088 100755 --- a/parquet/go +++ b/parquet/go @@ -1,6 +1,8 @@ #!/bin/bash set -euo pipefail + +cd "$(dirname ${BASH_SOURCE[0]})" make diff --git a/parquet/parquet.cc b/parquet/parquet.cc index 67e4bdc..80d1a62 100644 --- a/parquet/parquet.cc +++ b/parquet/parquet.cc @@ -277,6 +277,61 @@ static int parquetEof(sqlite3_vtab_cursor *cur){ return 0; } +void debugConstraints(sqlite3_index_info *pIdxInfo, ParquetTable *table, sqlite3_value** argv) { + for(int i = 0; i < pIdxInfo->nConstraint; i++) { + std::string valueStr = "?"; + if(argv != NULL) { + int type = sqlite3_value_type(argv[i]); + switch(type) { + case SQLITE_INTEGER: + { + sqlite3_int64 rv = sqlite3_value_int64(argv[i]); + std::ostringstream ss; + ss << rv; + valueStr = ss.str(); + break; + } + case SQLITE_FLOAT: + { + double rv = sqlite3_value_double(argv[i]); + std::ostringstream ss; + ss << rv; + valueStr = ss.str(); + break; + } + case SQLITE_TEXT: + { + const unsigned char* rv = sqlite3_value_text(argv[i]); + std::ostringstream ss; + ss << "'" << rv << "'"; + valueStr = ss.str(); + break; + } + case SQLITE_BLOB: + { + int sizeBytes = sqlite3_value_bytes(argv[i]); + std::ostringstream ss; + ss << "'..." << sizeBytes << "-byte blob...'"; + valueStr = ss.str(); + break; + } + case SQLITE_NULL: + { + valueStr = "NULL"; + break; + } + } + } + printf(" constraint %d: col %s %s %s, usable %d\n", + i, + table->columnName(pIdxInfo->aConstraint[i].iColumn).data(), + opName(pIdxInfo->aConstraint[i].op), + valueStr.data(), + pIdxInfo->aConstraint[i].usable); + } +} + + /* ** Only a full table scan is supported. So xFilter simply rewinds to ** the beginning. @@ -288,10 +343,9 @@ static int parquetFilter( int argc, sqlite3_value **argv ){ - printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc); - const unsigned char* needle = sqlite3_value_text(argv[0]); - printf(" ...%s\n", needle); ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; + printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc); + debugConstraints((sqlite3_index_info*)idxStr, cursor->getTable(), argv); cursor->reset(); return parquetNext(cur); } @@ -307,16 +361,7 @@ static int parquetBestIndex( ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table; printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy); - // Duplicate pIdxInfo and stash it in pIdxInfo->idxStr. - for(int i = 0; i < pIdxInfo->nConstraint; i++) { - printf(" constraint %d: col %d[%s], op %d[%s], usable %d\n", - i, - pIdxInfo->aConstraint[i].iColumn, - table->columnName(pIdxInfo->aConstraint[i].iColumn).data(), - pIdxInfo->aConstraint[i].op, - opName(pIdxInfo->aConstraint[i].op), - pIdxInfo->aConstraint[i].usable); - } + debugConstraints(pIdxInfo, table, NULL); if((pIdxInfo->nConstraint == 0 && pIdxInfo->nOrderBy == 0)) { pIdxInfo->estimatedCost = 1000000000000; diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc index 2543a79..51fdc99 100644 --- a/parquet/parquet_cursor.cc +++ b/parquet/parquet_cursor.cc @@ -328,3 +328,5 @@ void ParquetCursor::reset() { numRows = reader->metadata()->num_rows(); numRowGroups = reader->metadata()->num_row_groups(); } + +ParquetTable* ParquetCursor::getTable() { return table; } diff --git a/parquet/parquet_cursor.h b/parquet/parquet_cursor.h index 8347bb7..1f8d1e7 100644 --- a/parquet/parquet_cursor.h +++ b/parquet/parquet_cursor.h @@ -41,6 +41,7 @@ public: bool isNull(int col); parquet::Type::type getPhysicalType(int col); parquet::LogicalType::type getLogicalType(int col); + ParquetTable* getTable(); int getInt32(int col); long getInt64(int col);