mirror of
				https://github.com/cldellow/sqlite-parquet-vtable.git
				synced 2025-10-27 01:59:56 +00:00 
			
		
		
		
	Cache clauses -> row group mapping
Create a shadow table. For `stats`, it'd be `_stats_rowgroups`. It contains three columns: - the clause (eg `city = 'Dawson Creek'`) - the initial estimate, as a bitmap of rowgroups based on stats - the actual observed rowgroups, as a bitmap This papers over poorly sorted parquet files, at the cost of some disk space. It makes interactive queries much more natural -- drilldown style queries are much faster, as they can leverage work done by previous queries. eg 'SELECT * FROM stats WHERE city = 'Dawson Creek' and question_id >= 1935 and question_id <= 1940` takes ~584ms on first run, but 9ms on subsequent runs. We only create entries when the estimates don't match the actual results. Fixes #6
This commit is contained in:
		
							
								
								
									
										12
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								README.md
									
									
									
									
									
								
							| @@ -72,6 +72,18 @@ constraints before returning control to SQLite's virtual machine. This minimizes | |||||||
| the number of allocations performed when many rows are filtered out by | the number of allocations performed when many rows are filtered out by | ||||||
| the user's criteria. | the user's criteria. | ||||||
|  |  | ||||||
|  | ### Memoized slices | ||||||
|  |  | ||||||
|  | Individual clauses are mapped to the row groups they match. | ||||||
|  |  | ||||||
|  | eg going on row group statistics, which store minimum and maximum values, a clause | ||||||
|  | like `WHERE city = 'Dawson Creek'` may match 80% of row groups. | ||||||
|  |  | ||||||
|  | In reality, it may only be present in one or two row groups. | ||||||
|  |  | ||||||
|  | This is recorded in a shadow table so future queries that contain that clause | ||||||
|  | can read only the necessary row groups. | ||||||
|  |  | ||||||
| ### Types | ### Types | ||||||
|  |  | ||||||
| These Parquet types are supported: | These Parquet types are supported: | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ SQLITE_EXTENSION_INIT1 | |||||||
| #include <stdarg.h> | #include <stdarg.h> | ||||||
| #include <ctype.h> | #include <ctype.h> | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
|  | #include <iomanip> | ||||||
|  |  | ||||||
| #include <memory> | #include <memory> | ||||||
|  |  | ||||||
| @@ -32,6 +33,7 @@ static int parquetConnect(sqlite3*, void*, int, const char*const*, | |||||||
|                            sqlite3_vtab**,char**); |                            sqlite3_vtab**,char**); | ||||||
| static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*); | static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*); | ||||||
| static int parquetDisconnect(sqlite3_vtab*); | static int parquetDisconnect(sqlite3_vtab*); | ||||||
|  | static int parquetDestroy(sqlite3_vtab*); | ||||||
| static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); | static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); | ||||||
| static int parquetClose(sqlite3_vtab_cursor*); | static int parquetClose(sqlite3_vtab_cursor*); | ||||||
| static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, | static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, | ||||||
| @@ -45,6 +47,7 @@ static int parquetRowid(sqlite3_vtab_cursor*,sqlite3_int64*); | |||||||
| typedef struct sqlite3_vtab_parquet { | typedef struct sqlite3_vtab_parquet { | ||||||
|   sqlite3_vtab base;              /* Base class.  Must be first */ |   sqlite3_vtab base;              /* Base class.  Must be first */ | ||||||
|   ParquetTable* table; |   ParquetTable* table; | ||||||
|  |   sqlite3* db; | ||||||
| } sqlite3_vtab_parquet; | } sqlite3_vtab_parquet; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -54,6 +57,21 @@ typedef struct sqlite3_vtab_cursor_parquet { | |||||||
|   ParquetCursor* cursor; |   ParquetCursor* cursor; | ||||||
| } sqlite3_vtab_cursor_parquet; | } sqlite3_vtab_cursor_parquet; | ||||||
|  |  | ||||||
|  | static int parquetDestroy(sqlite3_vtab *pVtab) { | ||||||
|  |   sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab; | ||||||
|  |  | ||||||
|  |   // Clean up our shadow table. This is useful if the user has recreated | ||||||
|  |   // the parquet file, and our mappings would now be invalid. | ||||||
|  |   std::string drop = "DROP TABLE IF EXISTS _"; | ||||||
|  |   drop.append(p->table->getTableName()); | ||||||
|  |   drop.append("_rowgroups"); | ||||||
|  |   int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0); | ||||||
|  |   if(rv != 0) | ||||||
|  |     return rv; | ||||||
|  |  | ||||||
|  |   return SQLITE_OK; | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** This method is the destructor fo a sqlite3_vtab_parquet object. | ** This method is the destructor fo a sqlite3_vtab_parquet object. | ||||||
| */ | */ | ||||||
| @@ -78,6 +96,7 @@ static int parquetConnect( | |||||||
|       return SQLITE_ERROR; |       return SQLITE_ERROR; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     std::string tableName = argv[2]; | ||||||
|     // Remove the delimiting single quotes |     // Remove the delimiting single quotes | ||||||
|     std::string fname = argv[3]; |     std::string fname = argv[3]; | ||||||
|     fname = fname.substr(1, fname.length() - 2); |     fname = fname.substr(1, fname.length() - 2); | ||||||
| @@ -87,7 +106,7 @@ static int parquetConnect( | |||||||
|     memset(vtab.get(), 0, sizeof(*vtab.get())); |     memset(vtab.get(), 0, sizeof(*vtab.get())); | ||||||
|  |  | ||||||
|     try { |     try { | ||||||
|       std::unique_ptr<ParquetTable> table(new ParquetTable(fname)); |       std::unique_ptr<ParquetTable> table(new ParquetTable(fname, tableName)); | ||||||
|  |  | ||||||
|       std::string create = table->CreateStatement(); |       std::string create = table->CreateStatement(); | ||||||
|       int rc = sqlite3_declare_vtab(db, create.data()); |       int rc = sqlite3_declare_vtab(db, create.data()); | ||||||
| @@ -95,6 +114,7 @@ static int parquetConnect( | |||||||
|         return rc; |         return rc; | ||||||
|  |  | ||||||
|       vtab->table = table.release(); |       vtab->table = table.release(); | ||||||
|  |       vtab->db = db; | ||||||
|       *ppVtab = (sqlite3_vtab*)vtab.release(); |       *ppVtab = (sqlite3_vtab*)vtab.release(); | ||||||
|       return SQLITE_OK; |       return SQLITE_OK; | ||||||
|     } catch (const std::exception& e) { |     } catch (const std::exception& e) { | ||||||
| @@ -119,16 +139,81 @@ static int parquetCreate( | |||||||
|   sqlite3_vtab **ppVtab, |   sqlite3_vtab **ppVtab, | ||||||
|   char **pzErr |   char **pzErr | ||||||
| ){ | ){ | ||||||
|  return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr); |   try { | ||||||
|  |     // Create shadow table for storing constraint -> rowid mappings | ||||||
|  |     std::string create = "CREATE TABLE IF NOT EXISTS _"; | ||||||
|  |     create.append(argv[2]); | ||||||
|  |     create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)"); | ||||||
|  |     int rv = sqlite3_exec(db, create.data(), 0, 0, 0); | ||||||
|  |     if(rv != 0) | ||||||
|  |       return rv; | ||||||
|  |  | ||||||
|  |     create = "CREATE UNIQUE INDEX IF NOT EXISTS _"; | ||||||
|  |     create.append(argv[2]); | ||||||
|  |     create.append("_index ON _"); | ||||||
|  |     create.append(argv[2]); | ||||||
|  |     create.append("_rowgroups(clause)"); | ||||||
|  |     rv = sqlite3_exec(db, create.data(), 0, 0, 0); | ||||||
|  |  | ||||||
|  |     return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr); | ||||||
|  |   } catch (std::bad_alloc& ba) { | ||||||
|  |     return SQLITE_NOMEM; | ||||||
|  |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::string quoteBlob(const std::vector<unsigned char>& bytes) { | ||||||
|  |   std::ostringstream ss; | ||||||
|  |   ss << "X'" << std::hex; | ||||||
|  |   for(unsigned int i = 0; i < bytes.size(); i++) { | ||||||
|  |     ss << std::setfill('0') << std::setw(2) << (unsigned int)(unsigned char)bytes[i]; | ||||||
|  |   } | ||||||
|  |   ss << "'"; | ||||||
|  |  | ||||||
|  |   return ss.str(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void persistConstraints(sqlite3* db, ParquetCursor* cursor) { | ||||||
|  |   for(unsigned int i = 0; i < cursor->getNumConstraints(); i++) { | ||||||
|  |     const Constraint& constraint = cursor->getConstraint(i); | ||||||
|  |     const std::vector<unsigned char>& estimated = constraint.bitmap.estimatedMembership; | ||||||
|  |     const std::vector<unsigned char>& actual = constraint.bitmap.actualMembership; | ||||||
|  |     if(estimated == actual) { | ||||||
|  |       continue; | ||||||
|  |     } | ||||||
|  |     std::string desc = constraint.describe(); | ||||||
|  |  | ||||||
|  |     std::string estimatedStr = quoteBlob(estimated); | ||||||
|  |     std::string actualStr = quoteBlob(actual); | ||||||
|  |  | ||||||
|  |     // This is only advisory, so ignore failures. | ||||||
|  |     char* sql = sqlite3_mprintf( | ||||||
|  |         "INSERT OR REPLACE INTO _%s_rowgroups(clause, estimate, actual) VALUES ('%q', %s, %s)", | ||||||
|  |         cursor->getTable()->getTableName().c_str(), | ||||||
|  |         desc.c_str(), | ||||||
|  |         estimatedStr.c_str(), | ||||||
|  |         actualStr.c_str()); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     if(sql == NULL) | ||||||
|  |       return; | ||||||
|  |  | ||||||
|  |     sqlite3_exec(db, sql, 0, 0, 0); | ||||||
|  |     sqlite3_free(sql); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** Destructor for a sqlite3_vtab_cursor_parquet. | ** Destructor for a sqlite3_vtab_cursor_parquet. | ||||||
| */ | */ | ||||||
| static int parquetClose(sqlite3_vtab_cursor *cur){ | static int parquetClose(sqlite3_vtab_cursor *cur){ | ||||||
|   sqlite3_vtab_cursor_parquet* p = (sqlite3_vtab_cursor_parquet*)cur; |   sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; | ||||||
|   p->cursor->close(); |   sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab); | ||||||
|   delete p->cursor; |   ParquetCursor* cursor = vtab_cursor_parquet->cursor; | ||||||
|  |   persistConstraints(vtab_parquet->db, cursor); | ||||||
|  |  | ||||||
|  |   vtab_cursor_parquet->cursor->close(); | ||||||
|  |   delete vtab_cursor_parquet->cursor; | ||||||
|   sqlite3_free(cur); |   sqlite3_free(cur); | ||||||
|   return SQLITE_OK; |   return SQLITE_OK; | ||||||
| } | } | ||||||
| @@ -196,7 +281,8 @@ const char* opName(int op) { | |||||||
| */ | */ | ||||||
| static int parquetNext(sqlite3_vtab_cursor *cur){ | static int parquetNext(sqlite3_vtab_cursor *cur){ | ||||||
|   try { |   try { | ||||||
|     ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; |     sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; | ||||||
|  |     ParquetCursor* cursor = vtab_cursor_parquet->cursor; | ||||||
|     cursor->next(); |     cursor->next(); | ||||||
|     return SQLITE_OK; |     return SQLITE_OK; | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch(std::bad_alloc& ba) { | ||||||
| @@ -395,6 +481,38 @@ ConstraintOperator constraintOperatorFromSqlite(int op) { | |||||||
|   throw std::invalid_argument(ss.str()); |   throw std::invalid_argument(ss.str()); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table, std::string clause) { | ||||||
|  |   std::vector<unsigned char> rv; | ||||||
|  |  | ||||||
|  |   std::unique_ptr<char, void(*)(void*)> sql(sqlite3_mprintf( | ||||||
|  |       "SELECT actual FROM _%s_rowgroups WHERE clause = '%q'", | ||||||
|  |       table.c_str(), | ||||||
|  |       clause.c_str()), sqlite3_free); | ||||||
|  |  | ||||||
|  |   if(sql.get() == NULL) | ||||||
|  |     return rv; | ||||||
|  |  | ||||||
|  |   sqlite3_stmt* pStmt = NULL; | ||||||
|  |   int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL); | ||||||
|  |   if(rc != 0) | ||||||
|  |     return rv; | ||||||
|  |  | ||||||
|  |   rc = sqlite3_step(pStmt); | ||||||
|  |   if(rc == SQLITE_ROW) { | ||||||
|  |     int size = sqlite3_column_bytes(pStmt, 0); | ||||||
|  |     unsigned char* blob = (unsigned char*)sqlite3_column_blob(pStmt, 0); | ||||||
|  |     // TODO: there is a memory leak here if we get a std::bad_alloc while populating rv; | ||||||
|  |     // we fail to free pStmt | ||||||
|  |     for(int i = 0; i < size; i++) { | ||||||
|  |       rv.push_back(blob[i]); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   sqlite3_finalize(pStmt); | ||||||
|  |   return rv; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** Only a full table scan is supported.  So xFilter simply rewinds to | ** Only a full table scan is supported.  So xFilter simply rewinds to | ||||||
| ** the beginning. | ** the beginning. | ||||||
| @@ -407,7 +525,10 @@ static int parquetFilter( | |||||||
|   sqlite3_value **argv |   sqlite3_value **argv | ||||||
| ){ | ){ | ||||||
|   try { |   try { | ||||||
|     ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; |     sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; | ||||||
|  |     sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab); | ||||||
|  |     sqlite3* db = vtab_parquet->db; | ||||||
|  |     ParquetCursor* cursor = vtab_cursor_parquet->cursor; | ||||||
|     sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr; |     sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr; | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| @@ -451,13 +572,40 @@ static int parquetFilter( | |||||||
|         type = Null; |         type = Null; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       Constraint constraint( |       std::string columnName = "rowid"; | ||||||
|  |       if(indexInfo->aConstraint[i].iColumn >= 0) { | ||||||
|  |         columnName = cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups()); | ||||||
|  |       Constraint dummy( | ||||||
|  |         bitmap, | ||||||
|         indexInfo->aConstraint[i].iColumn, |         indexInfo->aConstraint[i].iColumn, | ||||||
|  |         columnName, | ||||||
|         constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), |         constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), | ||||||
|         type, |         type, | ||||||
|         intValue, |         intValue, | ||||||
|         doubleValue, |         doubleValue, | ||||||
|         blobValue); |         blobValue); | ||||||
|  |  | ||||||
|  |       std::vector<unsigned char> actual = getRowGroupsForClause(db, cursor->getTable()->getTableName(), dummy.describe()); | ||||||
|  |       if(actual.size() > 0) { | ||||||
|  |         // Initialize the estimate to be the actual -- eventually they'll converge | ||||||
|  |         // and we'll stop writing back to the db. | ||||||
|  |         std::vector<unsigned char> estimate = actual; | ||||||
|  |         bitmap = RowGroupBitmap(estimate, actual); | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       Constraint constraint( | ||||||
|  |         bitmap, | ||||||
|  |         indexInfo->aConstraint[i].iColumn, | ||||||
|  |         columnName, | ||||||
|  |         constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), | ||||||
|  |         type, | ||||||
|  |         intValue, | ||||||
|  |         doubleValue, | ||||||
|  |         blobValue); | ||||||
|  |  | ||||||
|       constraints.push_back(constraint); |       constraints.push_back(constraint); | ||||||
|       j++; |       j++; | ||||||
|     } |     } | ||||||
| @@ -555,7 +703,7 @@ static sqlite3_module ParquetModule = { | |||||||
|   parquetConnect,           /* xConnect */ |   parquetConnect,           /* xConnect */ | ||||||
|   parquetBestIndex,         /* xBestIndex */ |   parquetBestIndex,         /* xBestIndex */ | ||||||
|   parquetDisconnect,        /* xDisconnect */ |   parquetDisconnect,        /* xDisconnect */ | ||||||
|   parquetDisconnect,        /* xDestroy */ |   parquetDestroy,           /* xDestroy */ | ||||||
|   parquetOpen,              /* xOpen - open a cursor */ |   parquetOpen,              /* xOpen - open a cursor */ | ||||||
|   parquetClose,             /* xClose - close a cursor */ |   parquetClose,             /* xClose - close a cursor */ | ||||||
|   parquetFilter,            /* xFilter - configure scan constraints */ |   parquetFilter,            /* xFilter - configure scan constraints */ | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| #include "parquet_cursor.h" | #include "parquet_cursor.h" | ||||||
|  |  | ||||||
| ParquetCursor::ParquetCursor(ParquetTable* table) { | ParquetCursor::ParquetCursor(ParquetTable* table): table(table) { | ||||||
|   this->table = table; |  | ||||||
|   reader = NULL; |   reader = NULL; | ||||||
|   reset(std::vector<Constraint>()); |   reset(std::vector<Constraint>()); | ||||||
| } | } | ||||||
| @@ -518,6 +517,7 @@ bool ParquetCursor::currentRowSatisfiesDoubleFilter(Constraint& constraint) { | |||||||
| // This avoids opening rowgroups that can't return useful | // This avoids opening rowgroups that can't return useful | ||||||
| // data, which provides substantial performance benefits. | // data, which provides substantial performance benefits. | ||||||
| bool ParquetCursor::currentRowGroupSatisfiesFilter() { | bool ParquetCursor::currentRowGroupSatisfiesFilter() { | ||||||
|  |   bool overallRv = true; | ||||||
|   for(unsigned int i = 0; i < constraints.size(); i++) { |   for(unsigned int i = 0; i < constraints.size(); i++) { | ||||||
|     int column = constraints[i].column; |     int column = constraints[i].column; | ||||||
|     int op = constraints[i].op; |     int op = constraints[i].op; | ||||||
| @@ -527,47 +527,52 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() { | |||||||
|       rv = currentRowGroupSatisfiesRowIdFilter(constraints[i]); |       rv = currentRowGroupSatisfiesRowIdFilter(constraints[i]); | ||||||
|     } else { |     } else { | ||||||
|       std::unique_ptr<parquet::ColumnChunkMetaData> md = rowGroupMetadata->ColumnChunk(column); |       std::unique_ptr<parquet::ColumnChunkMetaData> md = rowGroupMetadata->ColumnChunk(column); | ||||||
|       if(!md->is_stats_set()) { |       if(md->is_stats_set()) { | ||||||
|         continue; |         std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics(); | ||||||
|       } |  | ||||||
|       std::shared_ptr<parquet::RowGroupStatistics> stats = md->statistics(); |  | ||||||
|  |  | ||||||
|       // SQLite is much looser with types than you might expect if you |         // SQLite is much looser with types than you might expect if you | ||||||
|       // come from a Postgres background. The constraint '30.0' (that is, |         // come from a Postgres background. The constraint '30.0' (that is, | ||||||
|       // a string containing a floating point number) should be treated |         // a string containing a floating point number) should be treated | ||||||
|       // as equal to a field containing an integer 30. |         // as equal to a field containing an integer 30. | ||||||
|       // |         // | ||||||
|       // This means that even if the parquet physical type is integer, |         // This means that even if the parquet physical type is integer, | ||||||
|       // the constraint type may be a string, so dispatch to the filter |         // the constraint type may be a string, so dispatch to the filter | ||||||
|       // fn based on the Parquet type. |         // fn based on the Parquet type. | ||||||
|  |  | ||||||
|       if(op == IsNull) { |         if(op == IsNull) { | ||||||
|         rv = stats->null_count() > 0; |           rv = stats->null_count() > 0; | ||||||
|       } else if(op == IsNotNull) { |         } else if(op == IsNotNull) { | ||||||
|         rv = stats->num_values() > 0; |           rv = stats->num_values() > 0; | ||||||
|       } else { |         } else { | ||||||
|         parquet::Type::type pqType = types[column]; |           parquet::Type::type pqType = types[column]; | ||||||
|  |  | ||||||
|         if(pqType == parquet::Type::BYTE_ARRAY && logicalTypes[column] == parquet::LogicalType::UTF8) { |           if(pqType == parquet::Type::BYTE_ARRAY && logicalTypes[column] == parquet::LogicalType::UTF8) { | ||||||
|           rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats); |             rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats); | ||||||
|         } else if(pqType == parquet::Type::BYTE_ARRAY) { |           } else if(pqType == parquet::Type::BYTE_ARRAY) { | ||||||
|           rv = currentRowGroupSatisfiesBlobFilter(constraints[i], stats); |             rv = currentRowGroupSatisfiesBlobFilter(constraints[i], stats); | ||||||
|         } else if(pqType == parquet::Type::INT32 || |           } else if(pqType == parquet::Type::INT32 || | ||||||
|                   pqType == parquet::Type::INT64 || |                     pqType == parquet::Type::INT64 || | ||||||
|                   pqType == parquet::Type::INT96 || |                     pqType == parquet::Type::INT96 || | ||||||
|                   pqType == parquet::Type::BOOLEAN) { |                     pqType == parquet::Type::BOOLEAN) { | ||||||
|           rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats); |             rv = currentRowGroupSatisfiesIntegerFilter(constraints[i], stats); | ||||||
|         } else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) { |           } else if(pqType == parquet::Type::FLOAT || pqType == parquet::Type::DOUBLE) { | ||||||
|           rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats); |             rv = currentRowGroupSatisfiesDoubleFilter(constraints[i], stats); | ||||||
|  |           } | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(!rv) |     // and it with the existing actual, which may have come from a previous run | ||||||
|       return false; |     rv = rv && constraints[i].bitmap.getActualMembership(rowGroupId); | ||||||
|  |     if(!rv) { | ||||||
|  |       constraints[i].bitmap.setEstimatedMembership(rowGroupId, rv); | ||||||
|  |       constraints[i].bitmap.setActualMembership(rowGroupId, rv); | ||||||
|  |     } | ||||||
|  |     overallRv = overallRv && rv; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   return true; | //  printf("rowGroup %d %s\n", rowGroupId, overallRv ? "may satisfy" : "does not satisfy"); | ||||||
|  |   return overallRv; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -609,9 +614,22 @@ start: | |||||||
|   // Increment rowId so currentRowGroupSatisfiesRowIdFilter can access it; |   // Increment rowId so currentRowGroupSatisfiesRowIdFilter can access it; | ||||||
|   // it'll get decremented by our caller |   // it'll get decremented by our caller | ||||||
|   rowId++; |   rowId++; | ||||||
|  |  | ||||||
|  |   // We're going to scan this row group; reset the expectation of discovering | ||||||
|  |   // a row | ||||||
|  |   for(unsigned int i = 0; i < constraints.size(); i++) { | ||||||
|  |     if(rowGroupId > 0 && constraints[i].rowGroupId == rowGroupId - 1) { | ||||||
|  |       constraints[i].bitmap.setActualMembership(rowGroupId - 1, constraints[i].hadRows); | ||||||
|  |     } | ||||||
|  |     constraints[i].hadRows = false; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   if(!currentRowGroupSatisfiesFilter()) |   if(!currentRowGroupSatisfiesFilter()) | ||||||
|     goto start; |     goto start; | ||||||
|  |  | ||||||
|  |   for(unsigned int i = 0; i < constraints.size(); i++) { | ||||||
|  |     constraints[i].rowGroupId = rowGroupId; | ||||||
|  |   } | ||||||
|   return true; |   return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -623,6 +641,7 @@ start: | |||||||
| // and the extension, which can add up on a dataset of tens | // and the extension, which can add up on a dataset of tens | ||||||
| // of millions of rows. | // of millions of rows. | ||||||
| bool ParquetCursor::currentRowSatisfiesFilter() { | bool ParquetCursor::currentRowSatisfiesFilter() { | ||||||
|  |   bool overallRv = true; | ||||||
|   for(unsigned int i = 0; i < constraints.size(); i++) { |   for(unsigned int i = 0; i < constraints.size(); i++) { | ||||||
|     bool rv = true; |     bool rv = true; | ||||||
|     int column = constraints[i].column; |     int column = constraints[i].column; | ||||||
| @@ -648,13 +667,18 @@ bool ParquetCursor::currentRowSatisfiesFilter() { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(!rv) |     // it defaults to false; so only set it if true | ||||||
|       return false; |     // ideally we'd short-circuit if we'd already set this group as visited | ||||||
|  |     if(rv) { | ||||||
|  |       constraints[i].hadRows = true; | ||||||
|  |     } | ||||||
|  |     overallRv = overallRv && rv; | ||||||
|   } |   } | ||||||
|   return true; |   return overallRv; | ||||||
| } | } | ||||||
|  |  | ||||||
| void ParquetCursor::next() { | void ParquetCursor::next() { | ||||||
|  |   // Returns true if we've crossed a row group boundary | ||||||
| start: | start: | ||||||
|   if(rowsLeftInRowGroup == 0) { |   if(rowsLeftInRowGroup == 0) { | ||||||
|     if(!nextRowGroup()) { |     if(!nextRowGroup()) { | ||||||
| @@ -672,7 +696,6 @@ start: | |||||||
|   rowId++; |   rowId++; | ||||||
|   if(!currentRowSatisfiesFilter()) |   if(!currentRowSatisfiesFilter()) | ||||||
|     goto start; |     goto start; | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| int ParquetCursor::getRowId() { | int ParquetCursor::getRowId() { | ||||||
| @@ -939,7 +962,7 @@ void ParquetCursor::reset(std::vector<Constraint> constraints) { | |||||||
|   // TODO: consider having a long lived handle in ParquetTable that can be borrowed |   // TODO: consider having a long lived handle in ParquetTable that can be borrowed | ||||||
|   // without incurring the cost of opening the file from scratch twice |   // without incurring the cost of opening the file from scratch twice | ||||||
|   reader = parquet::ParquetFileReader::OpenFile( |   reader = parquet::ParquetFileReader::OpenFile( | ||||||
|       table->file.data(), |       table->getFile().data(), | ||||||
|       true, |       true, | ||||||
|       parquet::default_reader_properties(), |       parquet::default_reader_properties(), | ||||||
|       table->getMetadata()); |       table->getMetadata()); | ||||||
| @@ -955,4 +978,10 @@ void ParquetCursor::reset(std::vector<Constraint> constraints) { | |||||||
|   numRowGroups = reader->metadata()->num_row_groups(); |   numRowGroups = reader->metadata()->num_row_groups(); | ||||||
| } | } | ||||||
|  |  | ||||||
| ParquetTable* ParquetCursor::getTable() { return table; } | ParquetTable* ParquetCursor::getTable() const { return table; } | ||||||
|  |  | ||||||
|  | unsigned int ParquetCursor::getNumRowGroups() const { return numRowGroups; } | ||||||
|  | unsigned int ParquetCursor::getNumConstraints() const { return constraints.size(); } | ||||||
|  | const Constraint& ParquetCursor::getConstraint(unsigned int i) const { return constraints[i]; } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -56,9 +56,12 @@ public: | |||||||
|  |  | ||||||
|   void ensureColumn(int col); |   void ensureColumn(int col); | ||||||
|   bool isNull(int col); |   bool isNull(int col); | ||||||
|  |   unsigned int getNumRowGroups() const; | ||||||
|  |   unsigned int getNumConstraints() const; | ||||||
|  |   const Constraint& getConstraint(unsigned int i) const; | ||||||
|   parquet::Type::type getPhysicalType(int col); |   parquet::Type::type getPhysicalType(int col); | ||||||
|   parquet::LogicalType::type getLogicalType(int col); |   parquet::LogicalType::type getLogicalType(int col); | ||||||
|   ParquetTable* getTable(); |   ParquetTable* getTable() const; | ||||||
|  |  | ||||||
|   int getInt32(int col); |   int getInt32(int col); | ||||||
|   long getInt64(int col); |   long getInt64(int col); | ||||||
|   | |||||||
| @@ -1,19 +1,25 @@ | |||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
|  |  | ||||||
| Constraint::Constraint( | Constraint::Constraint( | ||||||
|  |   RowGroupBitmap bitmap, | ||||||
|   int column, |   int column, | ||||||
|  |   std::string columnName, | ||||||
|   ConstraintOperator op, |   ConstraintOperator op, | ||||||
|   ValueType type, |   ValueType type, | ||||||
|   int64_t intValue, |   int64_t intValue, | ||||||
|   double doubleValue, |   double doubleValue, | ||||||
|   std::vector<unsigned char> blobValue |   std::vector<unsigned char> blobValue | ||||||
| ) { | ): bitmap(bitmap), | ||||||
|   this->column = column; |    column(column), | ||||||
|   this->op = op; |    columnName(columnName), | ||||||
|   this->type = type; |    op(op), | ||||||
|   this->intValue = intValue; |    type(type), | ||||||
|   this->doubleValue = doubleValue; |    intValue(intValue), | ||||||
|   this->blobValue = blobValue; |    doubleValue(doubleValue), | ||||||
|  |    blobValue(blobValue), | ||||||
|  |    hadRows(false) { | ||||||
|  |      RowGroupBitmap bm = bitmap; | ||||||
|  |      this->bitmap = bm; | ||||||
|  |  | ||||||
|   if(type == Text) { |   if(type == Text) { | ||||||
|     stringValue = std::string((char*)&blobValue[0], blobValue.size()); |     stringValue = std::string((char*)&blobValue[0], blobValue.size()); | ||||||
| @@ -34,3 +40,72 @@ Constraint::Constraint( | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::string Constraint::describe() const { | ||||||
|  |   std::string rv; | ||||||
|  |   rv.append(columnName); | ||||||
|  |   rv.append(" "); | ||||||
|  |   switch(op) { | ||||||
|  |     case Equal: | ||||||
|  |       rv.append("="); | ||||||
|  |       break; | ||||||
|  |     case GreaterThan: | ||||||
|  |       rv.append(">"); | ||||||
|  |       break; | ||||||
|  |     case LessThanOrEqual: | ||||||
|  |       rv.append("<="); | ||||||
|  |       break; | ||||||
|  |     case LessThan: | ||||||
|  |       rv.append("<"); | ||||||
|  |       break; | ||||||
|  |     case GreaterThanOrEqual: | ||||||
|  |       rv.append(">="); | ||||||
|  |       break; | ||||||
|  |     case Match: | ||||||
|  |       rv.append("MATCH"); | ||||||
|  |       break; | ||||||
|  |     case Like: | ||||||
|  |       rv.append("LIKE"); | ||||||
|  |       break; | ||||||
|  |     case Glob: | ||||||
|  |       rv.append("GLOB"); | ||||||
|  |       break; | ||||||
|  |     case Regexp: | ||||||
|  |       rv.append("REGEXP"); | ||||||
|  |       break; | ||||||
|  |     case NotEqual: | ||||||
|  |       rv.append("<>"); | ||||||
|  |       break; | ||||||
|  |     case IsNot: | ||||||
|  |       rv.append("IS NOT"); | ||||||
|  |       break; | ||||||
|  |     case IsNotNull: | ||||||
|  |       rv.append("IS NOT NULL"); | ||||||
|  |       break; | ||||||
|  |     case IsNull: | ||||||
|  |       rv.append("IS NULL"); | ||||||
|  |       break; | ||||||
|  |     case Is: | ||||||
|  |       rv.append("IS"); | ||||||
|  |       break; | ||||||
|  |   } | ||||||
|  |   rv.append(" "); | ||||||
|  |  | ||||||
|  |   switch(type) { | ||||||
|  |     case Null: | ||||||
|  |       rv.append("NULL"); | ||||||
|  |       break; | ||||||
|  |     case Integer: | ||||||
|  |       rv.append(std::to_string(intValue)); | ||||||
|  |       break; | ||||||
|  |     case Double: | ||||||
|  |       rv.append(std::to_string(doubleValue)); | ||||||
|  |       break; | ||||||
|  |     case Blob: | ||||||
|  |       break; | ||||||
|  |     case Text: | ||||||
|  |       rv.append(stringValue); | ||||||
|  |       break; | ||||||
|  |   } | ||||||
|  |   return rv; | ||||||
|  | } | ||||||
|   | |||||||
| @@ -30,11 +30,63 @@ enum ValueType { | |||||||
|   Text |   Text | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class RowGroupBitmap { | ||||||
|  |   void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) { | ||||||
|  |     int byte = rowGroup / 8; | ||||||
|  |     int offset = rowGroup % 8; | ||||||
|  |     unsigned char c = membership[byte]; | ||||||
|  |     c &= ~(1UL << offset); | ||||||
|  |     if(isSet) { | ||||||
|  |       c |= 1UL << offset; | ||||||
|  |     } | ||||||
|  |     membership[byte] = c; | ||||||
|  |   } | ||||||
|  | // Compares estimated rowGroupFilter results against observed results | ||||||
|  | // when we explored the row group. This lets us cache  | ||||||
|  | public: | ||||||
|  |   RowGroupBitmap(unsigned int totalRowGroups) { | ||||||
|  |     // Initialize everything to assume that all row groups match. | ||||||
|  |     // As we discover otherwise, we'll update that assumption. | ||||||
|  |     for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) { | ||||||
|  |       estimatedMembership.push_back(0xFF); | ||||||
|  |       actualMembership.push_back(0xFF); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   RowGroupBitmap( | ||||||
|  |       std::vector<unsigned char> estimatedMembership, | ||||||
|  |       std::vector<unsigned char> actualMembership) : | ||||||
|  |     estimatedMembership(estimatedMembership), | ||||||
|  |     actualMembership(actualMembership) { | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   std::vector<unsigned char> estimatedMembership; | ||||||
|  |   std::vector<unsigned char> actualMembership; | ||||||
|  |   // Pass false only if definitely does not have rows | ||||||
|  |   void setEstimatedMembership(unsigned int rowGroup, bool hasRows) { | ||||||
|  |     setBit(estimatedMembership, rowGroup, hasRows); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   // Pass false only after exhausting all rows | ||||||
|  |   void setActualMembership(unsigned int rowGroup, bool hadRows) { | ||||||
|  |     setBit(actualMembership, rowGroup, hadRows); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   bool getActualMembership(unsigned int rowGroup) { | ||||||
|  |     int byte = rowGroup / 8; | ||||||
|  |     int offset = rowGroup % 8; | ||||||
|  |  | ||||||
|  |     return (actualMembership[byte] >> offset) & 1U; | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
| class Constraint { | class Constraint { | ||||||
| public: | public: | ||||||
|   // Kind of a messy constructor function, but it's just for internal use, so whatever. |   // Kind of a messy constructor function, but it's just for internal use, so whatever. | ||||||
|   Constraint( |   Constraint( | ||||||
|  |     RowGroupBitmap bitmap, | ||||||
|     int column, |     int column, | ||||||
|  |     std::string columnName, | ||||||
|     ConstraintOperator op, |     ConstraintOperator op, | ||||||
|     ValueType type, |     ValueType type, | ||||||
|     int64_t intValue, |     int64_t intValue, | ||||||
| @@ -42,7 +94,9 @@ public: | |||||||
|     std::vector<unsigned char> blobValue |     std::vector<unsigned char> blobValue | ||||||
|   ); |   ); | ||||||
|  |  | ||||||
|  |   RowGroupBitmap bitmap; | ||||||
|   int column; // underlying column in the query |   int column; // underlying column in the query | ||||||
|  |   std::string columnName; | ||||||
|   ConstraintOperator op; |   ConstraintOperator op; | ||||||
|   ValueType type; |   ValueType type; | ||||||
|  |  | ||||||
| @@ -54,6 +108,15 @@ public: | |||||||
|  |  | ||||||
|   // Only set when stringValue is set and op == Like |   // Only set when stringValue is set and op == Like | ||||||
|   std::string likeStringValue; |   std::string likeStringValue; | ||||||
|  |  | ||||||
|  |   // A unique identifier for this constraint, e.g. | ||||||
|  |   // col0 = 'Dawson Creek' | ||||||
|  |   std::string describe() const; | ||||||
|  |  | ||||||
|  |   // This is a temp field used while evaluating if a rowgroup had rows | ||||||
|  |   // that matched this constraint. | ||||||
|  |   int rowGroupId; | ||||||
|  |   bool hadRows; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -2,9 +2,7 @@ | |||||||
|  |  | ||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  |  | ||||||
| ParquetTable::ParquetTable(std::string file) { | ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) { | ||||||
|   this->file = file; |  | ||||||
|  |  | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data()); |   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data()); | ||||||
|   metadata = reader->metadata(); |   metadata = reader->metadata(); | ||||||
| } | } | ||||||
| @@ -138,3 +136,6 @@ std::string ParquetTable::CreateStatement() { | |||||||
| } | } | ||||||
|  |  | ||||||
| std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; } | std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; } | ||||||
|  |  | ||||||
|  | const std::string& ParquetTable::getFile() { return file; } | ||||||
|  | const std::string& ParquetTable::getTableName() { return tableName; } | ||||||
|   | |||||||
| @@ -6,16 +6,19 @@ | |||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  |  | ||||||
| class ParquetTable { | class ParquetTable { | ||||||
|  |   std::string file; | ||||||
|  |   std::string tableName; | ||||||
|   std::vector<std::string> columnNames; |   std::vector<std::string> columnNames; | ||||||
|   std::shared_ptr<parquet::FileMetaData> metadata; |   std::shared_ptr<parquet::FileMetaData> metadata; | ||||||
|  |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   ParquetTable(std::string file); |   ParquetTable(std::string file, std::string tableName); | ||||||
|   std::string CreateStatement(); |   std::string CreateStatement(); | ||||||
|   std::string file; |  | ||||||
|   std::string columnName(int idx); |   std::string columnName(int idx); | ||||||
|   std::shared_ptr<parquet::FileMetaData> getMetadata(); |   std::shared_ptr<parquet::FileMetaData> getMetadata(); | ||||||
|  |   const std::string& getFile(); | ||||||
|  |   const std::string& getTableName(); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -47,7 +47,7 @@ main() { | |||||||
|   fi |   fi | ||||||
|  |  | ||||||
|   cat "$root"/parquet-generator/*.sql > "$root"/testcase-bootstrap.sql |   cat "$root"/parquet-generator/*.sql > "$root"/testcase-bootstrap.sql | ||||||
|   rm test.db |   rm -f test.db | ||||||
|   "$root"/sqlite/sqlite3 test.db -init "$root"/testcase-bootstrap.sql < /dev/null |   "$root"/sqlite/sqlite3 test.db -init "$root"/testcase-bootstrap.sql < /dev/null | ||||||
|   if [ ! -v NO_DEBUG ] && [ "$(cat testcases.txt | wc -l)" == "1" ]; then |   if [ ! -v NO_DEBUG ] && [ "$(cat testcases.txt | wc -l)" == "1" ]; then | ||||||
|     set -x |     set -x | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Colin Dellow
					Colin Dellow