mirror of
				https://github.com/cldellow/sqlite-parquet-vtable.git
				synced 2025-10-31 02:19:56 +00:00 
			
		
		
		
	Run a formatting pass with clang-format to minimize future git churn
This commit is contained in:
		
							
								
								
									
										508
									
								
								src/parquet.cc
									
									
									
									
									
								
							
							
						
						
									
										508
									
								
								src/parquet.cc
									
									
									
									
									
								
							| @@ -1,66 +1,65 @@ | |||||||
| /* | /* | ||||||
| * This file contains the implementation of an SQLite virtual table for |  * This file contains the implementation of an SQLite virtual table for | ||||||
| * reading Parquet files. |  * reading Parquet files. | ||||||
| * |  * | ||||||
| * Usage: |  * Usage: | ||||||
| * |  * | ||||||
| *    .load ./parquet |  *    .load ./parquet | ||||||
| *    CREATE VIRTUAL TABLE demo USING parquet(FILENAME); |  *    CREATE VIRTUAL TABLE demo USING parquet(FILENAME); | ||||||
| *    SELECT * FROM demo; |  *    SELECT * FROM demo; | ||||||
| * |  * | ||||||
| */ |  */ | ||||||
| #include <sqlite3ext.h> | #include <sqlite3ext.h> | ||||||
| SQLITE_EXTENSION_INIT1 | SQLITE_EXTENSION_INIT1 | ||||||
| #include <string.h> |  | ||||||
| #include <stdlib.h> |  | ||||||
| #include <assert.h> | #include <assert.h> | ||||||
| #include <stdarg.h> |  | ||||||
| #include <ctype.h> | #include <ctype.h> | ||||||
| #include <stdio.h> |  | ||||||
| #include <iomanip> | #include <iomanip> | ||||||
| #include <sys/time.h> |  | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <stdarg.h> | ||||||
|  | #include <stdio.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <string.h> | ||||||
|  | #include <sys/time.h> | ||||||
|  |  | ||||||
| #include "parquet_table.h" |  | ||||||
| #include "parquet_cursor.h" | #include "parquet_cursor.h" | ||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
|  | #include "parquet_table.h" | ||||||
|  |  | ||||||
| //#define DEBUG | //#define DEBUG | ||||||
|  |  | ||||||
| /* Forward references to the various virtual table methods implemented | /* Forward references to the various virtual table methods implemented | ||||||
|  * in this file. */ |  * in this file. */ | ||||||
| static int parquetCreate(sqlite3*, void*, int, const char*const*,  | static int parquetCreate(sqlite3 *, void *, int, const char *const *, | ||||||
|                            sqlite3_vtab**,char**); |                          sqlite3_vtab **, char **); | ||||||
| static int parquetConnect(sqlite3*, void*, int, const char*const*,  | static int parquetConnect(sqlite3 *, void *, int, const char *const *, | ||||||
|                            sqlite3_vtab**,char**); |                           sqlite3_vtab **, char **); | ||||||
| static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*); | static int parquetBestIndex(sqlite3_vtab *, sqlite3_index_info *); | ||||||
| static int parquetDisconnect(sqlite3_vtab*); | static int parquetDisconnect(sqlite3_vtab *); | ||||||
| static int parquetDestroy(sqlite3_vtab*); | static int parquetDestroy(sqlite3_vtab *); | ||||||
| static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); | static int parquetOpen(sqlite3_vtab *, sqlite3_vtab_cursor **); | ||||||
| static int parquetClose(sqlite3_vtab_cursor*); | static int parquetClose(sqlite3_vtab_cursor *); | ||||||
| static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, | static int parquetFilter(sqlite3_vtab_cursor *, int idxNum, const char *idxStr, | ||||||
|                          int argc, sqlite3_value **argv); |                          int argc, sqlite3_value **argv); | ||||||
| static int parquetNext(sqlite3_vtab_cursor*); | static int parquetNext(sqlite3_vtab_cursor *); | ||||||
| static int parquetEof(sqlite3_vtab_cursor*); | static int parquetEof(sqlite3_vtab_cursor *); | ||||||
| static int parquetColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); | static int parquetColumn(sqlite3_vtab_cursor *, sqlite3_context *, int); | ||||||
| static int parquetRowid(sqlite3_vtab_cursor*,sqlite3_int64*); | static int parquetRowid(sqlite3_vtab_cursor *, sqlite3_int64 *); | ||||||
|  |  | ||||||
| /* An instance of the Parquet virtual table */ | /* An instance of the Parquet virtual table */ | ||||||
| typedef struct sqlite3_vtab_parquet { | typedef struct sqlite3_vtab_parquet { | ||||||
|   sqlite3_vtab base; /* Base class.  Must be first */ |   sqlite3_vtab base; /* Base class.  Must be first */ | ||||||
|   ParquetTable* table; |   ParquetTable *table; | ||||||
|   sqlite3* db; |   sqlite3 *db; | ||||||
| } sqlite3_vtab_parquet; | } sqlite3_vtab_parquet; | ||||||
|  |  | ||||||
|  |  | ||||||
| /* A cursor for the Parquet virtual table */ | /* A cursor for the Parquet virtual table */ | ||||||
| typedef struct sqlite3_vtab_cursor_parquet { | typedef struct sqlite3_vtab_cursor_parquet { | ||||||
|   sqlite3_vtab_cursor base; /* Base class.  Must be first */ |   sqlite3_vtab_cursor base; /* Base class.  Must be first */ | ||||||
|   ParquetCursor* cursor; |   ParquetCursor *cursor; | ||||||
| } sqlite3_vtab_cursor_parquet; | } sqlite3_vtab_cursor_parquet; | ||||||
|  |  | ||||||
| static int parquetDestroy(sqlite3_vtab *pVtab) { | static int parquetDestroy(sqlite3_vtab *pVtab) { | ||||||
|   sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab; |   sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet *)pVtab; | ||||||
|  |  | ||||||
|   // Clean up our shadow table. This is useful if the user has recreated |   // Clean up our shadow table. This is useful if the user has recreated | ||||||
|   // the parquet file, and our mappings would now be invalid. |   // the parquet file, and our mappings would now be invalid. | ||||||
| @@ -68,7 +67,7 @@ static int parquetDestroy(sqlite3_vtab *pVtab) { | |||||||
|   drop.append(p->table->getTableName()); |   drop.append(p->table->getTableName()); | ||||||
|   drop.append("_rowgroups"); |   drop.append("_rowgroups"); | ||||||
|   int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0); |   int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0); | ||||||
|   if(rv != 0) |   if (rv != 0) | ||||||
|     return rv; |     return rv; | ||||||
|  |  | ||||||
|   return SQLITE_OK; |   return SQLITE_OK; | ||||||
| @@ -77,24 +76,20 @@ static int parquetDestroy(sqlite3_vtab *pVtab) { | |||||||
| /* | /* | ||||||
| ** This method is the destructor fo a sqlite3_vtab_parquet object. | ** This method is the destructor fo a sqlite3_vtab_parquet object. | ||||||
| */ | */ | ||||||
| static int parquetDisconnect(sqlite3_vtab *pVtab){ | static int parquetDisconnect(sqlite3_vtab *pVtab) { | ||||||
|   sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab; |   sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet *)pVtab; | ||||||
|   delete p->table; |   delete p->table; | ||||||
|   sqlite3_free(p); |   sqlite3_free(p); | ||||||
|   return SQLITE_OK; |   return SQLITE_OK; | ||||||
| } | } | ||||||
|  |  | ||||||
| static int parquetConnect( | static int parquetConnect(sqlite3 *db, void *pAux, int argc, | ||||||
|   sqlite3 *db, |                           const char *const *argv, sqlite3_vtab **ppVtab, | ||||||
|   void *pAux, |                           char **pzErr) { | ||||||
|   int argc, |  | ||||||
|   const char *const*argv, |  | ||||||
|   sqlite3_vtab **ppVtab, |  | ||||||
|   char **pzErr |  | ||||||
| ){ |  | ||||||
|   try { |   try { | ||||||
|     if(argc != 4 || strlen(argv[3]) < 2) { |     if (argc != 4 || strlen(argv[3]) < 2) { | ||||||
|       *pzErr = sqlite3_mprintf("must provide exactly one argument, the path to a parquet file"); |       *pzErr = sqlite3_mprintf( | ||||||
|  |           "must provide exactly one argument, the path to a parquet file"); | ||||||
|       return SQLITE_ERROR; |       return SQLITE_ERROR; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -102,8 +97,8 @@ static int parquetConnect( | |||||||
|     // Remove the delimiting single quotes |     // Remove the delimiting single quotes | ||||||
|     std::string fname = argv[3]; |     std::string fname = argv[3]; | ||||||
|     fname = fname.substr(1, fname.length() - 2); |     fname = fname.substr(1, fname.length() - 2); | ||||||
|     std::unique_ptr<sqlite3_vtab_parquet, void(*)(void*)> vtab( |     std::unique_ptr<sqlite3_vtab_parquet, void (*)(void *)> vtab( | ||||||
|         (sqlite3_vtab_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)), |         (sqlite3_vtab_parquet *)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)), | ||||||
|         sqlite3_free); |         sqlite3_free); | ||||||
|     memset(vtab.get(), 0, sizeof(*vtab.get())); |     memset(vtab.get(), 0, sizeof(*vtab.get())); | ||||||
|  |  | ||||||
| @@ -112,20 +107,20 @@ static int parquetConnect( | |||||||
|  |  | ||||||
|       std::string create = table->CreateStatement(); |       std::string create = table->CreateStatement(); | ||||||
|       int rc = sqlite3_declare_vtab(db, create.data()); |       int rc = sqlite3_declare_vtab(db, create.data()); | ||||||
|       if(rc) |       if (rc) | ||||||
|         return rc; |         return rc; | ||||||
|  |  | ||||||
|       vtab->table = table.release(); |       vtab->table = table.release(); | ||||||
|       vtab->db = db; |       vtab->db = db; | ||||||
|       *ppVtab = (sqlite3_vtab*)vtab.release(); |       *ppVtab = (sqlite3_vtab *)vtab.release(); | ||||||
|       return SQLITE_OK; |       return SQLITE_OK; | ||||||
|     } catch (const std::exception& e) { |     } catch (const std::exception &e) { | ||||||
|       *pzErr = sqlite3_mprintf(e.what()); |       *pzErr = sqlite3_mprintf(e.what()); | ||||||
|       return SQLITE_ERROR; |       return SQLITE_ERROR; | ||||||
|     } |     } | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -134,20 +129,16 @@ static int parquetConnect( | |||||||
| ** The xConnect and xCreate methods do the same thing, but they must be | ** The xConnect and xCreate methods do the same thing, but they must be | ||||||
| ** different so that the virtual table is not an eponymous virtual table. | ** different so that the virtual table is not an eponymous virtual table. | ||||||
| */ | */ | ||||||
| static int parquetCreate( | static int parquetCreate(sqlite3 *db, void *pAux, int argc, | ||||||
|   sqlite3 *db, |                          const char *const *argv, sqlite3_vtab **ppVtab, | ||||||
|   void *pAux, |                          char **pzErr) { | ||||||
|   int argc, const char *const*argv, |  | ||||||
|   sqlite3_vtab **ppVtab, |  | ||||||
|   char **pzErr |  | ||||||
| ){ |  | ||||||
|   try { |   try { | ||||||
|     // Create shadow table for storing constraint -> rowid mappings |     // Create shadow table for storing constraint -> rowid mappings | ||||||
|     std::string create = "CREATE TABLE IF NOT EXISTS _"; |     std::string create = "CREATE TABLE IF NOT EXISTS _"; | ||||||
|     create.append(argv[2]); |     create.append(argv[2]); | ||||||
|     create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)"); |     create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)"); | ||||||
|     int rv = sqlite3_exec(db, create.data(), 0, 0, 0); |     int rv = sqlite3_exec(db, create.data(), 0, 0, 0); | ||||||
|     if(rv != 0) |     if (rv != 0) | ||||||
|       return rv; |       return rv; | ||||||
|  |  | ||||||
|     create = "CREATE UNIQUE INDEX IF NOT EXISTS _"; |     create = "CREATE UNIQUE INDEX IF NOT EXISTS _"; | ||||||
| @@ -158,28 +149,31 @@ static int parquetCreate( | |||||||
|     rv = sqlite3_exec(db, create.data(), 0, 0, 0); |     rv = sqlite3_exec(db, create.data(), 0, 0, 0); | ||||||
|  |  | ||||||
|     return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr); |     return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr); | ||||||
|   } catch (std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string quoteBlob(const std::vector<unsigned char>& bytes) { | std::string quoteBlob(const std::vector<unsigned char> &bytes) { | ||||||
|   std::ostringstream ss; |   std::ostringstream ss; | ||||||
|   ss << "X'" << std::hex; |   ss << "X'" << std::hex; | ||||||
|   for(unsigned int i = 0; i < bytes.size(); i++) { |   for (unsigned int i = 0; i < bytes.size(); i++) { | ||||||
|     ss << std::setfill('0') << std::setw(2) << (unsigned int)(unsigned char)bytes[i]; |     ss << std::setfill('0') << std::setw(2) | ||||||
|  |        << (unsigned int)(unsigned char)bytes[i]; | ||||||
|   } |   } | ||||||
|   ss << "'"; |   ss << "'"; | ||||||
|  |  | ||||||
|   return ss.str(); |   return ss.str(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void persistConstraints(sqlite3* db, ParquetCursor* cursor) { | void persistConstraints(sqlite3 *db, ParquetCursor *cursor) { | ||||||
|   for(unsigned int i = 0; i < cursor->getNumConstraints(); i++) { |   for (unsigned int i = 0; i < cursor->getNumConstraints(); i++) { | ||||||
|     const Constraint& constraint = cursor->getConstraint(i); |     const Constraint &constraint = cursor->getConstraint(i); | ||||||
|     const std::vector<unsigned char>& estimated = constraint.bitmap.estimatedMembership; |     const std::vector<unsigned char> &estimated = | ||||||
|     const std::vector<unsigned char>& actual = constraint.bitmap.actualMembership; |         constraint.bitmap.estimatedMembership; | ||||||
|     if(estimated == actual) { |     const std::vector<unsigned char> &actual = | ||||||
|  |         constraint.bitmap.actualMembership; | ||||||
|  |     if (estimated == actual) { | ||||||
|       continue; |       continue; | ||||||
|     } |     } | ||||||
|     std::string desc = constraint.describe(); |     std::string desc = constraint.describe(); | ||||||
| @@ -188,15 +182,13 @@ void persistConstraints(sqlite3* db, ParquetCursor* cursor) { | |||||||
|     std::string actualStr = quoteBlob(actual); |     std::string actualStr = quoteBlob(actual); | ||||||
|  |  | ||||||
|     // This is only advisory, so ignore failures. |     // This is only advisory, so ignore failures. | ||||||
|     char* sql = sqlite3_mprintf( |     char *sql = | ||||||
|         "INSERT OR REPLACE INTO _%s_rowgroups(clause, estimate, actual) VALUES ('%q', %s, %s)", |         sqlite3_mprintf("INSERT OR REPLACE INTO _%s_rowgroups(clause, " | ||||||
|  |                         "estimate, actual) VALUES ('%q', %s, %s)", | ||||||
|                         cursor->getTable()->getTableName().c_str(), |                         cursor->getTable()->getTableName().c_str(), | ||||||
|         desc.c_str(), |                         desc.c_str(), estimatedStr.c_str(), actualStr.c_str()); | ||||||
|         estimatedStr.c_str(), |  | ||||||
|         actualStr.c_str()); |  | ||||||
|  |  | ||||||
|  |     if (sql == NULL) | ||||||
|     if(sql == NULL) |  | ||||||
|       return; |       return; | ||||||
|  |  | ||||||
|     sqlite3_exec(db, sql, 0, 0, 0); |     sqlite3_exec(db, sql, 0, 0, 0); | ||||||
| @@ -204,12 +196,12 @@ void persistConstraints(sqlite3* db, ParquetCursor* cursor) { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** Destructor for a sqlite3_vtab_cursor_parquet. | ** Destructor for a sqlite3_vtab_cursor_parquet. | ||||||
| */ | */ | ||||||
| static int parquetClose(sqlite3_vtab_cursor *cur){ | static int parquetClose(sqlite3_vtab_cursor *cur) { | ||||||
|   sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; |   sqlite3_vtab_cursor_parquet *vtab_cursor_parquet = | ||||||
|  |       (sqlite3_vtab_cursor_parquet *)cur; | ||||||
|   vtab_cursor_parquet->cursor->close(); |   vtab_cursor_parquet->cursor->close(); | ||||||
|   delete vtab_cursor_parquet->cursor; |   delete vtab_cursor_parquet->cursor; | ||||||
|   sqlite3_free(cur); |   sqlite3_free(cur); | ||||||
| @@ -219,39 +211,40 @@ static int parquetClose(sqlite3_vtab_cursor *cur){ | |||||||
| /* | /* | ||||||
| ** Constructor for a new sqlite3_vtab_parquet cursor object. | ** Constructor for a new sqlite3_vtab_parquet cursor object. | ||||||
| */ | */ | ||||||
| static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ | static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { | ||||||
|   try { |   try { | ||||||
|     std::unique_ptr<sqlite3_vtab_cursor_parquet, void(*)(void*)> cursor( |     std::unique_ptr<sqlite3_vtab_cursor_parquet, void (*)(void *)> cursor( | ||||||
|         (sqlite3_vtab_cursor_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_cursor_parquet)), |         (sqlite3_vtab_cursor_parquet *)sqlite3_malloc( | ||||||
|  |             sizeof(sqlite3_vtab_cursor_parquet)), | ||||||
|         sqlite3_free); |         sqlite3_free); | ||||||
|     memset(cursor.get(), 0, sizeof(*cursor.get())); |     memset(cursor.get(), 0, sizeof(*cursor.get())); | ||||||
|  |  | ||||||
|     sqlite3_vtab_parquet* pParquet = (sqlite3_vtab_parquet*)p; |     sqlite3_vtab_parquet *pParquet = (sqlite3_vtab_parquet *)p; | ||||||
|     cursor->cursor = new ParquetCursor(pParquet->table); |     cursor->cursor = new ParquetCursor(pParquet->table); | ||||||
|  |  | ||||||
|     *ppCursor = (sqlite3_vtab_cursor*)cursor.release(); |     *ppCursor = (sqlite3_vtab_cursor *)cursor.release(); | ||||||
|     return SQLITE_OK; |     return SQLITE_OK; | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** Advance a sqlite3_vtab_cursor_parquet to its next row of input. | ** Advance a sqlite3_vtab_cursor_parquet to its next row of input. | ||||||
| ** Set the EOF marker if we reach the end of input. | ** Set the EOF marker if we reach the end of input. | ||||||
| */ | */ | ||||||
| static int parquetNext(sqlite3_vtab_cursor *cur){ | static int parquetNext(sqlite3_vtab_cursor *cur) { | ||||||
|   try { |   try { | ||||||
|     sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; |     sqlite3_vtab_cursor_parquet *vtab_cursor_parquet = | ||||||
|     ParquetCursor* cursor = vtab_cursor_parquet->cursor; |         (sqlite3_vtab_cursor_parquet *)cur; | ||||||
|  |     ParquetCursor *cursor = vtab_cursor_parquet->cursor; | ||||||
|     cursor->next(); |     cursor->next(); | ||||||
|     return SQLITE_OK; |     return SQLITE_OK; | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -260,73 +253,70 @@ static int parquetNext(sqlite3_vtab_cursor *cur){ | |||||||
| ** Return values of columns for the row at which the sqlite3_vtab_cursor_parquet | ** Return values of columns for the row at which the sqlite3_vtab_cursor_parquet | ||||||
| ** is currently pointing. | ** is currently pointing. | ||||||
| */ | */ | ||||||
| static int parquetColumn( | static int | ||||||
|   sqlite3_vtab_cursor *cur,   /* The cursor */ | parquetColumn(sqlite3_vtab_cursor *cur, /* The cursor */ | ||||||
|               sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ |               sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ | ||||||
|               int col               /* Which column to return */ |               int col               /* Which column to return */ | ||||||
| ){ | ) { | ||||||
|   try { |   try { | ||||||
|     ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; |     ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor; | ||||||
|     cursor->ensureColumn(col); |     cursor->ensureColumn(col); | ||||||
|  |  | ||||||
|     if(cursor->isNull(col)) { |     if (cursor->isNull(col)) { | ||||||
|       sqlite3_result_null(ctx); |       sqlite3_result_null(ctx); | ||||||
|     } else { |     } else { | ||||||
|       switch(cursor->getPhysicalType(col)) { |       switch (cursor->getPhysicalType(col)) { | ||||||
|       case parquet::Type::BOOLEAN: |       case parquet::Type::BOOLEAN: | ||||||
|         case parquet::Type::INT32: |       case parquet::Type::INT32: { | ||||||
|         { |  | ||||||
|         int rv = cursor->getInt32(col); |         int rv = cursor->getInt32(col); | ||||||
|         sqlite3_result_int(ctx, rv); |         sqlite3_result_int(ctx, rv); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|       case parquet::Type::FLOAT: |       case parquet::Type::FLOAT: | ||||||
|         case parquet::Type::DOUBLE: |       case parquet::Type::DOUBLE: { | ||||||
|         { |  | ||||||
|         double rv = cursor->getDouble(col); |         double rv = cursor->getDouble(col); | ||||||
|         sqlite3_result_double(ctx, rv); |         sqlite3_result_double(ctx, rv); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case parquet::Type::BYTE_ARRAY: |       case parquet::Type::BYTE_ARRAY: { | ||||||
|         { |         parquet::ByteArray *rv = cursor->getByteArray(col); | ||||||
|           parquet::ByteArray* rv = cursor->getByteArray(col); |         if (cursor->getLogicalType(col) == parquet::LogicalType::UTF8) { | ||||||
|           if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) { |           sqlite3_result_text(ctx, (const char *)rv->ptr, rv->len, | ||||||
|             sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT); |                               SQLITE_TRANSIENT); | ||||||
|         } else { |         } else { | ||||||
|             sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT); |           sqlite3_result_blob(ctx, (void *)rv->ptr, rv->len, SQLITE_TRANSIENT); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|       case parquet::Type::INT96: |       case parquet::Type::INT96: | ||||||
|         // This type exists to store timestamps in nanoseconds due to legacy |         // This type exists to store timestamps in nanoseconds due to legacy | ||||||
|         // reasons. We just interpret it as a timestamp in milliseconds. |         // reasons. We just interpret it as a timestamp in milliseconds. | ||||||
|         case parquet::Type::INT64: |       case parquet::Type::INT64: { | ||||||
|         { |  | ||||||
|         long rv = cursor->getInt64(col); |         long rv = cursor->getInt64(col); | ||||||
|         sqlite3_result_int64(ctx, rv); |         sqlite3_result_int64(ctx, rv); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case parquet::Type::FIXED_LEN_BYTE_ARRAY: |       case parquet::Type::FIXED_LEN_BYTE_ARRAY: { | ||||||
|         { |         parquet::ByteArray *rv = cursor->getByteArray(col); | ||||||
|           parquet::ByteArray* rv = cursor->getByteArray(col); |         sqlite3_result_blob(ctx, (void *)rv->ptr, rv->len, SQLITE_TRANSIENT); | ||||||
|           sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT); |  | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|       default: |       default: | ||||||
|         // Should be impossible to get here as we should have forbidden this at |         // Should be impossible to get here as we should have forbidden this at | ||||||
|         // CREATE time -- maybe file changed underneath us? |         // CREATE time -- maybe file changed underneath us? | ||||||
|         std::ostringstream ss; |         std::ostringstream ss; | ||||||
|           ss << __FILE__ << ":" << __LINE__ << ": column " << col << " has unsupported type: " << |         ss << __FILE__ << ":" << __LINE__ << ": column " << col | ||||||
|             parquet::TypeToString(cursor->getPhysicalType(col)); |            << " has unsupported type: " | ||||||
|  |            << parquet::TypeToString(cursor->getPhysicalType(col)); | ||||||
|  |  | ||||||
|         throw std::invalid_argument(ss.str()); |         throw std::invalid_argument(ss.str()); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     return SQLITE_OK; |     return SQLITE_OK; | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
| @@ -334,8 +324,8 @@ static int parquetColumn( | |||||||
| /* | /* | ||||||
| ** Return the rowid for the current row. | ** Return the rowid for the current row. | ||||||
| */ | */ | ||||||
| static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ | static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { | ||||||
|   ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; |   ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor; | ||||||
|   *pRowid = cursor->getRowId(); |   *pRowid = cursor->getRowId(); | ||||||
|   return SQLITE_OK; |   return SQLITE_OK; | ||||||
| } | } | ||||||
| @@ -344,11 +334,13 @@ static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ | |||||||
| ** Return TRUE if the cursor has been moved off of the last | ** Return TRUE if the cursor has been moved off of the last | ||||||
| ** row of output. | ** row of output. | ||||||
| */ | */ | ||||||
| static int parquetEof(sqlite3_vtab_cursor *cur){ | static int parquetEof(sqlite3_vtab_cursor *cur) { | ||||||
|   ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; |   ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor; | ||||||
|   if(cursor->eof()) { |   if (cursor->eof()) { | ||||||
|     sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; |     sqlite3_vtab_cursor_parquet *vtab_cursor_parquet = | ||||||
|     sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab); |         (sqlite3_vtab_cursor_parquet *)cur; | ||||||
|  |     sqlite3_vtab_parquet *vtab_parquet = | ||||||
|  |         (sqlite3_vtab_parquet *)(vtab_cursor_parquet->base.pVtab); | ||||||
|     persistConstraints(vtab_parquet->db, cursor); |     persistConstraints(vtab_parquet->db, cursor); | ||||||
|     return 1; |     return 1; | ||||||
|   } |   } | ||||||
| @@ -356,8 +348,8 @@ static int parquetEof(sqlite3_vtab_cursor *cur){ | |||||||
| } | } | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| const char* opName(int op) { | const char *opName(int op) { | ||||||
|   switch(op) { |   switch (op) { | ||||||
|   case SQLITE_INDEX_CONSTRAINT_EQ: |   case SQLITE_INDEX_CONSTRAINT_EQ: | ||||||
|     return "="; |     return "="; | ||||||
|   case SQLITE_INDEX_CONSTRAINT_GT: |   case SQLITE_INDEX_CONSTRAINT_GT: | ||||||
| @@ -391,66 +383,60 @@ const char* opName(int op) { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void debugConstraints(sqlite3_index_info *pIdxInfo, ParquetTable *table, int argc, sqlite3_value** argv) { | void debugConstraints(sqlite3_index_info *pIdxInfo, ParquetTable *table, | ||||||
|  |                       int argc, sqlite3_value **argv) { | ||||||
|   printf("debugConstraints, argc=%d\n", argc); |   printf("debugConstraints, argc=%d\n", argc); | ||||||
|   int j = 0; |   int j = 0; | ||||||
|   for(int i = 0; i < pIdxInfo->nConstraint; i++) { |   for (int i = 0; i < pIdxInfo->nConstraint; i++) { | ||||||
|     std::string valueStr = "?"; |     std::string valueStr = "?"; | ||||||
|     if(argv != NULL && pIdxInfo->aConstraint[i].usable) { |     if (argv != NULL && pIdxInfo->aConstraint[i].usable) { | ||||||
|       int type = sqlite3_value_type(argv[j]); |       int type = sqlite3_value_type(argv[j]); | ||||||
|       switch(type) { |       switch (type) { | ||||||
|         case SQLITE_INTEGER: |       case SQLITE_INTEGER: { | ||||||
|         { |  | ||||||
|         sqlite3_int64 rv = sqlite3_value_int64(argv[j]); |         sqlite3_int64 rv = sqlite3_value_int64(argv[j]); | ||||||
|         std::ostringstream ss; |         std::ostringstream ss; | ||||||
|         ss << rv; |         ss << rv; | ||||||
|         valueStr = ss.str(); |         valueStr = ss.str(); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case SQLITE_FLOAT: |       case SQLITE_FLOAT: { | ||||||
|         { |  | ||||||
|         double rv = sqlite3_value_double(argv[j]); |         double rv = sqlite3_value_double(argv[j]); | ||||||
|         std::ostringstream ss; |         std::ostringstream ss; | ||||||
|         ss << rv; |         ss << rv; | ||||||
|         valueStr = ss.str(); |         valueStr = ss.str(); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case SQLITE_TEXT: |       case SQLITE_TEXT: { | ||||||
|         { |         const unsigned char *rv = sqlite3_value_text(argv[j]); | ||||||
|           const unsigned char* rv = sqlite3_value_text(argv[j]); |  | ||||||
|         std::ostringstream ss; |         std::ostringstream ss; | ||||||
|         ss << "'" << rv << "'"; |         ss << "'" << rv << "'"; | ||||||
|         valueStr = ss.str(); |         valueStr = ss.str(); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case SQLITE_BLOB: |       case SQLITE_BLOB: { | ||||||
|         { |  | ||||||
|         int sizeBytes = sqlite3_value_bytes(argv[j]); |         int sizeBytes = sqlite3_value_bytes(argv[j]); | ||||||
|         std::ostringstream ss; |         std::ostringstream ss; | ||||||
|         ss << "'..." << sizeBytes << "-byte blob...'"; |         ss << "'..." << sizeBytes << "-byte blob...'"; | ||||||
|         valueStr = ss.str(); |         valueStr = ss.str(); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|         case SQLITE_NULL: |       case SQLITE_NULL: { | ||||||
|         { |  | ||||||
|         valueStr = "NULL"; |         valueStr = "NULL"; | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|       } |       } | ||||||
|       j++; |       j++; | ||||||
|     } |     } | ||||||
|     printf("  constraint %d: col %s %s %s, usable %d\n", |     printf("  constraint %d: col %s %s %s, usable %d\n", i, | ||||||
|         i, |  | ||||||
|            table->columnName(pIdxInfo->aConstraint[i].iColumn).data(), |            table->columnName(pIdxInfo->aConstraint[i].iColumn).data(), | ||||||
|         opName(pIdxInfo->aConstraint[i].op), |            opName(pIdxInfo->aConstraint[i].op), valueStr.data(), | ||||||
|         valueStr.data(), |  | ||||||
|            pIdxInfo->aConstraint[i].usable); |            pIdxInfo->aConstraint[i].usable); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| ConstraintOperator constraintOperatorFromSqlite(int op) { | ConstraintOperator constraintOperatorFromSqlite(int op) { | ||||||
|   switch(op) { |   switch (op) { | ||||||
|   case SQLITE_INDEX_CONSTRAINT_EQ: |   case SQLITE_INDEX_CONSTRAINT_EQ: | ||||||
|     return Equal; |     return Equal; | ||||||
|   case SQLITE_INDEX_CONSTRAINT_GT: |   case SQLITE_INDEX_CONSTRAINT_GT: | ||||||
| @@ -482,29 +468,30 @@ ConstraintOperator constraintOperatorFromSqlite(int op) { | |||||||
|   throw std::invalid_argument(ss.str()); |   throw std::invalid_argument(ss.str()); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table, std::string clause) { | std::vector<unsigned char> getRowGroupsForClause(sqlite3 *db, std::string table, | ||||||
|  |                                                  std::string clause) { | ||||||
|   std::vector<unsigned char> rv; |   std::vector<unsigned char> rv; | ||||||
|  |  | ||||||
|   std::unique_ptr<char, void(*)(void*)> sql(sqlite3_mprintf( |   std::unique_ptr<char, void (*)(void *)> sql( | ||||||
|       "SELECT actual FROM _%s_rowgroups WHERE clause = '%q'", |       sqlite3_mprintf("SELECT actual FROM _%s_rowgroups WHERE clause = '%q'", | ||||||
|       table.c_str(), |                       table.c_str(), clause.c_str()), | ||||||
|       clause.c_str()), sqlite3_free); |       sqlite3_free); | ||||||
|  |  | ||||||
|   if(sql.get() == NULL) |   if (sql.get() == NULL) | ||||||
|     return rv; |     return rv; | ||||||
|  |  | ||||||
|   sqlite3_stmt* pStmt = NULL; |   sqlite3_stmt *pStmt = NULL; | ||||||
|   int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL); |   int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL); | ||||||
|   if(rc != 0) |   if (rc != 0) | ||||||
|     return rv; |     return rv; | ||||||
|  |  | ||||||
|   rc = sqlite3_step(pStmt); |   rc = sqlite3_step(pStmt); | ||||||
|   if(rc == SQLITE_ROW) { |   if (rc == SQLITE_ROW) { | ||||||
|     int size = sqlite3_column_bytes(pStmt, 0); |     int size = sqlite3_column_bytes(pStmt, 0); | ||||||
|     unsigned char* blob = (unsigned char*)sqlite3_column_blob(pStmt, 0); |     unsigned char *blob = (unsigned char *)sqlite3_column_blob(pStmt, 0); | ||||||
|     // TODO: there is a memory leak here if we get a std::bad_alloc while populating rv; |     // TODO: there is a memory leak here if we get a std::bad_alloc while | ||||||
|     // we fail to free pStmt |     // populating rv; we fail to free pStmt | ||||||
|     for(int i = 0; i < size; i++) { |     for (int i = 0; i < size; i++) { | ||||||
|       rv.push_back(blob[i]); |       rv.push_back(blob[i]); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
| @@ -513,24 +500,20 @@ std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table, | |||||||
|   return rv; |   return rv; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
| ** Only a full table scan is supported.  So xFilter simply rewinds to | ** Only a full table scan is supported.  So xFilter simply rewinds to | ||||||
| ** the beginning. | ** the beginning. | ||||||
| */ | */ | ||||||
| static int parquetFilter( | static int parquetFilter(sqlite3_vtab_cursor *cur, int idxNum, | ||||||
|   sqlite3_vtab_cursor *cur, |                          const char *idxStr, int argc, sqlite3_value **argv) { | ||||||
|   int idxNum, |  | ||||||
|   const char *idxStr, |  | ||||||
|   int argc, |  | ||||||
|   sqlite3_value **argv |  | ||||||
| ){ |  | ||||||
|   try { |   try { | ||||||
|     sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur; |     sqlite3_vtab_cursor_parquet *vtab_cursor_parquet = | ||||||
|     sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab); |         (sqlite3_vtab_cursor_parquet *)cur; | ||||||
|     sqlite3* db = vtab_parquet->db; |     sqlite3_vtab_parquet *vtab_parquet = | ||||||
|     ParquetCursor* cursor = vtab_cursor_parquet->cursor; |         (sqlite3_vtab_parquet *)(vtab_cursor_parquet->base.pVtab); | ||||||
|     sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr; |     sqlite3 *db = vtab_parquet->db; | ||||||
|  |     ParquetCursor *cursor = vtab_cursor_parquet->cursor; | ||||||
|  |     sqlite3_index_info *indexInfo = (sqlite3_index_info *)idxStr; | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
|     struct timeval tv; |     struct timeval tv; | ||||||
| @@ -539,13 +522,14 @@ static int parquetFilter( | |||||||
|         (unsigned long long)(tv.tv_sec) * 1000 + |         (unsigned long long)(tv.tv_sec) * 1000 + | ||||||
|         (unsigned long long)(tv.tv_usec) / 1000; |         (unsigned long long)(tv.tv_usec) / 1000; | ||||||
|  |  | ||||||
|     printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc); |     printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", | ||||||
|  |            millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc); | ||||||
|     debugConstraints(indexInfo, cursor->getTable(), argc, argv); |     debugConstraints(indexInfo, cursor->getTable(), argc, argv); | ||||||
| #endif | #endif | ||||||
|     std::vector<Constraint> constraints; |     std::vector<Constraint> constraints; | ||||||
|     int j = 0; |     int j = 0; | ||||||
|     for(int i = 0; i < indexInfo->nConstraint; i++) { |     for (int i = 0; i < indexInfo->nConstraint; i++) { | ||||||
|       if(!indexInfo->aConstraint[i].usable) { |       if (!indexInfo->aConstraint[i].usable) { | ||||||
|         continue; |         continue; | ||||||
|       } |       } | ||||||
|  |  | ||||||
| @@ -555,86 +539,76 @@ static int parquetFilter( | |||||||
|       std::vector<unsigned char> blobValue; |       std::vector<unsigned char> blobValue; | ||||||
|       int sqliteType = sqlite3_value_type(argv[j]); |       int sqliteType = sqlite3_value_type(argv[j]); | ||||||
|  |  | ||||||
|       if(sqliteType == SQLITE_INTEGER) { |       if (sqliteType == SQLITE_INTEGER) { | ||||||
|         type = Integer; |         type = Integer; | ||||||
|         intValue = sqlite3_value_int64(argv[j]); |         intValue = sqlite3_value_int64(argv[j]); | ||||||
|       } else if(sqliteType == SQLITE_FLOAT) { |       } else if (sqliteType == SQLITE_FLOAT) { | ||||||
|         type = Double; |         type = Double; | ||||||
|         doubleValue = sqlite3_value_double(argv[j]); |         doubleValue = sqlite3_value_double(argv[j]); | ||||||
|       } else if(sqliteType == SQLITE_TEXT) { |       } else if (sqliteType == SQLITE_TEXT) { | ||||||
|         type = Text; |         type = Text; | ||||||
|         int len = sqlite3_value_bytes(argv[j]); |         int len = sqlite3_value_bytes(argv[j]); | ||||||
|         const unsigned char* ptr = sqlite3_value_text(argv[j]); |         const unsigned char *ptr = sqlite3_value_text(argv[j]); | ||||||
|         for(int k = 0; k < len; k++) { |         for (int k = 0; k < len; k++) { | ||||||
|           blobValue.push_back(ptr[k]); |           blobValue.push_back(ptr[k]); | ||||||
|         } |         } | ||||||
|       } else if(sqliteType == SQLITE_BLOB) { |       } else if (sqliteType == SQLITE_BLOB) { | ||||||
|         type = Blob; |         type = Blob; | ||||||
|         int len = sqlite3_value_bytes(argv[j]); |         int len = sqlite3_value_bytes(argv[j]); | ||||||
|         const unsigned char* ptr = (const unsigned char*)sqlite3_value_blob(argv[j]); |         const unsigned char *ptr = | ||||||
|         for(int k = 0; k < len; k++) { |             (const unsigned char *)sqlite3_value_blob(argv[j]); | ||||||
|  |         for (int k = 0; k < len; k++) { | ||||||
|           blobValue.push_back(ptr[k]); |           blobValue.push_back(ptr[k]); | ||||||
|         } |         } | ||||||
|       } else if(sqliteType == SQLITE_NULL) { |       } else if (sqliteType == SQLITE_NULL) { | ||||||
|         type = Null; |         type = Null; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       std::string columnName = "rowid"; |       std::string columnName = "rowid"; | ||||||
|       if(indexInfo->aConstraint[i].iColumn >= 0) { |       if (indexInfo->aConstraint[i].iColumn >= 0) { | ||||||
|         columnName = cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn); |         columnName = | ||||||
|  |             cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups()); |       RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups()); | ||||||
|       Constraint dummy( |       Constraint dummy( | ||||||
|         bitmap, |           bitmap, indexInfo->aConstraint[i].iColumn, columnName, | ||||||
|         indexInfo->aConstraint[i].iColumn, |           constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), type, | ||||||
|         columnName, |           intValue, doubleValue, blobValue); | ||||||
|         constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), |  | ||||||
|         type, |  | ||||||
|         intValue, |  | ||||||
|         doubleValue, |  | ||||||
|         blobValue); |  | ||||||
|  |  | ||||||
|       std::vector<unsigned char> actual = getRowGroupsForClause(db, cursor->getTable()->getTableName(), dummy.describe()); |       std::vector<unsigned char> actual = getRowGroupsForClause( | ||||||
|       if(actual.size() > 0) { |           db, cursor->getTable()->getTableName(), dummy.describe()); | ||||||
|         // Initialize the estimate to be the actual -- eventually they'll converge |       if (actual.size() > 0) { | ||||||
|         // and we'll stop writing back to the db. |         // Initialize the estimate to be the actual -- eventually they'll | ||||||
|  |         // converge and we'll stop writing back to the db. | ||||||
|         std::vector<unsigned char> estimate = actual; |         std::vector<unsigned char> estimate = actual; | ||||||
|         bitmap = RowGroupBitmap(estimate, actual); |         bitmap = RowGroupBitmap(estimate, actual); | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       Constraint constraint( |       Constraint constraint( | ||||||
|         bitmap, |           bitmap, indexInfo->aConstraint[i].iColumn, columnName, | ||||||
|         indexInfo->aConstraint[i].iColumn, |           constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), type, | ||||||
|         columnName, |           intValue, doubleValue, blobValue); | ||||||
|         constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), |  | ||||||
|         type, |  | ||||||
|         intValue, |  | ||||||
|         doubleValue, |  | ||||||
|         blobValue); |  | ||||||
|  |  | ||||||
|       constraints.push_back(constraint); |       constraints.push_back(constraint); | ||||||
|       j++; |       j++; | ||||||
|     } |     } | ||||||
|     cursor->reset(constraints); |     cursor->reset(constraints); | ||||||
|     return parquetNext(cur); |     return parquetNext(cur); | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
| * We'll always indicate to SQLite that we prefer it to use an index so that it will |  * We'll always indicate to SQLite that we prefer it to use an index so that it | ||||||
| * pass additional context to xFilter, which we may or may not use. |  * will pass additional context to xFilter, which we may or may not use. | ||||||
| * |  * | ||||||
| * We copy the sqlite3_index_info structure, as is, into idxStr for later use. |  * We copy the sqlite3_index_info structure, as is, into idxStr for later use. | ||||||
| */ |  */ | ||||||
| static int parquetBestIndex( | static int parquetBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) { | ||||||
|   sqlite3_vtab *tab, |  | ||||||
|   sqlite3_index_info *pIdxInfo |  | ||||||
| ){ |  | ||||||
|   try { |   try { | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
| @@ -644,18 +618,19 @@ static int parquetBestIndex( | |||||||
|         (unsigned long long)(tv.tv_sec) * 1000 + |         (unsigned long long)(tv.tv_sec) * 1000 + | ||||||
|         (unsigned long long)(tv.tv_usec) / 1000; |         (unsigned long long)(tv.tv_usec) / 1000; | ||||||
|  |  | ||||||
|  |     ParquetTable *table = ((sqlite3_vtab_parquet *)tab)->table; | ||||||
|     ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table; |     printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", | ||||||
|     printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy); |            millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy); | ||||||
|     debugConstraints(pIdxInfo, table, 0, NULL); |     debugConstraints(pIdxInfo, table, 0, NULL); | ||||||
| #endif | #endif | ||||||
|     // We traverse in rowid ascending order, so if they're asking for it to be ordered like that, |     // We traverse in rowid ascending order, so if they're asking for it to be | ||||||
|     // we can tell SQLite that it's guaranteed. This speeds up some DB viewer utilities that |     // ordered like that, we can tell SQLite that it's guaranteed. This speeds | ||||||
|     // use rowids for pagination. |     // up some DB viewer utilities that use rowids for pagination. | ||||||
|     if(pIdxInfo->nOrderBy == 1 && pIdxInfo->aOrderBy[0].iColumn == -1 && pIdxInfo->aOrderBy[0].desc == 0) |     if (pIdxInfo->nOrderBy == 1 && pIdxInfo->aOrderBy[0].iColumn == -1 && | ||||||
|  |         pIdxInfo->aOrderBy[0].desc == 0) | ||||||
|       pIdxInfo->orderByConsumed = 1; |       pIdxInfo->orderByConsumed = 1; | ||||||
|  |  | ||||||
|     if(pIdxInfo->nConstraint == 0) { |     if (pIdxInfo->nConstraint == 0) { | ||||||
|       pIdxInfo->estimatedCost = 1000000000000; |       pIdxInfo->estimatedCost = 1000000000000; | ||||||
|       pIdxInfo->idxNum = 0; |       pIdxInfo->idxNum = 0; | ||||||
|     } else { |     } else { | ||||||
| @@ -663,61 +638,69 @@ static int parquetBestIndex( | |||||||
|       pIdxInfo->idxNum = 1; |       pIdxInfo->idxNum = 1; | ||||||
|       int j = 0; |       int j = 0; | ||||||
|  |  | ||||||
|       for(int i = 0; i < pIdxInfo->nConstraint; i++) { |       for (int i = 0; i < pIdxInfo->nConstraint; i++) { | ||||||
|         if(pIdxInfo->aConstraint[i].usable) { |         if (pIdxInfo->aConstraint[i].usable) { | ||||||
|           j++; |           j++; | ||||||
|           pIdxInfo->aConstraintUsage[i].argvIndex = j; |           pIdxInfo->aConstraintUsage[i].argvIndex = j; | ||||||
| //          pIdxInfo->aConstraintUsage[i].omit = 1; |           //          pIdxInfo->aConstraintUsage[i].omit = 1; | ||||||
|         } |         } | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     size_t dupeSize = sizeof(sqlite3_index_info) + |     size_t dupeSize = | ||||||
|       //pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) + |         sizeof(sqlite3_index_info) + | ||||||
|       pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) + |         // pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) + | ||||||
|  |         pIdxInfo->nConstraint * | ||||||
|  |             sizeof(sqlite3_index_info::sqlite3_index_constraint) + | ||||||
|         pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) + |         pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) + | ||||||
|       pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint_usage); |         pIdxInfo->nConstraint * | ||||||
|     sqlite3_index_info* dupe = (sqlite3_index_info*)sqlite3_malloc(dupeSize); |             sizeof(sqlite3_index_info::sqlite3_index_constraint_usage); | ||||||
|     pIdxInfo->idxStr = (char*)dupe; |     sqlite3_index_info *dupe = (sqlite3_index_info *)sqlite3_malloc(dupeSize); | ||||||
|  |     pIdxInfo->idxStr = (char *)dupe; | ||||||
|     pIdxInfo->needToFreeIdxStr = 1; |     pIdxInfo->needToFreeIdxStr = 1; | ||||||
|  |  | ||||||
|     memset(dupe, 0, dupeSize); |     memset(dupe, 0, dupeSize); | ||||||
|     memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info)); |     memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info)); | ||||||
|  |  | ||||||
|     dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint*)((char*)dupe + sizeof(sqlite3_index_info)); |     dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint | ||||||
|     dupe->aOrderBy = (sqlite3_index_info::sqlite3_index_orderby*)((char*)dupe + |                              *)((char *)dupe + sizeof(sqlite3_index_info)); | ||||||
|         sizeof(sqlite3_index_info) + |     dupe->aOrderBy = | ||||||
|         pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint)); |         (sqlite3_index_info::sqlite3_index_orderby | ||||||
|     dupe->aConstraintUsage = (sqlite3_index_info::sqlite3_index_constraint_usage*)((char*)dupe + |              *)((char *)dupe + sizeof(sqlite3_index_info) + | ||||||
|         sizeof(sqlite3_index_info) + |                 pIdxInfo->nConstraint * | ||||||
|         pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) + |                     sizeof(sqlite3_index_info::sqlite3_index_constraint)); | ||||||
|         pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby)); |     dupe->aConstraintUsage = | ||||||
|  |         (sqlite3_index_info::sqlite3_index_constraint_usage | ||||||
|  |              *)((char *)dupe + sizeof(sqlite3_index_info) + | ||||||
|  |                 pIdxInfo->nConstraint * | ||||||
|  |                     sizeof(sqlite3_index_info::sqlite3_index_constraint) + | ||||||
|  |                 pIdxInfo->nOrderBy * | ||||||
|  |                     sizeof(sqlite3_index_info::sqlite3_index_orderby)); | ||||||
|  |  | ||||||
|  |     for (int i = 0; i < pIdxInfo->nConstraint; i++) { | ||||||
|     for(int i = 0; i < pIdxInfo->nConstraint; i++) { |  | ||||||
|       dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn; |       dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn; | ||||||
|       dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op; |       dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op; | ||||||
|       dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable; |       dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable; | ||||||
|       dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset; |       dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset; | ||||||
|  |  | ||||||
|       dupe->aConstraintUsage[i].argvIndex = pIdxInfo->aConstraintUsage[i].argvIndex; |       dupe->aConstraintUsage[i].argvIndex = | ||||||
|  |           pIdxInfo->aConstraintUsage[i].argvIndex; | ||||||
|       dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit; |       dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     for(int i = 0; i < pIdxInfo->nOrderBy; i++) { |     for (int i = 0; i < pIdxInfo->nOrderBy; i++) { | ||||||
|       dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn; |       dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn; | ||||||
|       dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc; |       dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return SQLITE_OK; |     return SQLITE_OK; | ||||||
|   } catch(std::bad_alloc& ba) { |   } catch (std::bad_alloc &ba) { | ||||||
|     return SQLITE_NOMEM; |     return SQLITE_NOMEM; | ||||||
|   } catch(std::exception& e) { |   } catch (std::exception &e) { | ||||||
|     return SQLITE_ERROR; |     return SQLITE_ERROR; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| static sqlite3_module ParquetModule = { | static sqlite3_module ParquetModule = { | ||||||
|     0,                 /* iVersion */ |     0,                 /* iVersion */ | ||||||
|     parquetCreate,     /* xCreate */ |     parquetCreate,     /* xCreate */ | ||||||
| @@ -742,19 +725,16 @@ static sqlite3_module ParquetModule = { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| /* | /* | ||||||
| * This routine is called when the extension is loaded.  The new |  * This routine is called when the extension is loaded.  The new | ||||||
| * Parquet virtual table module is registered with the calling database |  * Parquet virtual table module is registered with the calling database | ||||||
| * connection. |  * connection. | ||||||
| */ |  */ | ||||||
| extern "C" { | extern "C" { | ||||||
|   int sqlite3_parquet_init( | int sqlite3_parquet_init(sqlite3 *db, char **pzErrMsg, | ||||||
|     sqlite3 *db,  |                          const sqlite3_api_routines *pApi) { | ||||||
|     char **pzErrMsg,  |  | ||||||
|     const sqlite3_api_routines *pApi |  | ||||||
|   ){ |  | ||||||
|   int rc; |   int rc; | ||||||
|   SQLITE_EXTENSION_INIT2(pApi); |   SQLITE_EXTENSION_INIT2(pApi); | ||||||
|   rc = sqlite3_create_module(db, "parquet", &ParquetModule, 0); |   rc = sqlite3_create_module(db, "parquet", &ParquetModule, 0); | ||||||
|   return rc; |   return rc; | ||||||
|   } | } | ||||||
| } | } | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,13 +1,13 @@ | |||||||
| #ifndef PARQUET_CURSOR_H | #ifndef PARQUET_CURSOR_H | ||||||
| #define PARQUET_CURSOR_H | #define PARQUET_CURSOR_H | ||||||
|  |  | ||||||
|  | #include "parquet/api/reader.h" | ||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
| #include "parquet_table.h" | #include "parquet_table.h" | ||||||
| #include "parquet/api/reader.h" |  | ||||||
|  |  | ||||||
| class ParquetCursor { | class ParquetCursor { | ||||||
|  |  | ||||||
|   ParquetTable* table; |   ParquetTable *table; | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader; |   std::unique_ptr<parquet::ParquetFileReader> reader; | ||||||
|   std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata; |   std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata; | ||||||
|   std::shared_ptr<parquet::RowGroupReader> rowGroup; |   std::shared_ptr<parquet::RowGroupReader> rowGroup; | ||||||
| @@ -35,19 +35,26 @@ class ParquetCursor { | |||||||
|  |  | ||||||
|   bool currentRowSatisfiesFilter(); |   bool currentRowSatisfiesFilter(); | ||||||
|   bool currentRowGroupSatisfiesFilter(); |   bool currentRowGroupSatisfiesFilter(); | ||||||
|   bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint); |   bool currentRowGroupSatisfiesRowIdFilter(Constraint &constraint); | ||||||
|   bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |   bool currentRowGroupSatisfiesTextFilter( | ||||||
|   bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |       Constraint &constraint, | ||||||
|   bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|   bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |   bool currentRowGroupSatisfiesBlobFilter( | ||||||
|  |       Constraint &constraint, | ||||||
|   bool currentRowSatisfiesTextFilter(Constraint& constraint); |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|   bool currentRowSatisfiesIntegerFilter(Constraint& constraint); |   bool currentRowGroupSatisfiesIntegerFilter( | ||||||
|   bool currentRowSatisfiesDoubleFilter(Constraint& constraint); |       Constraint &constraint, | ||||||
|  |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|  |   bool currentRowGroupSatisfiesDoubleFilter( | ||||||
|  |       Constraint &constraint, | ||||||
|  |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|  |  | ||||||
|  |   bool currentRowSatisfiesTextFilter(Constraint &constraint); | ||||||
|  |   bool currentRowSatisfiesIntegerFilter(Constraint &constraint); | ||||||
|  |   bool currentRowSatisfiesDoubleFilter(Constraint &constraint); | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   ParquetCursor(ParquetTable* table); |   ParquetCursor(ParquetTable *table); | ||||||
|   int getRowId(); |   int getRowId(); | ||||||
|   void next(); |   void next(); | ||||||
|   void close(); |   void close(); | ||||||
| @@ -58,16 +65,15 @@ public: | |||||||
|   bool isNull(int col); |   bool isNull(int col); | ||||||
|   unsigned int getNumRowGroups() const; |   unsigned int getNumRowGroups() const; | ||||||
|   unsigned int getNumConstraints() const; |   unsigned int getNumConstraints() const; | ||||||
|   const Constraint& getConstraint(unsigned int i) const; |   const Constraint &getConstraint(unsigned int i) const; | ||||||
|   parquet::Type::type getPhysicalType(int col); |   parquet::Type::type getPhysicalType(int col); | ||||||
|   parquet::LogicalType::type getLogicalType(int col); |   parquet::LogicalType::type getLogicalType(int col); | ||||||
|   ParquetTable* getTable() const; |   ParquetTable *getTable() const; | ||||||
|  |  | ||||||
|   int getInt32(int col); |   int getInt32(int col); | ||||||
|   long getInt64(int col); |   long getInt64(int col); | ||||||
|   double getDouble(int col); |   double getDouble(int col); | ||||||
|   parquet::ByteArray* getByteArray(int col); |   parquet::ByteArray *getByteArray(int col); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,40 +1,29 @@ | |||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
|  |  | ||||||
| Constraint::Constraint( | Constraint::Constraint(RowGroupBitmap bitmap, int column, | ||||||
|   RowGroupBitmap bitmap, |                        std::string columnName, ConstraintOperator op, | ||||||
|   int column, |                        ValueType type, int64_t intValue, double doubleValue, | ||||||
|   std::string columnName, |                        std::vector<unsigned char> blobValue) | ||||||
|   ConstraintOperator op, |     : bitmap(bitmap), column(column), columnName(columnName), op(op), | ||||||
|   ValueType type, |       type(type), intValue(intValue), doubleValue(doubleValue), | ||||||
|   int64_t intValue, |       blobValue(blobValue), hadRows(false) { | ||||||
|   double doubleValue, |  | ||||||
|   std::vector<unsigned char> blobValue |  | ||||||
| ): bitmap(bitmap), |  | ||||||
|    column(column), |  | ||||||
|    columnName(columnName), |  | ||||||
|    op(op), |  | ||||||
|    type(type), |  | ||||||
|    intValue(intValue), |  | ||||||
|    doubleValue(doubleValue), |  | ||||||
|    blobValue(blobValue), |  | ||||||
|    hadRows(false) { |  | ||||||
|   RowGroupBitmap bm = bitmap; |   RowGroupBitmap bm = bitmap; | ||||||
|   this->bitmap = bm; |   this->bitmap = bm; | ||||||
|  |  | ||||||
|   if(type == Text) { |   if (type == Text) { | ||||||
|     stringValue = std::string((char*)&blobValue[0], blobValue.size()); |     stringValue = std::string((char *)&blobValue[0], blobValue.size()); | ||||||
|  |  | ||||||
|     if(op == Like) { |     if (op == Like) { | ||||||
|       // This permits more rowgroups than is strictly needed |       // This permits more rowgroups than is strictly needed | ||||||
|       // since it assumes an implicit wildcard. But it's |       // since it assumes an implicit wildcard. But it's | ||||||
|       // simple to implement, so we'll go with it. |       // simple to implement, so we'll go with it. | ||||||
|       likeStringValue = stringValue; |       likeStringValue = stringValue; | ||||||
|       size_t idx = likeStringValue.find_first_of("%"); |       size_t idx = likeStringValue.find_first_of("%"); | ||||||
|       if(idx != std::string::npos) { |       if (idx != std::string::npos) { | ||||||
|         likeStringValue = likeStringValue.substr(0, idx); |         likeStringValue = likeStringValue.substr(0, idx); | ||||||
|       } |       } | ||||||
|       idx = likeStringValue.find_first_of("_"); |       idx = likeStringValue.find_first_of("_"); | ||||||
|       if(idx != std::string::npos) { |       if (idx != std::string::npos) { | ||||||
|         likeStringValue = likeStringValue.substr(0, idx); |         likeStringValue = likeStringValue.substr(0, idx); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
| @@ -45,7 +34,7 @@ std::string Constraint::describe() const { | |||||||
|   std::string rv; |   std::string rv; | ||||||
|   rv.append(columnName); |   rv.append(columnName); | ||||||
|   rv.append(" "); |   rv.append(" "); | ||||||
|   switch(op) { |   switch (op) { | ||||||
|   case Equal: |   case Equal: | ||||||
|     rv.append("="); |     rv.append("="); | ||||||
|     break; |     break; | ||||||
| @@ -85,7 +74,7 @@ std::string Constraint::describe() const { | |||||||
|   } |   } | ||||||
|   rv.append(" "); |   rv.append(" "); | ||||||
|  |  | ||||||
|   switch(type) { |   switch (type) { | ||||||
|   case Null: |   case Null: | ||||||
|     rv.append("NULL"); |     rv.append("NULL"); | ||||||
|     break; |     break; | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| #ifndef PARQUET_FILTER_H | #ifndef PARQUET_FILTER_H | ||||||
| #define PARQUET_FILTER_H | #define PARQUET_FILTER_H | ||||||
|  |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include <cstdint> | #include <cstdint> | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
| enum ConstraintOperator { | enum ConstraintOperator { | ||||||
|   Equal, |   Equal, | ||||||
| @@ -20,43 +20,36 @@ enum ConstraintOperator { | |||||||
|   Is |   Is | ||||||
| }; | }; | ||||||
|  |  | ||||||
| enum ValueType { | enum ValueType { Null, Integer, Double, Blob, Text }; | ||||||
|   Null, |  | ||||||
|   Integer, |  | ||||||
|   Double, |  | ||||||
|   Blob, |  | ||||||
|   Text |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class RowGroupBitmap { | class RowGroupBitmap { | ||||||
|   void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) { |   void setBit(std::vector<unsigned char> &membership, unsigned int rowGroup, | ||||||
|  |               bool isSet) { | ||||||
|     int byte = rowGroup / 8; |     int byte = rowGroup / 8; | ||||||
|     int offset = rowGroup % 8; |     int offset = rowGroup % 8; | ||||||
|     unsigned char c = membership[byte]; |     unsigned char c = membership[byte]; | ||||||
|     c &= ~(1UL << offset); |     c &= ~(1UL << offset); | ||||||
|     if(isSet) { |     if (isSet) { | ||||||
|       c |= 1UL << offset; |       c |= 1UL << offset; | ||||||
|     } |     } | ||||||
|     membership[byte] = c; |     membership[byte] = c; | ||||||
|   } |   } | ||||||
| // Compares estimated rowGroupFilter results against observed results |   // Compares estimated rowGroupFilter results against observed results | ||||||
| // when we explored the row group. This lets us cache  |   // when we explored the row group. This lets us cache | ||||||
| public: | public: | ||||||
|   RowGroupBitmap(unsigned int totalRowGroups) { |   RowGroupBitmap(unsigned int totalRowGroups) { | ||||||
|     // Initialize everything to assume that all row groups match. |     // Initialize everything to assume that all row groups match. | ||||||
|     // As we discover otherwise, we'll update that assumption. |     // As we discover otherwise, we'll update that assumption. | ||||||
|     for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) { |     for (unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) { | ||||||
|       estimatedMembership.push_back(0xFF); |       estimatedMembership.push_back(0xFF); | ||||||
|       actualMembership.push_back(0xFF); |       actualMembership.push_back(0xFF); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   RowGroupBitmap( |   RowGroupBitmap(std::vector<unsigned char> estimatedMembership, | ||||||
|       std::vector<unsigned char> estimatedMembership, |                  std::vector<unsigned char> actualMembership) | ||||||
|       std::vector<unsigned char> actualMembership) : |       : estimatedMembership(estimatedMembership), | ||||||
|     estimatedMembership(estimatedMembership), |         actualMembership(actualMembership) {} | ||||||
|     actualMembership(actualMembership) { |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::vector<unsigned char> estimatedMembership; |   std::vector<unsigned char> estimatedMembership; | ||||||
|   std::vector<unsigned char> actualMembership; |   std::vector<unsigned char> actualMembership; | ||||||
| @@ -80,17 +73,11 @@ public: | |||||||
|  |  | ||||||
| class Constraint { | class Constraint { | ||||||
| public: | public: | ||||||
|   // Kind of a messy constructor function, but it's just for internal use, so whatever. |   // Kind of a messy constructor function, but it's just for internal use, so | ||||||
|   Constraint( |   // whatever. | ||||||
|     RowGroupBitmap bitmap, |   Constraint(RowGroupBitmap bitmap, int column, std::string columnName, | ||||||
|     int column, |              ConstraintOperator op, ValueType type, int64_t intValue, | ||||||
|     std::string columnName, |              double doubleValue, std::vector<unsigned char> blobValue); | ||||||
|     ConstraintOperator op, |  | ||||||
|     ValueType type, |  | ||||||
|     int64_t intValue, |  | ||||||
|     double doubleValue, |  | ||||||
|     std::vector<unsigned char> blobValue |  | ||||||
|   ); |  | ||||||
|  |  | ||||||
|   RowGroupBitmap bitmap; |   RowGroupBitmap bitmap; | ||||||
|   int column; // underlying column in the query |   int column; // underlying column in the query | ||||||
|   | |||||||
| @@ -2,61 +2,61 @@ | |||||||
|  |  | ||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  |  | ||||||
| ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) { | ParquetTable::ParquetTable(std::string file, std::string tableName) | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data()); |     : file(file), tableName(tableName) { | ||||||
|  |   std::unique_ptr<parquet::ParquetFileReader> reader = | ||||||
|  |       parquet::ParquetFileReader::OpenFile(file.data()); | ||||||
|   metadata = reader->metadata(); |   metadata = reader->metadata(); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string ParquetTable::columnName(int i) { | std::string ParquetTable::columnName(int i) { | ||||||
|   if(i == -1) |   if (i == -1) | ||||||
|     return "rowid"; |     return "rowid"; | ||||||
|   return columnNames[i]; |   return columnNames[i]; | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int ParquetTable::getNumColumns() { | unsigned int ParquetTable::getNumColumns() { return columnNames.size(); } | ||||||
|   return columnNames.size(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| std::string ParquetTable::CreateStatement() { | std::string ParquetTable::CreateStatement() { | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile( |   std::unique_ptr<parquet::ParquetFileReader> reader = | ||||||
|       file.data(), |       parquet::ParquetFileReader::OpenFile( | ||||||
|       true, |           file.data(), true, parquet::default_reader_properties(), metadata); | ||||||
|       parquet::default_reader_properties(), |  | ||||||
|       metadata); |  | ||||||
|   std::string text("CREATE TABLE x("); |   std::string text("CREATE TABLE x("); | ||||||
|   auto schema = reader->metadata()->schema(); |   auto schema = reader->metadata()->schema(); | ||||||
|  |  | ||||||
|   for(auto i = 0; i < schema->num_columns(); i++) { |   for (auto i = 0; i < schema->num_columns(); i++) { | ||||||
|     auto _col = schema->GetColumnRoot(i); |     auto _col = schema->GetColumnRoot(i); | ||||||
|     columnNames.push_back(_col->name()); |     columnNames.push_back(_col->name()); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   for(auto i = 0; i < schema->num_columns(); i++) { |   for (auto i = 0; i < schema->num_columns(); i++) { | ||||||
|     auto _col = schema->GetColumnRoot(i); |     auto _col = schema->GetColumnRoot(i); | ||||||
|  |  | ||||||
|     if(!_col->is_primitive()) { |     if (!_col->is_primitive()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-primitive type"; |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|  |          << " has non-primitive type"; | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(_col->is_repeated()) { |     if (_col->is_repeated()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-scalar type"; |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|  |          << " has non-scalar type"; | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     parquet::schema::PrimitiveNode* col = (parquet::schema::PrimitiveNode*)_col; |     parquet::schema::PrimitiveNode *col = | ||||||
|  |         (parquet::schema::PrimitiveNode *)_col; | ||||||
|  |  | ||||||
|     if(i > 0) |     if (i > 0) | ||||||
|       text += ", "; |       text += ", "; | ||||||
|  |  | ||||||
|     text += "\""; |     text += "\""; | ||||||
|     // Horrifically inefficient, but easy to understand. |     // Horrifically inefficient, but easy to understand. | ||||||
|     std::string colName = col->name(); |     std::string colName = col->name(); | ||||||
|     for(char& c : colName) { |     for (char &c : colName) { | ||||||
|       if(c == '"') |       if (c == '"') | ||||||
|         text += "\"\""; |         text += "\"\""; | ||||||
|       else |       else | ||||||
|         text += c; |         text += c; | ||||||
| @@ -71,7 +71,7 @@ std::string ParquetTable::CreateStatement() { | |||||||
|     // whose unsigned ints start getting interpreted as signed. (We could |     // whose unsigned ints start getting interpreted as signed. (We could | ||||||
|     // support this for UINT_8/16/32 -- and for UINT_64 we could throw if |     // support this for UINT_8/16/32 -- and for UINT_64 we could throw if | ||||||
|     // the high bit was set.) |     // the high bit was set.) | ||||||
|     if(logical == parquet::LogicalType::NONE || |     if (logical == parquet::LogicalType::NONE || | ||||||
|         logical == parquet::LogicalType::UTF8 || |         logical == parquet::LogicalType::UTF8 || | ||||||
|         logical == parquet::LogicalType::DATE || |         logical == parquet::LogicalType::DATE || | ||||||
|         logical == parquet::LogicalType::TIME_MILLIS || |         logical == parquet::LogicalType::TIME_MILLIS || | ||||||
| @@ -82,17 +82,17 @@ std::string ParquetTable::CreateStatement() { | |||||||
|         logical == parquet::LogicalType::INT_16 || |         logical == parquet::LogicalType::INT_16 || | ||||||
|         logical == parquet::LogicalType::INT_32 || |         logical == parquet::LogicalType::INT_32 || | ||||||
|         logical == parquet::LogicalType::INT_64) { |         logical == parquet::LogicalType::INT_64) { | ||||||
|       switch(physical) { |       switch (physical) { | ||||||
|       case parquet::Type::BOOLEAN: |       case parquet::Type::BOOLEAN: | ||||||
|         type = "TINYINT"; |         type = "TINYINT"; | ||||||
|         break; |         break; | ||||||
|       case parquet::Type::INT32: |       case parquet::Type::INT32: | ||||||
|           if(logical == parquet::LogicalType::NONE || |         if (logical == parquet::LogicalType::NONE || | ||||||
|             logical == parquet::LogicalType::INT_32) { |             logical == parquet::LogicalType::INT_32) { | ||||||
|           type = "INT"; |           type = "INT"; | ||||||
|           } else if(logical == parquet::LogicalType::INT_8) { |         } else if (logical == parquet::LogicalType::INT_8) { | ||||||
|           type = "TINYINT"; |           type = "TINYINT"; | ||||||
|           } else if(logical == parquet::LogicalType::INT_16) { |         } else if (logical == parquet::LogicalType::INT_16) { | ||||||
|           type = "SMALLINT"; |           type = "SMALLINT"; | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
| @@ -109,7 +109,7 @@ std::string ParquetTable::CreateStatement() { | |||||||
|         type = "DOUBLE"; |         type = "DOUBLE"; | ||||||
|         break; |         break; | ||||||
|       case parquet::Type::BYTE_ARRAY: |       case parquet::Type::BYTE_ARRAY: | ||||||
|           if(logical == parquet::LogicalType::UTF8) { |         if (logical == parquet::LogicalType::UTF8) { | ||||||
|           type = "TEXT"; |           type = "TEXT"; | ||||||
|         } else { |         } else { | ||||||
|           type = "BLOB"; |           type = "BLOB"; | ||||||
| @@ -123,33 +123,33 @@ std::string ParquetTable::CreateStatement() { | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(type.empty()) { |     if (type.empty()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " << |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|         parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical); |          << " has unsupported type: " << parquet::TypeToString(physical) << "/" | ||||||
|  |          << parquet::LogicalTypeToString(logical); | ||||||
|  |  | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
|     printf("col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", |     printf( | ||||||
|         i, |         "col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", i, col->name().data(), | ||||||
|         col->name().data(), |  | ||||||
|         col->physical_type(), |         col->physical_type(), | ||||||
|         parquet::TypeToString(col->physical_type()).data(), |         parquet::TypeToString(col->physical_type()).data(), col->logical_type(), | ||||||
|         col->logical_type(), |         parquet::LogicalTypeToString(col->logical_type()).data(), type.data()); | ||||||
|         parquet::LogicalTypeToString(col->logical_type()).data(), |  | ||||||
|         type.data()); |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     text += " "; |     text += " "; | ||||||
|     text += type; |     text += type; | ||||||
|   } |   } | ||||||
|   text +=");"; |   text += ");"; | ||||||
|   return text; |   return text; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; } | std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { | ||||||
|  |   return metadata; | ||||||
|  | } | ||||||
|  |  | ||||||
| const std::string& ParquetTable::getFile() { return file; } | const std::string &ParquetTable::getFile() { return file; } | ||||||
| const std::string& ParquetTable::getTableName() { return tableName; } | const std::string &ParquetTable::getTableName() { return tableName; } | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| #ifndef PARQUET_TABLE_H | #ifndef PARQUET_TABLE_H | ||||||
| #define PARQUET_TABLE_H | #define PARQUET_TABLE_H | ||||||
|  |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
| class ParquetTable { | class ParquetTable { | ||||||
|   std::string file; |   std::string file; | ||||||
| @@ -11,15 +11,14 @@ class ParquetTable { | |||||||
|   std::vector<std::string> columnNames; |   std::vector<std::string> columnNames; | ||||||
|   std::shared_ptr<parquet::FileMetaData> metadata; |   std::shared_ptr<parquet::FileMetaData> metadata; | ||||||
|  |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   ParquetTable(std::string file, std::string tableName); |   ParquetTable(std::string file, std::string tableName); | ||||||
|   std::string CreateStatement(); |   std::string CreateStatement(); | ||||||
|   std::string columnName(int idx); |   std::string columnName(int idx); | ||||||
|   unsigned int getNumColumns(); |   unsigned int getNumColumns(); | ||||||
|   std::shared_ptr<parquet::FileMetaData> getMetadata(); |   std::shared_ptr<parquet::FileMetaData> getMetadata(); | ||||||
|   const std::string& getFile(); |   const std::string &getFile(); | ||||||
|   const std::string& getTableName(); |   const std::string &getTableName(); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Addie Morrison
					Addie Morrison