mirror of
https://github.com/cldellow/sqlite-parquet-vtable.git
synced 2025-06-12 15:17:19 +00:00
Run a formatting pass with clang-format to minimize future git churn
This commit is contained in:
parent
ae194c69c5
commit
7bc6f91f6f
508
src/parquet.cc
508
src/parquet.cc
@ -1,66 +1,65 @@
|
|||||||
/*
|
/*
|
||||||
* This file contains the implementation of an SQLite virtual table for
|
* This file contains the implementation of an SQLite virtual table for
|
||||||
* reading Parquet files.
|
* reading Parquet files.
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
*
|
*
|
||||||
* .load ./parquet
|
* .load ./parquet
|
||||||
* CREATE VIRTUAL TABLE demo USING parquet(FILENAME);
|
* CREATE VIRTUAL TABLE demo USING parquet(FILENAME);
|
||||||
* SELECT * FROM demo;
|
* SELECT * FROM demo;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#include <sqlite3ext.h>
|
#include <sqlite3ext.h>
|
||||||
SQLITE_EXTENSION_INIT1
|
SQLITE_EXTENSION_INIT1
|
||||||
#include <string.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdarg.h>
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <sys/time.h>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
#include "parquet_table.h"
|
|
||||||
#include "parquet_cursor.h"
|
#include "parquet_cursor.h"
|
||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
|
#include "parquet_table.h"
|
||||||
|
|
||||||
//#define DEBUG
|
//#define DEBUG
|
||||||
|
|
||||||
/* Forward references to the various virtual table methods implemented
|
/* Forward references to the various virtual table methods implemented
|
||||||
* in this file. */
|
* in this file. */
|
||||||
static int parquetCreate(sqlite3*, void*, int, const char*const*,
|
static int parquetCreate(sqlite3 *, void *, int, const char *const *,
|
||||||
sqlite3_vtab**,char**);
|
sqlite3_vtab **, char **);
|
||||||
static int parquetConnect(sqlite3*, void*, int, const char*const*,
|
static int parquetConnect(sqlite3 *, void *, int, const char *const *,
|
||||||
sqlite3_vtab**,char**);
|
sqlite3_vtab **, char **);
|
||||||
static int parquetBestIndex(sqlite3_vtab*,sqlite3_index_info*);
|
static int parquetBestIndex(sqlite3_vtab *, sqlite3_index_info *);
|
||||||
static int parquetDisconnect(sqlite3_vtab*);
|
static int parquetDisconnect(sqlite3_vtab *);
|
||||||
static int parquetDestroy(sqlite3_vtab*);
|
static int parquetDestroy(sqlite3_vtab *);
|
||||||
static int parquetOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
|
static int parquetOpen(sqlite3_vtab *, sqlite3_vtab_cursor **);
|
||||||
static int parquetClose(sqlite3_vtab_cursor*);
|
static int parquetClose(sqlite3_vtab_cursor *);
|
||||||
static int parquetFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
|
static int parquetFilter(sqlite3_vtab_cursor *, int idxNum, const char *idxStr,
|
||||||
int argc, sqlite3_value **argv);
|
int argc, sqlite3_value **argv);
|
||||||
static int parquetNext(sqlite3_vtab_cursor*);
|
static int parquetNext(sqlite3_vtab_cursor *);
|
||||||
static int parquetEof(sqlite3_vtab_cursor*);
|
static int parquetEof(sqlite3_vtab_cursor *);
|
||||||
static int parquetColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
|
static int parquetColumn(sqlite3_vtab_cursor *, sqlite3_context *, int);
|
||||||
static int parquetRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
|
static int parquetRowid(sqlite3_vtab_cursor *, sqlite3_int64 *);
|
||||||
|
|
||||||
/* An instance of the Parquet virtual table */
|
/* An instance of the Parquet virtual table */
|
||||||
typedef struct sqlite3_vtab_parquet {
|
typedef struct sqlite3_vtab_parquet {
|
||||||
sqlite3_vtab base; /* Base class. Must be first */
|
sqlite3_vtab base; /* Base class. Must be first */
|
||||||
ParquetTable* table;
|
ParquetTable *table;
|
||||||
sqlite3* db;
|
sqlite3 *db;
|
||||||
} sqlite3_vtab_parquet;
|
} sqlite3_vtab_parquet;
|
||||||
|
|
||||||
|
|
||||||
/* A cursor for the Parquet virtual table */
|
/* A cursor for the Parquet virtual table */
|
||||||
typedef struct sqlite3_vtab_cursor_parquet {
|
typedef struct sqlite3_vtab_cursor_parquet {
|
||||||
sqlite3_vtab_cursor base; /* Base class. Must be first */
|
sqlite3_vtab_cursor base; /* Base class. Must be first */
|
||||||
ParquetCursor* cursor;
|
ParquetCursor *cursor;
|
||||||
} sqlite3_vtab_cursor_parquet;
|
} sqlite3_vtab_cursor_parquet;
|
||||||
|
|
||||||
static int parquetDestroy(sqlite3_vtab *pVtab) {
|
static int parquetDestroy(sqlite3_vtab *pVtab) {
|
||||||
sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab;
|
sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet *)pVtab;
|
||||||
|
|
||||||
// Clean up our shadow table. This is useful if the user has recreated
|
// Clean up our shadow table. This is useful if the user has recreated
|
||||||
// the parquet file, and our mappings would now be invalid.
|
// the parquet file, and our mappings would now be invalid.
|
||||||
@ -68,7 +67,7 @@ static int parquetDestroy(sqlite3_vtab *pVtab) {
|
|||||||
drop.append(p->table->getTableName());
|
drop.append(p->table->getTableName());
|
||||||
drop.append("_rowgroups");
|
drop.append("_rowgroups");
|
||||||
int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0);
|
int rv = sqlite3_exec(p->db, drop.data(), 0, 0, 0);
|
||||||
if(rv != 0)
|
if (rv != 0)
|
||||||
return rv;
|
return rv;
|
||||||
|
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
@ -77,24 +76,20 @@ static int parquetDestroy(sqlite3_vtab *pVtab) {
|
|||||||
/*
|
/*
|
||||||
** This method is the destructor fo a sqlite3_vtab_parquet object.
|
** This method is the destructor fo a sqlite3_vtab_parquet object.
|
||||||
*/
|
*/
|
||||||
static int parquetDisconnect(sqlite3_vtab *pVtab){
|
static int parquetDisconnect(sqlite3_vtab *pVtab) {
|
||||||
sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet*)pVtab;
|
sqlite3_vtab_parquet *p = (sqlite3_vtab_parquet *)pVtab;
|
||||||
delete p->table;
|
delete p->table;
|
||||||
sqlite3_free(p);
|
sqlite3_free(p);
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parquetConnect(
|
static int parquetConnect(sqlite3 *db, void *pAux, int argc,
|
||||||
sqlite3 *db,
|
const char *const *argv, sqlite3_vtab **ppVtab,
|
||||||
void *pAux,
|
char **pzErr) {
|
||||||
int argc,
|
|
||||||
const char *const*argv,
|
|
||||||
sqlite3_vtab **ppVtab,
|
|
||||||
char **pzErr
|
|
||||||
){
|
|
||||||
try {
|
try {
|
||||||
if(argc != 4 || strlen(argv[3]) < 2) {
|
if (argc != 4 || strlen(argv[3]) < 2) {
|
||||||
*pzErr = sqlite3_mprintf("must provide exactly one argument, the path to a parquet file");
|
*pzErr = sqlite3_mprintf(
|
||||||
|
"must provide exactly one argument, the path to a parquet file");
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,8 +97,8 @@ static int parquetConnect(
|
|||||||
// Remove the delimiting single quotes
|
// Remove the delimiting single quotes
|
||||||
std::string fname = argv[3];
|
std::string fname = argv[3];
|
||||||
fname = fname.substr(1, fname.length() - 2);
|
fname = fname.substr(1, fname.length() - 2);
|
||||||
std::unique_ptr<sqlite3_vtab_parquet, void(*)(void*)> vtab(
|
std::unique_ptr<sqlite3_vtab_parquet, void (*)(void *)> vtab(
|
||||||
(sqlite3_vtab_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)),
|
(sqlite3_vtab_parquet *)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)),
|
||||||
sqlite3_free);
|
sqlite3_free);
|
||||||
memset(vtab.get(), 0, sizeof(*vtab.get()));
|
memset(vtab.get(), 0, sizeof(*vtab.get()));
|
||||||
|
|
||||||
@ -112,20 +107,20 @@ static int parquetConnect(
|
|||||||
|
|
||||||
std::string create = table->CreateStatement();
|
std::string create = table->CreateStatement();
|
||||||
int rc = sqlite3_declare_vtab(db, create.data());
|
int rc = sqlite3_declare_vtab(db, create.data());
|
||||||
if(rc)
|
if (rc)
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
vtab->table = table.release();
|
vtab->table = table.release();
|
||||||
vtab->db = db;
|
vtab->db = db;
|
||||||
*ppVtab = (sqlite3_vtab*)vtab.release();
|
*ppVtab = (sqlite3_vtab *)vtab.release();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception &e) {
|
||||||
*pzErr = sqlite3_mprintf(e.what());
|
*pzErr = sqlite3_mprintf(e.what());
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -134,20 +129,16 @@ static int parquetConnect(
|
|||||||
** The xConnect and xCreate methods do the same thing, but they must be
|
** The xConnect and xCreate methods do the same thing, but they must be
|
||||||
** different so that the virtual table is not an eponymous virtual table.
|
** different so that the virtual table is not an eponymous virtual table.
|
||||||
*/
|
*/
|
||||||
static int parquetCreate(
|
static int parquetCreate(sqlite3 *db, void *pAux, int argc,
|
||||||
sqlite3 *db,
|
const char *const *argv, sqlite3_vtab **ppVtab,
|
||||||
void *pAux,
|
char **pzErr) {
|
||||||
int argc, const char *const*argv,
|
|
||||||
sqlite3_vtab **ppVtab,
|
|
||||||
char **pzErr
|
|
||||||
){
|
|
||||||
try {
|
try {
|
||||||
// Create shadow table for storing constraint -> rowid mappings
|
// Create shadow table for storing constraint -> rowid mappings
|
||||||
std::string create = "CREATE TABLE IF NOT EXISTS _";
|
std::string create = "CREATE TABLE IF NOT EXISTS _";
|
||||||
create.append(argv[2]);
|
create.append(argv[2]);
|
||||||
create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)");
|
create.append("_rowgroups(clause TEXT, estimate BLOB, actual BLOB)");
|
||||||
int rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
int rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
||||||
if(rv != 0)
|
if (rv != 0)
|
||||||
return rv;
|
return rv;
|
||||||
|
|
||||||
create = "CREATE UNIQUE INDEX IF NOT EXISTS _";
|
create = "CREATE UNIQUE INDEX IF NOT EXISTS _";
|
||||||
@ -158,28 +149,31 @@ static int parquetCreate(
|
|||||||
rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
rv = sqlite3_exec(db, create.data(), 0, 0, 0);
|
||||||
|
|
||||||
return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr);
|
return parquetConnect(db, pAux, argc, argv, ppVtab, pzErr);
|
||||||
} catch (std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string quoteBlob(const std::vector<unsigned char>& bytes) {
|
std::string quoteBlob(const std::vector<unsigned char> &bytes) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << "X'" << std::hex;
|
ss << "X'" << std::hex;
|
||||||
for(unsigned int i = 0; i < bytes.size(); i++) {
|
for (unsigned int i = 0; i < bytes.size(); i++) {
|
||||||
ss << std::setfill('0') << std::setw(2) << (unsigned int)(unsigned char)bytes[i];
|
ss << std::setfill('0') << std::setw(2)
|
||||||
|
<< (unsigned int)(unsigned char)bytes[i];
|
||||||
}
|
}
|
||||||
ss << "'";
|
ss << "'";
|
||||||
|
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
void persistConstraints(sqlite3* db, ParquetCursor* cursor) {
|
void persistConstraints(sqlite3 *db, ParquetCursor *cursor) {
|
||||||
for(unsigned int i = 0; i < cursor->getNumConstraints(); i++) {
|
for (unsigned int i = 0; i < cursor->getNumConstraints(); i++) {
|
||||||
const Constraint& constraint = cursor->getConstraint(i);
|
const Constraint &constraint = cursor->getConstraint(i);
|
||||||
const std::vector<unsigned char>& estimated = constraint.bitmap.estimatedMembership;
|
const std::vector<unsigned char> &estimated =
|
||||||
const std::vector<unsigned char>& actual = constraint.bitmap.actualMembership;
|
constraint.bitmap.estimatedMembership;
|
||||||
if(estimated == actual) {
|
const std::vector<unsigned char> &actual =
|
||||||
|
constraint.bitmap.actualMembership;
|
||||||
|
if (estimated == actual) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
std::string desc = constraint.describe();
|
std::string desc = constraint.describe();
|
||||||
@ -188,15 +182,13 @@ void persistConstraints(sqlite3* db, ParquetCursor* cursor) {
|
|||||||
std::string actualStr = quoteBlob(actual);
|
std::string actualStr = quoteBlob(actual);
|
||||||
|
|
||||||
// This is only advisory, so ignore failures.
|
// This is only advisory, so ignore failures.
|
||||||
char* sql = sqlite3_mprintf(
|
char *sql =
|
||||||
"INSERT OR REPLACE INTO _%s_rowgroups(clause, estimate, actual) VALUES ('%q', %s, %s)",
|
sqlite3_mprintf("INSERT OR REPLACE INTO _%s_rowgroups(clause, "
|
||||||
|
"estimate, actual) VALUES ('%q', %s, %s)",
|
||||||
cursor->getTable()->getTableName().c_str(),
|
cursor->getTable()->getTableName().c_str(),
|
||||||
desc.c_str(),
|
desc.c_str(), estimatedStr.c_str(), actualStr.c_str());
|
||||||
estimatedStr.c_str(),
|
|
||||||
actualStr.c_str());
|
|
||||||
|
|
||||||
|
if (sql == NULL)
|
||||||
if(sql == NULL)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sqlite3_exec(db, sql, 0, 0, 0);
|
sqlite3_exec(db, sql, 0, 0, 0);
|
||||||
@ -204,12 +196,12 @@ void persistConstraints(sqlite3* db, ParquetCursor* cursor) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Destructor for a sqlite3_vtab_cursor_parquet.
|
** Destructor for a sqlite3_vtab_cursor_parquet.
|
||||||
*/
|
*/
|
||||||
static int parquetClose(sqlite3_vtab_cursor *cur){
|
static int parquetClose(sqlite3_vtab_cursor *cur) {
|
||||||
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
sqlite3_vtab_cursor_parquet *vtab_cursor_parquet =
|
||||||
|
(sqlite3_vtab_cursor_parquet *)cur;
|
||||||
vtab_cursor_parquet->cursor->close();
|
vtab_cursor_parquet->cursor->close();
|
||||||
delete vtab_cursor_parquet->cursor;
|
delete vtab_cursor_parquet->cursor;
|
||||||
sqlite3_free(cur);
|
sqlite3_free(cur);
|
||||||
@ -219,39 +211,40 @@ static int parquetClose(sqlite3_vtab_cursor *cur){
|
|||||||
/*
|
/*
|
||||||
** Constructor for a new sqlite3_vtab_parquet cursor object.
|
** Constructor for a new sqlite3_vtab_parquet cursor object.
|
||||||
*/
|
*/
|
||||||
static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
|
static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) {
|
||||||
try {
|
try {
|
||||||
std::unique_ptr<sqlite3_vtab_cursor_parquet, void(*)(void*)> cursor(
|
std::unique_ptr<sqlite3_vtab_cursor_parquet, void (*)(void *)> cursor(
|
||||||
(sqlite3_vtab_cursor_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_cursor_parquet)),
|
(sqlite3_vtab_cursor_parquet *)sqlite3_malloc(
|
||||||
|
sizeof(sqlite3_vtab_cursor_parquet)),
|
||||||
sqlite3_free);
|
sqlite3_free);
|
||||||
memset(cursor.get(), 0, sizeof(*cursor.get()));
|
memset(cursor.get(), 0, sizeof(*cursor.get()));
|
||||||
|
|
||||||
sqlite3_vtab_parquet* pParquet = (sqlite3_vtab_parquet*)p;
|
sqlite3_vtab_parquet *pParquet = (sqlite3_vtab_parquet *)p;
|
||||||
cursor->cursor = new ParquetCursor(pParquet->table);
|
cursor->cursor = new ParquetCursor(pParquet->table);
|
||||||
|
|
||||||
*ppCursor = (sqlite3_vtab_cursor*)cursor.release();
|
*ppCursor = (sqlite3_vtab_cursor *)cursor.release();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Advance a sqlite3_vtab_cursor_parquet to its next row of input.
|
** Advance a sqlite3_vtab_cursor_parquet to its next row of input.
|
||||||
** Set the EOF marker if we reach the end of input.
|
** Set the EOF marker if we reach the end of input.
|
||||||
*/
|
*/
|
||||||
static int parquetNext(sqlite3_vtab_cursor *cur){
|
static int parquetNext(sqlite3_vtab_cursor *cur) {
|
||||||
try {
|
try {
|
||||||
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
sqlite3_vtab_cursor_parquet *vtab_cursor_parquet =
|
||||||
ParquetCursor* cursor = vtab_cursor_parquet->cursor;
|
(sqlite3_vtab_cursor_parquet *)cur;
|
||||||
|
ParquetCursor *cursor = vtab_cursor_parquet->cursor;
|
||||||
cursor->next();
|
cursor->next();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -260,73 +253,70 @@ static int parquetNext(sqlite3_vtab_cursor *cur){
|
|||||||
** Return values of columns for the row at which the sqlite3_vtab_cursor_parquet
|
** Return values of columns for the row at which the sqlite3_vtab_cursor_parquet
|
||||||
** is currently pointing.
|
** is currently pointing.
|
||||||
*/
|
*/
|
||||||
static int parquetColumn(
|
static int
|
||||||
sqlite3_vtab_cursor *cur, /* The cursor */
|
parquetColumn(sqlite3_vtab_cursor *cur, /* The cursor */
|
||||||
sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
|
sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
|
||||||
int col /* Which column to return */
|
int col /* Which column to return */
|
||||||
){
|
) {
|
||||||
try {
|
try {
|
||||||
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
|
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor;
|
||||||
cursor->ensureColumn(col);
|
cursor->ensureColumn(col);
|
||||||
|
|
||||||
if(cursor->isNull(col)) {
|
if (cursor->isNull(col)) {
|
||||||
sqlite3_result_null(ctx);
|
sqlite3_result_null(ctx);
|
||||||
} else {
|
} else {
|
||||||
switch(cursor->getPhysicalType(col)) {
|
switch (cursor->getPhysicalType(col)) {
|
||||||
case parquet::Type::BOOLEAN:
|
case parquet::Type::BOOLEAN:
|
||||||
case parquet::Type::INT32:
|
case parquet::Type::INT32: {
|
||||||
{
|
|
||||||
int rv = cursor->getInt32(col);
|
int rv = cursor->getInt32(col);
|
||||||
sqlite3_result_int(ctx, rv);
|
sqlite3_result_int(ctx, rv);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case parquet::Type::FLOAT:
|
case parquet::Type::FLOAT:
|
||||||
case parquet::Type::DOUBLE:
|
case parquet::Type::DOUBLE: {
|
||||||
{
|
|
||||||
double rv = cursor->getDouble(col);
|
double rv = cursor->getDouble(col);
|
||||||
sqlite3_result_double(ctx, rv);
|
sqlite3_result_double(ctx, rv);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case parquet::Type::BYTE_ARRAY:
|
case parquet::Type::BYTE_ARRAY: {
|
||||||
{
|
parquet::ByteArray *rv = cursor->getByteArray(col);
|
||||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
if (cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
|
||||||
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
|
sqlite3_result_text(ctx, (const char *)rv->ptr, rv->len,
|
||||||
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
SQLITE_TRANSIENT);
|
||||||
} else {
|
} else {
|
||||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
sqlite3_result_blob(ctx, (void *)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case parquet::Type::INT96:
|
case parquet::Type::INT96:
|
||||||
// This type exists to store timestamps in nanoseconds due to legacy
|
// This type exists to store timestamps in nanoseconds due to legacy
|
||||||
// reasons. We just interpret it as a timestamp in milliseconds.
|
// reasons. We just interpret it as a timestamp in milliseconds.
|
||||||
case parquet::Type::INT64:
|
case parquet::Type::INT64: {
|
||||||
{
|
|
||||||
long rv = cursor->getInt64(col);
|
long rv = cursor->getInt64(col);
|
||||||
sqlite3_result_int64(ctx, rv);
|
sqlite3_result_int64(ctx, rv);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
case parquet::Type::FIXED_LEN_BYTE_ARRAY: {
|
||||||
{
|
parquet::ByteArray *rv = cursor->getByteArray(col);
|
||||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
sqlite3_result_blob(ctx, (void *)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
// Should be impossible to get here as we should have forbidden this at
|
// Should be impossible to get here as we should have forbidden this at
|
||||||
// CREATE time -- maybe file changed underneath us?
|
// CREATE time -- maybe file changed underneath us?
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << col << " has unsupported type: " <<
|
ss << __FILE__ << ":" << __LINE__ << ": column " << col
|
||||||
parquet::TypeToString(cursor->getPhysicalType(col));
|
<< " has unsupported type: "
|
||||||
|
<< parquet::TypeToString(cursor->getPhysicalType(col));
|
||||||
|
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -334,8 +324,8 @@ static int parquetColumn(
|
|||||||
/*
|
/*
|
||||||
** Return the rowid for the current row.
|
** Return the rowid for the current row.
|
||||||
*/
|
*/
|
||||||
static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
|
static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) {
|
||||||
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
|
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor;
|
||||||
*pRowid = cursor->getRowId();
|
*pRowid = cursor->getRowId();
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
@ -344,11 +334,13 @@ static int parquetRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
|
|||||||
** Return TRUE if the cursor has been moved off of the last
|
** Return TRUE if the cursor has been moved off of the last
|
||||||
** row of output.
|
** row of output.
|
||||||
*/
|
*/
|
||||||
static int parquetEof(sqlite3_vtab_cursor *cur){
|
static int parquetEof(sqlite3_vtab_cursor *cur) {
|
||||||
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
|
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet *)cur)->cursor;
|
||||||
if(cursor->eof()) {
|
if (cursor->eof()) {
|
||||||
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
sqlite3_vtab_cursor_parquet *vtab_cursor_parquet =
|
||||||
sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab);
|
(sqlite3_vtab_cursor_parquet *)cur;
|
||||||
|
sqlite3_vtab_parquet *vtab_parquet =
|
||||||
|
(sqlite3_vtab_parquet *)(vtab_cursor_parquet->base.pVtab);
|
||||||
persistConstraints(vtab_parquet->db, cursor);
|
persistConstraints(vtab_parquet->db, cursor);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -356,8 +348,8 @@ static int parquetEof(sqlite3_vtab_cursor *cur){
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
const char* opName(int op) {
|
const char *opName(int op) {
|
||||||
switch(op) {
|
switch (op) {
|
||||||
case SQLITE_INDEX_CONSTRAINT_EQ:
|
case SQLITE_INDEX_CONSTRAINT_EQ:
|
||||||
return "=";
|
return "=";
|
||||||
case SQLITE_INDEX_CONSTRAINT_GT:
|
case SQLITE_INDEX_CONSTRAINT_GT:
|
||||||
@ -391,66 +383,60 @@ const char* opName(int op) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void debugConstraints(sqlite3_index_info *pIdxInfo, ParquetTable *table, int argc, sqlite3_value** argv) {
|
void debugConstraints(sqlite3_index_info *pIdxInfo, ParquetTable *table,
|
||||||
|
int argc, sqlite3_value **argv) {
|
||||||
printf("debugConstraints, argc=%d\n", argc);
|
printf("debugConstraints, argc=%d\n", argc);
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for(int i = 0; i < pIdxInfo->nConstraint; i++) {
|
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||||
std::string valueStr = "?";
|
std::string valueStr = "?";
|
||||||
if(argv != NULL && pIdxInfo->aConstraint[i].usable) {
|
if (argv != NULL && pIdxInfo->aConstraint[i].usable) {
|
||||||
int type = sqlite3_value_type(argv[j]);
|
int type = sqlite3_value_type(argv[j]);
|
||||||
switch(type) {
|
switch (type) {
|
||||||
case SQLITE_INTEGER:
|
case SQLITE_INTEGER: {
|
||||||
{
|
|
||||||
sqlite3_int64 rv = sqlite3_value_int64(argv[j]);
|
sqlite3_int64 rv = sqlite3_value_int64(argv[j]);
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << rv;
|
ss << rv;
|
||||||
valueStr = ss.str();
|
valueStr = ss.str();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SQLITE_FLOAT:
|
case SQLITE_FLOAT: {
|
||||||
{
|
|
||||||
double rv = sqlite3_value_double(argv[j]);
|
double rv = sqlite3_value_double(argv[j]);
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << rv;
|
ss << rv;
|
||||||
valueStr = ss.str();
|
valueStr = ss.str();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SQLITE_TEXT:
|
case SQLITE_TEXT: {
|
||||||
{
|
const unsigned char *rv = sqlite3_value_text(argv[j]);
|
||||||
const unsigned char* rv = sqlite3_value_text(argv[j]);
|
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << "'" << rv << "'";
|
ss << "'" << rv << "'";
|
||||||
valueStr = ss.str();
|
valueStr = ss.str();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SQLITE_BLOB:
|
case SQLITE_BLOB: {
|
||||||
{
|
|
||||||
int sizeBytes = sqlite3_value_bytes(argv[j]);
|
int sizeBytes = sqlite3_value_bytes(argv[j]);
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << "'..." << sizeBytes << "-byte blob...'";
|
ss << "'..." << sizeBytes << "-byte blob...'";
|
||||||
valueStr = ss.str();
|
valueStr = ss.str();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SQLITE_NULL:
|
case SQLITE_NULL: {
|
||||||
{
|
|
||||||
valueStr = "NULL";
|
valueStr = "NULL";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
printf(" constraint %d: col %s %s %s, usable %d\n",
|
printf(" constraint %d: col %s %s %s, usable %d\n", i,
|
||||||
i,
|
|
||||||
table->columnName(pIdxInfo->aConstraint[i].iColumn).data(),
|
table->columnName(pIdxInfo->aConstraint[i].iColumn).data(),
|
||||||
opName(pIdxInfo->aConstraint[i].op),
|
opName(pIdxInfo->aConstraint[i].op), valueStr.data(),
|
||||||
valueStr.data(),
|
|
||||||
pIdxInfo->aConstraint[i].usable);
|
pIdxInfo->aConstraint[i].usable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ConstraintOperator constraintOperatorFromSqlite(int op) {
|
ConstraintOperator constraintOperatorFromSqlite(int op) {
|
||||||
switch(op) {
|
switch (op) {
|
||||||
case SQLITE_INDEX_CONSTRAINT_EQ:
|
case SQLITE_INDEX_CONSTRAINT_EQ:
|
||||||
return Equal;
|
return Equal;
|
||||||
case SQLITE_INDEX_CONSTRAINT_GT:
|
case SQLITE_INDEX_CONSTRAINT_GT:
|
||||||
@ -482,29 +468,30 @@ ConstraintOperator constraintOperatorFromSqlite(int op) {
|
|||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table, std::string clause) {
|
std::vector<unsigned char> getRowGroupsForClause(sqlite3 *db, std::string table,
|
||||||
|
std::string clause) {
|
||||||
std::vector<unsigned char> rv;
|
std::vector<unsigned char> rv;
|
||||||
|
|
||||||
std::unique_ptr<char, void(*)(void*)> sql(sqlite3_mprintf(
|
std::unique_ptr<char, void (*)(void *)> sql(
|
||||||
"SELECT actual FROM _%s_rowgroups WHERE clause = '%q'",
|
sqlite3_mprintf("SELECT actual FROM _%s_rowgroups WHERE clause = '%q'",
|
||||||
table.c_str(),
|
table.c_str(), clause.c_str()),
|
||||||
clause.c_str()), sqlite3_free);
|
sqlite3_free);
|
||||||
|
|
||||||
if(sql.get() == NULL)
|
if (sql.get() == NULL)
|
||||||
return rv;
|
return rv;
|
||||||
|
|
||||||
sqlite3_stmt* pStmt = NULL;
|
sqlite3_stmt *pStmt = NULL;
|
||||||
int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL);
|
int rc = sqlite3_prepare_v2(db, sql.get(), -1, &pStmt, NULL);
|
||||||
if(rc != 0)
|
if (rc != 0)
|
||||||
return rv;
|
return rv;
|
||||||
|
|
||||||
rc = sqlite3_step(pStmt);
|
rc = sqlite3_step(pStmt);
|
||||||
if(rc == SQLITE_ROW) {
|
if (rc == SQLITE_ROW) {
|
||||||
int size = sqlite3_column_bytes(pStmt, 0);
|
int size = sqlite3_column_bytes(pStmt, 0);
|
||||||
unsigned char* blob = (unsigned char*)sqlite3_column_blob(pStmt, 0);
|
unsigned char *blob = (unsigned char *)sqlite3_column_blob(pStmt, 0);
|
||||||
// TODO: there is a memory leak here if we get a std::bad_alloc while populating rv;
|
// TODO: there is a memory leak here if we get a std::bad_alloc while
|
||||||
// we fail to free pStmt
|
// populating rv; we fail to free pStmt
|
||||||
for(int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
rv.push_back(blob[i]);
|
rv.push_back(blob[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -513,24 +500,20 @@ std::vector<unsigned char> getRowGroupsForClause(sqlite3* db, std::string table,
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Only a full table scan is supported. So xFilter simply rewinds to
|
** Only a full table scan is supported. So xFilter simply rewinds to
|
||||||
** the beginning.
|
** the beginning.
|
||||||
*/
|
*/
|
||||||
static int parquetFilter(
|
static int parquetFilter(sqlite3_vtab_cursor *cur, int idxNum,
|
||||||
sqlite3_vtab_cursor *cur,
|
const char *idxStr, int argc, sqlite3_value **argv) {
|
||||||
int idxNum,
|
|
||||||
const char *idxStr,
|
|
||||||
int argc,
|
|
||||||
sqlite3_value **argv
|
|
||||||
){
|
|
||||||
try {
|
try {
|
||||||
sqlite3_vtab_cursor_parquet* vtab_cursor_parquet = (sqlite3_vtab_cursor_parquet*)cur;
|
sqlite3_vtab_cursor_parquet *vtab_cursor_parquet =
|
||||||
sqlite3_vtab_parquet* vtab_parquet = (sqlite3_vtab_parquet*)(vtab_cursor_parquet->base.pVtab);
|
(sqlite3_vtab_cursor_parquet *)cur;
|
||||||
sqlite3* db = vtab_parquet->db;
|
sqlite3_vtab_parquet *vtab_parquet =
|
||||||
ParquetCursor* cursor = vtab_cursor_parquet->cursor;
|
(sqlite3_vtab_parquet *)(vtab_cursor_parquet->base.pVtab);
|
||||||
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
|
sqlite3 *db = vtab_parquet->db;
|
||||||
|
ParquetCursor *cursor = vtab_cursor_parquet->cursor;
|
||||||
|
sqlite3_index_info *indexInfo = (sqlite3_index_info *)idxStr;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
@ -539,13 +522,14 @@ static int parquetFilter(
|
|||||||
(unsigned long long)(tv.tv_sec) * 1000 +
|
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||||
(unsigned long long)(tv.tv_usec) / 1000;
|
(unsigned long long)(tv.tv_usec) / 1000;
|
||||||
|
|
||||||
printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc);
|
printf("%llu xFilter: idxNum=%d, idxStr=%lu, argc=%d\n",
|
||||||
|
millisecondsSinceEpoch, idxNum, (long unsigned int)idxStr, argc);
|
||||||
debugConstraints(indexInfo, cursor->getTable(), argc, argv);
|
debugConstraints(indexInfo, cursor->getTable(), argc, argv);
|
||||||
#endif
|
#endif
|
||||||
std::vector<Constraint> constraints;
|
std::vector<Constraint> constraints;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for(int i = 0; i < indexInfo->nConstraint; i++) {
|
for (int i = 0; i < indexInfo->nConstraint; i++) {
|
||||||
if(!indexInfo->aConstraint[i].usable) {
|
if (!indexInfo->aConstraint[i].usable) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -555,86 +539,76 @@ static int parquetFilter(
|
|||||||
std::vector<unsigned char> blobValue;
|
std::vector<unsigned char> blobValue;
|
||||||
int sqliteType = sqlite3_value_type(argv[j]);
|
int sqliteType = sqlite3_value_type(argv[j]);
|
||||||
|
|
||||||
if(sqliteType == SQLITE_INTEGER) {
|
if (sqliteType == SQLITE_INTEGER) {
|
||||||
type = Integer;
|
type = Integer;
|
||||||
intValue = sqlite3_value_int64(argv[j]);
|
intValue = sqlite3_value_int64(argv[j]);
|
||||||
} else if(sqliteType == SQLITE_FLOAT) {
|
} else if (sqliteType == SQLITE_FLOAT) {
|
||||||
type = Double;
|
type = Double;
|
||||||
doubleValue = sqlite3_value_double(argv[j]);
|
doubleValue = sqlite3_value_double(argv[j]);
|
||||||
} else if(sqliteType == SQLITE_TEXT) {
|
} else if (sqliteType == SQLITE_TEXT) {
|
||||||
type = Text;
|
type = Text;
|
||||||
int len = sqlite3_value_bytes(argv[j]);
|
int len = sqlite3_value_bytes(argv[j]);
|
||||||
const unsigned char* ptr = sqlite3_value_text(argv[j]);
|
const unsigned char *ptr = sqlite3_value_text(argv[j]);
|
||||||
for(int k = 0; k < len; k++) {
|
for (int k = 0; k < len; k++) {
|
||||||
blobValue.push_back(ptr[k]);
|
blobValue.push_back(ptr[k]);
|
||||||
}
|
}
|
||||||
} else if(sqliteType == SQLITE_BLOB) {
|
} else if (sqliteType == SQLITE_BLOB) {
|
||||||
type = Blob;
|
type = Blob;
|
||||||
int len = sqlite3_value_bytes(argv[j]);
|
int len = sqlite3_value_bytes(argv[j]);
|
||||||
const unsigned char* ptr = (const unsigned char*)sqlite3_value_blob(argv[j]);
|
const unsigned char *ptr =
|
||||||
for(int k = 0; k < len; k++) {
|
(const unsigned char *)sqlite3_value_blob(argv[j]);
|
||||||
|
for (int k = 0; k < len; k++) {
|
||||||
blobValue.push_back(ptr[k]);
|
blobValue.push_back(ptr[k]);
|
||||||
}
|
}
|
||||||
} else if(sqliteType == SQLITE_NULL) {
|
} else if (sqliteType == SQLITE_NULL) {
|
||||||
type = Null;
|
type = Null;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string columnName = "rowid";
|
std::string columnName = "rowid";
|
||||||
if(indexInfo->aConstraint[i].iColumn >= 0) {
|
if (indexInfo->aConstraint[i].iColumn >= 0) {
|
||||||
columnName = cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn);
|
columnName =
|
||||||
|
cursor->getTable()->columnName(indexInfo->aConstraint[i].iColumn);
|
||||||
}
|
}
|
||||||
|
|
||||||
RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups());
|
RowGroupBitmap bitmap = RowGroupBitmap(cursor->getNumRowGroups());
|
||||||
Constraint dummy(
|
Constraint dummy(
|
||||||
bitmap,
|
bitmap, indexInfo->aConstraint[i].iColumn, columnName,
|
||||||
indexInfo->aConstraint[i].iColumn,
|
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), type,
|
||||||
columnName,
|
intValue, doubleValue, blobValue);
|
||||||
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
|
|
||||||
type,
|
|
||||||
intValue,
|
|
||||||
doubleValue,
|
|
||||||
blobValue);
|
|
||||||
|
|
||||||
std::vector<unsigned char> actual = getRowGroupsForClause(db, cursor->getTable()->getTableName(), dummy.describe());
|
std::vector<unsigned char> actual = getRowGroupsForClause(
|
||||||
if(actual.size() > 0) {
|
db, cursor->getTable()->getTableName(), dummy.describe());
|
||||||
// Initialize the estimate to be the actual -- eventually they'll converge
|
if (actual.size() > 0) {
|
||||||
// and we'll stop writing back to the db.
|
// Initialize the estimate to be the actual -- eventually they'll
|
||||||
|
// converge and we'll stop writing back to the db.
|
||||||
std::vector<unsigned char> estimate = actual;
|
std::vector<unsigned char> estimate = actual;
|
||||||
bitmap = RowGroupBitmap(estimate, actual);
|
bitmap = RowGroupBitmap(estimate, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
Constraint constraint(
|
Constraint constraint(
|
||||||
bitmap,
|
bitmap, indexInfo->aConstraint[i].iColumn, columnName,
|
||||||
indexInfo->aConstraint[i].iColumn,
|
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), type,
|
||||||
columnName,
|
intValue, doubleValue, blobValue);
|
||||||
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
|
|
||||||
type,
|
|
||||||
intValue,
|
|
||||||
doubleValue,
|
|
||||||
blobValue);
|
|
||||||
|
|
||||||
constraints.push_back(constraint);
|
constraints.push_back(constraint);
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
cursor->reset(constraints);
|
cursor->reset(constraints);
|
||||||
return parquetNext(cur);
|
return parquetNext(cur);
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We'll always indicate to SQLite that we prefer it to use an index so that it will
|
* We'll always indicate to SQLite that we prefer it to use an index so that it
|
||||||
* pass additional context to xFilter, which we may or may not use.
|
* will pass additional context to xFilter, which we may or may not use.
|
||||||
*
|
*
|
||||||
* We copy the sqlite3_index_info structure, as is, into idxStr for later use.
|
* We copy the sqlite3_index_info structure, as is, into idxStr for later use.
|
||||||
*/
|
*/
|
||||||
static int parquetBestIndex(
|
static int parquetBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo) {
|
||||||
sqlite3_vtab *tab,
|
|
||||||
sqlite3_index_info *pIdxInfo
|
|
||||||
){
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
@ -644,18 +618,19 @@ static int parquetBestIndex(
|
|||||||
(unsigned long long)(tv.tv_sec) * 1000 +
|
(unsigned long long)(tv.tv_sec) * 1000 +
|
||||||
(unsigned long long)(tv.tv_usec) / 1000;
|
(unsigned long long)(tv.tv_usec) / 1000;
|
||||||
|
|
||||||
|
ParquetTable *table = ((sqlite3_vtab_parquet *)tab)->table;
|
||||||
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
|
printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n",
|
||||||
printf("%llu xBestIndex: nConstraint=%d, nOrderBy=%d\n", millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
millisecondsSinceEpoch, pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
|
||||||
debugConstraints(pIdxInfo, table, 0, NULL);
|
debugConstraints(pIdxInfo, table, 0, NULL);
|
||||||
#endif
|
#endif
|
||||||
// We traverse in rowid ascending order, so if they're asking for it to be ordered like that,
|
// We traverse in rowid ascending order, so if they're asking for it to be
|
||||||
// we can tell SQLite that it's guaranteed. This speeds up some DB viewer utilities that
|
// ordered like that, we can tell SQLite that it's guaranteed. This speeds
|
||||||
// use rowids for pagination.
|
// up some DB viewer utilities that use rowids for pagination.
|
||||||
if(pIdxInfo->nOrderBy == 1 && pIdxInfo->aOrderBy[0].iColumn == -1 && pIdxInfo->aOrderBy[0].desc == 0)
|
if (pIdxInfo->nOrderBy == 1 && pIdxInfo->aOrderBy[0].iColumn == -1 &&
|
||||||
|
pIdxInfo->aOrderBy[0].desc == 0)
|
||||||
pIdxInfo->orderByConsumed = 1;
|
pIdxInfo->orderByConsumed = 1;
|
||||||
|
|
||||||
if(pIdxInfo->nConstraint == 0) {
|
if (pIdxInfo->nConstraint == 0) {
|
||||||
pIdxInfo->estimatedCost = 1000000000000;
|
pIdxInfo->estimatedCost = 1000000000000;
|
||||||
pIdxInfo->idxNum = 0;
|
pIdxInfo->idxNum = 0;
|
||||||
} else {
|
} else {
|
||||||
@ -663,61 +638,69 @@ static int parquetBestIndex(
|
|||||||
pIdxInfo->idxNum = 1;
|
pIdxInfo->idxNum = 1;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
|
|
||||||
for(int i = 0; i < pIdxInfo->nConstraint; i++) {
|
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||||
if(pIdxInfo->aConstraint[i].usable) {
|
if (pIdxInfo->aConstraint[i].usable) {
|
||||||
j++;
|
j++;
|
||||||
pIdxInfo->aConstraintUsage[i].argvIndex = j;
|
pIdxInfo->aConstraintUsage[i].argvIndex = j;
|
||||||
// pIdxInfo->aConstraintUsage[i].omit = 1;
|
// pIdxInfo->aConstraintUsage[i].omit = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t dupeSize = sizeof(sqlite3_index_info) +
|
size_t dupeSize =
|
||||||
//pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) +
|
sizeof(sqlite3_index_info) +
|
||||||
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) +
|
// pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) +
|
||||||
|
pIdxInfo->nConstraint *
|
||||||
|
sizeof(sqlite3_index_info::sqlite3_index_constraint) +
|
||||||
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) +
|
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) +
|
||||||
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint_usage);
|
pIdxInfo->nConstraint *
|
||||||
sqlite3_index_info* dupe = (sqlite3_index_info*)sqlite3_malloc(dupeSize);
|
sizeof(sqlite3_index_info::sqlite3_index_constraint_usage);
|
||||||
pIdxInfo->idxStr = (char*)dupe;
|
sqlite3_index_info *dupe = (sqlite3_index_info *)sqlite3_malloc(dupeSize);
|
||||||
|
pIdxInfo->idxStr = (char *)dupe;
|
||||||
pIdxInfo->needToFreeIdxStr = 1;
|
pIdxInfo->needToFreeIdxStr = 1;
|
||||||
|
|
||||||
memset(dupe, 0, dupeSize);
|
memset(dupe, 0, dupeSize);
|
||||||
memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info));
|
memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info));
|
||||||
|
|
||||||
dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint*)((char*)dupe + sizeof(sqlite3_index_info));
|
dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint
|
||||||
dupe->aOrderBy = (sqlite3_index_info::sqlite3_index_orderby*)((char*)dupe +
|
*)((char *)dupe + sizeof(sqlite3_index_info));
|
||||||
sizeof(sqlite3_index_info) +
|
dupe->aOrderBy =
|
||||||
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint));
|
(sqlite3_index_info::sqlite3_index_orderby
|
||||||
dupe->aConstraintUsage = (sqlite3_index_info::sqlite3_index_constraint_usage*)((char*)dupe +
|
*)((char *)dupe + sizeof(sqlite3_index_info) +
|
||||||
sizeof(sqlite3_index_info) +
|
pIdxInfo->nConstraint *
|
||||||
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) +
|
sizeof(sqlite3_index_info::sqlite3_index_constraint));
|
||||||
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby));
|
dupe->aConstraintUsage =
|
||||||
|
(sqlite3_index_info::sqlite3_index_constraint_usage
|
||||||
|
*)((char *)dupe + sizeof(sqlite3_index_info) +
|
||||||
|
pIdxInfo->nConstraint *
|
||||||
|
sizeof(sqlite3_index_info::sqlite3_index_constraint) +
|
||||||
|
pIdxInfo->nOrderBy *
|
||||||
|
sizeof(sqlite3_index_info::sqlite3_index_orderby));
|
||||||
|
|
||||||
|
for (int i = 0; i < pIdxInfo->nConstraint; i++) {
|
||||||
for(int i = 0; i < pIdxInfo->nConstraint; i++) {
|
|
||||||
dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn;
|
dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn;
|
||||||
dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op;
|
dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op;
|
||||||
dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable;
|
dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable;
|
||||||
dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset;
|
dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset;
|
||||||
|
|
||||||
dupe->aConstraintUsage[i].argvIndex = pIdxInfo->aConstraintUsage[i].argvIndex;
|
dupe->aConstraintUsage[i].argvIndex =
|
||||||
|
pIdxInfo->aConstraintUsage[i].argvIndex;
|
||||||
dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit;
|
dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i = 0; i < pIdxInfo->nOrderBy; i++) {
|
for (int i = 0; i < pIdxInfo->nOrderBy; i++) {
|
||||||
dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn;
|
dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn;
|
||||||
dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc;
|
dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
} catch(std::bad_alloc& ba) {
|
} catch (std::bad_alloc &ba) {
|
||||||
return SQLITE_NOMEM;
|
return SQLITE_NOMEM;
|
||||||
} catch(std::exception& e) {
|
} catch (std::exception &e) {
|
||||||
return SQLITE_ERROR;
|
return SQLITE_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static sqlite3_module ParquetModule = {
|
static sqlite3_module ParquetModule = {
|
||||||
0, /* iVersion */
|
0, /* iVersion */
|
||||||
parquetCreate, /* xCreate */
|
parquetCreate, /* xCreate */
|
||||||
@ -742,19 +725,16 @@ static sqlite3_module ParquetModule = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This routine is called when the extension is loaded. The new
|
* This routine is called when the extension is loaded. The new
|
||||||
* Parquet virtual table module is registered with the calling database
|
* Parquet virtual table module is registered with the calling database
|
||||||
* connection.
|
* connection.
|
||||||
*/
|
*/
|
||||||
extern "C" {
|
extern "C" {
|
||||||
int sqlite3_parquet_init(
|
int sqlite3_parquet_init(sqlite3 *db, char **pzErrMsg,
|
||||||
sqlite3 *db,
|
const sqlite3_api_routines *pApi) {
|
||||||
char **pzErrMsg,
|
|
||||||
const sqlite3_api_routines *pApi
|
|
||||||
){
|
|
||||||
int rc;
|
int rc;
|
||||||
SQLITE_EXTENSION_INIT2(pApi);
|
SQLITE_EXTENSION_INIT2(pApi);
|
||||||
rc = sqlite3_create_module(db, "parquet", &ParquetModule, 0);
|
rc = sqlite3_create_module(db, "parquet", &ParquetModule, 0);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,13 +1,13 @@
|
|||||||
#ifndef PARQUET_CURSOR_H
|
#ifndef PARQUET_CURSOR_H
|
||||||
#define PARQUET_CURSOR_H
|
#define PARQUET_CURSOR_H
|
||||||
|
|
||||||
|
#include "parquet/api/reader.h"
|
||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
#include "parquet_table.h"
|
#include "parquet_table.h"
|
||||||
#include "parquet/api/reader.h"
|
|
||||||
|
|
||||||
class ParquetCursor {
|
class ParquetCursor {
|
||||||
|
|
||||||
ParquetTable* table;
|
ParquetTable *table;
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader;
|
std::unique_ptr<parquet::ParquetFileReader> reader;
|
||||||
std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata;
|
std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata;
|
||||||
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
||||||
@ -35,19 +35,26 @@ class ParquetCursor {
|
|||||||
|
|
||||||
bool currentRowSatisfiesFilter();
|
bool currentRowSatisfiesFilter();
|
||||||
bool currentRowGroupSatisfiesFilter();
|
bool currentRowGroupSatisfiesFilter();
|
||||||
bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint);
|
bool currentRowGroupSatisfiesRowIdFilter(Constraint &constraint);
|
||||||
bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
bool currentRowGroupSatisfiesTextFilter(
|
||||||
bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
Constraint &constraint,
|
||||||
bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
bool currentRowGroupSatisfiesBlobFilter(
|
||||||
|
Constraint &constraint,
|
||||||
bool currentRowSatisfiesTextFilter(Constraint& constraint);
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
bool currentRowSatisfiesIntegerFilter(Constraint& constraint);
|
bool currentRowGroupSatisfiesIntegerFilter(
|
||||||
bool currentRowSatisfiesDoubleFilter(Constraint& constraint);
|
Constraint &constraint,
|
||||||
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
|
bool currentRowGroupSatisfiesDoubleFilter(
|
||||||
|
Constraint &constraint,
|
||||||
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
|
|
||||||
|
bool currentRowSatisfiesTextFilter(Constraint &constraint);
|
||||||
|
bool currentRowSatisfiesIntegerFilter(Constraint &constraint);
|
||||||
|
bool currentRowSatisfiesDoubleFilter(Constraint &constraint);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ParquetCursor(ParquetTable* table);
|
ParquetCursor(ParquetTable *table);
|
||||||
int getRowId();
|
int getRowId();
|
||||||
void next();
|
void next();
|
||||||
void close();
|
void close();
|
||||||
@ -58,16 +65,15 @@ public:
|
|||||||
bool isNull(int col);
|
bool isNull(int col);
|
||||||
unsigned int getNumRowGroups() const;
|
unsigned int getNumRowGroups() const;
|
||||||
unsigned int getNumConstraints() const;
|
unsigned int getNumConstraints() const;
|
||||||
const Constraint& getConstraint(unsigned int i) const;
|
const Constraint &getConstraint(unsigned int i) const;
|
||||||
parquet::Type::type getPhysicalType(int col);
|
parquet::Type::type getPhysicalType(int col);
|
||||||
parquet::LogicalType::type getLogicalType(int col);
|
parquet::LogicalType::type getLogicalType(int col);
|
||||||
ParquetTable* getTable() const;
|
ParquetTable *getTable() const;
|
||||||
|
|
||||||
int getInt32(int col);
|
int getInt32(int col);
|
||||||
long getInt64(int col);
|
long getInt64(int col);
|
||||||
double getDouble(int col);
|
double getDouble(int col);
|
||||||
parquet::ByteArray* getByteArray(int col);
|
parquet::ByteArray *getByteArray(int col);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,40 +1,29 @@
|
|||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
|
|
||||||
Constraint::Constraint(
|
Constraint::Constraint(RowGroupBitmap bitmap, int column,
|
||||||
RowGroupBitmap bitmap,
|
std::string columnName, ConstraintOperator op,
|
||||||
int column,
|
ValueType type, int64_t intValue, double doubleValue,
|
||||||
std::string columnName,
|
std::vector<unsigned char> blobValue)
|
||||||
ConstraintOperator op,
|
: bitmap(bitmap), column(column), columnName(columnName), op(op),
|
||||||
ValueType type,
|
type(type), intValue(intValue), doubleValue(doubleValue),
|
||||||
int64_t intValue,
|
blobValue(blobValue), hadRows(false) {
|
||||||
double doubleValue,
|
|
||||||
std::vector<unsigned char> blobValue
|
|
||||||
): bitmap(bitmap),
|
|
||||||
column(column),
|
|
||||||
columnName(columnName),
|
|
||||||
op(op),
|
|
||||||
type(type),
|
|
||||||
intValue(intValue),
|
|
||||||
doubleValue(doubleValue),
|
|
||||||
blobValue(blobValue),
|
|
||||||
hadRows(false) {
|
|
||||||
RowGroupBitmap bm = bitmap;
|
RowGroupBitmap bm = bitmap;
|
||||||
this->bitmap = bm;
|
this->bitmap = bm;
|
||||||
|
|
||||||
if(type == Text) {
|
if (type == Text) {
|
||||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
stringValue = std::string((char *)&blobValue[0], blobValue.size());
|
||||||
|
|
||||||
if(op == Like) {
|
if (op == Like) {
|
||||||
// This permits more rowgroups than is strictly needed
|
// This permits more rowgroups than is strictly needed
|
||||||
// since it assumes an implicit wildcard. But it's
|
// since it assumes an implicit wildcard. But it's
|
||||||
// simple to implement, so we'll go with it.
|
// simple to implement, so we'll go with it.
|
||||||
likeStringValue = stringValue;
|
likeStringValue = stringValue;
|
||||||
size_t idx = likeStringValue.find_first_of("%");
|
size_t idx = likeStringValue.find_first_of("%");
|
||||||
if(idx != std::string::npos) {
|
if (idx != std::string::npos) {
|
||||||
likeStringValue = likeStringValue.substr(0, idx);
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
}
|
}
|
||||||
idx = likeStringValue.find_first_of("_");
|
idx = likeStringValue.find_first_of("_");
|
||||||
if(idx != std::string::npos) {
|
if (idx != std::string::npos) {
|
||||||
likeStringValue = likeStringValue.substr(0, idx);
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -45,7 +34,7 @@ std::string Constraint::describe() const {
|
|||||||
std::string rv;
|
std::string rv;
|
||||||
rv.append(columnName);
|
rv.append(columnName);
|
||||||
rv.append(" ");
|
rv.append(" ");
|
||||||
switch(op) {
|
switch (op) {
|
||||||
case Equal:
|
case Equal:
|
||||||
rv.append("=");
|
rv.append("=");
|
||||||
break;
|
break;
|
||||||
@ -85,7 +74,7 @@ std::string Constraint::describe() const {
|
|||||||
}
|
}
|
||||||
rv.append(" ");
|
rv.append(" ");
|
||||||
|
|
||||||
switch(type) {
|
switch (type) {
|
||||||
case Null:
|
case Null:
|
||||||
rv.append("NULL");
|
rv.append("NULL");
|
||||||
break;
|
break;
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef PARQUET_FILTER_H
|
#ifndef PARQUET_FILTER_H
|
||||||
#define PARQUET_FILTER_H
|
#define PARQUET_FILTER_H
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
enum ConstraintOperator {
|
enum ConstraintOperator {
|
||||||
Equal,
|
Equal,
|
||||||
@ -20,43 +20,36 @@ enum ConstraintOperator {
|
|||||||
Is
|
Is
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ValueType {
|
enum ValueType { Null, Integer, Double, Blob, Text };
|
||||||
Null,
|
|
||||||
Integer,
|
|
||||||
Double,
|
|
||||||
Blob,
|
|
||||||
Text
|
|
||||||
};
|
|
||||||
|
|
||||||
class RowGroupBitmap {
|
class RowGroupBitmap {
|
||||||
void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) {
|
void setBit(std::vector<unsigned char> &membership, unsigned int rowGroup,
|
||||||
|
bool isSet) {
|
||||||
int byte = rowGroup / 8;
|
int byte = rowGroup / 8;
|
||||||
int offset = rowGroup % 8;
|
int offset = rowGroup % 8;
|
||||||
unsigned char c = membership[byte];
|
unsigned char c = membership[byte];
|
||||||
c &= ~(1UL << offset);
|
c &= ~(1UL << offset);
|
||||||
if(isSet) {
|
if (isSet) {
|
||||||
c |= 1UL << offset;
|
c |= 1UL << offset;
|
||||||
}
|
}
|
||||||
membership[byte] = c;
|
membership[byte] = c;
|
||||||
}
|
}
|
||||||
// Compares estimated rowGroupFilter results against observed results
|
// Compares estimated rowGroupFilter results against observed results
|
||||||
// when we explored the row group. This lets us cache
|
// when we explored the row group. This lets us cache
|
||||||
public:
|
public:
|
||||||
RowGroupBitmap(unsigned int totalRowGroups) {
|
RowGroupBitmap(unsigned int totalRowGroups) {
|
||||||
// Initialize everything to assume that all row groups match.
|
// Initialize everything to assume that all row groups match.
|
||||||
// As we discover otherwise, we'll update that assumption.
|
// As we discover otherwise, we'll update that assumption.
|
||||||
for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
for (unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
||||||
estimatedMembership.push_back(0xFF);
|
estimatedMembership.push_back(0xFF);
|
||||||
actualMembership.push_back(0xFF);
|
actualMembership.push_back(0xFF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RowGroupBitmap(
|
RowGroupBitmap(std::vector<unsigned char> estimatedMembership,
|
||||||
std::vector<unsigned char> estimatedMembership,
|
std::vector<unsigned char> actualMembership)
|
||||||
std::vector<unsigned char> actualMembership) :
|
: estimatedMembership(estimatedMembership),
|
||||||
estimatedMembership(estimatedMembership),
|
actualMembership(actualMembership) {}
|
||||||
actualMembership(actualMembership) {
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<unsigned char> estimatedMembership;
|
std::vector<unsigned char> estimatedMembership;
|
||||||
std::vector<unsigned char> actualMembership;
|
std::vector<unsigned char> actualMembership;
|
||||||
@ -80,17 +73,11 @@ public:
|
|||||||
|
|
||||||
class Constraint {
|
class Constraint {
|
||||||
public:
|
public:
|
||||||
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
// Kind of a messy constructor function, but it's just for internal use, so
|
||||||
Constraint(
|
// whatever.
|
||||||
RowGroupBitmap bitmap,
|
Constraint(RowGroupBitmap bitmap, int column, std::string columnName,
|
||||||
int column,
|
ConstraintOperator op, ValueType type, int64_t intValue,
|
||||||
std::string columnName,
|
double doubleValue, std::vector<unsigned char> blobValue);
|
||||||
ConstraintOperator op,
|
|
||||||
ValueType type,
|
|
||||||
int64_t intValue,
|
|
||||||
double doubleValue,
|
|
||||||
std::vector<unsigned char> blobValue
|
|
||||||
);
|
|
||||||
|
|
||||||
RowGroupBitmap bitmap;
|
RowGroupBitmap bitmap;
|
||||||
int column; // underlying column in the query
|
int column; // underlying column in the query
|
||||||
|
@ -2,61 +2,61 @@
|
|||||||
|
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
|
||||||
ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) {
|
ParquetTable::ParquetTable(std::string file, std::string tableName)
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data());
|
: file(file), tableName(tableName) {
|
||||||
|
std::unique_ptr<parquet::ParquetFileReader> reader =
|
||||||
|
parquet::ParquetFileReader::OpenFile(file.data());
|
||||||
metadata = reader->metadata();
|
metadata = reader->metadata();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ParquetTable::columnName(int i) {
|
std::string ParquetTable::columnName(int i) {
|
||||||
if(i == -1)
|
if (i == -1)
|
||||||
return "rowid";
|
return "rowid";
|
||||||
return columnNames[i];
|
return columnNames[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ParquetTable::getNumColumns() {
|
unsigned int ParquetTable::getNumColumns() { return columnNames.size(); }
|
||||||
return columnNames.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::string ParquetTable::CreateStatement() {
|
std::string ParquetTable::CreateStatement() {
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(
|
std::unique_ptr<parquet::ParquetFileReader> reader =
|
||||||
file.data(),
|
parquet::ParquetFileReader::OpenFile(
|
||||||
true,
|
file.data(), true, parquet::default_reader_properties(), metadata);
|
||||||
parquet::default_reader_properties(),
|
|
||||||
metadata);
|
|
||||||
std::string text("CREATE TABLE x(");
|
std::string text("CREATE TABLE x(");
|
||||||
auto schema = reader->metadata()->schema();
|
auto schema = reader->metadata()->schema();
|
||||||
|
|
||||||
for(auto i = 0; i < schema->num_columns(); i++) {
|
for (auto i = 0; i < schema->num_columns(); i++) {
|
||||||
auto _col = schema->GetColumnRoot(i);
|
auto _col = schema->GetColumnRoot(i);
|
||||||
columnNames.push_back(_col->name());
|
columnNames.push_back(_col->name());
|
||||||
}
|
}
|
||||||
|
|
||||||
for(auto i = 0; i < schema->num_columns(); i++) {
|
for (auto i = 0; i < schema->num_columns(); i++) {
|
||||||
auto _col = schema->GetColumnRoot(i);
|
auto _col = schema->GetColumnRoot(i);
|
||||||
|
|
||||||
if(!_col->is_primitive()) {
|
if (!_col->is_primitive()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-primitive type";
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
|
<< " has non-primitive type";
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_col->is_repeated()) {
|
if (_col->is_repeated()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-scalar type";
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
|
<< " has non-scalar type";
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
parquet::schema::PrimitiveNode* col = (parquet::schema::PrimitiveNode*)_col;
|
parquet::schema::PrimitiveNode *col =
|
||||||
|
(parquet::schema::PrimitiveNode *)_col;
|
||||||
|
|
||||||
if(i > 0)
|
if (i > 0)
|
||||||
text += ", ";
|
text += ", ";
|
||||||
|
|
||||||
text += "\"";
|
text += "\"";
|
||||||
// Horrifically inefficient, but easy to understand.
|
// Horrifically inefficient, but easy to understand.
|
||||||
std::string colName = col->name();
|
std::string colName = col->name();
|
||||||
for(char& c : colName) {
|
for (char &c : colName) {
|
||||||
if(c == '"')
|
if (c == '"')
|
||||||
text += "\"\"";
|
text += "\"\"";
|
||||||
else
|
else
|
||||||
text += c;
|
text += c;
|
||||||
@ -71,7 +71,7 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
// whose unsigned ints start getting interpreted as signed. (We could
|
// whose unsigned ints start getting interpreted as signed. (We could
|
||||||
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
|
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
|
||||||
// the high bit was set.)
|
// the high bit was set.)
|
||||||
if(logical == parquet::LogicalType::NONE ||
|
if (logical == parquet::LogicalType::NONE ||
|
||||||
logical == parquet::LogicalType::UTF8 ||
|
logical == parquet::LogicalType::UTF8 ||
|
||||||
logical == parquet::LogicalType::DATE ||
|
logical == parquet::LogicalType::DATE ||
|
||||||
logical == parquet::LogicalType::TIME_MILLIS ||
|
logical == parquet::LogicalType::TIME_MILLIS ||
|
||||||
@ -82,17 +82,17 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
logical == parquet::LogicalType::INT_16 ||
|
logical == parquet::LogicalType::INT_16 ||
|
||||||
logical == parquet::LogicalType::INT_32 ||
|
logical == parquet::LogicalType::INT_32 ||
|
||||||
logical == parquet::LogicalType::INT_64) {
|
logical == parquet::LogicalType::INT_64) {
|
||||||
switch(physical) {
|
switch (physical) {
|
||||||
case parquet::Type::BOOLEAN:
|
case parquet::Type::BOOLEAN:
|
||||||
type = "TINYINT";
|
type = "TINYINT";
|
||||||
break;
|
break;
|
||||||
case parquet::Type::INT32:
|
case parquet::Type::INT32:
|
||||||
if(logical == parquet::LogicalType::NONE ||
|
if (logical == parquet::LogicalType::NONE ||
|
||||||
logical == parquet::LogicalType::INT_32) {
|
logical == parquet::LogicalType::INT_32) {
|
||||||
type = "INT";
|
type = "INT";
|
||||||
} else if(logical == parquet::LogicalType::INT_8) {
|
} else if (logical == parquet::LogicalType::INT_8) {
|
||||||
type = "TINYINT";
|
type = "TINYINT";
|
||||||
} else if(logical == parquet::LogicalType::INT_16) {
|
} else if (logical == parquet::LogicalType::INT_16) {
|
||||||
type = "SMALLINT";
|
type = "SMALLINT";
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -109,7 +109,7 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
type = "DOUBLE";
|
type = "DOUBLE";
|
||||||
break;
|
break;
|
||||||
case parquet::Type::BYTE_ARRAY:
|
case parquet::Type::BYTE_ARRAY:
|
||||||
if(logical == parquet::LogicalType::UTF8) {
|
if (logical == parquet::LogicalType::UTF8) {
|
||||||
type = "TEXT";
|
type = "TEXT";
|
||||||
} else {
|
} else {
|
||||||
type = "BLOB";
|
type = "BLOB";
|
||||||
@ -123,33 +123,33 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(type.empty()) {
|
if (type.empty()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " <<
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical);
|
<< " has unsupported type: " << parquet::TypeToString(physical) << "/"
|
||||||
|
<< parquet::LogicalTypeToString(logical);
|
||||||
|
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n",
|
printf(
|
||||||
i,
|
"col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", i, col->name().data(),
|
||||||
col->name().data(),
|
|
||||||
col->physical_type(),
|
col->physical_type(),
|
||||||
parquet::TypeToString(col->physical_type()).data(),
|
parquet::TypeToString(col->physical_type()).data(), col->logical_type(),
|
||||||
col->logical_type(),
|
parquet::LogicalTypeToString(col->logical_type()).data(), type.data());
|
||||||
parquet::LogicalTypeToString(col->logical_type()).data(),
|
|
||||||
type.data());
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
text += " ";
|
text += " ";
|
||||||
text += type;
|
text += type;
|
||||||
}
|
}
|
||||||
text +=");";
|
text += ");";
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; }
|
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
const std::string& ParquetTable::getFile() { return file; }
|
const std::string &ParquetTable::getFile() { return file; }
|
||||||
const std::string& ParquetTable::getTableName() { return tableName; }
|
const std::string &ParquetTable::getTableName() { return tableName; }
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef PARQUET_TABLE_H
|
#ifndef PARQUET_TABLE_H
|
||||||
#define PARQUET_TABLE_H
|
#define PARQUET_TABLE_H
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
class ParquetTable {
|
class ParquetTable {
|
||||||
std::string file;
|
std::string file;
|
||||||
@ -11,15 +11,14 @@ class ParquetTable {
|
|||||||
std::vector<std::string> columnNames;
|
std::vector<std::string> columnNames;
|
||||||
std::shared_ptr<parquet::FileMetaData> metadata;
|
std::shared_ptr<parquet::FileMetaData> metadata;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ParquetTable(std::string file, std::string tableName);
|
ParquetTable(std::string file, std::string tableName);
|
||||||
std::string CreateStatement();
|
std::string CreateStatement();
|
||||||
std::string columnName(int idx);
|
std::string columnName(int idx);
|
||||||
unsigned int getNumColumns();
|
unsigned int getNumColumns();
|
||||||
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
||||||
const std::string& getFile();
|
const std::string &getFile();
|
||||||
const std::string& getTableName();
|
const std::string &getTableName();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user