1
0
mirror of https://github.com/cldellow/sqlite-parquet-vtable.git synced 2025-10-27 01:59:56 +00:00

Support BLOBs

This commit is contained in:
Colin Dellow
2018-03-04 17:20:28 -05:00
parent f3e78408bf
commit 7edb5e472f
8 changed files with 88 additions and 6 deletions

View File

@@ -188,7 +188,11 @@ static int parquetColumn(
case parquet::Type::BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
} else {
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
}
break;
}
case parquet::Type::INT96:
@@ -201,6 +205,11 @@ static int parquetColumn(
break;
}
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
break;
}
default:
// Should be impossible to get here as we should have forbidden this at
// CREATE time -- maybe file changed underneath us?

View File

@@ -32,8 +32,13 @@ void ParquetCursor::nextRowGroup() {
types.push_back(rowGroupMetadata->schema()->Column(0)->physical_type());
}
while(logicalTypes.size() < (unsigned int)rowGroupMetadata->num_columns()) {
logicalTypes.push_back(rowGroupMetadata->schema()->Column(0)->logical_type());
}
for(unsigned int i = 0; i < (unsigned int)rowGroupMetadata->num_columns(); i++) {
types[i] = rowGroupMetadata->schema()->Column(i)->physical_type();
logicalTypes[i] = rowGroupMetadata->schema()->Column(i)->logical_type();
}
}
@@ -169,6 +174,18 @@ void ParquetCursor::ensureColumn(int col) {
break;
}
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
{
parquet::FixedLenByteArrayScanner* s = (parquet::FixedLenByteArrayScanner*)scanners[col].get();
parquet::FixedLenByteArray flba;
if(s->NextValue(&flba, &wasNull)) {
colByteArrayValues[col].ptr = flba.ptr;
// TODO: cache this
colByteArrayValues[col].len = rowGroupMetadata->schema()->Column(col)->type_length();
} else {
throw std::invalid_argument("unexpectedly lacking a next value");
}
break;
}
default:
// Should be impossible to get here as we should have forbidden this at
// CREATE time -- maybe file changed underneath us?
@@ -203,9 +220,10 @@ parquet::ByteArray* ParquetCursor::getByteArray(int col) {
return &colByteArrayValues[col];
}
parquet::Type::type ParquetCursor::getPhysicalType(int col) {
// return rowGroupMetadata->schema()->Column(col)->physical_type();
return types[col];
}
parquet::LogicalType::type ParquetCursor::getLogicalType(int col) {
return logicalTypes[col];
}

View File

@@ -12,6 +12,7 @@ class ParquetCursor {
std::shared_ptr<parquet::RowGroupReader> rowGroup;
std::vector<std::shared_ptr<parquet::Scanner>> scanners;
std::vector<parquet::Type::type> types;
std::vector<parquet::LogicalType::type> logicalTypes;
std::vector<int> colRows;
std::vector<bool> colNulls;
@@ -36,6 +37,7 @@ public:
void ensureColumn(int col);
bool isNull(int col);
parquet::Type::type getPhysicalType(int col);
parquet::LogicalType::type getLogicalType(int col);
int getInt32(int col);
long getInt64(int col);

View File

@@ -82,9 +82,13 @@ std::string ParquetTable::CreateStatement() {
case parquet::Type::BYTE_ARRAY:
if(logical == parquet::LogicalType::UTF8) {
type = "TEXT";
} else {
type = "BLOB";
}
break;
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
type = "BLOB";
break;
default:
break;
}