mirror of
https://github.com/cldellow/sqlite-parquet-vtable.git
synced 2025-10-27 01:59:56 +00:00
Support BLOBs
This commit is contained in:
@@ -188,7 +188,11 @@ static int parquetColumn(
|
||||
case parquet::Type::BYTE_ARRAY:
|
||||
{
|
||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
||||
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
|
||||
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
} else {
|
||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case parquet::Type::INT96:
|
||||
@@ -201,6 +205,11 @@ static int parquetColumn(
|
||||
break;
|
||||
}
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
{
|
||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should be impossible to get here as we should have forbidden this at
|
||||
// CREATE time -- maybe file changed underneath us?
|
||||
|
||||
@@ -32,8 +32,13 @@ void ParquetCursor::nextRowGroup() {
|
||||
types.push_back(rowGroupMetadata->schema()->Column(0)->physical_type());
|
||||
}
|
||||
|
||||
while(logicalTypes.size() < (unsigned int)rowGroupMetadata->num_columns()) {
|
||||
logicalTypes.push_back(rowGroupMetadata->schema()->Column(0)->logical_type());
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (unsigned int)rowGroupMetadata->num_columns(); i++) {
|
||||
types[i] = rowGroupMetadata->schema()->Column(i)->physical_type();
|
||||
logicalTypes[i] = rowGroupMetadata->schema()->Column(i)->logical_type();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,6 +174,18 @@ void ParquetCursor::ensureColumn(int col) {
|
||||
break;
|
||||
}
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
{
|
||||
parquet::FixedLenByteArrayScanner* s = (parquet::FixedLenByteArrayScanner*)scanners[col].get();
|
||||
parquet::FixedLenByteArray flba;
|
||||
if(s->NextValue(&flba, &wasNull)) {
|
||||
colByteArrayValues[col].ptr = flba.ptr;
|
||||
// TODO: cache this
|
||||
colByteArrayValues[col].len = rowGroupMetadata->schema()->Column(col)->type_length();
|
||||
} else {
|
||||
throw std::invalid_argument("unexpectedly lacking a next value");
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should be impossible to get here as we should have forbidden this at
|
||||
// CREATE time -- maybe file changed underneath us?
|
||||
@@ -203,9 +220,10 @@ parquet::ByteArray* ParquetCursor::getByteArray(int col) {
|
||||
return &colByteArrayValues[col];
|
||||
}
|
||||
|
||||
|
||||
|
||||
parquet::Type::type ParquetCursor::getPhysicalType(int col) {
|
||||
// return rowGroupMetadata->schema()->Column(col)->physical_type();
|
||||
return types[col];
|
||||
}
|
||||
|
||||
parquet::LogicalType::type ParquetCursor::getLogicalType(int col) {
|
||||
return logicalTypes[col];
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@ class ParquetCursor {
|
||||
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
||||
std::vector<std::shared_ptr<parquet::Scanner>> scanners;
|
||||
std::vector<parquet::Type::type> types;
|
||||
std::vector<parquet::LogicalType::type> logicalTypes;
|
||||
|
||||
std::vector<int> colRows;
|
||||
std::vector<bool> colNulls;
|
||||
@@ -36,6 +37,7 @@ public:
|
||||
void ensureColumn(int col);
|
||||
bool isNull(int col);
|
||||
parquet::Type::type getPhysicalType(int col);
|
||||
parquet::LogicalType::type getLogicalType(int col);
|
||||
|
||||
int getInt32(int col);
|
||||
long getInt64(int col);
|
||||
|
||||
@@ -82,9 +82,13 @@ std::string ParquetTable::CreateStatement() {
|
||||
case parquet::Type::BYTE_ARRAY:
|
||||
if(logical == parquet::LogicalType::UTF8) {
|
||||
type = "TEXT";
|
||||
} else {
|
||||
type = "BLOB";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
type = "BLOB";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user