Support BLOBs
This commit is contained in:
parent
f3e78408bf
commit
7edb5e472f
|
@ -46,9 +46,9 @@ These types are supported:
|
|||
* BOOLEAN
|
||||
* FLOAT
|
||||
* DOUBLE
|
||||
* Variable- and fixed-length byte arrays
|
||||
|
||||
These are not supported:
|
||||
|
||||
* UINT8/UINT16/UINT32/UINT64
|
||||
* Fixed length byte arrays, including JSON and BSON subtypes
|
||||
* DECIMAL
|
||||
|
|
|
@ -188,7 +188,11 @@ static int parquetColumn(
|
|||
case parquet::Type::BYTE_ARRAY:
|
||||
{
|
||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
||||
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
|
||||
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
} else {
|
||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case parquet::Type::INT96:
|
||||
|
@ -201,6 +205,11 @@ static int parquetColumn(
|
|||
break;
|
||||
}
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
{
|
||||
parquet::ByteArray* rv = cursor->getByteArray(col);
|
||||
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should be impossible to get here as we should have forbidden this at
|
||||
// CREATE time -- maybe file changed underneath us?
|
||||
|
|
|
@ -32,8 +32,13 @@ void ParquetCursor::nextRowGroup() {
|
|||
types.push_back(rowGroupMetadata->schema()->Column(0)->physical_type());
|
||||
}
|
||||
|
||||
while(logicalTypes.size() < (unsigned int)rowGroupMetadata->num_columns()) {
|
||||
logicalTypes.push_back(rowGroupMetadata->schema()->Column(0)->logical_type());
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (unsigned int)rowGroupMetadata->num_columns(); i++) {
|
||||
types[i] = rowGroupMetadata->schema()->Column(i)->physical_type();
|
||||
logicalTypes[i] = rowGroupMetadata->schema()->Column(i)->logical_type();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,6 +174,18 @@ void ParquetCursor::ensureColumn(int col) {
|
|||
break;
|
||||
}
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
{
|
||||
parquet::FixedLenByteArrayScanner* s = (parquet::FixedLenByteArrayScanner*)scanners[col].get();
|
||||
parquet::FixedLenByteArray flba;
|
||||
if(s->NextValue(&flba, &wasNull)) {
|
||||
colByteArrayValues[col].ptr = flba.ptr;
|
||||
// TODO: cache this
|
||||
colByteArrayValues[col].len = rowGroupMetadata->schema()->Column(col)->type_length();
|
||||
} else {
|
||||
throw std::invalid_argument("unexpectedly lacking a next value");
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should be impossible to get here as we should have forbidden this at
|
||||
// CREATE time -- maybe file changed underneath us?
|
||||
|
@ -203,9 +220,10 @@ parquet::ByteArray* ParquetCursor::getByteArray(int col) {
|
|||
return &colByteArrayValues[col];
|
||||
}
|
||||
|
||||
|
||||
|
||||
parquet::Type::type ParquetCursor::getPhysicalType(int col) {
|
||||
// return rowGroupMetadata->schema()->Column(col)->physical_type();
|
||||
return types[col];
|
||||
}
|
||||
|
||||
parquet::LogicalType::type ParquetCursor::getLogicalType(int col) {
|
||||
return logicalTypes[col];
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ class ParquetCursor {
|
|||
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
||||
std::vector<std::shared_ptr<parquet::Scanner>> scanners;
|
||||
std::vector<parquet::Type::type> types;
|
||||
std::vector<parquet::LogicalType::type> logicalTypes;
|
||||
|
||||
std::vector<int> colRows;
|
||||
std::vector<bool> colNulls;
|
||||
|
@ -36,6 +37,7 @@ public:
|
|||
void ensureColumn(int col);
|
||||
bool isNull(int col);
|
||||
parquet::Type::type getPhysicalType(int col);
|
||||
parquet::LogicalType::type getLogicalType(int col);
|
||||
|
||||
int getInt32(int col);
|
||||
long getInt64(int col);
|
||||
|
|
|
@ -82,9 +82,13 @@ std::string ParquetTable::CreateStatement() {
|
|||
case parquet::Type::BYTE_ARRAY:
|
||||
if(logical == parquet::LogicalType::UTF8) {
|
||||
type = "TEXT";
|
||||
} else {
|
||||
type = "BLOB";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
type = "BLOB";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
here=$(dirname "${BASH_SOURCE[0]}")
|
||||
|
||||
set -x
|
||||
"$here"/test-unsupported
|
||||
"$here"/test-supported
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Verify that all the non-unsupported.*parquet files can be loaded and 'SELECT * FROM x LIMIT 1'ed
|
||||
# without segfaulting.
|
||||
|
||||
load_supported() {
|
||||
file=${1:?must provide file to load}
|
||||
basename=$(basename "$file")
|
||||
cat <<EOF
|
||||
.echo on
|
||||
.load parquet/libparquet
|
||||
.testcase $basename
|
||||
.bail on
|
||||
CREATE VIRTUAL TABLE test USING parquet('$file');
|
||||
SELECT * FROM test LIMIT 1;
|
||||
SELECT 123;
|
||||
EOF
|
||||
}
|
||||
|
||||
main() {
|
||||
root=$(dirname "${BASH_SOURCE[0]}")/..
|
||||
root=$(readlink -f "$root")
|
||||
cd "$root"
|
||||
|
||||
supported_files=$(find . -type f -name '*.parquet' -not -name 'unsupported*.parquet')
|
||||
while read -r supported; do
|
||||
echo "Testing: $supported"
|
||||
if ! "$root"/sqlite/sqlite3 -init <(load_supported "$supported") < /dev/null > /dev/null 2> testcase-err.txt; then
|
||||
echo "...FAILED; check testcase-{out,err}.txt" >&2
|
||||
exit 1
|
||||
fi
|
||||
# We expect the 'SELECT 123' command to have been run
|
||||
if ! grep -q 123 testcase-out.txt; then
|
||||
echo "...FAILED; check testcase-{out,err}.txt" >&2
|
||||
exit 1
|
||||
fi
|
||||
done < <(echo "$supported_files")
|
||||
}
|
||||
|
||||
main "$@"
|
|
@ -28,7 +28,7 @@ main() {
|
|||
"$root"/sqlite/sqlite3 -init <(load_unsupported "$unsupported") < /dev/null > /dev/null 2> testcase-err.txt
|
||||
# We expect the 'SELECT 123' command to NOT have been run
|
||||
if grep -q 123 testcase-out.txt; then
|
||||
echo "...FAILED" >&2
|
||||
echo "...FAILED; expected an error message. Check testcase-{out,err}.txt" >&2
|
||||
exit 1
|
||||
fi
|
||||
done < <(echo "$unsupported_files")
|
||||
|
|
Loading…
Reference in New Issue