mirror of
https://github.com/cldellow/sqlite-parquet-vtable.git
synced 2025-09-16 22:49:59 +00:00
More defensive, add caveats
This commit is contained in:
@@ -35,45 +35,65 @@ std::string ParquetTable::CreateStatement() {
|
||||
text += col->name();
|
||||
|
||||
std::string type;
|
||||
switch(col->physical_type()) {
|
||||
case parquet::Type::BOOLEAN:
|
||||
type = "TINYINT";
|
||||
break;
|
||||
case parquet::Type::INT32:
|
||||
if(col->logical_type() == parquet::LogicalType::NONE) {
|
||||
type = "INT";
|
||||
} else if(col->logical_type() == parquet::LogicalType::INT_8) {
|
||||
|
||||
parquet::Type::type physical = col->physical_type();
|
||||
parquet::LogicalType::type logical = col->logical_type();
|
||||
// Be explicit about which types we understand so we don't mislead someone
|
||||
// whose unsigned ints start getting interpreted as signed. (We could
|
||||
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
|
||||
// the high bit was set.)
|
||||
if(logical == parquet::LogicalType::NONE ||
|
||||
logical == parquet::LogicalType::UTF8 ||
|
||||
logical == parquet::LogicalType::DATE ||
|
||||
logical == parquet::LogicalType::TIME_MILLIS ||
|
||||
logical == parquet::LogicalType::TIMESTAMP_MILLIS ||
|
||||
logical == parquet::LogicalType::TIME_MICROS ||
|
||||
logical == parquet::LogicalType::TIMESTAMP_MICROS ||
|
||||
logical == parquet::LogicalType::INT_8 ||
|
||||
logical == parquet::LogicalType::INT_16 ||
|
||||
logical == parquet::LogicalType::INT_32 ||
|
||||
logical == parquet::LogicalType::INT_64) {
|
||||
switch(physical) {
|
||||
case parquet::Type::BOOLEAN:
|
||||
type = "TINYINT";
|
||||
} else if(col->logical_type() == parquet::LogicalType::INT_16) {
|
||||
type = "SMALLINT";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::INT96:
|
||||
// INT96 is used for nanosecond precision on timestamps; we truncate
|
||||
// to millisecond precision.
|
||||
case parquet::Type::INT64:
|
||||
type = "BIGINT";
|
||||
break;
|
||||
case parquet::Type::FLOAT:
|
||||
type = "REAL";
|
||||
break;
|
||||
case parquet::Type::DOUBLE:
|
||||
type = "DOUBLE";
|
||||
break;
|
||||
case parquet::Type::BYTE_ARRAY:
|
||||
if(col->logical_type() == parquet::LogicalType::UTF8) {
|
||||
type = "TEXT";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
case parquet::Type::INT32:
|
||||
if(logical == parquet::LogicalType::NONE ||
|
||||
logical == parquet::LogicalType::INT_32) {
|
||||
type = "INT";
|
||||
} else if(logical == parquet::LogicalType::INT_8) {
|
||||
type = "TINYINT";
|
||||
} else if(logical == parquet::LogicalType::INT_16) {
|
||||
type = "SMALLINT";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::INT96:
|
||||
// INT96 is used for nanosecond precision on timestamps; we truncate
|
||||
// to millisecond precision.
|
||||
case parquet::Type::INT64:
|
||||
type = "BIGINT";
|
||||
break;
|
||||
case parquet::Type::FLOAT:
|
||||
type = "REAL";
|
||||
break;
|
||||
case parquet::Type::DOUBLE:
|
||||
type = "DOUBLE";
|
||||
break;
|
||||
case parquet::Type::BYTE_ARRAY:
|
||||
if(logical == parquet::LogicalType::UTF8) {
|
||||
type = "TEXT";
|
||||
}
|
||||
break;
|
||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(type.empty()) {
|
||||
std::ostringstream ss;
|
||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " <<
|
||||
parquet::TypeToString(col->physical_type()) << "/" << parquet::LogicalTypeToString(col->logical_type());
|
||||
parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical);
|
||||
|
||||
throw std::invalid_argument(ss.str());
|
||||
}
|
||||
|
Reference in New Issue
Block a user