mirror of
https://github.com/cldellow/sqlite-parquet-vtable.git
synced 2025-04-03 09:39:47 +00:00
Run a formatting pass with clang-format to minimize future git churn
This commit is contained in:
parent
ae194c69c5
commit
7bc6f91f6f
808
src/parquet.cc
808
src/parquet.cc
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,13 +1,13 @@
|
|||||||
#ifndef PARQUET_CURSOR_H
|
#ifndef PARQUET_CURSOR_H
|
||||||
#define PARQUET_CURSOR_H
|
#define PARQUET_CURSOR_H
|
||||||
|
|
||||||
|
#include "parquet/api/reader.h"
|
||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
#include "parquet_table.h"
|
#include "parquet_table.h"
|
||||||
#include "parquet/api/reader.h"
|
|
||||||
|
|
||||||
class ParquetCursor {
|
class ParquetCursor {
|
||||||
|
|
||||||
ParquetTable* table;
|
ParquetTable *table;
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader;
|
std::unique_ptr<parquet::ParquetFileReader> reader;
|
||||||
std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata;
|
std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata;
|
||||||
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
||||||
@ -35,19 +35,26 @@ class ParquetCursor {
|
|||||||
|
|
||||||
bool currentRowSatisfiesFilter();
|
bool currentRowSatisfiesFilter();
|
||||||
bool currentRowGroupSatisfiesFilter();
|
bool currentRowGroupSatisfiesFilter();
|
||||||
bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint);
|
bool currentRowGroupSatisfiesRowIdFilter(Constraint &constraint);
|
||||||
bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
bool currentRowGroupSatisfiesTextFilter(
|
||||||
bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
Constraint &constraint,
|
||||||
bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats);
|
bool currentRowGroupSatisfiesBlobFilter(
|
||||||
|
Constraint &constraint,
|
||||||
bool currentRowSatisfiesTextFilter(Constraint& constraint);
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
bool currentRowSatisfiesIntegerFilter(Constraint& constraint);
|
bool currentRowGroupSatisfiesIntegerFilter(
|
||||||
bool currentRowSatisfiesDoubleFilter(Constraint& constraint);
|
Constraint &constraint,
|
||||||
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
|
bool currentRowGroupSatisfiesDoubleFilter(
|
||||||
|
Constraint &constraint,
|
||||||
|
std::shared_ptr<parquet::RowGroupStatistics> stats);
|
||||||
|
|
||||||
|
bool currentRowSatisfiesTextFilter(Constraint &constraint);
|
||||||
|
bool currentRowSatisfiesIntegerFilter(Constraint &constraint);
|
||||||
|
bool currentRowSatisfiesDoubleFilter(Constraint &constraint);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ParquetCursor(ParquetTable* table);
|
ParquetCursor(ParquetTable *table);
|
||||||
int getRowId();
|
int getRowId();
|
||||||
void next();
|
void next();
|
||||||
void close();
|
void close();
|
||||||
@ -58,16 +65,15 @@ public:
|
|||||||
bool isNull(int col);
|
bool isNull(int col);
|
||||||
unsigned int getNumRowGroups() const;
|
unsigned int getNumRowGroups() const;
|
||||||
unsigned int getNumConstraints() const;
|
unsigned int getNumConstraints() const;
|
||||||
const Constraint& getConstraint(unsigned int i) const;
|
const Constraint &getConstraint(unsigned int i) const;
|
||||||
parquet::Type::type getPhysicalType(int col);
|
parquet::Type::type getPhysicalType(int col);
|
||||||
parquet::LogicalType::type getLogicalType(int col);
|
parquet::LogicalType::type getLogicalType(int col);
|
||||||
ParquetTable* getTable() const;
|
ParquetTable *getTable() const;
|
||||||
|
|
||||||
int getInt32(int col);
|
int getInt32(int col);
|
||||||
long getInt64(int col);
|
long getInt64(int col);
|
||||||
double getDouble(int col);
|
double getDouble(int col);
|
||||||
parquet::ByteArray* getByteArray(int col);
|
parquet::ByteArray *getByteArray(int col);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,40 +1,29 @@
|
|||||||
#include "parquet_filter.h"
|
#include "parquet_filter.h"
|
||||||
|
|
||||||
Constraint::Constraint(
|
Constraint::Constraint(RowGroupBitmap bitmap, int column,
|
||||||
RowGroupBitmap bitmap,
|
std::string columnName, ConstraintOperator op,
|
||||||
int column,
|
ValueType type, int64_t intValue, double doubleValue,
|
||||||
std::string columnName,
|
std::vector<unsigned char> blobValue)
|
||||||
ConstraintOperator op,
|
: bitmap(bitmap), column(column), columnName(columnName), op(op),
|
||||||
ValueType type,
|
type(type), intValue(intValue), doubleValue(doubleValue),
|
||||||
int64_t intValue,
|
blobValue(blobValue), hadRows(false) {
|
||||||
double doubleValue,
|
RowGroupBitmap bm = bitmap;
|
||||||
std::vector<unsigned char> blobValue
|
this->bitmap = bm;
|
||||||
): bitmap(bitmap),
|
|
||||||
column(column),
|
|
||||||
columnName(columnName),
|
|
||||||
op(op),
|
|
||||||
type(type),
|
|
||||||
intValue(intValue),
|
|
||||||
doubleValue(doubleValue),
|
|
||||||
blobValue(blobValue),
|
|
||||||
hadRows(false) {
|
|
||||||
RowGroupBitmap bm = bitmap;
|
|
||||||
this->bitmap = bm;
|
|
||||||
|
|
||||||
if(type == Text) {
|
if (type == Text) {
|
||||||
stringValue = std::string((char*)&blobValue[0], blobValue.size());
|
stringValue = std::string((char *)&blobValue[0], blobValue.size());
|
||||||
|
|
||||||
if(op == Like) {
|
if (op == Like) {
|
||||||
// This permits more rowgroups than is strictly needed
|
// This permits more rowgroups than is strictly needed
|
||||||
// since it assumes an implicit wildcard. But it's
|
// since it assumes an implicit wildcard. But it's
|
||||||
// simple to implement, so we'll go with it.
|
// simple to implement, so we'll go with it.
|
||||||
likeStringValue = stringValue;
|
likeStringValue = stringValue;
|
||||||
size_t idx = likeStringValue.find_first_of("%");
|
size_t idx = likeStringValue.find_first_of("%");
|
||||||
if(idx != std::string::npos) {
|
if (idx != std::string::npos) {
|
||||||
likeStringValue = likeStringValue.substr(0, idx);
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
}
|
}
|
||||||
idx = likeStringValue.find_first_of("_");
|
idx = likeStringValue.find_first_of("_");
|
||||||
if(idx != std::string::npos) {
|
if (idx != std::string::npos) {
|
||||||
likeStringValue = likeStringValue.substr(0, idx);
|
likeStringValue = likeStringValue.substr(0, idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -45,61 +34,61 @@ std::string Constraint::describe() const {
|
|||||||
std::string rv;
|
std::string rv;
|
||||||
rv.append(columnName);
|
rv.append(columnName);
|
||||||
rv.append(" ");
|
rv.append(" ");
|
||||||
switch(op) {
|
switch (op) {
|
||||||
case Equal:
|
case Equal:
|
||||||
rv.append("=");
|
rv.append("=");
|
||||||
break;
|
break;
|
||||||
case GreaterThan:
|
case GreaterThan:
|
||||||
rv.append(">");
|
rv.append(">");
|
||||||
break;
|
break;
|
||||||
case LessThanOrEqual:
|
case LessThanOrEqual:
|
||||||
rv.append("<=");
|
rv.append("<=");
|
||||||
break;
|
break;
|
||||||
case LessThan:
|
case LessThan:
|
||||||
rv.append("<");
|
rv.append("<");
|
||||||
break;
|
break;
|
||||||
case GreaterThanOrEqual:
|
case GreaterThanOrEqual:
|
||||||
rv.append(">=");
|
rv.append(">=");
|
||||||
break;
|
break;
|
||||||
case Like:
|
case Like:
|
||||||
rv.append("LIKE");
|
rv.append("LIKE");
|
||||||
break;
|
break;
|
||||||
case Glob:
|
case Glob:
|
||||||
rv.append("GLOB");
|
rv.append("GLOB");
|
||||||
break;
|
break;
|
||||||
case NotEqual:
|
case NotEqual:
|
||||||
rv.append("<>");
|
rv.append("<>");
|
||||||
break;
|
break;
|
||||||
case IsNot:
|
case IsNot:
|
||||||
rv.append("IS NOT");
|
rv.append("IS NOT");
|
||||||
break;
|
break;
|
||||||
case IsNotNull:
|
case IsNotNull:
|
||||||
rv.append("IS NOT NULL");
|
rv.append("IS NOT NULL");
|
||||||
break;
|
break;
|
||||||
case IsNull:
|
case IsNull:
|
||||||
rv.append("IS NULL");
|
rv.append("IS NULL");
|
||||||
break;
|
break;
|
||||||
case Is:
|
case Is:
|
||||||
rv.append("IS");
|
rv.append("IS");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
rv.append(" ");
|
rv.append(" ");
|
||||||
|
|
||||||
switch(type) {
|
switch (type) {
|
||||||
case Null:
|
case Null:
|
||||||
rv.append("NULL");
|
rv.append("NULL");
|
||||||
break;
|
break;
|
||||||
case Integer:
|
case Integer:
|
||||||
rv.append(std::to_string(intValue));
|
rv.append(std::to_string(intValue));
|
||||||
break;
|
break;
|
||||||
case Double:
|
case Double:
|
||||||
rv.append(std::to_string(doubleValue));
|
rv.append(std::to_string(doubleValue));
|
||||||
break;
|
break;
|
||||||
case Blob:
|
case Blob:
|
||||||
break;
|
break;
|
||||||
case Text:
|
case Text:
|
||||||
rv.append(stringValue);
|
rv.append(stringValue);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef PARQUET_FILTER_H
|
#ifndef PARQUET_FILTER_H
|
||||||
#define PARQUET_FILTER_H
|
#define PARQUET_FILTER_H
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
enum ConstraintOperator {
|
enum ConstraintOperator {
|
||||||
Equal,
|
Equal,
|
||||||
@ -20,43 +20,36 @@ enum ConstraintOperator {
|
|||||||
Is
|
Is
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ValueType {
|
enum ValueType { Null, Integer, Double, Blob, Text };
|
||||||
Null,
|
|
||||||
Integer,
|
|
||||||
Double,
|
|
||||||
Blob,
|
|
||||||
Text
|
|
||||||
};
|
|
||||||
|
|
||||||
class RowGroupBitmap {
|
class RowGroupBitmap {
|
||||||
void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) {
|
void setBit(std::vector<unsigned char> &membership, unsigned int rowGroup,
|
||||||
|
bool isSet) {
|
||||||
int byte = rowGroup / 8;
|
int byte = rowGroup / 8;
|
||||||
int offset = rowGroup % 8;
|
int offset = rowGroup % 8;
|
||||||
unsigned char c = membership[byte];
|
unsigned char c = membership[byte];
|
||||||
c &= ~(1UL << offset);
|
c &= ~(1UL << offset);
|
||||||
if(isSet) {
|
if (isSet) {
|
||||||
c |= 1UL << offset;
|
c |= 1UL << offset;
|
||||||
}
|
}
|
||||||
membership[byte] = c;
|
membership[byte] = c;
|
||||||
}
|
}
|
||||||
// Compares estimated rowGroupFilter results against observed results
|
// Compares estimated rowGroupFilter results against observed results
|
||||||
// when we explored the row group. This lets us cache
|
// when we explored the row group. This lets us cache
|
||||||
public:
|
public:
|
||||||
RowGroupBitmap(unsigned int totalRowGroups) {
|
RowGroupBitmap(unsigned int totalRowGroups) {
|
||||||
// Initialize everything to assume that all row groups match.
|
// Initialize everything to assume that all row groups match.
|
||||||
// As we discover otherwise, we'll update that assumption.
|
// As we discover otherwise, we'll update that assumption.
|
||||||
for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
for (unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) {
|
||||||
estimatedMembership.push_back(0xFF);
|
estimatedMembership.push_back(0xFF);
|
||||||
actualMembership.push_back(0xFF);
|
actualMembership.push_back(0xFF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RowGroupBitmap(
|
RowGroupBitmap(std::vector<unsigned char> estimatedMembership,
|
||||||
std::vector<unsigned char> estimatedMembership,
|
std::vector<unsigned char> actualMembership)
|
||||||
std::vector<unsigned char> actualMembership) :
|
: estimatedMembership(estimatedMembership),
|
||||||
estimatedMembership(estimatedMembership),
|
actualMembership(actualMembership) {}
|
||||||
actualMembership(actualMembership) {
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<unsigned char> estimatedMembership;
|
std::vector<unsigned char> estimatedMembership;
|
||||||
std::vector<unsigned char> actualMembership;
|
std::vector<unsigned char> actualMembership;
|
||||||
@ -80,17 +73,11 @@ public:
|
|||||||
|
|
||||||
class Constraint {
|
class Constraint {
|
||||||
public:
|
public:
|
||||||
// Kind of a messy constructor function, but it's just for internal use, so whatever.
|
// Kind of a messy constructor function, but it's just for internal use, so
|
||||||
Constraint(
|
// whatever.
|
||||||
RowGroupBitmap bitmap,
|
Constraint(RowGroupBitmap bitmap, int column, std::string columnName,
|
||||||
int column,
|
ConstraintOperator op, ValueType type, int64_t intValue,
|
||||||
std::string columnName,
|
double doubleValue, std::vector<unsigned char> blobValue);
|
||||||
ConstraintOperator op,
|
|
||||||
ValueType type,
|
|
||||||
int64_t intValue,
|
|
||||||
double doubleValue,
|
|
||||||
std::vector<unsigned char> blobValue
|
|
||||||
);
|
|
||||||
|
|
||||||
RowGroupBitmap bitmap;
|
RowGroupBitmap bitmap;
|
||||||
int column; // underlying column in the query
|
int column; // underlying column in the query
|
||||||
|
@ -2,61 +2,61 @@
|
|||||||
|
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
|
||||||
ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) {
|
ParquetTable::ParquetTable(std::string file, std::string tableName)
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data());
|
: file(file), tableName(tableName) {
|
||||||
|
std::unique_ptr<parquet::ParquetFileReader> reader =
|
||||||
|
parquet::ParquetFileReader::OpenFile(file.data());
|
||||||
metadata = reader->metadata();
|
metadata = reader->metadata();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ParquetTable::columnName(int i) {
|
std::string ParquetTable::columnName(int i) {
|
||||||
if(i == -1)
|
if (i == -1)
|
||||||
return "rowid";
|
return "rowid";
|
||||||
return columnNames[i];
|
return columnNames[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int ParquetTable::getNumColumns() {
|
unsigned int ParquetTable::getNumColumns() { return columnNames.size(); }
|
||||||
return columnNames.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::string ParquetTable::CreateStatement() {
|
std::string ParquetTable::CreateStatement() {
|
||||||
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(
|
std::unique_ptr<parquet::ParquetFileReader> reader =
|
||||||
file.data(),
|
parquet::ParquetFileReader::OpenFile(
|
||||||
true,
|
file.data(), true, parquet::default_reader_properties(), metadata);
|
||||||
parquet::default_reader_properties(),
|
|
||||||
metadata);
|
|
||||||
std::string text("CREATE TABLE x(");
|
std::string text("CREATE TABLE x(");
|
||||||
auto schema = reader->metadata()->schema();
|
auto schema = reader->metadata()->schema();
|
||||||
|
|
||||||
for(auto i = 0; i < schema->num_columns(); i++) {
|
for (auto i = 0; i < schema->num_columns(); i++) {
|
||||||
auto _col = schema->GetColumnRoot(i);
|
auto _col = schema->GetColumnRoot(i);
|
||||||
columnNames.push_back(_col->name());
|
columnNames.push_back(_col->name());
|
||||||
}
|
}
|
||||||
|
|
||||||
for(auto i = 0; i < schema->num_columns(); i++) {
|
for (auto i = 0; i < schema->num_columns(); i++) {
|
||||||
auto _col = schema->GetColumnRoot(i);
|
auto _col = schema->GetColumnRoot(i);
|
||||||
|
|
||||||
if(!_col->is_primitive()) {
|
if (!_col->is_primitive()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-primitive type";
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
|
<< " has non-primitive type";
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_col->is_repeated()) {
|
if (_col->is_repeated()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-scalar type";
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
|
<< " has non-scalar type";
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
parquet::schema::PrimitiveNode* col = (parquet::schema::PrimitiveNode*)_col;
|
parquet::schema::PrimitiveNode *col =
|
||||||
|
(parquet::schema::PrimitiveNode *)_col;
|
||||||
|
|
||||||
if(i > 0)
|
if (i > 0)
|
||||||
text += ", ";
|
text += ", ";
|
||||||
|
|
||||||
text += "\"";
|
text += "\"";
|
||||||
// Horrifically inefficient, but easy to understand.
|
// Horrifically inefficient, but easy to understand.
|
||||||
std::string colName = col->name();
|
std::string colName = col->name();
|
||||||
for(char& c : colName) {
|
for (char &c : colName) {
|
||||||
if(c == '"')
|
if (c == '"')
|
||||||
text += "\"\"";
|
text += "\"\"";
|
||||||
else
|
else
|
||||||
text += c;
|
text += c;
|
||||||
@ -71,7 +71,7 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
// whose unsigned ints start getting interpreted as signed. (We could
|
// whose unsigned ints start getting interpreted as signed. (We could
|
||||||
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
|
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
|
||||||
// the high bit was set.)
|
// the high bit was set.)
|
||||||
if(logical == parquet::LogicalType::NONE ||
|
if (logical == parquet::LogicalType::NONE ||
|
||||||
logical == parquet::LogicalType::UTF8 ||
|
logical == parquet::LogicalType::UTF8 ||
|
||||||
logical == parquet::LogicalType::DATE ||
|
logical == parquet::LogicalType::DATE ||
|
||||||
logical == parquet::LogicalType::TIME_MILLIS ||
|
logical == parquet::LogicalType::TIME_MILLIS ||
|
||||||
@ -82,74 +82,74 @@ std::string ParquetTable::CreateStatement() {
|
|||||||
logical == parquet::LogicalType::INT_16 ||
|
logical == parquet::LogicalType::INT_16 ||
|
||||||
logical == parquet::LogicalType::INT_32 ||
|
logical == parquet::LogicalType::INT_32 ||
|
||||||
logical == parquet::LogicalType::INT_64) {
|
logical == parquet::LogicalType::INT_64) {
|
||||||
switch(physical) {
|
switch (physical) {
|
||||||
case parquet::Type::BOOLEAN:
|
case parquet::Type::BOOLEAN:
|
||||||
|
type = "TINYINT";
|
||||||
|
break;
|
||||||
|
case parquet::Type::INT32:
|
||||||
|
if (logical == parquet::LogicalType::NONE ||
|
||||||
|
logical == parquet::LogicalType::INT_32) {
|
||||||
|
type = "INT";
|
||||||
|
} else if (logical == parquet::LogicalType::INT_8) {
|
||||||
type = "TINYINT";
|
type = "TINYINT";
|
||||||
break;
|
} else if (logical == parquet::LogicalType::INT_16) {
|
||||||
case parquet::Type::INT32:
|
type = "SMALLINT";
|
||||||
if(logical == parquet::LogicalType::NONE ||
|
}
|
||||||
logical == parquet::LogicalType::INT_32) {
|
break;
|
||||||
type = "INT";
|
case parquet::Type::INT96:
|
||||||
} else if(logical == parquet::LogicalType::INT_8) {
|
// INT96 is used for nanosecond precision on timestamps; we truncate
|
||||||
type = "TINYINT";
|
// to millisecond precision.
|
||||||
} else if(logical == parquet::LogicalType::INT_16) {
|
case parquet::Type::INT64:
|
||||||
type = "SMALLINT";
|
type = "BIGINT";
|
||||||
}
|
break;
|
||||||
break;
|
case parquet::Type::FLOAT:
|
||||||
case parquet::Type::INT96:
|
type = "REAL";
|
||||||
// INT96 is used for nanosecond precision on timestamps; we truncate
|
break;
|
||||||
// to millisecond precision.
|
case parquet::Type::DOUBLE:
|
||||||
case parquet::Type::INT64:
|
type = "DOUBLE";
|
||||||
type = "BIGINT";
|
break;
|
||||||
break;
|
case parquet::Type::BYTE_ARRAY:
|
||||||
case parquet::Type::FLOAT:
|
if (logical == parquet::LogicalType::UTF8) {
|
||||||
type = "REAL";
|
type = "TEXT";
|
||||||
break;
|
} else {
|
||||||
case parquet::Type::DOUBLE:
|
|
||||||
type = "DOUBLE";
|
|
||||||
break;
|
|
||||||
case parquet::Type::BYTE_ARRAY:
|
|
||||||
if(logical == parquet::LogicalType::UTF8) {
|
|
||||||
type = "TEXT";
|
|
||||||
} else {
|
|
||||||
type = "BLOB";
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
|
||||||
type = "BLOB";
|
type = "BLOB";
|
||||||
break;
|
}
|
||||||
default:
|
break;
|
||||||
break;
|
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
|
||||||
|
type = "BLOB";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(type.empty()) {
|
if (type.empty()) {
|
||||||
std::ostringstream ss;
|
std::ostringstream ss;
|
||||||
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " <<
|
ss << __FILE__ << ":" << __LINE__ << ": column " << i
|
||||||
parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical);
|
<< " has unsupported type: " << parquet::TypeToString(physical) << "/"
|
||||||
|
<< parquet::LogicalTypeToString(logical);
|
||||||
|
|
||||||
throw std::invalid_argument(ss.str());
|
throw std::invalid_argument(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
printf("col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n",
|
printf(
|
||||||
i,
|
"col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", i, col->name().data(),
|
||||||
col->name().data(),
|
|
||||||
col->physical_type(),
|
col->physical_type(),
|
||||||
parquet::TypeToString(col->physical_type()).data(),
|
parquet::TypeToString(col->physical_type()).data(), col->logical_type(),
|
||||||
col->logical_type(),
|
parquet::LogicalTypeToString(col->logical_type()).data(), type.data());
|
||||||
parquet::LogicalTypeToString(col->logical_type()).data(),
|
|
||||||
type.data());
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
text += " ";
|
text += " ";
|
||||||
text += type;
|
text += type;
|
||||||
}
|
}
|
||||||
text +=");";
|
text += ");";
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; }
|
std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() {
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
const std::string& ParquetTable::getFile() { return file; }
|
const std::string &ParquetTable::getFile() { return file; }
|
||||||
const std::string& ParquetTable::getTableName() { return tableName; }
|
const std::string &ParquetTable::getTableName() { return tableName; }
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef PARQUET_TABLE_H
|
#ifndef PARQUET_TABLE_H
|
||||||
#define PARQUET_TABLE_H
|
#define PARQUET_TABLE_H
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include "parquet/api/reader.h"
|
#include "parquet/api/reader.h"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
class ParquetTable {
|
class ParquetTable {
|
||||||
std::string file;
|
std::string file;
|
||||||
@ -11,15 +11,14 @@ class ParquetTable {
|
|||||||
std::vector<std::string> columnNames;
|
std::vector<std::string> columnNames;
|
||||||
std::shared_ptr<parquet::FileMetaData> metadata;
|
std::shared_ptr<parquet::FileMetaData> metadata;
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ParquetTable(std::string file, std::string tableName);
|
ParquetTable(std::string file, std::string tableName);
|
||||||
std::string CreateStatement();
|
std::string CreateStatement();
|
||||||
std::string columnName(int idx);
|
std::string columnName(int idx);
|
||||||
unsigned int getNumColumns();
|
unsigned int getNumColumns();
|
||||||
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
std::shared_ptr<parquet::FileMetaData> getMetadata();
|
||||||
const std::string& getFile();
|
const std::string &getFile();
|
||||||
const std::string& getTableName();
|
const std::string &getTableName();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user