mirror of
				https://github.com/cldellow/sqlite-parquet-vtable.git
				synced 2025-10-31 02:19:56 +00:00 
			
		
		
		
	Run a formatting pass with clang-format to minimize future git churn
This commit is contained in:
		
							
								
								
									
										804
									
								
								src/parquet.cc
									
									
									
									
									
								
							
							
						
						
									
										804
									
								
								src/parquet.cc
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,13 +1,13 @@ | |||||||
| #ifndef PARQUET_CURSOR_H | #ifndef PARQUET_CURSOR_H | ||||||
| #define PARQUET_CURSOR_H | #define PARQUET_CURSOR_H | ||||||
|  |  | ||||||
|  | #include "parquet/api/reader.h" | ||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
| #include "parquet_table.h" | #include "parquet_table.h" | ||||||
| #include "parquet/api/reader.h" |  | ||||||
|  |  | ||||||
| class ParquetCursor { | class ParquetCursor { | ||||||
|  |  | ||||||
|   ParquetTable* table; |   ParquetTable *table; | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader; |   std::unique_ptr<parquet::ParquetFileReader> reader; | ||||||
|   std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata; |   std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata; | ||||||
|   std::shared_ptr<parquet::RowGroupReader> rowGroup; |   std::shared_ptr<parquet::RowGroupReader> rowGroup; | ||||||
| @@ -35,19 +35,26 @@ class ParquetCursor { | |||||||
|  |  | ||||||
|   bool currentRowSatisfiesFilter(); |   bool currentRowSatisfiesFilter(); | ||||||
|   bool currentRowGroupSatisfiesFilter(); |   bool currentRowGroupSatisfiesFilter(); | ||||||
|   bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint); |   bool currentRowGroupSatisfiesRowIdFilter(Constraint &constraint); | ||||||
|   bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |   bool currentRowGroupSatisfiesTextFilter( | ||||||
|   bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |       Constraint &constraint, | ||||||
|   bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|   bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr<parquet::RowGroupStatistics> stats); |   bool currentRowGroupSatisfiesBlobFilter( | ||||||
|  |       Constraint &constraint, | ||||||
|   bool currentRowSatisfiesTextFilter(Constraint& constraint); |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|   bool currentRowSatisfiesIntegerFilter(Constraint& constraint); |   bool currentRowGroupSatisfiesIntegerFilter( | ||||||
|   bool currentRowSatisfiesDoubleFilter(Constraint& constraint); |       Constraint &constraint, | ||||||
|  |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|  |   bool currentRowGroupSatisfiesDoubleFilter( | ||||||
|  |       Constraint &constraint, | ||||||
|  |       std::shared_ptr<parquet::RowGroupStatistics> stats); | ||||||
|  |  | ||||||
|  |   bool currentRowSatisfiesTextFilter(Constraint &constraint); | ||||||
|  |   bool currentRowSatisfiesIntegerFilter(Constraint &constraint); | ||||||
|  |   bool currentRowSatisfiesDoubleFilter(Constraint &constraint); | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   ParquetCursor(ParquetTable* table); |   ParquetCursor(ParquetTable *table); | ||||||
|   int getRowId(); |   int getRowId(); | ||||||
|   void next(); |   void next(); | ||||||
|   void close(); |   void close(); | ||||||
| @@ -58,16 +65,15 @@ public: | |||||||
|   bool isNull(int col); |   bool isNull(int col); | ||||||
|   unsigned int getNumRowGroups() const; |   unsigned int getNumRowGroups() const; | ||||||
|   unsigned int getNumConstraints() const; |   unsigned int getNumConstraints() const; | ||||||
|   const Constraint& getConstraint(unsigned int i) const; |   const Constraint &getConstraint(unsigned int i) const; | ||||||
|   parquet::Type::type getPhysicalType(int col); |   parquet::Type::type getPhysicalType(int col); | ||||||
|   parquet::LogicalType::type getLogicalType(int col); |   parquet::LogicalType::type getLogicalType(int col); | ||||||
|   ParquetTable* getTable() const; |   ParquetTable *getTable() const; | ||||||
|  |  | ||||||
|   int getInt32(int col); |   int getInt32(int col); | ||||||
|   long getInt64(int col); |   long getInt64(int col); | ||||||
|   double getDouble(int col); |   double getDouble(int col); | ||||||
|   parquet::ByteArray* getByteArray(int col); |   parquet::ByteArray *getByteArray(int col); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,40 +1,29 @@ | |||||||
| #include "parquet_filter.h" | #include "parquet_filter.h" | ||||||
|  |  | ||||||
| Constraint::Constraint( | Constraint::Constraint(RowGroupBitmap bitmap, int column, | ||||||
|   RowGroupBitmap bitmap, |                        std::string columnName, ConstraintOperator op, | ||||||
|   int column, |                        ValueType type, int64_t intValue, double doubleValue, | ||||||
|   std::string columnName, |                        std::vector<unsigned char> blobValue) | ||||||
|   ConstraintOperator op, |     : bitmap(bitmap), column(column), columnName(columnName), op(op), | ||||||
|   ValueType type, |       type(type), intValue(intValue), doubleValue(doubleValue), | ||||||
|   int64_t intValue, |       blobValue(blobValue), hadRows(false) { | ||||||
|   double doubleValue, |   RowGroupBitmap bm = bitmap; | ||||||
|   std::vector<unsigned char> blobValue |   this->bitmap = bm; | ||||||
| ): bitmap(bitmap), |  | ||||||
|    column(column), |  | ||||||
|    columnName(columnName), |  | ||||||
|    op(op), |  | ||||||
|    type(type), |  | ||||||
|    intValue(intValue), |  | ||||||
|    doubleValue(doubleValue), |  | ||||||
|    blobValue(blobValue), |  | ||||||
|    hadRows(false) { |  | ||||||
|      RowGroupBitmap bm = bitmap; |  | ||||||
|      this->bitmap = bm; |  | ||||||
|  |  | ||||||
|   if(type == Text) { |   if (type == Text) { | ||||||
|     stringValue = std::string((char*)&blobValue[0], blobValue.size()); |     stringValue = std::string((char *)&blobValue[0], blobValue.size()); | ||||||
|  |  | ||||||
|     if(op == Like) { |     if (op == Like) { | ||||||
|       // This permits more rowgroups than is strictly needed |       // This permits more rowgroups than is strictly needed | ||||||
|       // since it assumes an implicit wildcard. But it's |       // since it assumes an implicit wildcard. But it's | ||||||
|       // simple to implement, so we'll go with it. |       // simple to implement, so we'll go with it. | ||||||
|       likeStringValue = stringValue; |       likeStringValue = stringValue; | ||||||
|       size_t idx = likeStringValue.find_first_of("%"); |       size_t idx = likeStringValue.find_first_of("%"); | ||||||
|       if(idx != std::string::npos) { |       if (idx != std::string::npos) { | ||||||
|         likeStringValue = likeStringValue.substr(0, idx); |         likeStringValue = likeStringValue.substr(0, idx); | ||||||
|       } |       } | ||||||
|       idx = likeStringValue.find_first_of("_"); |       idx = likeStringValue.find_first_of("_"); | ||||||
|       if(idx != std::string::npos) { |       if (idx != std::string::npos) { | ||||||
|         likeStringValue = likeStringValue.substr(0, idx); |         likeStringValue = likeStringValue.substr(0, idx); | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
| @@ -45,61 +34,61 @@ std::string Constraint::describe() const { | |||||||
|   std::string rv; |   std::string rv; | ||||||
|   rv.append(columnName); |   rv.append(columnName); | ||||||
|   rv.append(" "); |   rv.append(" "); | ||||||
|   switch(op) { |   switch (op) { | ||||||
|     case Equal: |   case Equal: | ||||||
|       rv.append("="); |     rv.append("="); | ||||||
|       break; |     break; | ||||||
|     case GreaterThan: |   case GreaterThan: | ||||||
|       rv.append(">"); |     rv.append(">"); | ||||||
|       break; |     break; | ||||||
|     case LessThanOrEqual: |   case LessThanOrEqual: | ||||||
|       rv.append("<="); |     rv.append("<="); | ||||||
|       break; |     break; | ||||||
|     case LessThan: |   case LessThan: | ||||||
|       rv.append("<"); |     rv.append("<"); | ||||||
|       break; |     break; | ||||||
|     case GreaterThanOrEqual: |   case GreaterThanOrEqual: | ||||||
|       rv.append(">="); |     rv.append(">="); | ||||||
|       break; |     break; | ||||||
|     case Like: |   case Like: | ||||||
|       rv.append("LIKE"); |     rv.append("LIKE"); | ||||||
|       break; |     break; | ||||||
|     case Glob: |   case Glob: | ||||||
|       rv.append("GLOB"); |     rv.append("GLOB"); | ||||||
|       break; |     break; | ||||||
|     case NotEqual: |   case NotEqual: | ||||||
|       rv.append("<>"); |     rv.append("<>"); | ||||||
|       break; |     break; | ||||||
|     case IsNot: |   case IsNot: | ||||||
|       rv.append("IS NOT"); |     rv.append("IS NOT"); | ||||||
|       break; |     break; | ||||||
|     case IsNotNull: |   case IsNotNull: | ||||||
|       rv.append("IS NOT NULL"); |     rv.append("IS NOT NULL"); | ||||||
|       break; |     break; | ||||||
|     case IsNull: |   case IsNull: | ||||||
|       rv.append("IS NULL"); |     rv.append("IS NULL"); | ||||||
|       break; |     break; | ||||||
|     case Is: |   case Is: | ||||||
|       rv.append("IS"); |     rv.append("IS"); | ||||||
|       break; |     break; | ||||||
|   } |   } | ||||||
|   rv.append(" "); |   rv.append(" "); | ||||||
|  |  | ||||||
|   switch(type) { |   switch (type) { | ||||||
|     case Null: |   case Null: | ||||||
|       rv.append("NULL"); |     rv.append("NULL"); | ||||||
|       break; |     break; | ||||||
|     case Integer: |   case Integer: | ||||||
|       rv.append(std::to_string(intValue)); |     rv.append(std::to_string(intValue)); | ||||||
|       break; |     break; | ||||||
|     case Double: |   case Double: | ||||||
|       rv.append(std::to_string(doubleValue)); |     rv.append(std::to_string(doubleValue)); | ||||||
|       break; |     break; | ||||||
|     case Blob: |   case Blob: | ||||||
|       break; |     break; | ||||||
|     case Text: |   case Text: | ||||||
|       rv.append(stringValue); |     rv.append(stringValue); | ||||||
|       break; |     break; | ||||||
|   } |   } | ||||||
|   return rv; |   return rv; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| #ifndef PARQUET_FILTER_H | #ifndef PARQUET_FILTER_H | ||||||
| #define PARQUET_FILTER_H | #define PARQUET_FILTER_H | ||||||
|  |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include <cstdint> | #include <cstdint> | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
| enum ConstraintOperator { | enum ConstraintOperator { | ||||||
|   Equal, |   Equal, | ||||||
| @@ -20,43 +20,36 @@ enum ConstraintOperator { | |||||||
|   Is |   Is | ||||||
| }; | }; | ||||||
|  |  | ||||||
| enum ValueType { | enum ValueType { Null, Integer, Double, Blob, Text }; | ||||||
|   Null, |  | ||||||
|   Integer, |  | ||||||
|   Double, |  | ||||||
|   Blob, |  | ||||||
|   Text |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| class RowGroupBitmap { | class RowGroupBitmap { | ||||||
|   void setBit(std::vector<unsigned char>& membership, unsigned int rowGroup, bool isSet) { |   void setBit(std::vector<unsigned char> &membership, unsigned int rowGroup, | ||||||
|  |               bool isSet) { | ||||||
|     int byte = rowGroup / 8; |     int byte = rowGroup / 8; | ||||||
|     int offset = rowGroup % 8; |     int offset = rowGroup % 8; | ||||||
|     unsigned char c = membership[byte]; |     unsigned char c = membership[byte]; | ||||||
|     c &= ~(1UL << offset); |     c &= ~(1UL << offset); | ||||||
|     if(isSet) { |     if (isSet) { | ||||||
|       c |= 1UL << offset; |       c |= 1UL << offset; | ||||||
|     } |     } | ||||||
|     membership[byte] = c; |     membership[byte] = c; | ||||||
|   } |   } | ||||||
| // Compares estimated rowGroupFilter results against observed results |   // Compares estimated rowGroupFilter results against observed results | ||||||
| // when we explored the row group. This lets us cache  |   // when we explored the row group. This lets us cache | ||||||
| public: | public: | ||||||
|   RowGroupBitmap(unsigned int totalRowGroups) { |   RowGroupBitmap(unsigned int totalRowGroups) { | ||||||
|     // Initialize everything to assume that all row groups match. |     // Initialize everything to assume that all row groups match. | ||||||
|     // As we discover otherwise, we'll update that assumption. |     // As we discover otherwise, we'll update that assumption. | ||||||
|     for(unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) { |     for (unsigned int i = 0; i < (totalRowGroups + 7) / 8; i++) { | ||||||
|       estimatedMembership.push_back(0xFF); |       estimatedMembership.push_back(0xFF); | ||||||
|       actualMembership.push_back(0xFF); |       actualMembership.push_back(0xFF); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   RowGroupBitmap( |   RowGroupBitmap(std::vector<unsigned char> estimatedMembership, | ||||||
|       std::vector<unsigned char> estimatedMembership, |                  std::vector<unsigned char> actualMembership) | ||||||
|       std::vector<unsigned char> actualMembership) : |       : estimatedMembership(estimatedMembership), | ||||||
|     estimatedMembership(estimatedMembership), |         actualMembership(actualMembership) {} | ||||||
|     actualMembership(actualMembership) { |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   std::vector<unsigned char> estimatedMembership; |   std::vector<unsigned char> estimatedMembership; | ||||||
|   std::vector<unsigned char> actualMembership; |   std::vector<unsigned char> actualMembership; | ||||||
| @@ -80,17 +73,11 @@ public: | |||||||
|  |  | ||||||
| class Constraint { | class Constraint { | ||||||
| public: | public: | ||||||
|   // Kind of a messy constructor function, but it's just for internal use, so whatever. |   // Kind of a messy constructor function, but it's just for internal use, so | ||||||
|   Constraint( |   // whatever. | ||||||
|     RowGroupBitmap bitmap, |   Constraint(RowGroupBitmap bitmap, int column, std::string columnName, | ||||||
|     int column, |              ConstraintOperator op, ValueType type, int64_t intValue, | ||||||
|     std::string columnName, |              double doubleValue, std::vector<unsigned char> blobValue); | ||||||
|     ConstraintOperator op, |  | ||||||
|     ValueType type, |  | ||||||
|     int64_t intValue, |  | ||||||
|     double doubleValue, |  | ||||||
|     std::vector<unsigned char> blobValue |  | ||||||
|   ); |  | ||||||
|  |  | ||||||
|   RowGroupBitmap bitmap; |   RowGroupBitmap bitmap; | ||||||
|   int column; // underlying column in the query |   int column; // underlying column in the query | ||||||
|   | |||||||
| @@ -2,61 +2,61 @@ | |||||||
|  |  | ||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  |  | ||||||
| ParquetTable::ParquetTable(std::string file, std::string tableName): file(file), tableName(tableName) { | ParquetTable::ParquetTable(std::string file, std::string tableName) | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(file.data()); |     : file(file), tableName(tableName) { | ||||||
|  |   std::unique_ptr<parquet::ParquetFileReader> reader = | ||||||
|  |       parquet::ParquetFileReader::OpenFile(file.data()); | ||||||
|   metadata = reader->metadata(); |   metadata = reader->metadata(); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string ParquetTable::columnName(int i) { | std::string ParquetTable::columnName(int i) { | ||||||
|   if(i == -1) |   if (i == -1) | ||||||
|     return "rowid"; |     return "rowid"; | ||||||
|   return columnNames[i]; |   return columnNames[i]; | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int ParquetTable::getNumColumns() { | unsigned int ParquetTable::getNumColumns() { return columnNames.size(); } | ||||||
|   return columnNames.size(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| std::string ParquetTable::CreateStatement() { | std::string ParquetTable::CreateStatement() { | ||||||
|   std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile( |   std::unique_ptr<parquet::ParquetFileReader> reader = | ||||||
|       file.data(), |       parquet::ParquetFileReader::OpenFile( | ||||||
|       true, |           file.data(), true, parquet::default_reader_properties(), metadata); | ||||||
|       parquet::default_reader_properties(), |  | ||||||
|       metadata); |  | ||||||
|   std::string text("CREATE TABLE x("); |   std::string text("CREATE TABLE x("); | ||||||
|   auto schema = reader->metadata()->schema(); |   auto schema = reader->metadata()->schema(); | ||||||
|  |  | ||||||
|   for(auto i = 0; i < schema->num_columns(); i++) { |   for (auto i = 0; i < schema->num_columns(); i++) { | ||||||
|     auto _col = schema->GetColumnRoot(i); |     auto _col = schema->GetColumnRoot(i); | ||||||
|     columnNames.push_back(_col->name()); |     columnNames.push_back(_col->name()); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   for(auto i = 0; i < schema->num_columns(); i++) { |   for (auto i = 0; i < schema->num_columns(); i++) { | ||||||
|     auto _col = schema->GetColumnRoot(i); |     auto _col = schema->GetColumnRoot(i); | ||||||
|  |  | ||||||
|     if(!_col->is_primitive()) { |     if (!_col->is_primitive()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-primitive type"; |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|  |          << " has non-primitive type"; | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(_col->is_repeated()) { |     if (_col->is_repeated()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has non-scalar type"; |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|  |          << " has non-scalar type"; | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     parquet::schema::PrimitiveNode* col = (parquet::schema::PrimitiveNode*)_col; |     parquet::schema::PrimitiveNode *col = | ||||||
|  |         (parquet::schema::PrimitiveNode *)_col; | ||||||
|  |  | ||||||
|     if(i > 0) |     if (i > 0) | ||||||
|       text += ", "; |       text += ", "; | ||||||
|  |  | ||||||
|     text += "\""; |     text += "\""; | ||||||
|     // Horrifically inefficient, but easy to understand. |     // Horrifically inefficient, but easy to understand. | ||||||
|     std::string colName = col->name(); |     std::string colName = col->name(); | ||||||
|     for(char& c : colName) { |     for (char &c : colName) { | ||||||
|       if(c == '"') |       if (c == '"') | ||||||
|         text += "\"\""; |         text += "\"\""; | ||||||
|       else |       else | ||||||
|         text += c; |         text += c; | ||||||
| @@ -71,7 +71,7 @@ std::string ParquetTable::CreateStatement() { | |||||||
|     // whose unsigned ints start getting interpreted as signed. (We could |     // whose unsigned ints start getting interpreted as signed. (We could | ||||||
|     // support this for UINT_8/16/32 -- and for UINT_64 we could throw if |     // support this for UINT_8/16/32 -- and for UINT_64 we could throw if | ||||||
|     // the high bit was set.) |     // the high bit was set.) | ||||||
|     if(logical == parquet::LogicalType::NONE || |     if (logical == parquet::LogicalType::NONE || | ||||||
|         logical == parquet::LogicalType::UTF8 || |         logical == parquet::LogicalType::UTF8 || | ||||||
|         logical == parquet::LogicalType::DATE || |         logical == parquet::LogicalType::DATE || | ||||||
|         logical == parquet::LogicalType::TIME_MILLIS || |         logical == parquet::LogicalType::TIME_MILLIS || | ||||||
| @@ -82,74 +82,74 @@ std::string ParquetTable::CreateStatement() { | |||||||
|         logical == parquet::LogicalType::INT_16 || |         logical == parquet::LogicalType::INT_16 || | ||||||
|         logical == parquet::LogicalType::INT_32 || |         logical == parquet::LogicalType::INT_32 || | ||||||
|         logical == parquet::LogicalType::INT_64) { |         logical == parquet::LogicalType::INT_64) { | ||||||
|       switch(physical) { |       switch (physical) { | ||||||
|         case parquet::Type::BOOLEAN: |       case parquet::Type::BOOLEAN: | ||||||
|  |         type = "TINYINT"; | ||||||
|  |         break; | ||||||
|  |       case parquet::Type::INT32: | ||||||
|  |         if (logical == parquet::LogicalType::NONE || | ||||||
|  |             logical == parquet::LogicalType::INT_32) { | ||||||
|  |           type = "INT"; | ||||||
|  |         } else if (logical == parquet::LogicalType::INT_8) { | ||||||
|           type = "TINYINT"; |           type = "TINYINT"; | ||||||
|           break; |         } else if (logical == parquet::LogicalType::INT_16) { | ||||||
|         case parquet::Type::INT32: |           type = "SMALLINT"; | ||||||
|           if(logical == parquet::LogicalType::NONE || |         } | ||||||
|               logical == parquet::LogicalType::INT_32) { |         break; | ||||||
|             type = "INT"; |       case parquet::Type::INT96: | ||||||
|           } else if(logical == parquet::LogicalType::INT_8) { |         // INT96 is used for nanosecond precision on timestamps; we truncate | ||||||
|             type = "TINYINT"; |         // to millisecond precision. | ||||||
|           } else if(logical == parquet::LogicalType::INT_16) { |       case parquet::Type::INT64: | ||||||
|             type = "SMALLINT"; |         type = "BIGINT"; | ||||||
|           } |         break; | ||||||
|           break; |       case parquet::Type::FLOAT: | ||||||
|         case parquet::Type::INT96: |         type = "REAL"; | ||||||
|           // INT96 is used for nanosecond precision on timestamps; we truncate |         break; | ||||||
|           // to millisecond precision. |       case parquet::Type::DOUBLE: | ||||||
|         case parquet::Type::INT64: |         type = "DOUBLE"; | ||||||
|           type = "BIGINT"; |         break; | ||||||
|           break; |       case parquet::Type::BYTE_ARRAY: | ||||||
|         case parquet::Type::FLOAT: |         if (logical == parquet::LogicalType::UTF8) { | ||||||
|           type = "REAL"; |           type = "TEXT"; | ||||||
|           break; |         } else { | ||||||
|         case parquet::Type::DOUBLE: |  | ||||||
|           type = "DOUBLE"; |  | ||||||
|           break; |  | ||||||
|         case parquet::Type::BYTE_ARRAY: |  | ||||||
|           if(logical == parquet::LogicalType::UTF8) { |  | ||||||
|             type = "TEXT"; |  | ||||||
|           } else { |  | ||||||
|             type = "BLOB"; |  | ||||||
|           } |  | ||||||
|           break; |  | ||||||
|         case parquet::Type::FIXED_LEN_BYTE_ARRAY: |  | ||||||
|           type = "BLOB"; |           type = "BLOB"; | ||||||
|           break; |         } | ||||||
|         default: |         break; | ||||||
|           break; |       case parquet::Type::FIXED_LEN_BYTE_ARRAY: | ||||||
|  |         type = "BLOB"; | ||||||
|  |         break; | ||||||
|  |       default: | ||||||
|  |         break; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if(type.empty()) { |     if (type.empty()) { | ||||||
|       std::ostringstream ss; |       std::ostringstream ss; | ||||||
|       ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " << |       ss << __FILE__ << ":" << __LINE__ << ": column " << i | ||||||
|         parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical); |          << " has unsupported type: " << parquet::TypeToString(physical) << "/" | ||||||
|  |          << parquet::LogicalTypeToString(logical); | ||||||
|  |  | ||||||
|       throw std::invalid_argument(ss.str()); |       throw std::invalid_argument(ss.str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| #ifdef DEBUG | #ifdef DEBUG | ||||||
|     printf("col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", |     printf( | ||||||
|         i, |         "col %d[name=%s, p=%d:%s, l=%d:%s] is %s\n", i, col->name().data(), | ||||||
|         col->name().data(), |  | ||||||
|         col->physical_type(), |         col->physical_type(), | ||||||
|         parquet::TypeToString(col->physical_type()).data(), |         parquet::TypeToString(col->physical_type()).data(), col->logical_type(), | ||||||
|         col->logical_type(), |         parquet::LogicalTypeToString(col->logical_type()).data(), type.data()); | ||||||
|         parquet::LogicalTypeToString(col->logical_type()).data(), |  | ||||||
|         type.data()); |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|     text += " "; |     text += " "; | ||||||
|     text += type; |     text += type; | ||||||
|   } |   } | ||||||
|   text +=");"; |   text += ");"; | ||||||
|   return text; |   return text; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { return metadata; } | std::shared_ptr<parquet::FileMetaData> ParquetTable::getMetadata() { | ||||||
|  |   return metadata; | ||||||
|  | } | ||||||
|  |  | ||||||
| const std::string& ParquetTable::getFile() { return file; } | const std::string &ParquetTable::getFile() { return file; } | ||||||
| const std::string& ParquetTable::getTableName() { return tableName; } | const std::string &ParquetTable::getTableName() { return tableName; } | ||||||
|   | |||||||
| @@ -1,9 +1,9 @@ | |||||||
| #ifndef PARQUET_TABLE_H | #ifndef PARQUET_TABLE_H | ||||||
| #define PARQUET_TABLE_H | #define PARQUET_TABLE_H | ||||||
|  |  | ||||||
| #include <vector> |  | ||||||
| #include <string> |  | ||||||
| #include "parquet/api/reader.h" | #include "parquet/api/reader.h" | ||||||
|  | #include <string> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
| class ParquetTable { | class ParquetTable { | ||||||
|   std::string file; |   std::string file; | ||||||
| @@ -11,15 +11,14 @@ class ParquetTable { | |||||||
|   std::vector<std::string> columnNames; |   std::vector<std::string> columnNames; | ||||||
|   std::shared_ptr<parquet::FileMetaData> metadata; |   std::shared_ptr<parquet::FileMetaData> metadata; | ||||||
|  |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|   ParquetTable(std::string file, std::string tableName); |   ParquetTable(std::string file, std::string tableName); | ||||||
|   std::string CreateStatement(); |   std::string CreateStatement(); | ||||||
|   std::string columnName(int idx); |   std::string columnName(int idx); | ||||||
|   unsigned int getNumColumns(); |   unsigned int getNumColumns(); | ||||||
|   std::shared_ptr<parquet::FileMetaData> getMetadata(); |   std::shared_ptr<parquet::FileMetaData> getMetadata(); | ||||||
|   const std::string& getFile(); |   const std::string &getFile(); | ||||||
|   const std::string& getTableName(); |   const std::string &getTableName(); | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Addie Morrison
					Addie Morrison