2018-03-03 20:44:01 +00:00
|
|
|
#ifndef PARQUET_CURSOR_H
|
|
|
|
#define PARQUET_CURSOR_H
|
|
|
|
|
2018-03-11 17:58:10 +00:00
|
|
|
#include "parquet_filter.h"
|
2018-03-03 20:44:01 +00:00
|
|
|
#include "parquet_table.h"
|
|
|
|
#include "parquet/api/reader.h"
|
|
|
|
|
|
|
|
class ParquetCursor {
|
|
|
|
|
|
|
|
ParquetTable* table;
|
|
|
|
std::unique_ptr<parquet::ParquetFileReader> reader;
|
|
|
|
std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata;
|
|
|
|
std::shared_ptr<parquet::RowGroupReader> rowGroup;
|
|
|
|
std::vector<std::shared_ptr<parquet::Scanner>> scanners;
|
|
|
|
std::vector<parquet::Type::type> types;
|
2019-11-13 13:17:29 +00:00
|
|
|
std::vector<parquet::ConvertedType::type> logicalTypes;
|
2018-03-03 20:44:01 +00:00
|
|
|
|
|
|
|
std::vector<int> colRows;
|
|
|
|
std::vector<bool> colNulls;
|
2018-03-13 00:42:50 +00:00
|
|
|
std::vector<int64_t> colIntValues;
|
2018-03-03 20:44:01 +00:00
|
|
|
std::vector<double> colDoubleValues;
|
|
|
|
std::vector<parquet::ByteArray> colByteArrayValues;
|
|
|
|
|
|
|
|
int rowId;
|
|
|
|
int rowGroupId;
|
2018-03-05 03:29:35 +00:00
|
|
|
int rowGroupStartRowId;
|
2018-03-13 00:42:50 +00:00
|
|
|
int rowGroupSize;
|
2018-03-03 20:44:01 +00:00
|
|
|
int numRows;
|
|
|
|
int numRowGroups;
|
|
|
|
int rowsLeftInRowGroup;
|
|
|
|
|
2018-03-04 22:49:19 +00:00
|
|
|
bool nextRowGroup();
|
2018-03-03 20:44:01 +00:00
|
|
|
|
2018-03-11 17:58:10 +00:00
|
|
|
std::vector<Constraint> constraints;
|
|
|
|
|
2018-03-13 00:42:50 +00:00
|
|
|
bool currentRowSatisfiesFilter();
|
|
|
|
bool currentRowGroupSatisfiesFilter();
|
2018-03-16 01:37:52 +00:00
|
|
|
bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint);
|
2019-11-13 13:17:29 +00:00
|
|
|
bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr<parquet::Statistics> stats);
|
|
|
|
bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr<parquet::Statistics> stats);
|
|
|
|
bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr<parquet::Statistics> stats);
|
|
|
|
bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr<parquet::Statistics> stats);
|
2018-03-16 01:37:52 +00:00
|
|
|
|
|
|
|
bool currentRowSatisfiesTextFilter(Constraint& constraint);
|
|
|
|
bool currentRowSatisfiesIntegerFilter(Constraint& constraint);
|
|
|
|
bool currentRowSatisfiesDoubleFilter(Constraint& constraint);
|
|
|
|
|
2018-03-13 00:42:50 +00:00
|
|
|
|
2018-03-03 20:44:01 +00:00
|
|
|
public:
|
|
|
|
ParquetCursor(ParquetTable* table);
|
|
|
|
int getRowId();
|
|
|
|
void next();
|
2018-03-05 02:05:26 +00:00
|
|
|
void close();
|
2018-03-11 17:58:10 +00:00
|
|
|
void reset(std::vector<Constraint> constraints);
|
2018-03-03 20:44:01 +00:00
|
|
|
bool eof();
|
|
|
|
|
|
|
|
void ensureColumn(int col);
|
|
|
|
bool isNull(int col);
|
2018-03-25 03:51:15 +00:00
|
|
|
unsigned int getNumRowGroups() const;
|
|
|
|
unsigned int getNumConstraints() const;
|
|
|
|
const Constraint& getConstraint(unsigned int i) const;
|
2018-03-04 01:00:50 +00:00
|
|
|
parquet::Type::type getPhysicalType(int col);
|
2019-11-13 13:17:29 +00:00
|
|
|
parquet::ConvertedType::type getLogicalType(int col);
|
2018-03-25 03:51:15 +00:00
|
|
|
ParquetTable* getTable() const;
|
2018-03-04 01:00:50 +00:00
|
|
|
|
|
|
|
int getInt32(int col);
|
|
|
|
long getInt64(int col);
|
2018-03-03 20:44:01 +00:00
|
|
|
double getDouble(int col);
|
|
|
|
parquet::ByteArray* getByteArray(int col);
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|