Move some code out of ensureColumn

Saves ~4% on the cold census needle query (~425ms -> ~405ms)
This commit is contained in:
Colin Dellow 2018-06-23 19:10:23 -04:00
parent b9c58bd97e
commit d7c5002cee
3 changed files with 17 additions and 10 deletions

View File

@ -586,6 +586,17 @@ start:
return false; return false;
} }
while(table->getNumColumns() >= scanners.size()) {
scanners.push_back(std::shared_ptr<parquet::Scanner>());
// If it doesn't exist, it's the rowId as of the last nextRowGroup call
colRows.push_back(rowGroupStartRowId);
colNulls.push_back(false);
colIntValues.push_back(0);
colDoubleValues.push_back(0);
colByteArrayValues.push_back(parquet::ByteArray());
}
rowGroupStartRowId = rowId; rowGroupStartRowId = rowId;
rowGroupId++; rowGroupId++;
rowGroupMetadata = reader->metadata()->RowGroup(rowGroupId); rowGroupMetadata = reader->metadata()->RowGroup(rowGroupId);
@ -712,16 +723,6 @@ void ParquetCursor::ensureColumn(int col) {
return; return;
// need to ensure a scanner exists (and skip the # of rows in the rowgroup) // need to ensure a scanner exists (and skip the # of rows in the rowgroup)
while((unsigned int)col >= scanners.size()) {
scanners.push_back(std::shared_ptr<parquet::Scanner>());
// If it doesn't exist, it's the rowId as of the last nextRowGroup call
colRows.push_back(rowGroupStartRowId);
colNulls.push_back(false);
colIntValues.push_back(0);
colDoubleValues.push_back(0);
colByteArrayValues.push_back(parquet::ByteArray());
}
if(scanners[col].get() == NULL) { if(scanners[col].get() == NULL) {
std::shared_ptr<parquet::ColumnReader> colReader = rowGroup->Column(col); std::shared_ptr<parquet::ColumnReader> colReader = rowGroup->Column(col);
scanners[col] = parquet::Scanner::Make(colReader); scanners[col] = parquet::Scanner::Make(colReader);

View File

@ -13,6 +13,11 @@ std::string ParquetTable::columnName(int i) {
return columnNames[i]; return columnNames[i];
} }
unsigned int ParquetTable::getNumColumns() {
return columnNames.size();
}
std::string ParquetTable::CreateStatement() { std::string ParquetTable::CreateStatement() {
std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile( std::unique_ptr<parquet::ParquetFileReader> reader = parquet::ParquetFileReader::OpenFile(
file.data(), file.data(),

View File

@ -16,6 +16,7 @@ public:
ParquetTable(std::string file, std::string tableName); ParquetTable(std::string file, std::string tableName);
std::string CreateStatement(); std::string CreateStatement();
std::string columnName(int idx); std::string columnName(int idx);
unsigned int getNumColumns();
std::shared_ptr<parquet::FileMetaData> getMetadata(); std::shared_ptr<parquet::FileMetaData> getMetadata();
const std::string& getFile(); const std::string& getFile();
const std::string& getTableName(); const std::string& getTableName();