diff --git a/build-sqlite b/build-sqlite
index 062a8c0..e73406a 100755
--- a/build-sqlite
+++ b/build-sqlite
@@ -1,11 +1,11 @@
#!/bin/bash
set -euo pipefail
-VERSION=3240000
+VERSION=3352000
fetch_if_needed() {
if [ ! -e sqlite ]; then
- curl --fail "https://sqlite.org/2018/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
+ curl --fail "https://sqlite.org/2020/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
tar xf sqlite.tar.gz
rm sqlite.tar.gz
mv sqlite-autoconf-${VERSION} sqlite
diff --git a/build/Makefile.linux b/build/Makefile.linux
index 656864a..829a9c7 100644
--- a/build/Makefile.linux
+++ b/build/Makefile.linux
@@ -3,119 +3,39 @@ ROOT:=$(HERE)/../..
VTABLE:=$(ROOT)/parquet
SQLITE:=$(ROOT)/sqlite
-# Directories
-ARROW=$(HERE)/arrow
-ARROW_RELEASE=$(ARROW)/cpp/release
-BOOST_ROOT=$(ARROW_RELEASE)/boost_ep-prefix/src/boost_ep
-BOOST=$(BOOST_ROOT)/stage/lib
-BROTLI=$(ARROW_RELEASE)/brotli_ep/src/brotli_ep-install/lib/x86_64-linux-gnu
-ICU=$(HERE)/icu
-LZ4=$(ARROW_RELEASE)/lz4_ep-prefix/src/lz4_ep/lib
-PARQUET_CPP=$(HERE)/parquet-cpp
-SNAPPY=$(ARROW_RELEASE)/snappy_ep/src/snappy_ep-install/lib
-ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
-ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
-
-# Libraries
-# profile_gen, profile_build for PGO
-APACHE_BUILD=release
-
-ARROW_LIB = $(ARROW_RELEASE)/$(APACHE_BUILD)/libarrow.a
-BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
-BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
-BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
-BROTLI_COMMON_LIB = $(BROTLI)/libbrotlicommon.a
-BROTLI_DEC_LIB = $(BROTLI)/libbrotlidec.a
-BROTLI_ENC_LIB = $(BROTLI)/libbrotlienc.a
-ICU_I18N_LIB=$(ICU)/source/lib/libicui18n.a
-ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
-ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
-LZ4_LIB = $(LZ4)/liblz4.a
-PARQUET_CPP_LIB = $(PARQUET_CPP)/build/$(APACHE_BUILD)/libparquet.a
-SNAPPY_LIB = $(SNAPPY)/libsnappy.a
-THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
-ZLIB_LIB = $(ZLIB)/libz.a
-ZSTD_LIB = $(ZSTD)/libzstd.a
-
# Flags
-CC = gcc
CXX = g++
OPTIMIZATIONS = -O3
-CPUS:=$(shell nproc)
-CFLAGS = -I $(SQLITE) -I $(PARQUET_CPP)/src -I $(ARROW)/cpp/src $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
+CFLAGS = -I $(SQLITE) $(OPTIMIZATIONS) -std=c++11 -Wall -fPIC -g
+LIBS = -lparquet -lboost_regex -lboost_system -lboost_filesystem \
+ -lbrotlienc -lbrotlicommon -lbrotlidec -licui18n -licuuc -licudata \
+ -llz4 -lsnappy -lthrift -lz -lzstd -lcrypto -lssl
-ALL_LIBS = $(PARQUET_CPP_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
- $(ICU_I18N_LIB) $(ICU_UC_LIB) $(ICU_DATA_LIB) \
- $(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) $(BOOST_REGEX_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
+LDFLAGS = $(OPTIMIZATIONS) -Wl,--no-whole-archive $(LIBS) -lz -lcrypto -lssl
-LDFLAGS = $(OPTIMIZATIONS) \
- -Wl,--whole-archive $(ALL_LIBS) \
- -Wl,--no-whole-archive -lz -lcrypto -lssl
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
-LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)
PROF =
-libparquet.so: $(LIBS) $(OBJ)
+libparquet.so: $(OBJ)
$(CXX) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)
-parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
+parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
-parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
+parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
-parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h $(ARROW) $(PARQUET_CPP)
+parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
-parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h $(ARROW) $(PARQUET_CPP)
+parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CXX) $(PROF) -c -o $@ $< $(CFLAGS)
-$(ARROW):
- rm -rf $(ARROW)
- git clone https://github.com/apache/arrow.git $(ARROW)
- cd $(ARROW) && git checkout apache-arrow-0.9.0
- mkdir $(ARROW)/cpp/release
- cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF ..
- touch -d @0 $(ARROW)
-
-$(ARROW_LIB): $(ARROW)
- cd $(ARROW)/cpp/release && make -j$(CPUS)
-
-# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
-$(ICU_I18N_LIB):
- rm -rf $(ICU)
- mkdir $(ICU)
- cd $(ICU) && wget https://github.com/unicode-org/icu/releases/download/release-$(ICU_VERSION)/icu4c-$(ICU_VERSION_U)-src.tgz
- cd $(ICU) && tar xf icu4c-$(ICU_VERSION_U)-src.tgz --strip-components=1
- cd $(ICU)/source && ./configure --enable-static
- cd $(ICU)/source && make -j$(CPUS) LIBCFLAGS='-fPIC' LIBCXXFLAGS='-fPIC'
-
-$(PARQUET_CPP):
- rm -rf $(PARQUET_CPP)
- git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
- cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
- cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF -DPARQUET_BUILD_SHARED=OFF .
- touch -d @0 $(PARQUET_CPP)
-
-$(PARQUET_CPP_LIB): $(PARQUET_CPP) $(ARROW_LIB)
- cd $(PARQUET_CPP) && make -j$(CPUS)
-
-.PHONY: clean arrow icu parquet publish_libs
+.PHONY: clean parquet
clean:
rm -f *.o *.so
distclean:
rm -rf $(SQLITE) $(HERE)
-
-
-arrow: $(ARROW_LIB)
-
-icu: $(ICU_I18N_LIB)
-
-parquet: $(PARQUET_CPP_LIB)
-
-publish_libs:
- tar -cJf libs.tar.xz $(ALL_LIBS) $(SQLITE)/sqlite3
- s3cmd put libs.tar.xz s3://cldellow/public/libparquet/$$(lsb_release -s -r)/libs.tar.xz
diff --git a/make-linux b/make-linux
index 4ff0af4..1268102 100755
--- a/make-linux
+++ b/make-linux
@@ -1,31 +1,46 @@
#!/bin/bash
set -euo pipefail
+apt install -y sudo lsb-release wget
+
here=$(dirname "${BASH_SOURCE[0]}")
here=$(readlink -f "$here")
-prebuilt="$here"/build/linux/prebuilt
-ubuntu="$(lsb_release -s -r)"
-libs=(libarrow.a libboost_filesystem.a libboost_regex.a libboost_system.a libbrotlicommon.a libbrotlidec.a \
- libbrotlienc.a libicudata.a libicui18n.a libicuuc.a liblz4.a libparquet.a libsnappy.a libthrift.a libzstd.a)
-lib_locs=()
+distro="$(lsb_release -s -r)"
setup_directories() {
cd "$here"
mkdir -p build/linux
- mkdir -p "$prebuilt"
cp -f build/Makefile.linux build/linux/Makefile
cd build/linux
}
-install_prerequisites() {
+install_prerequisites_amazon_linux() {
+ # Install Apache Arrow and dependencies.
+ sudo yum update -y
+ sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+ sudo yum install -y https://apache.bintray.com/arrow/centos/7/apache-arrow-release-latest.rpm
+ sudo yum install -y --enablerepo=epel parquet-devel
+ sudo yum install -y lz4-devel thrift-devel libzstd-devel snappy-devel brotli-devel boost-devel boost-static libicu-devel openssl-devel
+ export CFLAGS=-D_GLIBCXX_USE_CXX11_ABI=0
+}
+
+install_prerequisites_ubuntu() {
+ # install Apache Arrow libs
+ # NOTE: Pinned to Ubuntu Focal
+ wget https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-focal.deb
+ sudo apt install -y -V ./apache-arrow-archive-keyring-latest-focal.deb
+ sudo apt update -y
+ sudo apt install -y -V libparquet-dev liblz4-dev libzstd-dev libthrift-dev \
+ libsnappy-dev libbrotli-dev libz-dev
+
# Install prereqs based on https://github.com/apache/parquet-cpp#linux
- sudo apt-get install libboost-dev g++ libboost-filesystem-dev \
+ sudo apt install -y libboost-dev g++ libboost-filesystem-dev \
libboost-program-options-dev libboost-regex-dev \
libboost-system-dev libboost-test-dev \
libssl-dev libtool bison flex pkg-config libreadline-dev libncurses-dev
# Install prereqs based on https://github.com/apache/arrow/tree/master/cpp
- sudo apt-get install cmake \
+ sudo apt install -y cmake \
libboost-dev \
libboost-filesystem-dev \
libboost-system-dev
@@ -38,7 +53,7 @@ build_sqlite() {
}
set_icu_version() {
- case "$ubuntu" in
+ case "$distro" in
14.04)
export ICU_VERSION=52-1
;;
@@ -48,6 +63,9 @@ set_icu_version() {
18.04)
export ICU_VERSION=60-2
;;
+ 20.10)
+ export ICU_VERSION=67-1
+ ;;
*)
echo "unsure what libicu version to use" >&2
exit 1
@@ -56,47 +74,11 @@ set_icu_version() {
export ICU_VERSION_U=${ICU_VERSION//-/_}
}
-add_prebuilt_lib() {
- lib_locs+=("$1=$prebuilt/$2.a")
-}
-
-fetch_prebuilt_libs() {
- if [ ! -e "$prebuilt"/complete ]; then
- (
- cd "$prebuilt"
- curl "https://s3.amazonaws.com/cldellow/public/libparquet/$ubuntu/libs.tar.xz" > libs.tar.xz
- tar xf libs.tar.xz --xform 's#.*/##'
- touch "$prebuilt"/complete
- )
- fi
-
- if [ ! -e "$here"/sqlite/sqlite3 ]; then
- ln -s "$prebuilt"/sqlite3 "$here"/sqlite/sqlite3
- fi
-
- add_prebuilt_lib "PARQUET_CPP_LIB" libparquet
- add_prebuilt_lib "LZ4_LIB" liblz4
- add_prebuilt_lib "ZSTD_LIB" libzstd
- add_prebuilt_lib "THRIFT_LIB" libthrift
- add_prebuilt_lib "SNAPPY_LIB" libsnappy
- add_prebuilt_lib "ARROW_LIB" libarrow
- add_prebuilt_lib "ICU_I18N_LIB" libicui18n
- add_prebuilt_lib "ICU_UC_LIB" libicuuc
- add_prebuilt_lib "ICU_DATA_LIB" libicudata
- add_prebuilt_lib "BROTLI_ENC_LIB" libbrotlienc
- add_prebuilt_lib "BROTLI_COMMON_LIB" libbrotlicommon
- add_prebuilt_lib "BROTLI_DEC_LIB" libbrotlidec
- add_prebuilt_lib "BOOST_REGEX_LIB" libboost_regex
- add_prebuilt_lib "BOOST_SYSTEM_LIB" libboost_system
- add_prebuilt_lib "BOOST_FILESYSTEM_LIB" libboost_filesystem
-
-}
-
main() {
+ set_icu_version
setup_directories
install_prerequisites
build_sqlite
- set_icu_version
if [ -v PREBUILT ]; then
fetch_prebuilt_libs
diff --git a/parquet/parquet.cc b/parquet/parquet.cc
index 9d5857e..e1df8ad 100644
--- a/parquet/parquet.cc
+++ b/parquet/parquet.cc
@@ -290,7 +290,7 @@ static int parquetColumn(
case parquet::Type::BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
- if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
+ if(cursor->getConvertedType(col) == parquet::ConvertedType::UTF8) {
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
} else {
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
diff --git a/parquet/parquet_cursor.cc b/parquet/parquet_cursor.cc
index e0ab8b6..e1fe8d3 100644
--- a/parquet/parquet_cursor.cc
+++ b/parquet/parquet_cursor.cc
@@ -31,7 +31,7 @@ bool ParquetCursor::currentRowGroupSatisfiesRowIdFilter(Constraint& constraint)
}
}
-bool ParquetCursor::currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr _stats) {
+bool ParquetCursor::currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr _stats) {
if(!_stats->HasMinMax()) {
return true;
}
@@ -48,8 +48,8 @@ bool ParquetCursor::currentRowGroupSatisfiesBlobFilter(Constraint& constraint, s
parquet::Type::type pqType = types[constraint.column];
if(pqType == parquet::Type::BYTE_ARRAY) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
minPtr = stats->min().ptr;
minLen = stats->min().len;
@@ -137,9 +137,9 @@ bool ParquetCursor::currentRowGroupSatisfiesBlobFilter(Constraint& constraint, s
}
}
-bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr _stats) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+bool ParquetCursor::currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr _stats) {
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
if(!stats->HasMinMax()) {
return true;
@@ -195,7 +195,7 @@ int64_t int96toMsSinceEpoch(const parquet::Int96& rv) {
return nsSinceEpoch;
}
-bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr _stats) {
+bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr _stats) {
if(!_stats->HasMinMax()) {
return true;
}
@@ -211,27 +211,27 @@ bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint& constraint
parquet::Type::type pqType = types[column];
if(pqType == parquet::Type::INT32) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = stats->min();
max = stats->max();
} else if(pqType == parquet::Type::INT64) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = stats->min();
max = stats->max();
} else if(pqType == parquet::Type::INT96) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = int96toMsSinceEpoch(stats->min());
max = int96toMsSinceEpoch(stats->max());
} else if(pqType == parquet::Type::BOOLEAN) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = stats->min();
max = stats->max();
@@ -272,7 +272,7 @@ bool ParquetCursor::currentRowGroupSatisfiesIntegerFilter(Constraint& constraint
return true;
}
-bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr _stats) {
+bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr _stats) {
if(!_stats->HasMinMax()) {
return true;
}
@@ -288,14 +288,14 @@ bool ParquetCursor::currentRowGroupSatisfiesDoubleFilter(Constraint& constraint,
parquet::Type::type pqType = types[column];
if(pqType == parquet::Type::DOUBLE) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = stats->min();
max = stats->max();
} else if(pqType == parquet::Type::FLOAT) {
- parquet::TypedRowGroupStatistics>* stats =
- (parquet::TypedRowGroupStatistics>*)_stats.get();
+ parquet::TypedStatistics* stats =
+ (parquet::TypedStatistics*)_stats.get();
min = stats->min();
max = stats->max();
@@ -527,7 +527,7 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
} else {
std::unique_ptr md = rowGroupMetadata->ColumnChunk(column);
if(md->is_stats_set()) {
- std::shared_ptr stats = md->statistics();
+ std::shared_ptr stats = md->statistics();
// SQLite is much looser with types than you might expect if you
// come from a Postgres background. The constraint '30.0' (that is,
@@ -545,7 +545,7 @@ bool ParquetCursor::currentRowGroupSatisfiesFilter() {
} else {
parquet::Type::type pqType = types[column];
- if(pqType == parquet::Type::BYTE_ARRAY && logicalTypes[column] == parquet::LogicalType::UTF8) {
+ if(pqType == parquet::Type::BYTE_ARRAY && ConvertedTypes[column] == parquet::ConvertedType::UTF8) {
rv = currentRowGroupSatisfiesTextFilter(constraints[i], stats);
} else if(pqType == parquet::Type::BYTE_ARRAY) {
rv = currentRowGroupSatisfiesBlobFilter(constraints[i], stats);
@@ -608,13 +608,13 @@ start:
types.push_back(rowGroupMetadata->schema()->Column(0)->physical_type());
}
- while(logicalTypes.size() < (unsigned int)rowGroupMetadata->num_columns()) {
- logicalTypes.push_back(rowGroupMetadata->schema()->Column(0)->logical_type());
+ while(ConvertedTypes.size() < (unsigned int)rowGroupMetadata->num_columns()) {
+ ConvertedTypes.push_back(rowGroupMetadata->schema()->Column(0)->converted_type());
}
for(unsigned int i = 0; i < (unsigned int)rowGroupMetadata->num_columns(); i++) {
types[i] = rowGroupMetadata->schema()->Column(i)->physical_type();
- logicalTypes[i] = rowGroupMetadata->schema()->Column(i)->logical_type();
+ ConvertedTypes[i] = rowGroupMetadata->schema()->Column(i)->converted_type();
}
for(unsigned int i = 0; i < colRows.size(); i++) {
@@ -664,7 +664,7 @@ bool ParquetCursor::currentRowSatisfiesFilter() {
rv = !isNull(column);
} else {
- if(logicalTypes[column] == parquet::LogicalType::UTF8) {
+ if(ConvertedTypes[column] == parquet::ConvertedType::UTF8) {
rv = currentRowSatisfiesTextFilter(constraints[i]);
} else {
parquet::Type::type pqType = types[column];
@@ -928,8 +928,8 @@ parquet::Type::type ParquetCursor::getPhysicalType(int col) {
return types[col];
}
-parquet::LogicalType::type ParquetCursor::getLogicalType(int col) {
- return logicalTypes[col];
+parquet::ConvertedType::type ParquetCursor::getConvertedType(int col) {
+ return ConvertedTypes[col];
}
void ParquetCursor::close() {
diff --git a/parquet/parquet_cursor.h b/parquet/parquet_cursor.h
index f7d8c2a..f6afc40 100644
--- a/parquet/parquet_cursor.h
+++ b/parquet/parquet_cursor.h
@@ -13,7 +13,7 @@ class ParquetCursor {
std::shared_ptr rowGroup;
std::vector> scanners;
std::vector types;
- std::vector logicalTypes;
+ std::vector ConvertedTypes;
std::vector colRows;
std::vector colNulls;
@@ -36,10 +36,10 @@ class ParquetCursor {
bool currentRowSatisfiesFilter();
bool currentRowGroupSatisfiesFilter();
bool currentRowGroupSatisfiesRowIdFilter(Constraint& constraint);
- bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr stats);
- bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr stats);
- bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr stats);
- bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr stats);
+ bool currentRowGroupSatisfiesTextFilter(Constraint& constraint, std::shared_ptr stats);
+ bool currentRowGroupSatisfiesBlobFilter(Constraint& constraint, std::shared_ptr stats);
+ bool currentRowGroupSatisfiesIntegerFilter(Constraint& constraint, std::shared_ptr stats);
+ bool currentRowGroupSatisfiesDoubleFilter(Constraint& constraint, std::shared_ptr stats);
bool currentRowSatisfiesTextFilter(Constraint& constraint);
bool currentRowSatisfiesIntegerFilter(Constraint& constraint);
@@ -60,7 +60,7 @@ public:
unsigned int getNumConstraints() const;
const Constraint& getConstraint(unsigned int i) const;
parquet::Type::type getPhysicalType(int col);
- parquet::LogicalType::type getLogicalType(int col);
+ parquet::ConvertedType::type getConvertedType(int col);
ParquetTable* getTable() const;
int getInt32(int col);
diff --git a/parquet/parquet_table.cc b/parquet/parquet_table.cc
index d796b8a..a65f02b 100644
--- a/parquet/parquet_table.cc
+++ b/parquet/parquet_table.cc
@@ -66,33 +66,33 @@ std::string ParquetTable::CreateStatement() {
std::string type;
parquet::Type::type physical = col->physical_type();
- parquet::LogicalType::type logical = col->logical_type();
+ parquet::ConvertedType::type converted = col->converted_type();
// Be explicit about which types we understand so we don't mislead someone
// whose unsigned ints start getting interpreted as signed. (We could
// support this for UINT_8/16/32 -- and for UINT_64 we could throw if
// the high bit was set.)
- if(logical == parquet::LogicalType::NONE ||
- logical == parquet::LogicalType::UTF8 ||
- logical == parquet::LogicalType::DATE ||
- logical == parquet::LogicalType::TIME_MILLIS ||
- logical == parquet::LogicalType::TIMESTAMP_MILLIS ||
- logical == parquet::LogicalType::TIME_MICROS ||
- logical == parquet::LogicalType::TIMESTAMP_MICROS ||
- logical == parquet::LogicalType::INT_8 ||
- logical == parquet::LogicalType::INT_16 ||
- logical == parquet::LogicalType::INT_32 ||
- logical == parquet::LogicalType::INT_64) {
+ if(converted == parquet::ConvertedType::NONE ||
+ converted == parquet::ConvertedType::UTF8 ||
+ converted == parquet::ConvertedType::DATE ||
+ converted == parquet::ConvertedType::TIME_MILLIS ||
+ converted == parquet::ConvertedType::TIMESTAMP_MILLIS ||
+ converted == parquet::ConvertedType::TIME_MICROS ||
+ converted == parquet::ConvertedType::TIMESTAMP_MICROS ||
+ converted == parquet::ConvertedType::INT_8 ||
+ converted == parquet::ConvertedType::INT_16 ||
+ converted == parquet::ConvertedType::INT_32 ||
+ converted == parquet::ConvertedType::INT_64) {
switch(physical) {
case parquet::Type::BOOLEAN:
type = "TINYINT";
break;
case parquet::Type::INT32:
- if(logical == parquet::LogicalType::NONE ||
- logical == parquet::LogicalType::INT_32) {
+ if(converted == parquet::ConvertedType::NONE ||
+ converted == parquet::ConvertedType::INT_32) {
type = "INT";
- } else if(logical == parquet::LogicalType::INT_8) {
+ } else if(converted == parquet::ConvertedType::INT_8) {
type = "TINYINT";
- } else if(logical == parquet::LogicalType::INT_16) {
+ } else if(converted == parquet::ConvertedType::INT_16) {
type = "SMALLINT";
}
break;
@@ -109,7 +109,7 @@ std::string ParquetTable::CreateStatement() {
type = "DOUBLE";
break;
case parquet::Type::BYTE_ARRAY:
- if(logical == parquet::LogicalType::UTF8) {
+ if(converted == parquet::ConvertedType::UTF8) {
type = "TEXT";
} else {
type = "BLOB";
@@ -126,7 +126,7 @@ std::string ParquetTable::CreateStatement() {
if(type.empty()) {
std::ostringstream ss;
ss << __FILE__ << ":" << __LINE__ << ": column " << i << " has unsupported type: " <<
- parquet::TypeToString(physical) << "/" << parquet::LogicalTypeToString(logical);
+ parquet::TypeToString(physical) << "/" << parquet::ConvertedTypeToString(converted);
throw std::invalid_argument(ss.str());
}
@@ -137,8 +137,8 @@ std::string ParquetTable::CreateStatement() {
col->name().data(),
col->physical_type(),
parquet::TypeToString(col->physical_type()).data(),
- col->logical_type(),
- parquet::LogicalTypeToString(col->logical_type()).data(),
+ col->converted_type(),
+ parquet::ConvertedTypeToString(col->converted_type()).data(),
type.data());
#endif