Use Arrow's compression libraries

Fixes #27
This commit is contained in:
Colin Dellow 2018-06-26 08:17:18 -04:00
parent 129ff4e694
commit 263a6af7ec
1 changed files with 16 additions and 66 deletions

View File

@ -5,28 +5,29 @@ SQLITE:=$(ROOT)/sqlite
# Directories # Directories
ARROW=$(HERE)/arrow ARROW=$(HERE)/arrow
BROTLI=$(HERE)/brotli ARROW_RELEASE=$(ARROW)/cpp/release
LZ4=$(HERE)/lz4 BROTLI=$(ARROW_RELEASE)/brotli_ep/src/brotli_ep-install/lib/x86_64-linux-gnu
LZ4=$(ARROW_RELEASE)/lz4_ep-prefix/src/lz4_ep/lib
PARQUET_CPP=$(HERE)/parquet-cpp PARQUET_CPP=$(HERE)/parquet-cpp
SNAPPY=$(HERE)/snappy SNAPPY=$(ARROW_RELEASE)/snappy_ep/src/snappy_ep-install/lib
ZLIB=$(HERE)/zlib ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
ZSTD=$(HERE)/zstd ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
# Libraries # Libraries
ARROW_LIB = $(ARROW)/cpp/release/release/libarrow.a ARROW_LIB = $(ARROW_RELEASE)/release/libarrow.a
BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so
BROTLI_COMMON_LIB = $(BROTLI)/out/libbrotlicommon-static.a BROTLI_COMMON_LIB = $(BROTLI)/libbrotlicommon.a
BROTLI_DEC_LIB = $(BROTLI)/out/libbrotlidec-static.a BROTLI_DEC_LIB = $(BROTLI)/libbrotlidec.a
BROTLI_ENC_LIB = $(BROTLI)/out/libbrotlienc-static.a BROTLI_ENC_LIB = $(BROTLI)/libbrotlienc.a
LZ4_LIB = $(LZ4)/lib/liblz4.a LZ4_LIB = $(LZ4)/liblz4.a
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a
SNAPPY_LIB = $(SNAPPY)/build/libsnappy.a SNAPPY_LIB = $(SNAPPY)/libsnappy.a
SQLITE3_LIB = $(SQLITE)/libsqlite3.a SQLITE3_LIB = $(SQLITE)/libsqlite3.a
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
ZLIB_LIB = $(ZLIB)/libz.a ZLIB_LIB = $(ZLIB)/libz.a
ZSTD_LIB = $(ZSTD)/lib/libzstd.a ZSTD_LIB = $(ZSTD)/libzstd.a
# Flags # Flags
CC = g++ CC = g++
@ -39,14 +40,7 @@ LDFLAGS = -O3 \
-Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB) -Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
LIBS = $(ARROW_LIB) \ LIBS = $(ARROW_LIB) \
$(BROTLI_COMMON_LIB) \ $(PARQUET_CPP_LIB)
$(BROTLI_DEC_LIB) \
$(BROTLI_ENC_LIB) \
$(LZ4_LIB) \
$(PARQUET_CPP_LIB) \
$(SNAPPY_LIB) \
$(ZLIB_LIB) \
$(ZSTD_LIB)
libparquet.so: $(LIBS) $(OBJ) libparquet.so: $(LIBS) $(OBJ)
$(CC) -shared -o $@ $(OBJ) $(LDFLAGS) $(CC) -shared -o $@ $(OBJ) $(LDFLAGS)
@ -71,50 +65,17 @@ $(ARROW_LIB):
cd $(ARROW)/cpp/release && cmake .. -DCMAKE_BUILD_TYPE=Release cd $(ARROW)/cpp/release && cmake .. -DCMAKE_BUILD_TYPE=Release
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
$(BROTLI_COMMON_LIB): $(PARQUET_CPP_LIB): $(ARROW_LIB)
rm -rf $(BROTLI)
git clone https://github.com/google/brotli.git $(BROTLI)
mkdir $(BROTLI)/out
cd $(BROTLI)/out && ../configure-cmake && make -j$(CPUS)
$(LZ4_LIB):
rm -rf $(LZ4)
git clone https://github.com/lz4/lz4.git $(LZ4)
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(LZ4)/lib/Makefile
cd $(LZ4) && make -j$(CPUS)
$(PARQUET_CPP_LIB): $(SNAPPY_LIB) $(BROTLI_COMMON_LIB) $(ZLIB_LIB) $(LZ4_LIB) $(ZSTD_LIB)
rm -rf $(PARQUET_CPP) rm -rf $(PARQUET_CPP)
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP) git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0 cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
cd $(PARQUET_CPP) && SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static . cd $(PARQUET_CPP) && SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static .
cd $(PARQUET_CPP) && make -j$(CPUS) cd $(PARQUET_CPP) && make -j$(CPUS)
$(SNAPPY_LIB):
rm -rf $(SNAPPY)
git clone https://github.com/google/snappy.git $(SNAPPY)
mkdir $(SNAPPY)/build
cd $(SNAPPY)/build && cmake ..
sed -i '3iset(CMAKE_POSITION_INDEPENDENT_CODE ON)' $(SNAPPY)/CMakeLists.txt
cd $(SNAPPY)/build && make -j$(CPUS)
$(SQLITE3_LIB): $(SQLITE3_LIB):
cd $(ROOT) && ./build-sqlite cd $(ROOT) && ./build-sqlite
$(ZLIB_LIB): .PHONY: clean arrow parquet sqlite
rm -rf $(ZLIB)
git clone https://github.com/madler/zlib.git $(ZLIB)
cd $(ZLIB) && ./configure
sed -i 's/^CFLAGS=-O3/CFLAGS=-fPIC -O3/' $(ZLIB)/Makefile
cd $(ZLIB) && make -j$(CPUS)
$(ZSTD_LIB):
rm -rf $(ZSTD)
git clone https://github.com/facebook/zstd.git $(ZSTD)
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(ZSTD)/lib/Makefile
cd $(ZSTD) && make -j$(CPUS)
.PHONY: clean parquet snappy brotli zlib lz4 zstd arrow
clean: clean:
rm -f *.o *.so rm -f *.o *.so
@ -125,17 +86,6 @@ distclean:
arrow: $(ARROW_LIB) arrow: $(ARROW_LIB)
brotli: $(BROTLI_COMMON_LIB)
lz4: $(LZ4_LIB)
parquet: $(PARQUET_CPP_LIB) parquet: $(PARQUET_CPP_LIB)
snappy: $(SNAPPY_LIB)
sqlite: $(SQLITE3_LIB) sqlite: $(SQLITE3_LIB)
zlib: $(ZLIB_LIB)
zstd: $(ZSTD_LIB)