Add `make-linux-pgo`
fixes #23, with perhaps some open questions about why PGO on arrow/parquet-cpp regressed things.
This commit is contained in:
parent
1a4f540e18
commit
2167d102b4
|
@ -20,6 +20,10 @@ The first run will git clone a bunch of libraries, patch them to be statically l
|
||||||
|
|
||||||
Subsequent builds will only build the parquet virtual table extension.
|
Subsequent builds will only build the parquet virtual table extension.
|
||||||
|
|
||||||
|
### Building (release)
|
||||||
|
|
||||||
|
Run `./make-linx-pgo` to build an instrumented binary, run tests to collect real-life usage samples, then build an optimized binary. PGO seems to give a 5-10% reduction in query times.
|
||||||
|
|
||||||
#### Tests
|
#### Tests
|
||||||
|
|
||||||
Run:
|
Run:
|
||||||
|
|
|
@ -17,7 +17,10 @@ ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
|
||||||
ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
|
ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
|
||||||
|
|
||||||
# Libraries
|
# Libraries
|
||||||
ARROW_LIB = $(ARROW_RELEASE)/release/libarrow.a
|
# profile_gen, profile_build for PGO
|
||||||
|
APACHE_BUILD=release
|
||||||
|
|
||||||
|
ARROW_LIB = $(ARROW_RELEASE)/$(APACHE_BUILD)/libarrow.a
|
||||||
BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
|
BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
|
||||||
BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
|
BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
|
||||||
BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
|
BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
|
||||||
|
@ -28,7 +31,7 @@ ICU_I18N_LIB=$(ICU)/source/lib/libicui18n.a
|
||||||
ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
|
ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
|
||||||
ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
|
ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
|
||||||
LZ4_LIB = $(LZ4)/liblz4.a
|
LZ4_LIB = $(LZ4)/liblz4.a
|
||||||
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a
|
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/$(APACHE_BUILD)/libparquet.a
|
||||||
SNAPPY_LIB = $(SNAPPY)/libsnappy.a
|
SNAPPY_LIB = $(SNAPPY)/libsnappy.a
|
||||||
SQLITE3_LIB = $(SQLITE)/libsqlite3.a
|
SQLITE3_LIB = $(SQLITE)/libsqlite3.a
|
||||||
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
|
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
|
||||||
|
@ -49,8 +52,6 @@ OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
|
||||||
LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)
|
LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)
|
||||||
|
|
||||||
PROF =
|
PROF =
|
||||||
#PROF = -fprofile-generate
|
|
||||||
#PROF = -fprofile-use
|
|
||||||
|
|
||||||
libparquet.so: $(LIBS) $(OBJ)
|
libparquet.so: $(LIBS) $(OBJ)
|
||||||
$(CC) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)
|
$(CC) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)
|
||||||
|
@ -72,7 +73,7 @@ $(ARROW_LIB):
|
||||||
git clone https://github.com/apache/arrow.git $(ARROW)
|
git clone https://github.com/apache/arrow.git $(ARROW)
|
||||||
cd $(ARROW) && git checkout apache-arrow-0.9.0
|
cd $(ARROW) && git checkout apache-arrow-0.9.0
|
||||||
mkdir $(ARROW)/cpp/release
|
mkdir $(ARROW)/cpp/release
|
||||||
cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF ..
|
cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF ..
|
||||||
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
|
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
|
||||||
|
|
||||||
# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
|
# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
|
||||||
|
@ -88,7 +89,7 @@ $(PARQUET_CPP_LIB): $(ARROW_LIB)
|
||||||
rm -rf $(PARQUET_CPP)
|
rm -rf $(PARQUET_CPP)
|
||||||
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
|
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
|
||||||
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
|
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
|
||||||
cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF .
|
cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF .
|
||||||
cd $(PARQUET_CPP) && make -j$(CPUS)
|
cd $(PARQUET_CPP) && make -j$(CPUS)
|
||||||
|
|
||||||
$(SQLITE3_LIB):
|
$(SQLITE3_LIB):
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd "$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
./make-linux distclean
|
||||||
|
./make-linux PROF=-fprofile-generate
|
||||||
|
./tests/test-all
|
||||||
|
./make-linux clean
|
||||||
|
./make-linux PROF=-fprofile-use
|
Loading…
Reference in New Issue