Add `make-linux-pgo`

fixes #23, with perhaps some open questions about why PGO on
arrow/parquet-cpp regressed things.
This commit is contained in:
Colin Dellow 2018-06-27 22:23:22 -04:00
parent 1a4f540e18
commit 2167d102b4
3 changed files with 20 additions and 6 deletions

View File

@ -20,6 +20,10 @@ The first run will git clone a bunch of libraries, patch them to be statically l
Subsequent builds will only build the parquet virtual table extension. Subsequent builds will only build the parquet virtual table extension.
### Building (release)
Run `./make-linx-pgo` to build an instrumented binary, run tests to collect real-life usage samples, then build an optimized binary. PGO seems to give a 5-10% reduction in query times.
#### Tests #### Tests
Run: Run:

View File

@ -17,7 +17,10 @@ ZLIB=$(ARROW_RELEASE)/zlib_ep/src/zlib_ep-install/lib
ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib ZSTD=$(ARROW_RELEASE)/zstd_ep-prefix/src/zstd_ep/lib
# Libraries # Libraries
ARROW_LIB = $(ARROW_RELEASE)/release/libarrow.a # profile_gen, profile_build for PGO
APACHE_BUILD=release
ARROW_LIB = $(ARROW_RELEASE)/$(APACHE_BUILD)/libarrow.a
BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a BOOST_FILESYSTEM_LIB = $(BOOST)/libboost_filesystem.a
BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a BOOST_REGEX_LIB = $(BOOST)/libboost_regex.a
BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a BOOST_SYSTEM_LIB = $(BOOST)/libboost_system.a
@ -28,7 +31,7 @@ ICU_I18N_LIB=$(ICU)/source/lib/libicui18n.a
ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a ICU_UC_LIB=$(ICU)/source/lib/libicuuc.a
ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a ICU_DATA_LIB=$(ICU)/source/lib/libicudata.a
LZ4_LIB = $(LZ4)/liblz4.a LZ4_LIB = $(LZ4)/liblz4.a
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a PARQUET_CPP_LIB = $(PARQUET_CPP)/build/$(APACHE_BUILD)/libparquet.a
SNAPPY_LIB = $(SNAPPY)/libsnappy.a SNAPPY_LIB = $(SNAPPY)/libsnappy.a
SQLITE3_LIB = $(SQLITE)/libsqlite3.a SQLITE3_LIB = $(SQLITE)/libsqlite3.a
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
@ -49,8 +52,6 @@ OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB) LIBS = $(ARROW_LIB) $(PARQUET_CPP_LIB) $(ICU_I18N_LIB)
PROF = PROF =
#PROF = -fprofile-generate
#PROF = -fprofile-use
libparquet.so: $(LIBS) $(OBJ) libparquet.so: $(LIBS) $(OBJ)
$(CC) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS) $(CC) $(PROF) -shared -o $@ $(OBJ) $(LDFLAGS)
@ -72,7 +73,7 @@ $(ARROW_LIB):
git clone https://github.com/apache/arrow.git $(ARROW) git clone https://github.com/apache/arrow.git $(ARROW)
cd $(ARROW) && git checkout apache-arrow-0.9.0 cd $(ARROW) && git checkout apache-arrow-0.9.0
mkdir $(ARROW)/cpp/release mkdir $(ARROW)/cpp/release
cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF .. cd $(ARROW)/cpp/release && cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DARROW_BOOST_VENDORED=ON -DARROW_BOOST_USE_SHARED=OFF ..
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
# This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly. # This is pretty gross. I'm sure someone who knows what they're doing could do this more cleanly.
@ -88,7 +89,7 @@ $(PARQUET_CPP_LIB): $(ARROW_LIB)
rm -rf $(PARQUET_CPP) rm -rf $(PARQUET_CPP)
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP) git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0 cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF . cd $(PARQUET_CPP) && BOOST_ROOT=$(BOOST_ROOT) BOOST_STATIC_REGEX_LIBRARY=$(BOOST_REGEX_LIB) SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=$(APACHE_BUILD) -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static -DPARQUET_BOOST_USE_SHARED=OFF .
cd $(PARQUET_CPP) && make -j$(CPUS) cd $(PARQUET_CPP) && make -j$(CPUS)
$(SQLITE3_LIB): $(SQLITE3_LIB):

9
make-linux-pgo Executable file
View File

@ -0,0 +1,9 @@
#!/bin/bash
set -euo pipefail
cd "$(dirname "${BASH_SOURCE[0]}")"
./make-linux distclean
./make-linux PROF=-fprofile-generate
./tests/test-all
./make-linux clean
./make-linux PROF=-fprofile-use