All-in-one build command

`./make-linux` clones and builds:

- arrow
- brotli
- lz4
- parquet
- snappy
- zlib
- zstd
- this project

as a statically linked binary. Two Boost libs are still pulled in as
shared libs, should probably fix that, too, for ultimate portability.
This commit is contained in:
Colin Dellow 2018-06-24 21:11:07 -04:00
parent ec6e970bbc
commit 0bdcc9895e
11 changed files with 199 additions and 77 deletions

1
.gitignore vendored
View File

@ -51,3 +51,4 @@ tests/queries
/tests/test.db /tests/test.db
/tests/results.bad_alloc /tests/results.bad_alloc
/tests/libfailmalloc /tests/libfailmalloc
/build/linux

View File

@ -4,15 +4,37 @@ A SQLite [virtual table](https://sqlite.org/vtab.html) extension to expose Parqu
This [blog post](https://cldellow.com/2018/06/22/sqlite-parquet-vtable.html) provides some context on why you might use this. This [blog post](https://cldellow.com/2018/06/22/sqlite-parquet-vtable.html) provides some context on why you might use this.
## Download ## Installing
### Download
You can fetch a version built for Ubuntu 16.04 at https://s3.amazonaws.com/cldellow/public/libparquet/libparquet.so.xz You can fetch a version built for Ubuntu 16.04 at https://s3.amazonaws.com/cldellow/public/libparquet/libparquet.so.xz
### Building
```
./make-linux
```
The first run will git clone a bunch of libraries, patch them to be statically linkable and build them.
Subsequent builds will only build the parquet virtual table extension.
#### Tests
Run:
```
tests/create-queries-from-templates
tests/test-all
```
## Use ## Use
``` ```
$ sqlite/sqlite3 $ sqlite/sqlite3
sqlite> .load parquet/libparquet sqlite> .load build/linux/libparquet
sqlite> CREATE VIRTUAL TABLE demo USING parquet('parquet-generator/99-rows-1.parquet'); sqlite> CREATE VIRTUAL TABLE demo USING parquet('parquet-generator/99-rows-1.parquet');
sqlite> SELECT * FROM demo; sqlite> SELECT * FROM demo;
...if all goes well, you'll see data here!... ...if all goes well, you'll see data here!...
@ -21,7 +43,7 @@ sqlite> SELECT * FROM demo;
Note: if you get an error like: Note: if you get an error like:
``` ```
sqlite> .load parquet/libparquet sqlite> .load build/linux/libparquet
Error: parquet/libparquet.so: wrong ELF class: ELFCLASS64 Error: parquet/libparquet.so: wrong ELF class: ELFCLASS64
``` ```
@ -89,26 +111,3 @@ These are not currently supported:
* UINT8/UINT16/UINT32/UINT64 * UINT8/UINT16/UINT32/UINT64
* DECIMAL * DECIMAL
## Building
If you're a masochist, you can try to build this yourself:
1. Install [`parquet-cpp`](https://github.com/apache/parquet-cpp)
1. Master appears to be broken for text row group stats; see https://github.com/cldellow/sqlite-parquet-vtable/issues/5 for which versions to use
2. Run `./build-sqlite` to fetch and build the SQLite dev bits
3. Run `./parquet/make` to build the module
1. You will need to fixup the paths in this file to point at your local parquet-cpp folder.
You're almost certainly going to regret your life. https://stackoverflow.com/questions/48157198/how-can-i-statically-link-arrow-when-building-parquet-cpp may be useful.
## Tests
Run:
```
tests/create-queries-from-templates
tests/test-all
```

View File

@ -8,7 +8,7 @@ fetch_if_needed() {
curl --fail "https://sqlite.org/2018/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz curl --fail "https://sqlite.org/2018/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
tar xf sqlite.tar.gz tar xf sqlite.tar.gz
rm sqlite.tar.gz rm sqlite.tar.gz
ln -s sqlite-autoconf-${VERSION} sqlite mv sqlite-autoconf-${VERSION} sqlite
fi fi
} }

141
build/Makefile.linux Normal file
View File

@ -0,0 +1,141 @@
HERE:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
ROOT:=$(HERE)/../..
VTABLE:=$(ROOT)/parquet
SQLITE:=$(ROOT)/sqlite
# Directories
ARROW=$(HERE)/arrow
BROTLI=$(HERE)/brotli
LZ4=$(HERE)/lz4
PARQUET_CPP=$(HERE)/parquet-cpp
SNAPPY=$(HERE)/snappy
ZLIB=$(HERE)/zlib
ZSTD=$(HERE)/zstd
# Libraries
ARROW_LIB = $(ARROW)/cpp/release/release/libarrow.a
BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so
BROTLI_COMMON_LIB = $(BROTLI)/out/libbrotlicommon-static.a
BROTLI_DEC_LIB = $(BROTLI)/out/libbrotlidec-static.a
BROTLI_ENC_LIB = $(BROTLI)/out/libbrotlienc-static.a
LZ4_LIB = $(LZ4)/lib/liblz4.a
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a
SNAPPY_LIB = $(SNAPPY)/build/libsnappy.a
SQLITE3_LIB = $(SQLITE)/libsqlite3.a
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
ZLIB_LIB = $(ZLIB)/libz.a
ZSTD_LIB = $(ZSTD)/lib/libzstd.a
# Flags
CC = g++
CPUS:=$(shell nproc)
CFLAGS = -I $(SQLITE) -O3 -std=c++11 -Wall -fPIC -g
LDFLAGS = -O3 \
-Wl,--whole-archive $(PARQUET_CPP_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) \
-Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
LIBS = $(ARROW_LIB) \
$(BROTLI_COMMON_LIB) \
$(BROTLI_DEC_LIB) \
$(BROTLI_ENC_LIB) \
$(LZ4_LIB) \
$(PARQUET_CPP_LIB) \
$(SNAPPY_LIB) \
$(ZLIB_LIB) \
$(ZSTD_LIB)
libparquet.so: $(OBJ) $(LIBS)
$(CC) -shared -o $@ $(OBJ) $(LDFLAGS)
parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
$(ARROW_LIB):
rm -rf $(ARROW)
git clone https://github.com/apache/arrow.git $(ARROW)
cd $(ARROW) && git checkout apache-arrow-0.9.0
mkdir $(ARROW)/cpp/release
cd $(ARROW)/cpp/release && cmake .. -DCMAKE_BUILD_TYPE=Release
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
$(BROTLI_COMMON_LIB):
rm -rf $(BROTLI)
git clone https://github.com/google/brotli.git $(BROTLI)
mkdir $(BROTLI)/out
cd $(BROTLI)/out && ../configure-cmake && make -j$(CPUS)
$(LZ4_LIB):
rm -rf $(LZ4)
git clone https://github.com/lz4/lz4.git $(LZ4)
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(LZ4)/lib/Makefile
cd $(LZ4) && make -j$(CPUS)
$(PARQUET_CPP_LIB): $(SNAPPY_LIB) $(BROTLI_COMMON_LIB) $(ZLIB_LIB) $(LZ4_LIB) $(ZSTD_LIB)
rm -rf $(PARQUET_CPP)
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
cd $(PARQUET_CPP) && SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static .
cd $(PARQUET_CPP) && make -j$(CPUS)
$(SNAPPY_LIB):
rm -rf $(SNAPPY)
git clone https://github.com/google/snappy.git $(SNAPPY)
mkdir $(SNAPPY)/build
cd $(SNAPPY)/build && cmake ..
sed -i '3iset(CMAKE_POSITION_INDEPENDENT_CODE ON)' $(SNAPPY)/CMakeLists.txt
cd $(SNAPPY)/build && make -j$(CPUS)
$(SQLITE3_LIB):
cd $(ROOT) && ./build-sqlite
$(ZLIB_LIB):
rm -rf $(ZLIB)
git clone https://github.com/madler/zlib.git $(ZLIB)
cd $(ZLIB) && ./configure
sed -i 's/^CFLAGS=-O3/CFLAGS=-fPIC -O3/' $(ZLIB)/Makefile
cd $(ZLIB) && make -j$(CPUS)
$(ZSTD_LIB):
rm -rf $(ZSTD)
git clone https://github.com/facebook/zstd.git $(ZSTD)
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(ZSTD)/lib/Makefile
cd $(ZSTD) && make -j$(CPUS)
.PHONY: clean parquet snappy brotli zlib lz4 zstd arrow
clean:
rm -f *.o *.so
distclean:
rm -rf $(SQLITE) $(HERE)
arrow: $(ARROW_LIB)
brotli: $(BROTLI_COMMON_LIB)
lz4: $(LZ4_LIB)
parquet: $(PARQUET_CPP_LIB)
snappy: $(SNAPPY_LIB)
sqlite: $(SQLITE3_LIB)
zlib: $(ZLIB_LIB)
zstd: $(ZSTD_LIB)

24
make-linux Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
set -euo pipefail
mkdir -p build/linux
cp -f build/Makefile.linux build/linux/Makefile
cd build/linux
# Install prereqs based on https://github.com/apache/parquet-cpp#linux
sudo apt-get install libboost-dev libboost-filesystem-dev \
libboost-program-options-dev libboost-regex-dev \
libboost-system-dev libboost-test-dev \
libssl-dev libtool bison flex pkg-config
# Install prereqs based on https://github.com/apache/arrow/tree/master/cpp
sudo apt-get install cmake \
libboost-dev \
libboost-filesystem-dev \
libboost-system-dev
if [ ! -e ../../sqlite/sqlite3 ]; then
make sqlite
fi
make "$@"

View File

@ -1,43 +0,0 @@
PARQUET_CPP=~/src/parquet-cpp
CC = g++
CFLAGS = -I ../sqlite -O3 -std=c++11 -Wall -fPIC -g
PARQUET_LIB = $(PARQUET_CPP)/build/release/libparquet.a
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
PARQUET_DEPS = /home/cldellow/src/parquet-deps
LZ4_LIB = $(PARQUET_DEPS)/lz4/lib/liblz4.a
ARROW_LIB = $(PARQUET_CPP)/arrow_ep-prefix/src/arrow_ep-build/release/libarrow.a
SNAPPY_LIB = $(PARQUET_DEPS)/snappy/build/libsnappy.a
ZSTD_LIB = $(PARQUET_DEPS)/zstd/lib/libzstd.a
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so
BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so
BROTLI_COMMON_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlicommon-static.a
BROTLI_ENC_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlienc-static.a
BROTLI_DEC_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlidec-static.a
LDFLAGS = -O3 \
-Wl,--whole-archive $(PARQUET_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) \
-Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
libparquet.so: $(OBJ)
$(CC) -shared -o $@ $^ $(LDFLAGS)
parquet_filter.o: parquet_filter.cc parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet_cursor.o: parquet_cursor.cc parquet_cursor.h parquet_table.h parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet_table.o: parquet_table.cc parquet_table.h
$(CC) -c -o $@ $< $(CFLAGS)
parquet.o: parquet.cc parquet_cursor.h parquet_table.h parquet_filter.h
$(CC) -c -o $@ $< $(CFLAGS)
.PHONY: clean
clean:
rm -f *.o *.so

View File

@ -6,7 +6,7 @@ set -euo pipefail
load_nonexistent() { load_nonexistent() {
cat <<EOF cat <<EOF
.echo on .echo on
.load parquet/libparquet .load build/linux/libparquet
.testcase notfound .testcase notfound
.bail on .bail on
CREATE VIRTUAL TABLE test USING parquet('$root/doesnotexist.parquet'); CREATE VIRTUAL TABLE test USING parquet('$root/doesnotexist.parquet');

View File

@ -9,7 +9,7 @@ run_query() {
query=${2:?must provide query to run} query=${2:?must provide query to run}
basename=$(basename "$file") basename=$(basename "$file")
cat <<EOF cat <<EOF
.load parquet/libparquet .load build/linux/libparquet
.testcase $basename .testcase $basename
.bail on .bail on
CREATE VIRTUAL TABLE IF NOT EXISTS nulls1 USING parquet('$root/parquet-generator/99-rows-nulls-1.parquet'); CREATE VIRTUAL TABLE IF NOT EXISTS nulls1 USING parquet('$root/parquet-generator/99-rows-nulls-1.parquet');

View File

@ -69,7 +69,7 @@ def test_statement(conn, table, column_values, all_values):
rv = [row for row in conn.execute(new_query)] rv = [row for row in conn.execute(new_query)]
if gold != rv: if gold != rv:
with open('testcase-cmds.txt', 'w') as f: with open('testcase-cmds.txt', 'w') as f:
f.write('.load parquet/libparquet\n.testcase query\n.bail on\n{};\n.output\n'.format(new_query)) f.write('.load build/linux/libparquet\n.testcase query\n.bail on\n{};\n.output\n'.format(new_query))
with open('testcase-expected.txt', 'w') as f: with open('testcase-expected.txt', 'w') as f:
for row in gold: for row in gold:
f.write('{}\n'.format(row)) f.write('{}\n'.format(row))
@ -105,5 +105,5 @@ def test_db(db_file, extension_file, tables):
if __name__ == '__main__': if __name__ == '__main__':
db_file = os.path.abspath(os.path.join(__file__, '..', '..', 'test.db')) db_file = os.path.abspath(os.path.join(__file__, '..', '..', 'test.db'))
extension_file = os.path.abspath(os.path.join(__file__, '..', '..', 'parquet', 'libparquet.so')) extension_file = os.path.abspath(os.path.join(__file__, '..', '..', 'build', 'linux', 'libparquet.so'))
test_db(db_file, extension_file, ['nulls', 'no_nulls']) test_db(db_file, extension_file, ['nulls', 'no_nulls'])

View File

@ -9,7 +9,7 @@ load_supported() {
basename=$(basename "$file") basename=$(basename "$file")
cat <<EOF cat <<EOF
.echo on .echo on
.load parquet/libparquet .load build/linux/libparquet
.testcase $basename .testcase $basename
.bail on .bail on
CREATE VIRTUAL TABLE test USING parquet('$file'); CREATE VIRTUAL TABLE test USING parquet('$file');
@ -23,7 +23,7 @@ main() {
root=$(readlink -f "$root") root=$(readlink -f "$root")
cd "$root" cd "$root"
supported_files=$(find . -type f -name '*.parquet' -not -name 'unsupported*.parquet') supported_files=$(find ./parquet-generator/ -type f -name '*.parquet' -not -name 'unsupported*.parquet')
while read -r supported; do while read -r supported; do
echo "Testing: $supported" echo "Testing: $supported"
if ! "$root"/sqlite/sqlite3 -init <(load_supported "$supported") < /dev/null > /dev/null 2> testcase-stderr.txt; then if ! "$root"/sqlite/sqlite3 -init <(load_supported "$supported") < /dev/null > /dev/null 2> testcase-stderr.txt; then

View File

@ -9,7 +9,7 @@ load_unsupported() {
basename=$(basename "$file") basename=$(basename "$file")
cat <<EOF cat <<EOF
.echo on .echo on
.load parquet/libparquet .load build/linux/libparquet
.testcase $basename .testcase $basename
.bail on .bail on
CREATE VIRTUAL TABLE test USING parquet('$file'); CREATE VIRTUAL TABLE test USING parquet('$file');