All-in-one build command
`./make-linux` clones and builds: - arrow - brotli - lz4 - parquet - snappy - zlib - zstd - this project as a statically linked binary. Two Boost libs are still pulled in as shared libs, should probably fix that, too, for ultimate portability.
This commit is contained in:
parent
ec6e970bbc
commit
0bdcc9895e
|
@ -51,3 +51,4 @@ tests/queries
|
|||
/tests/test.db
|
||||
/tests/results.bad_alloc
|
||||
/tests/libfailmalloc
|
||||
/build/linux
|
||||
|
|
51
README.md
51
README.md
|
@ -4,15 +4,37 @@ A SQLite [virtual table](https://sqlite.org/vtab.html) extension to expose Parqu
|
|||
|
||||
This [blog post](https://cldellow.com/2018/06/22/sqlite-parquet-vtable.html) provides some context on why you might use this.
|
||||
|
||||
## Download
|
||||
## Installing
|
||||
|
||||
### Download
|
||||
|
||||
You can fetch a version built for Ubuntu 16.04 at https://s3.amazonaws.com/cldellow/public/libparquet/libparquet.so.xz
|
||||
|
||||
### Building
|
||||
|
||||
```
|
||||
./make-linux
|
||||
```
|
||||
|
||||
The first run will git clone a bunch of libraries, patch them to be statically linkable and build them.
|
||||
|
||||
Subsequent builds will only build the parquet virtual table extension.
|
||||
|
||||
#### Tests
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
tests/create-queries-from-templates
|
||||
tests/test-all
|
||||
```
|
||||
|
||||
|
||||
## Use
|
||||
|
||||
```
|
||||
$ sqlite/sqlite3
|
||||
sqlite> .load parquet/libparquet
|
||||
sqlite> .load build/linux/libparquet
|
||||
sqlite> CREATE VIRTUAL TABLE demo USING parquet('parquet-generator/99-rows-1.parquet');
|
||||
sqlite> SELECT * FROM demo;
|
||||
...if all goes well, you'll see data here!...
|
||||
|
@ -21,7 +43,7 @@ sqlite> SELECT * FROM demo;
|
|||
Note: if you get an error like:
|
||||
|
||||
```
|
||||
sqlite> .load parquet/libparquet
|
||||
sqlite> .load build/linux/libparquet
|
||||
Error: parquet/libparquet.so: wrong ELF class: ELFCLASS64
|
||||
```
|
||||
|
||||
|
@ -89,26 +111,3 @@ These are not currently supported:
|
|||
|
||||
* UINT8/UINT16/UINT32/UINT64
|
||||
* DECIMAL
|
||||
|
||||
## Building
|
||||
|
||||
If you're a masochist, you can try to build this yourself:
|
||||
|
||||
1. Install [`parquet-cpp`](https://github.com/apache/parquet-cpp)
|
||||
1. Master appears to be broken for text row group stats; see https://github.com/cldellow/sqlite-parquet-vtable/issues/5 for which versions to use
|
||||
2. Run `./build-sqlite` to fetch and build the SQLite dev bits
|
||||
3. Run `./parquet/make` to build the module
|
||||
1. You will need to fixup the paths in this file to point at your local parquet-cpp folder.
|
||||
|
||||
You're almost certainly going to regret your life. https://stackoverflow.com/questions/48157198/how-can-i-statically-link-arrow-when-building-parquet-cpp may be useful.
|
||||
|
||||
## Tests
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
tests/create-queries-from-templates
|
||||
tests/test-all
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ fetch_if_needed() {
|
|||
curl --fail "https://sqlite.org/2018/sqlite-autoconf-${VERSION}.tar.gz" > sqlite.tar.gz
|
||||
tar xf sqlite.tar.gz
|
||||
rm sqlite.tar.gz
|
||||
ln -s sqlite-autoconf-${VERSION} sqlite
|
||||
mv sqlite-autoconf-${VERSION} sqlite
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,141 @@
|
|||
HERE:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
ROOT:=$(HERE)/../..
|
||||
VTABLE:=$(ROOT)/parquet
|
||||
SQLITE:=$(ROOT)/sqlite
|
||||
|
||||
# Directories
|
||||
ARROW=$(HERE)/arrow
|
||||
BROTLI=$(HERE)/brotli
|
||||
LZ4=$(HERE)/lz4
|
||||
PARQUET_CPP=$(HERE)/parquet-cpp
|
||||
SNAPPY=$(HERE)/snappy
|
||||
ZLIB=$(HERE)/zlib
|
||||
ZSTD=$(HERE)/zstd
|
||||
|
||||
# Libraries
|
||||
ARROW_LIB = $(ARROW)/cpp/release/release/libarrow.a
|
||||
BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so
|
||||
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
|
||||
BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so
|
||||
BROTLI_COMMON_LIB = $(BROTLI)/out/libbrotlicommon-static.a
|
||||
BROTLI_DEC_LIB = $(BROTLI)/out/libbrotlidec-static.a
|
||||
BROTLI_ENC_LIB = $(BROTLI)/out/libbrotlienc-static.a
|
||||
LZ4_LIB = $(LZ4)/lib/liblz4.a
|
||||
PARQUET_CPP_LIB = $(PARQUET_CPP)/build/release/libparquet.a
|
||||
SNAPPY_LIB = $(SNAPPY)/build/libsnappy.a
|
||||
SQLITE3_LIB = $(SQLITE)/libsqlite3.a
|
||||
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
|
||||
ZLIB_LIB = $(ZLIB)/libz.a
|
||||
ZSTD_LIB = $(ZSTD)/lib/libzstd.a
|
||||
|
||||
# Flags
|
||||
CC = g++
|
||||
CPUS:=$(shell nproc)
|
||||
CFLAGS = -I $(SQLITE) -O3 -std=c++11 -Wall -fPIC -g
|
||||
|
||||
LDFLAGS = -O3 \
|
||||
-Wl,--whole-archive $(PARQUET_CPP_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
|
||||
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) \
|
||||
-Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
|
||||
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
|
||||
LIBS = $(ARROW_LIB) \
|
||||
$(BROTLI_COMMON_LIB) \
|
||||
$(BROTLI_DEC_LIB) \
|
||||
$(BROTLI_ENC_LIB) \
|
||||
$(LZ4_LIB) \
|
||||
$(PARQUET_CPP_LIB) \
|
||||
$(SNAPPY_LIB) \
|
||||
$(ZLIB_LIB) \
|
||||
$(ZSTD_LIB)
|
||||
|
||||
libparquet.so: $(OBJ) $(LIBS)
|
||||
$(CC) -shared -o $@ $(OBJ) $(LDFLAGS)
|
||||
|
||||
parquet_filter.o: $(VTABLE)/parquet_filter.cc $(VTABLE)/parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet_cursor.o: $(VTABLE)/parquet_cursor.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet_table.o: $(VTABLE)/parquet_table.cc $(VTABLE)/parquet_table.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet.o: $(VTABLE)/parquet.cc $(VTABLE)/parquet_cursor.h $(VTABLE)/parquet_table.h $(VTABLE)/parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
$(ARROW_LIB):
|
||||
rm -rf $(ARROW)
|
||||
git clone https://github.com/apache/arrow.git $(ARROW)
|
||||
cd $(ARROW) && git checkout apache-arrow-0.9.0
|
||||
mkdir $(ARROW)/cpp/release
|
||||
cd $(ARROW)/cpp/release && cmake .. -DCMAKE_BUILD_TYPE=Release
|
||||
cd $(ARROW)/cpp/release && make -j$(CPUS) unittest
|
||||
|
||||
$(BROTLI_COMMON_LIB):
|
||||
rm -rf $(BROTLI)
|
||||
git clone https://github.com/google/brotli.git $(BROTLI)
|
||||
mkdir $(BROTLI)/out
|
||||
cd $(BROTLI)/out && ../configure-cmake && make -j$(CPUS)
|
||||
|
||||
$(LZ4_LIB):
|
||||
rm -rf $(LZ4)
|
||||
git clone https://github.com/lz4/lz4.git $(LZ4)
|
||||
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(LZ4)/lib/Makefile
|
||||
cd $(LZ4) && make -j$(CPUS)
|
||||
|
||||
$(PARQUET_CPP_LIB): $(SNAPPY_LIB) $(BROTLI_COMMON_LIB) $(ZLIB_LIB) $(LZ4_LIB) $(ZSTD_LIB)
|
||||
rm -rf $(PARQUET_CPP)
|
||||
git clone https://github.com/apache/parquet-cpp.git $(PARQUET_CPP)
|
||||
cd $(PARQUET_CPP) && git checkout apache-parquet-cpp-1.4.0
|
||||
cd $(PARQUET_CPP) && SNAPPY_STATIC_LIB=$(SNAPPY_LIB) BROTLI_STATIC_LIB_ENC=$(BROTLI_ENC_LIB) BROTLI_STATIC_LIB_DEC=$(BROTLI_DEC_LIB) BROTLI_STATIC_LIB_COMMON=$(BROTLI_COMMON_LIB) ZLIB_STATIC_LIB=$(ZLIB_LIB) LZ4_STATIC_LIB=$(LZ4_LIB) ZSTD_STATIC_LIB=$(ZSTD_LIB) cmake -DCMAKE_BUILD_TYPE=Release -DPARQUET_MINIMAL_DEPENDENCY=ON -DPARQUET_ARROW_LINKAGE=static .
|
||||
cd $(PARQUET_CPP) && make -j$(CPUS)
|
||||
|
||||
$(SNAPPY_LIB):
|
||||
rm -rf $(SNAPPY)
|
||||
git clone https://github.com/google/snappy.git $(SNAPPY)
|
||||
mkdir $(SNAPPY)/build
|
||||
cd $(SNAPPY)/build && cmake ..
|
||||
sed -i '3iset(CMAKE_POSITION_INDEPENDENT_CODE ON)' $(SNAPPY)/CMakeLists.txt
|
||||
cd $(SNAPPY)/build && make -j$(CPUS)
|
||||
|
||||
$(SQLITE3_LIB):
|
||||
cd $(ROOT) && ./build-sqlite
|
||||
|
||||
$(ZLIB_LIB):
|
||||
rm -rf $(ZLIB)
|
||||
git clone https://github.com/madler/zlib.git $(ZLIB)
|
||||
cd $(ZLIB) && ./configure
|
||||
sed -i 's/^CFLAGS=-O3/CFLAGS=-fPIC -O3/' $(ZLIB)/Makefile
|
||||
cd $(ZLIB) && make -j$(CPUS)
|
||||
|
||||
$(ZSTD_LIB):
|
||||
rm -rf $(ZSTD)
|
||||
git clone https://github.com/facebook/zstd.git $(ZSTD)
|
||||
sed -i 's/^CFLAGS *+=/CFLAGS += -fPIC /' $(ZSTD)/lib/Makefile
|
||||
cd $(ZSTD) && make -j$(CPUS)
|
||||
|
||||
.PHONY: clean parquet snappy brotli zlib lz4 zstd arrow
|
||||
|
||||
clean:
|
||||
rm -f *.o *.so
|
||||
|
||||
distclean:
|
||||
rm -rf $(SQLITE) $(HERE)
|
||||
|
||||
|
||||
arrow: $(ARROW_LIB)
|
||||
|
||||
brotli: $(BROTLI_COMMON_LIB)
|
||||
|
||||
lz4: $(LZ4_LIB)
|
||||
|
||||
parquet: $(PARQUET_CPP_LIB)
|
||||
|
||||
snappy: $(SNAPPY_LIB)
|
||||
|
||||
sqlite: $(SQLITE3_LIB)
|
||||
|
||||
zlib: $(ZLIB_LIB)
|
||||
|
||||
zstd: $(ZSTD_LIB)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p build/linux
|
||||
cp -f build/Makefile.linux build/linux/Makefile
|
||||
|
||||
cd build/linux
|
||||
|
||||
# Install prereqs based on https://github.com/apache/parquet-cpp#linux
|
||||
sudo apt-get install libboost-dev libboost-filesystem-dev \
|
||||
libboost-program-options-dev libboost-regex-dev \
|
||||
libboost-system-dev libboost-test-dev \
|
||||
libssl-dev libtool bison flex pkg-config
|
||||
|
||||
# Install prereqs based on https://github.com/apache/arrow/tree/master/cpp
|
||||
sudo apt-get install cmake \
|
||||
libboost-dev \
|
||||
libboost-filesystem-dev \
|
||||
libboost-system-dev
|
||||
|
||||
if [ ! -e ../../sqlite/sqlite3 ]; then
|
||||
make sqlite
|
||||
fi
|
||||
make "$@"
|
|
@ -1,43 +0,0 @@
|
|||
PARQUET_CPP=~/src/parquet-cpp
|
||||
|
||||
CC = g++
|
||||
CFLAGS = -I ../sqlite -O3 -std=c++11 -Wall -fPIC -g
|
||||
PARQUET_LIB = $(PARQUET_CPP)/build/release/libparquet.a
|
||||
THRIFT_LIB = $(PARQUET_CPP)/thrift_ep/src/thrift_ep-install/lib/libthrift.a
|
||||
PARQUET_DEPS = /home/cldellow/src/parquet-deps
|
||||
LZ4_LIB = $(PARQUET_DEPS)/lz4/lib/liblz4.a
|
||||
ARROW_LIB = $(PARQUET_CPP)/arrow_ep-prefix/src/arrow_ep-build/release/libarrow.a
|
||||
SNAPPY_LIB = $(PARQUET_DEPS)/snappy/build/libsnappy.a
|
||||
ZSTD_LIB = $(PARQUET_DEPS)/zstd/lib/libzstd.a
|
||||
BOOST_LIB = /usr/lib/x86_64-linux-gnu/libboost_regex.so
|
||||
BOOST_SYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_system.so
|
||||
BOOST_FILESYSTEM_LIB = /usr/lib/x86_64-linux-gnu/libboost_filesystem.so
|
||||
BROTLI_COMMON_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlicommon-static.a
|
||||
BROTLI_ENC_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlienc-static.a
|
||||
BROTLI_DEC_LIB = $(PARQUET_DEPS)/brotli/out/libbrotlidec-static.a
|
||||
|
||||
LDFLAGS = -O3 \
|
||||
-Wl,--whole-archive $(PARQUET_LIB) $(LZ4_LIB) $(ZSTD_LIB) $(THRIFT_LIB) $(SNAPPY_LIB) $(ARROW_LIB) \
|
||||
$(BROTLI_ENC_LIB) $(BROTLI_COMMON_LIB) $(BROTLI_DEC_LIB) \
|
||||
-Wl,--no-whole-archive -lz -lcrypto -lssl $(BOOST_LIB) $(BOOST_SYSTEM_LIB) $(BOOST_FILESYSTEM_LIB)
|
||||
OBJ = parquet.o parquet_filter.o parquet_table.o parquet_cursor.o
|
||||
|
||||
libparquet.so: $(OBJ)
|
||||
$(CC) -shared -o $@ $^ $(LDFLAGS)
|
||||
|
||||
parquet_filter.o: parquet_filter.cc parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet_cursor.o: parquet_cursor.cc parquet_cursor.h parquet_table.h parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet_table.o: parquet_table.cc parquet_table.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
parquet.o: parquet.cc parquet_cursor.h parquet_table.h parquet_filter.h
|
||||
$(CC) -c -o $@ $< $(CFLAGS)
|
||||
|
||||
.PHONY: clean
|
||||
|
||||
clean:
|
||||
rm -f *.o *.so
|
|
@ -6,7 +6,7 @@ set -euo pipefail
|
|||
load_nonexistent() {
|
||||
cat <<EOF
|
||||
.echo on
|
||||
.load parquet/libparquet
|
||||
.load build/linux/libparquet
|
||||
.testcase notfound
|
||||
.bail on
|
||||
CREATE VIRTUAL TABLE test USING parquet('$root/doesnotexist.parquet');
|
||||
|
|
|
@ -9,7 +9,7 @@ run_query() {
|
|||
query=${2:?must provide query to run}
|
||||
basename=$(basename "$file")
|
||||
cat <<EOF
|
||||
.load parquet/libparquet
|
||||
.load build/linux/libparquet
|
||||
.testcase $basename
|
||||
.bail on
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS nulls1 USING parquet('$root/parquet-generator/99-rows-nulls-1.parquet');
|
||||
|
|
|
@ -69,7 +69,7 @@ def test_statement(conn, table, column_values, all_values):
|
|||
rv = [row for row in conn.execute(new_query)]
|
||||
if gold != rv:
|
||||
with open('testcase-cmds.txt', 'w') as f:
|
||||
f.write('.load parquet/libparquet\n.testcase query\n.bail on\n{};\n.output\n'.format(new_query))
|
||||
f.write('.load build/linux/libparquet\n.testcase query\n.bail on\n{};\n.output\n'.format(new_query))
|
||||
with open('testcase-expected.txt', 'w') as f:
|
||||
for row in gold:
|
||||
f.write('{}\n'.format(row))
|
||||
|
@ -105,5 +105,5 @@ def test_db(db_file, extension_file, tables):
|
|||
|
||||
if __name__ == '__main__':
|
||||
db_file = os.path.abspath(os.path.join(__file__, '..', '..', 'test.db'))
|
||||
extension_file = os.path.abspath(os.path.join(__file__, '..', '..', 'parquet', 'libparquet.so'))
|
||||
extension_file = os.path.abspath(os.path.join(__file__, '..', '..', 'build', 'linux', 'libparquet.so'))
|
||||
test_db(db_file, extension_file, ['nulls', 'no_nulls'])
|
||||
|
|
|
@ -9,7 +9,7 @@ load_supported() {
|
|||
basename=$(basename "$file")
|
||||
cat <<EOF
|
||||
.echo on
|
||||
.load parquet/libparquet
|
||||
.load build/linux/libparquet
|
||||
.testcase $basename
|
||||
.bail on
|
||||
CREATE VIRTUAL TABLE test USING parquet('$file');
|
||||
|
@ -23,7 +23,7 @@ main() {
|
|||
root=$(readlink -f "$root")
|
||||
cd "$root"
|
||||
|
||||
supported_files=$(find . -type f -name '*.parquet' -not -name 'unsupported*.parquet')
|
||||
supported_files=$(find ./parquet-generator/ -type f -name '*.parquet' -not -name 'unsupported*.parquet')
|
||||
while read -r supported; do
|
||||
echo "Testing: $supported"
|
||||
if ! "$root"/sqlite/sqlite3 -init <(load_supported "$supported") < /dev/null > /dev/null 2> testcase-stderr.txt; then
|
||||
|
|
|
@ -9,7 +9,7 @@ load_unsupported() {
|
|||
basename=$(basename "$file")
|
||||
cat <<EOF
|
||||
.echo on
|
||||
.load parquet/libparquet
|
||||
.load build/linux/libparquet
|
||||
.testcase $basename
|
||||
.bail on
|
||||
CREATE VIRTUAL TABLE test USING parquet('$file');
|
||||
|
|
Loading…
Reference in New Issue