1
0
mirror of https://github.com/cldellow/sqlite-parquet-vtable.git synced 2025-04-03 09:39:47 +00:00
2019-12-11 11:18:03 -06:00

78 lines
3.2 KiB
Python

#!/usr/bin/python3
import argparse
import os
import subprocess
DESCRIPTION = '''sqlite-parquet-vtable test runner
Run a query against several data files with varying encodings,
and verify that the output matches an expected value
'''
COMMON_QUERY_LINES = [
'.echo off',
'.load ./libparquet',
]
TEST_ROOT = os.path.dirname(os.path.abspath(__file__))
def read(filename):
'''Take a filename, read it into a variable -- critically, closing it after we're done'''
with open(filename) as file:
return file.read()
def run_and_verify(query, expected_results, expected_exit_code):
'''Run a given query, compare the results and error code'''
proc = subprocess.run(
'sqlite3',
stdout=subprocess.PIPE,
input=query,
encoding='UTF-8',
check=False
)
assert proc.returncode == expected_exit_code, f'Expected exit code {expected_exit_code}, found {proc.returncode}'
assert proc.stdout == expected_results, f'Expected stdout to be {expected_results}, it was actually {proc.stdout}'
def dispatch(query_file, results_file, expected_exit_code, datasets):
'''Run a query against assorted formats of the datasets, verifying the result each time'''
query = read(query_file)
expected_results = read(results_file)
# Some of the error test cases don't need multiple datasets
if len(datasets) == 0:
run_and_verify(query, expected_results, expected_exit_code)
# For most tests, run against all members of a dataset
for dataset in datasets:
for file in os.listdir(TEST_ROOT+'/datasets/'+dataset):
filepath = TEST_ROOT+'/datasets/'+dataset+'/'+file
if not filepath.endswith('.parquet'):
print(f'Ignoring {file} -- does not end in .parquet')
continue
vtable_statement = f'CREATE VIRTUAL TABLE dataset USING parquet(\'{filepath}\');'
# Append test-specified query to common lines, insert \n between lines
full_query = '\n'.join(COMMON_QUERY_LINES+[vtable_statement, query])
run_and_verify(full_query, expected_results, expected_exit_code)
if __name__ == '__main__':
PARSER = argparse.ArgumentParser(description=DESCRIPTION)
PARSER.add_argument('query', help='the .sql file containing the query to run')
PARSER.add_argument('results', help='the file containing the expected result of the query')
PARSER.add_argument('exit_code', help='the exit code you expect to recieve')
PARSER.add_argument('dataset', nargs='*', help='a dataset to run the query against')
ARGS = PARSER.parse_args()
# Verify that each query and result file exist
assert os.path.isfile(ARGS.query), f'Expected to find query file at {ARGS.query}'
assert os.path.isfile(ARGS.results), f'Expected to find results file at {ARGS.results}'
# Verify that each dataset argument is a folder (ideally with .parquet files inside)
for dataset_dir in ARGS.dataset:
assert os.path.isdir(TEST_ROOT+'/datasets/'+dataset_dir), f'Expected dataset to be a directory'
dispatch(
query_file=ARGS.query,
results_file=ARGS.results,
expected_exit_code=int(ARGS.exit_code),
datasets=ARGS.dataset
)