diff --git a/runner.py b/runner.py new file mode 100644 index 0000000..21e93f6 --- /dev/null +++ b/runner.py @@ -0,0 +1,67 @@ +#!/usr/bin/python3 +import argparse +import os +import subprocess + +DESCRIPTION = '''sqlite-parquet-vtable test runner +Run a query against several data files with varying encodings, +and verify that the output matches an expected value +''' + +COMMON_QUERY_LINES = [ + '.echo off', + '.load ./libparquet', +] + + +def read(filename): + '''Take a filename, read it into a variable -- critically, closing it after we're done''' + with open(filename) as file: + return file.read() + + +def dispatch(query_file, results_file, expected_exit_code, datasets): + '''Run a query against assorted formats of the datasets, verifying the result each time''' + query = read(query_file) + expected_results = read(results_file) + for dataset in datasets: + for file in os.listdir(dataset): + if not file.endswith('.parquet'): + print(f'Ignoring {file} -- does not end in .parquet') + continue + vtable_statement = f'CREATE VIRTUAL TABLE dataset USING parquet(\'{file}\');' + # Append test-specified query to common lines, insert \n between lines + full_query = '\n'.join(COMMON_QUERY_LINES+[vtable_statement, query]) + proc = subprocess.run( + 'sqlite3', + stdout=subprocess.PIPE, + input=full_query, + encoding='UTF-8', + check=False + ) + assert proc.returncode == expected_exit_code + assert proc.stdout == expected_results + + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser(description=DESCRIPTION) + PARSER.add_argument('query', metavar='query', help='the .sql file containing the query') + PARSER.add_argument('results', help='the file containing .output format expected results') + PARSER.add_argument('exit_code', help='the exit code you expect to recieve') + PARSER.add_argument('dataset', nargs='+', help='a dataset to run the query against') + + ARGS = PARSER.parse_args() + + # Verify that each query and result file exist + assert os.path.isfile(ARGS.query) + assert os.path.isfile(ARGS.results) + # Verify that each dataset argument is a folder (ideally with .parquet files inside) + for dataset_dir in ARGS.dataset: + assert os.path.isdir(dataset_dir) + + dispatch( + query_file=ARGS.query, + results_file=ARGS.results, + expected_exit_code=ARGS.exit_code, + datasets=ARGS.dataset + )