mirror of
				https://github.com/cldellow/sqlite-parquet-vtable.git
				synced 2025-11-04 02:39:56 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			66 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/python3
 | 
						|
 | 
						|
import os.path
 | 
						|
from os import makedirs
 | 
						|
from glob import glob
 | 
						|
import re
 | 
						|
import itertools
 | 
						|
 | 
						|
NULL_TOKENS = ['nulls1', 'nulls2', 'nulls3']
 | 
						|
NO_NULL_TOKENS = ['no_nulls1', 'no_nulls2', 'no_nulls3']
 | 
						|
 | 
						|
TOKEN_SET = NULL_TOKENS + NO_NULL_TOKENS
 | 
						|
 | 
						|
def extract_tokens(line):
 | 
						|
    line = re.sub('[^a-zA-Z0-9_]', ' ', line).split(' ')
 | 
						|
    return list(set([token for token in line if token in TOKEN_SET]))
 | 
						|
 | 
						|
def main(template_dir, query_dir):
 | 
						|
    try:
 | 
						|
        makedirs(query_dir)
 | 
						|
    except:
 | 
						|
        pass
 | 
						|
 | 
						|
    for f in glob(os.path.join(query_dir, '*')):
 | 
						|
            os.remove(f)
 | 
						|
 | 
						|
    for f in glob(os.path.join(template_dir, '*.sql')):
 | 
						|
        basename = os.path.basename(f)
 | 
						|
 | 
						|
        with open(f, 'r') as fptr:
 | 
						|
            content = fptr.readlines()
 | 
						|
 | 
						|
        # Sort tokens longest first so string replace will replace no_nullsX before nullsX
 | 
						|
        # (irrelevant ATM, but maybe useful later)
 | 
						|
        tokens = sorted(extract_tokens(content[0]), key=lambda x: -len(x))
 | 
						|
        if not tokens:
 | 
						|
            raise ValueError('{} had no tokens'.format(content[0]))
 | 
						|
 | 
						|
        # For simplicity, we don't support mixing nulls and no_nulls
 | 
						|
        if not all(len(token) == len(tokens[0]) for token in tokens):
 | 
						|
            raise ValueError('cannot mix nulls and no_nulls: {}'.format(content[0]))
 | 
						|
 | 
						|
 | 
						|
        template = content[0]
 | 
						|
        for token in tokens:
 | 
						|
            template = template.replace(token, token.upper())
 | 
						|
 | 
						|
        replacements = NULL_TOKENS
 | 
						|
        if tokens[0].startswith('no'):
 | 
						|
            replacements = NO_NULL_TOKENS
 | 
						|
 | 
						|
        for variant, choices in enumerate(itertools.combinations_with_replacement(replacements, len(tokens))):
 | 
						|
            query = template
 | 
						|
            for idx, token in enumerate(tokens):
 | 
						|
                query = query.replace(token.upper(), choices[idx])
 | 
						|
            fname = basename.replace('.sql', '-{}.sql'.format(variant))
 | 
						|
 | 
						|
            with open(os.path.join(query_dir, fname), 'w') as fptr:
 | 
						|
                content[0] = query
 | 
						|
                fptr.write(''.join(content))
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    template_dir = os.path.abspath(os.path.join(__file__, '..', 'templates'))
 | 
						|
    query_dir = os.path.abspath(os.path.join(__file__, '..', 'queries'))
 | 
						|
    main(template_dir, query_dir)
 |