1
0
mirror of https://github.com/cldellow/sqlite-parquet-vtable.git synced 2025-09-08 22:24:56 +00:00

Rejig parquet generation

- "fixed_size_binary" -> "binary_10"
- make null parquet use rowgroups of sie 10: first rowgroup
  has no nulls, 2nd has all null, 3rd-10th have alternating
  nulls

This is prep for making a Postgres layer to use as an oracle
for generating test cases so that we have good coverage before
implementing advanced `xBestIndex` and `xFilter` modes.
This commit is contained in:
Colin Dellow
2018-03-06 21:02:26 -05:00
parent 56245c1d3d
commit 0d4806ca6f
7 changed files with 79 additions and 17 deletions

Binary file not shown.

View File

@@ -62,6 +62,7 @@ def write_parquet(file_name, rows, types, row_group_size):
def name_of(i):
name = '{}_{}'.format(types[i], i)
name = name.replace('timestamp[ns]', 'ts')
name = name.replace('fixed_size_binary[1]', 'binary')
return name
cols = [pa.Column.from_array(name_of(i), fields[i]) for i in range(len(fields))]
@@ -104,9 +105,9 @@ def main():
for i in range(len(rows)):
for j in range(len(rows[i])):
if (i + j) % 2 == 0:
if (i >= 10 and i <= 19) or (i >= 20 and (i + j) % 2 == 0):
rows[i][j] = None
write_parquet('100-rows-nulls.parquet', rows, types,row_group_size=100)
write_parquet('100-rows-nulls.parquet', rows, types,row_group_size=10)
write_unsupported_parquets()