Colin Dellow 
							
						 
					 
					
						
						
							
						
						599430b2f4 
					 
					
						
						
							
							Add #ifdefs around printfs  
						
						 
						
						
						
						
					 
					
						2018-03-20 19:57:12 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						5480de7fb6 
					 
					
						
						
							
							Compile w/static linkages for parquet  
						
						 
						
						... 
						
						
						
						Fixes  #4 . A stock Ubuntu 14.04 can now install sqlite3:amd64 and
libboost-all-dev, then use this module to read the test parquet file. 
						
						
					 
					
						2018-03-20 19:06:39 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						8bf890ab66 
					 
					
						
						
							
							Fix incorrect row pruning for non-text BYTE_ARRAY  
						
						 
						
						
						
						
					 
					
						2018-03-18 19:43:09 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						e2af2a07a4 
					 
					
						
						
							
							Make rowid start from 1, not 0  
						
						 
						
						... 
						
						
						
						Unclear whether this is strictly required, but I'm going to start using
SQLite as an oracle, and it'll be simpler if our rowids match theirs. 
						
						
					 
					
						2018-03-18 17:03:46 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						1f3ffce560 
					 
					
						
						
							
							Row group filtering for BYTE_ARRAY  
						
						 
						
						
						
						
					 
					
						2018-03-18 15:03:08 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						7b302a0eb2 
					 
					
						
						
							
							Bail on rowId constraint when non-int  
						
						 
						
						
						
						
					 
					
						2018-03-18 14:31:23 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						3b557f7fb0 
					 
					
						
						
							
							Add explicit test for file not found  
						
						 
						
						... 
						
						
						
						...caching the metadata moved where ParquetTable did I/O,
which introduced a segfault on not found 
						
						
					 
					
						2018-03-18 11:58:23 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						4cbde9fc09 
					 
					
						
						
							
							Row filtering for doubles  
						
						 
						
						
						
						
					 
					
						2018-03-17 16:09:57 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						86e09b111e 
					 
					
						
						
							
							Add row filtering for int32/64/96/boolean  
						
						 
						
						
						
						
					 
					
						2018-03-17 16:05:38 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						a3af16eb54 
					 
					
						
						
							
							Row-filtering for other string ops  
						
						 
						
						
						
						
					 
					
						2018-03-17 15:28:51 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						03a20a9432 
					 
					
						
						
							
							LIKE row group filtering  
						
						 
						
						... 
						
						
						
						~1.7s -> ~1.0s for the census data set on `LIKE 'Dawson %'` 
						
						
					 
					
						2018-03-17 00:11:38 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						01e8ffaba7 
					 
					
						
						
							
							Row group filtering for double/float  
						
						 
						
						
						
						
					 
					
						2018-03-16 16:30:05 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						9c22fd1f57 
					 
					
						
						
							
							Row group filters for strings, int32/64/96, bools  
						
						 
						
						
						
						
					 
					
						2018-03-16 16:07:41 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						1f4cebe2a6 
					 
					
						
						
							
							Don't use accessors  
						
						 
						
						... 
						
						
						
						This drops the `= 'Dawson Creek'` query from 210ms to 145ms.
Maybe inlining would have been an option here? I'm not familiar enough
with g++ to know. :( 
						
						
					 
					
						2018-03-15 23:04:11 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						8ba13f44d5 
					 
					
						
						
							
							Remove unnecessary copy  
						
						 
						
						... 
						
						
						
						Now the `== 'Dawson Creek'` query is ~210ms, which is approx the
same as a `count(*)` query. This seems maybe OK, since the row group
filter is only excluding 30% of records. 
						
						
					 
					
						2018-03-15 22:10:45 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						f7f1ed03d1 
					 
					
						
						
							
							add row filter for string ==  
						
						 
						
						... 
						
						
						
						This gets the census `== 'Dawson Creek'` query down to ~410ms from
~650ms.
That still seems much slower than it should be. Am I accidentally
doing a copy? Now to go learn how to profile C++ code... 
						
						
					 
					
						2018-03-15 21:37:52 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						6648ff5968 
					 
					
						
						
							
							add string == row group filter  
						
						 
						
						... 
						
						
						
						For the statscan census set filtering on `== 'Dawson Creek'`, the query
goes from 980ms to 660ms.
This is expected, since the data isn't sorted by that column.
I'll try adding some scaffolding to do filtering at the row level, too.
We could also try unpacking the dictionary and testing the individual
values, although we may want some heuristics to decide whether it's
worth doing -- eg if < 10% of the rows have a unique value.
Ideally, this should be like a ~1ms query. 
						
						
					 
					
						2018-03-15 20:40:21 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						dc431aee20 
					 
					
						
						
							
							Dispatch row group filtering based on parquet type  
						
						 
						
						
						
						
					 
					
						2018-03-15 20:25:02 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						92ba5f94e0 
					 
					
						
						
							
							reuse FileMetaData  
						
						 
						
						... 
						
						
						
						For the statscan dataset, parsing the file metadata takes ~30-40ms,
so stash it away for future re-use. 
						
						
					 
					
						2018-03-15 19:57:38 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						769060dbcb 
					 
					
						
						
							
							Add stub row group filters for text/int/dbl  
						
						 
						
						... 
						
						
						
						Checkpointing to investigate why min/max stats for text aren't
present 
						
						
					 
					
						2018-03-12 23:07:41 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						110e3e3668 
					 
					
						
						
							
							row group skipping for is [not] null queries  
						
						 
						
						
						
						
					 
					
						2018-03-12 21:09:00 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						95748a5192 
					 
					
						
						
							
							Remove bool from Constraint  
						
						 
						
						
						
						
					 
					
						2018-03-12 20:50:30 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						acc15256ec 
					 
					
						
						
							
							Add rowgroup filtering for rowid  
						
						 
						
						
						
						
					 
					
						2018-03-12 20:42:50 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						1f938a005d 
					 
					
						
						
							
							More tests cases to deal with affinity  
						
						 
						
						... 
						
						
						
						I'm not sure how these manifest - whether SQLite retypes them based on
column affinity before we see them, or whether they're provided as is. 
						
						
					 
					
						2018-03-11 19:18:44 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						095b576cc2 
					 
					
						
						
							
							Scaffolding for row group filters, tests  
						
						 
						
						... 
						
						
						
						rowid is special since its column index is -1, so add
explicit tests around it 
						
						
					 
					
						2018-03-11 15:44:51 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						5559a7b563 
					 
					
						
						
							
							Fix when last rowgroup is not same size as first  
						
						 
						
						... 
						
						
						
						...change test data to use 99 rows, so that when we have
rowgroup size 10 we exercise this code. 
						
						
					 
					
						2018-03-11 15:15:27 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						830053c1fc 
					 
					
						
						
							
							Scaffolding for in-extension filtering  
						
						 
						
						... 
						
						
						
						Supports IS NULL and IS NOT NULL checks 
						
						
					 
					
						2018-03-11 13:58:10 -04:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						d28ae86d15 
					 
					
						
						
							
							Test unusable constraints  
						
						 
						
						
						
						
					 
					
						2018-03-10 13:38:34 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						96fcafcd2f 
					 
					
						
						
							
							Add test cases  
						
						 
						
						
						
						
					 
					
						2018-03-10 13:25:13 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						210f322a1c 
					 
					
						
						
							
							Code to pretty print constraints  
						
						 
						
						
						
						
					 
					
						2018-03-10 10:59:53 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						2bc054a2cf 
					 
					
						
						
							
							Add crappy Makefile  
						
						 
						
						
						
						
					 
					
						2018-03-10 10:46:10 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						824a416f51 
					 
					
						
						
							
							better debug logs for xBestIndex  
						
						 
						
						
						
						
					 
					
						2018-03-08 13:21:33 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						0d4806ca6f 
					 
					
						
						
							
							Rejig parquet generation  
						
						 
						
						... 
						
						
						
						- "fixed_size_binary" -> "binary_10"
- make null parquet use rowgroups of sie 10: first rowgroup
  has no nulls, 2nd has all null, 3rd-10th have alternating
  nulls
This is prep for making a Postgres layer to use as an oracle
for generating test cases so that we have good coverage before
implementing advanced `xBestIndex` and `xFilter` modes. 
						
						
					 
					
						2018-03-06 21:02:26 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						67005623df 
					 
					
						
						
							
							ensureColumn catches up when rows are skipped 
						
						 
						
						
						
						
					 
					
						2018-03-04 22:29:35 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						bb3a9440f7 
					 
					
						
						
							
							Add query test framework, fix xFilter  
						
						 
						
						
						
						
					 
					
						2018-03-04 21:05:26 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						4c54ab89ae 
					 
					
						
						
							
							Don't segfault on full table scan  
						
						 
						
						
						
						
					 
					
						2018-03-04 17:49:19 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						7edb5e472f 
					 
					
						
						
							
							Support BLOBs  
						
						 
						
						
						
						
					 
					
						2018-03-04 17:20:59 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						67b0d96967 
					 
					
						
						
							
							float support  
						
						 
						
						
						
						
					 
					
						2018-03-03 20:57:09 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						18f07f4c43 
					 
					
						
						
							
							More defensive, add caveats  
						
						 
						
						
						
						
					 
					
						2018-03-03 20:30:46 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						eb0b48f867 
					 
					
						
						
							
							Boolean, INT96, INT64  
						
						 
						
						
						
						
					 
					
						2018-03-03 20:00:50 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						1de843fca8 
					 
					
						
						
							
							Very rough first cut  
						
						 
						
						... 
						
						
						
						supports int32, double, strings. 
						
						
					 
					
						2018-03-03 15:44:01 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						f8599f8d3e 
					 
					
						
						
							
							Rename some references to CSVs  
						
						 
						
						... 
						
						
						
						...some nonsensical things, like "first row of Parquet",
but we'll tidy them up later. 
						
						
					 
					
						2018-03-02 19:18:36 -05:00  
					
					
						 
						
						
							
							
							 
							
							
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Colin Dellow 
							
						 
					 
					
						
						
							
						
						552da5a647 
					 
					
						
						
							
							Initial checkin of CSV table  
						
						 
						
						... 
						
						
						
						parquet.cc is a fork of the sample CSV virtual table at
https://www.sqlite.org/src/artifact?ci=trunk&filename=ext/misc/csv.c 
So far the only changes are those needed to make it compile cleanly in
C++11 mode. 
						
						
					 
					
						2018-03-02 18:59:34 -05:00