From 552da5a6475972db2e8194ac6f204b44204c4dc8 Mon Sep 17 00:00:00 2001
From: Colin Dellow <cldellow@gmail.com>
Date: Fri, 2 Mar 2018 18:59:34 -0500
Subject: [PATCH] Initial checkin of CSV table

parquet.cc is a fork of the sample CSV virtual table at
https://www.sqlite.org/src/artifact?ci=trunk&filename=ext/misc/csv.c

So far the only changes are those needed to make it compile cleanly in
C++11 mode.
---
 README.md          |  20 +-
 parquet/.gitignore |   2 +
 parquet/cmds.txt   |   3 +
 parquet/go         |   1 +
 parquet/make       |  25 ++
 parquet/parquet.cc | 909 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 959 insertions(+), 1 deletion(-)
 create mode 100644 parquet/.gitignore
 create mode 100644 parquet/cmds.txt
 create mode 100755 parquet/go
 create mode 100755 parquet/make
 create mode 100644 parquet/parquet.cc

diff --git a/README.md b/README.md
index de7b1a1..99c7555 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,20 @@
 # parquet-vtable
-A SQLite vtable extension to read Parquet files
+
+A SQLite [virtual table](https://sqlite.org/vtab.html) extension to expose Parquet files as SQL tables.
+
+## Building
+
+1. Install [`parquet-cpp`](https://github.com/apache/parquet-cpp)
+2. Run `./build-sqlite` to fetch and build the SQLite dev bits
+3. Run `./parquet/make` to build the module
+  1. You will need to fixup the paths in this file to point at your local parquet-cpp folder.
+
+## Use
+
+```
+$ sqlite/sqlite3
+sqlite> .load parquet/libparquet
+sqlite> create virtual table demo USING parquet('demo.parquet');
+sqlite> select * from demo limit 1;
+...if all goes well, you'll see data here!...
+```
diff --git a/parquet/.gitignore b/parquet/.gitignore
new file mode 100644
index 0000000..deb64be
--- /dev/null
+++ b/parquet/.gitignore
@@ -0,0 +1,2 @@
+cmds.txt
+go
diff --git a/parquet/cmds.txt b/parquet/cmds.txt
new file mode 100644
index 0000000..4c77a73
--- /dev/null
+++ b/parquet/cmds.txt
@@ -0,0 +1,3 @@
+.load ./libparquet
+create virtual table parquet using parquet(filename='../csv.csv');
+select * from parquet;
diff --git a/parquet/go b/parquet/go
new file mode 100755
index 0000000..ec520d8
--- /dev/null
+++ b/parquet/go
@@ -0,0 +1 @@
+./make && ../sqlite/sqlite3 -init ./cmds.txt < /dev/null
diff --git a/parquet/make b/parquet/make
new file mode 100755
index 0000000..1ab3a82
--- /dev/null
+++ b/parquet/make
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -x
+
+PARQUET_CPP=${PARQUET_CPP:-~/src/parquet-cpp}
+
+# make obj file
+clean() {
+  rm -f *.o *.so
+}
+
+build() {
+  clean
+  g++ -std=c++11 -Wall -fPIC -c -g parquet.cc -I ../sqlite -lz -ldl -lpthread 
+
+  g++ -shared -o libparquet.so parquet.o \
+    ${PARQUET_CPP}/build/release/libparquet.a \
+    ${PARQUET_CPP}/thrift_ep/src/thrift_ep-install/lib/libthrift.a \
+    ${PARQUET_CPP}/build/release/libarrow.so \
+    /usr/lib/x86_64-linux-gnu/libboost_regex.so
+}
+
+fn=${1:-build}
+
+"$fn"
diff --git a/parquet/parquet.cc b/parquet/parquet.cc
new file mode 100644
index 0000000..bed9088
--- /dev/null
+++ b/parquet/parquet.cc
@@ -0,0 +1,909 @@
+/*
+** 2016-05-28
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains the implementation of an SQLite virtual table for
+** reading CSV files.
+**
+** Usage:
+**
+**    .load ./csv
+**    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
+**    SELECT * FROM csv;
+**
+** The columns are named "c1", "c2", "c3", ... by default.  But the
+** application can define its own CREATE TABLE statement as an additional
+** parameter.  For example:
+**
+**    CREATE VIRTUAL TABLE temp.csv2 USING csv(
+**       filename = "../http.log",
+**       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
+**    );
+**
+** Instead of specifying a file, the text of the CSV can be loaded using
+** the data= parameter.
+**
+** If the columns=N parameter is supplied, then the CSV file is assumed to have
+** N columns.  If the columns parameter is omitted, the CSV file is opened
+** as soon as the virtual table is constructed and the first row of the CSV
+** is read in order to count the tables.
+**
+** Some extra debugging features (used for testing virtual tables) are available
+** if this module is compiled with -DSQLITE_TEST.
+*/
+#include <sqlite3ext.h>
+SQLITE_EXTENSION_INIT1
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#include "parquet/api/reader.h"
+
+char const *EMPTY_STRING = "";
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+
+/*
+** A macro to hint to the compiler that a function should not be
+** inlined.
+*/
+#if defined(__GNUC__)
+#  define CSV_NOINLINE  __attribute__((noinline))
+#elif defined(_MSC_VER) && _MSC_VER>=1310
+#  define CSV_NOINLINE  __declspec(noinline)
+#else
+#  define CSV_NOINLINE
+#endif
+
+void gogo() {
+  printf("ok");
+  try {
+    std::unique_ptr<parquet::ParquetFileReader> reader =
+        parquet::ParquetFileReader::OpenFile("/home/cldellow/src/csv2parquet/12m.parquet.snappy");
+    printf("%d\n", reader->metadata()->size());
+    printf("%ld\n", reader->metadata()->num_rows());
+  } catch (const std::exception& e) {
+    std::cerr << "Parquet error: " << e.what() << std::endl;
+  }
+}
+
+/* Max size of the error message in a CsvReader */
+#define CSV_MXERR 200
+
+/* Size of the CsvReader input buffer */
+#define CSV_INBUFSZ 1024
+
+/* A context object used when read a CSV file. */
+typedef struct CsvReader CsvReader;
+struct CsvReader {
+  FILE *in;              /* Read the CSV text from this input stream */
+  char *z;               /* Accumulated text for a field */
+  int n;                 /* Number of bytes in z */
+  int nAlloc;            /* Space allocated for z[] */
+  int nLine;             /* Current line number */
+  int bNotFirst;         /* True if prior text has been seen */
+  int cTerm;             /* Character that terminated the most recent field */
+  size_t iIn;            /* Next unread character in the input buffer */
+  size_t nIn;            /* Number of characters in the input buffer */
+  char *zIn;             /* The input buffer */
+  char zErr[CSV_MXERR];  /* Error message */
+};
+
+/* Initialize a CsvReader object */
+static void csv_reader_init(CsvReader *p){
+  p->in = 0;
+  p->z = 0;
+  p->n = 0;
+  p->nAlloc = 0;
+  p->nLine = 0;
+  p->bNotFirst = 0;
+  p->nIn = 0;
+  p->zIn = 0;
+  p->zErr[0] = 0;
+}
+
+/* Close and reset a CsvReader object */
+static void csv_reader_reset(CsvReader *p){
+  if( p->in ){
+    fclose(p->in);
+    sqlite3_free(p->zIn);
+  }
+  sqlite3_free(p->z);
+  csv_reader_init(p);
+}
+
+/* Report an error on a CsvReader */
+static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
+  va_list ap;
+  va_start(ap, zFormat);
+  sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
+  va_end(ap);
+}
+
+/* Open the file associated with a CsvReader
+** Return the number of errors.
+*/
+static int csv_reader_open(
+  CsvReader *p,               /* The reader to open */
+  const char *zFilename,      /* Read from this filename */
+  const char *zData           /*  ... or use this data */
+){
+  if( zFilename ){
+    p->zIn = (char*)sqlite3_malloc( CSV_INBUFSZ );
+    if( p->zIn==0 ){
+      csv_errmsg(p, "out of memory");
+      return 1;
+    }
+    p->in = fopen(zFilename, "rb");
+    if( p->in==0 ){
+      csv_reader_reset(p);
+      csv_errmsg(p, "cannot open '%s' for reading", zFilename);
+      return 1;
+    }
+  }else{
+    assert( p->in==0 );
+    p->zIn = (char*)zData;
+    p->nIn = strlen(zData);
+  }
+  return 0;
+}
+
+/* The input buffer has overflowed.  Refill the input buffer, then
+** return the next character
+*/
+static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
+  size_t got;
+
+  assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
+  assert( p->in!=0 );        /* Only called if reading froma file */
+
+  got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
+  if( got==0 ) return EOF;
+  p->nIn = got;
+  p->iIn = 1;
+  return p->zIn[0];
+}
+
+/* Return the next character of input.  Return EOF at end of input. */
+static int csv_getc(CsvReader *p){
+  if( p->iIn >= p->nIn ){
+    if( p->in!=0 ) return csv_getc_refill(p);
+    return EOF;
+  }
+  return ((unsigned char*)p->zIn)[p->iIn++];
+}
+
+/* Increase the size of p->z and append character c to the end. 
+** Return 0 on success and non-zero if there is an OOM error */
+static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
+  char *zNew;
+  int nNew = p->nAlloc*2 + 100;
+  zNew = (char*)sqlite3_realloc64(p->z, nNew);
+  if( zNew ){
+    p->z = zNew;
+    p->nAlloc = nNew;
+    p->z[p->n++] = c;
+    return 0;
+  }else{
+    csv_errmsg(p, "out of memory");
+    return 1;
+  }
+}
+
+/* Append a single character to the CsvReader.z[] array.
+** Return 0 on success and non-zero if there is an OOM error */
+static int csv_append(CsvReader *p, char c){
+  if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
+  p->z[p->n++] = c;
+  return 0;
+}
+
+/* Read a single field of CSV text.  Compatible with rfc4180 and extended
+** with the option of having a separator other than ",".
+**
+**   +  Input comes from p->in.
+**   +  Store results in p->z of length p->n.  Space to hold p->z comes
+**      from sqlite3_malloc64().
+**   +  Keep track of the line number in p->nLine.
+**   +  Store the character that terminates the field in p->cTerm.  Store
+**      EOF on end-of-file.
+**
+** Return "" at EOF.  Return 0 on an OOM error.
+*/
+static const char *csv_read_one_field(CsvReader *p){
+  int c;
+  p->n = 0;
+  c = csv_getc(p);
+  if( c==EOF ){
+    p->cTerm = EOF;
+    return EMPTY_STRING;
+  }
+  if( c=='"' ){
+    int pc, ppc;
+    int startLine = p->nLine;
+    pc = ppc = 0;
+    while( 1 ){
+      c = csv_getc(p);
+      if( c<='"' || pc=='"' ){
+        if( c=='\n' ) p->nLine++;
+        if( c=='"' ){
+          if( pc=='"' ){
+            pc = 0;
+            continue;
+          }
+        }
+        if( (c==',' && pc=='"')
+         || (c=='\n' && pc=='"')
+         || (c=='\n' && pc=='\r' && ppc=='"')
+         || (c==EOF && pc=='"')
+        ){
+          do{ p->n--; }while( p->z[p->n]!='"' );
+          p->cTerm = (char)c;
+          break;
+        }
+        if( pc=='"' && c!='\r' ){
+          csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
+          break;
+        }
+        if( c==EOF ){
+          csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
+                     startLine, '"');
+          p->cTerm = (char)c;
+          break;
+        }
+      }
+      if( csv_append(p, (char)c) ) return 0;
+      ppc = pc;
+      pc = c;
+    }
+  }else{
+    /* If this is the first field being parsed and it begins with the
+    ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
+    if( (c&0xff)==0xef && p->bNotFirst==0 ){
+      csv_append(p, (char)c);
+      c = csv_getc(p);
+      if( (c&0xff)==0xbb ){
+        csv_append(p, (char)c);
+        c = csv_getc(p);
+        if( (c&0xff)==0xbf ){
+          p->bNotFirst = 1;
+          p->n = 0;
+          return csv_read_one_field(p);
+        }
+      }
+    }
+    while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
+      if( csv_append(p, (char)c) ) return 0;
+      c = csv_getc(p);
+    }
+    if( c=='\n' ){
+      p->nLine++;
+      if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
+    }
+    p->cTerm = (char)c;
+  }
+  if( p->z ) p->z[p->n] = 0;
+  p->bNotFirst = 1;
+  return p->z;
+}
+
+
+/* Forward references to the various virtual table methods implemented
+** in this file. */
+static int csvtabCreate(sqlite3*, void*, int, const char*const*, 
+                           sqlite3_vtab**,char**);
+static int csvtabConnect(sqlite3*, void*, int, const char*const*, 
+                           sqlite3_vtab**,char**);
+static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
+static int csvtabDisconnect(sqlite3_vtab*);
+static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
+static int csvtabClose(sqlite3_vtab_cursor*);
+static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
+                          int argc, sqlite3_value **argv);
+static int csvtabNext(sqlite3_vtab_cursor*);
+static int csvtabEof(sqlite3_vtab_cursor*);
+static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
+static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
+
+/* An instance of the CSV virtual table */
+typedef struct CsvTable {
+  sqlite3_vtab base;              /* Base class.  Must be first */
+  char *zFilename;                /* Name of the CSV file */
+  char *zData;                    /* Raw CSV data in lieu of zFilename */
+  long iStart;                    /* Offset to start of data in zFilename */
+  unsigned int nCol;                       /* Number of columns in the CSV file */
+  unsigned int tstFlags;          /* Bit values used for testing */
+} CsvTable;
+
+/* Allowed values for tstFlags */
+#define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
+
+/* A cursor for the CSV virtual table */
+typedef struct CsvCursor {
+  sqlite3_vtab_cursor base;       /* Base class.  Must be first */
+  CsvReader rdr;                  /* The CsvReader object */
+  char **azVal;                   /* Value of the current row */
+  int *aLen;                      /* Length of each entry */
+  sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
+} CsvCursor;
+
+/* Transfer error message text from a reader into a CsvTable */
+static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
+  sqlite3_free(pTab->base.zErrMsg);
+  pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
+}
+
+/*
+** This method is the destructor fo a CsvTable object.
+*/
+static int csvtabDisconnect(sqlite3_vtab *pVtab){
+  CsvTable *p = (CsvTable*)pVtab;
+  sqlite3_free(p->zFilename);
+  sqlite3_free(p->zData);
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/* Skip leading whitespace.  Return a pointer to the first non-whitespace
+** character, or to the zero terminator if the string has only whitespace */
+static const char *csv_skip_whitespace(const char *z){
+  while( isspace((unsigned char)z[0]) ) z++;
+  return z;
+}
+
+/* Remove trailing whitespace from the end of string z[] */
+static void csv_trim_whitespace(char *z){
+  size_t n = strlen(z);
+  while( n>0 && isspace((unsigned char)z[n]) ) n--;
+  z[n] = 0;
+}
+
+/* Dequote the string */
+static void csv_dequote(char *z){
+  int j;
+  char cQuote = z[0];
+  size_t i, n;
+
+  if( cQuote!='\'' && cQuote!='"' ) return;
+  n = strlen(z);
+  if( n<2 || z[n-1]!=z[0] ) return;
+  for(i=1, j=0; i<n-1; i++){
+    if( z[i]==cQuote && z[i+1]==cQuote ) i++;
+    z[j++] = z[i];
+  }
+  z[j] = 0;
+}
+
+/* Check to see if the string is of the form:  "TAG = VALUE" with optional
+** whitespace before and around tokens.  If it is, return a pointer to the
+** first character of VALUE.  If it is not, return NULL.
+*/
+static const char *csv_parameter(const char *zTag, int nTag, const char *z){
+  z = csv_skip_whitespace(z);
+  if( strncmp(zTag, z, nTag)!=0 ) return 0;
+  z = csv_skip_whitespace(z+nTag);
+  if( z[0]!='=' ) return 0;
+  return csv_skip_whitespace(z+1);
+}
+
+/* Decode a parameter that requires a dequoted string.
+**
+** Return 1 if the parameter is seen, or 0 if not.  1 is returned
+** even if there is an error.  If an error occurs, then an error message
+** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
+*/
+static int csv_string_parameter(
+  CsvReader *p,            /* Leave the error message here, if there is one */
+  const char *zParam,      /* Parameter we are checking for */
+  const char *zArg,        /* Raw text of the virtual table argment */
+  char **pzVal             /* Write the dequoted string value here */
+){
+  const char *zValue;
+  zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
+  if( zValue==0 ) return 0;
+  p->zErr[0] = 0;
+  if( *pzVal ){
+    csv_errmsg(p, "more than one '%s' parameter", zParam);
+    return 1;
+  }
+  *pzVal = sqlite3_mprintf("%s", zValue);
+  if( *pzVal==0 ){
+    csv_errmsg(p, "out of memory");
+    return 1;
+  }
+  csv_trim_whitespace(*pzVal);
+  csv_dequote(*pzVal);
+  return 1;
+}
+
+
+/* Return 0 if the argument is false and 1 if it is true.  Return -1 if
+** we cannot really tell.
+*/
+static int csv_boolean(const char *z){
+  if( sqlite3_stricmp("yes",z)==0
+   || sqlite3_stricmp("on",z)==0
+   || sqlite3_stricmp("true",z)==0
+   || (z[0]=='1' && z[1]==0)
+  ){
+    return 1;
+  }
+  if( sqlite3_stricmp("no",z)==0
+   || sqlite3_stricmp("off",z)==0
+   || sqlite3_stricmp("false",z)==0
+   || (z[0]=='0' && z[1]==0)
+  ){
+    return 0;
+  }
+  return -1;
+}
+
+
+/*
+** Parameters:
+**    filename=FILENAME          Name of file containing CSV content
+**    data=TEXT                  Direct CSV content.
+**    schema=SCHEMA              Alternative CSV schema.
+**    header=YES|NO              First row of CSV defines the names of
+**                               columns if "yes".  Default "no".
+**    columns=N                  Assume the CSV file contains N columns.
+**
+** Only available if compiled with SQLITE_TEST:
+**    
+**    testflags=N                Bitmask of test flags.  Optional
+**
+** If schema= is omitted, then the columns are named "c0", "c1", "c2",
+** and so forth.  If columns=N is omitted, then the file is opened and
+** the number of columns in the first row is counted to determine the
+** column count.  If header=YES, then the first row is skipped.
+*/
+static int csvtabConnect(
+  sqlite3 *db,
+  void *pAux,
+  int argcOrig, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+  unsigned int argc = argcOrig;
+  CsvTable *pNew = 0;        /* The CsvTable object to construct */
+  int bHeader = -1;          /* header= flags.  -1 means not seen yet */
+  int rc = SQLITE_OK;        /* Result code from this routine */
+  unsigned int i, j;                  /* Loop counters */
+#ifdef SQLITE_TEST
+  int tstFlags = 0;          /* Value for testflags=N parameter */
+#endif
+  int nCol = -99;            /* Value of the columns= parameter */
+  CsvReader sRdr;            /* A CSV file reader used to store an error
+                             ** message and/or to count the number of columns */
+  static const char *azParam[] = {
+     "filename", "data", "schema", 
+  };
+  char *azPValue[3];         /* Parameter values */
+# define CSV_FILENAME (azPValue[0])
+# define CSV_DATA     (azPValue[1])
+# define CSV_SCHEMA   (azPValue[2])
+
+
+  assert( sizeof(azPValue)==sizeof(azParam) );
+  memset(&sRdr, 0, sizeof(sRdr));
+  memset(azPValue, 0, sizeof(azPValue));
+  for(i=3; i<argc; i++){
+    const char *z = argv[i];
+    const char *zValue;
+    for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
+      if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
+    }
+    if( j<sizeof(azParam)/sizeof(azParam[0]) ){
+      if( sRdr.zErr[0] ) goto csvtab_connect_error;
+    }else
+    if( (zValue = csv_parameter("header",6,z))!=0 ){
+      int x;
+      if( bHeader>=0 ){
+        csv_errmsg(&sRdr, "more than one 'header' parameter");
+        goto csvtab_connect_error;
+      }
+      x = csv_boolean(zValue);
+      if( x==1 ){
+        bHeader = 1;
+      }else if( x==0 ){
+        bHeader = 0;
+      }else{
+        csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
+        goto csvtab_connect_error;
+      }
+    }else
+#ifdef SQLITE_TEST
+    if( (zValue = csv_parameter("testflags",9,z))!=0 ){
+      tstFlags = (unsigned int)atoi(zValue);
+    }else
+#endif
+    if( (zValue = csv_parameter("columns",7,z))!=0 ){
+      if( nCol>0 ){
+        csv_errmsg(&sRdr, "more than one 'columns' parameter");
+        goto csvtab_connect_error;
+      }
+      nCol = atoi(zValue);
+      if( nCol<=0 ){
+        csv_errmsg(&sRdr, "must have at least one column");
+        goto csvtab_connect_error;
+      }
+    }else
+    {
+      csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
+      goto csvtab_connect_error;
+    }
+  }
+  if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
+    csv_errmsg(&sRdr, "must either filename= or data= but not both");
+    goto csvtab_connect_error;
+  }
+  if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
+    goto csvtab_connect_error;
+  }
+  pNew = (CsvTable*)sqlite3_malloc( sizeof(*pNew) );
+  *ppVtab = (sqlite3_vtab*)pNew;
+  if( pNew==0 ) goto csvtab_connect_oom;
+  memset(pNew, 0, sizeof(*pNew));
+  if( nCol>0 ){
+    pNew->nCol = nCol;
+  }else{
+    do{
+      const char *z = csv_read_one_field(&sRdr);
+      if( z==0 ) goto csvtab_connect_oom;
+      pNew->nCol++;
+    }while( sRdr.cTerm==',' );
+  }
+  pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
+  pNew->zData = CSV_DATA;          CSV_DATA = 0;
+#ifdef SQLITE_TEST
+  pNew->tstFlags = tstFlags;
+#endif
+  pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
+  csv_reader_reset(&sRdr);
+  if( CSV_SCHEMA==0 ){
+    const char *zSep = EMPTY_STRING;
+    CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
+    if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
+    for(i=0; i<pNew->nCol; i++){
+      CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
+      zSep = ",";
+    }
+    CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
+  }
+  rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
+  if( rc ) goto csvtab_connect_error;
+  for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
+    sqlite3_free(azPValue[i]);
+  }
+  return SQLITE_OK;
+
+csvtab_connect_oom:
+  rc = SQLITE_NOMEM;
+  csv_errmsg(&sRdr, "out of memory");
+
+csvtab_connect_error:
+  if( pNew ) csvtabDisconnect(&pNew->base);
+  for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
+    sqlite3_free(azPValue[i]);
+  }
+  if( sRdr.zErr[0] ){
+    sqlite3_free(*pzErr);
+    *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
+  }
+  csv_reader_reset(&sRdr);
+  if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
+  return rc;
+}
+
+/*
+** Reset the current row content held by a CsvCursor.
+*/
+static void csvtabCursorRowReset(CsvCursor *pCur){
+  CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
+  unsigned int i;
+  for(i=0; i<pTab->nCol; i++){
+    sqlite3_free(pCur->azVal[i]);
+    pCur->azVal[i] = 0;
+    pCur->aLen[i] = 0;
+  }
+}
+
+/*
+** The xConnect and xCreate methods do the same thing, but they must be
+** different so that the virtual table is not an eponymous virtual table.
+*/
+static int csvtabCreate(
+  sqlite3 *db,
+  void *pAux,
+  int argc, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+ return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** Destructor for a CsvCursor.
+*/
+static int csvtabClose(sqlite3_vtab_cursor *cur){
+  CsvCursor *pCur = (CsvCursor*)cur;
+  csvtabCursorRowReset(pCur);
+  csv_reader_reset(&pCur->rdr);
+  sqlite3_free(cur);
+  return SQLITE_OK;
+}
+
+/*
+** Constructor for a new CsvTable cursor object.
+*/
+static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
+  CsvTable *pTab = (CsvTable*)p;
+  CsvCursor *pCur;
+  size_t nByte;
+  nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
+  pCur = (CsvCursor*)sqlite3_malloc64( nByte );
+  if( pCur==0 ) return SQLITE_NOMEM;
+  memset(pCur, 0, nByte);
+  pCur->azVal = (char**)&pCur[1];
+  pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
+  *ppCursor = &pCur->base;
+  if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
+    csv_xfer_error(pTab, &pCur->rdr);
+    return SQLITE_ERROR;
+  }
+  return SQLITE_OK;
+}
+
+
+/*
+** Advance a CsvCursor to its next row of input.
+** Set the EOF marker if we reach the end of input.
+*/
+static int csvtabNext(sqlite3_vtab_cursor *cur){
+  CsvCursor *pCur = (CsvCursor*)cur;
+  CsvTable *pTab = (CsvTable*)cur->pVtab;
+  unsigned int i = 0;
+  const char *z;
+  do{
+    z = csv_read_one_field(&pCur->rdr);
+    if( z==0 ){
+      csv_xfer_error(pTab, &pCur->rdr);
+      break;
+    }
+    if( i<pTab->nCol ){
+      if( pCur->aLen[i] < pCur->rdr.n+1 ){
+        char *zNew = (char*)sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
+        if( zNew==0 ){
+          csv_errmsg(&pCur->rdr, "out of memory");
+          csv_xfer_error(pTab, &pCur->rdr);
+          break;
+        }
+        pCur->azVal[i] = zNew;
+        pCur->aLen[i] = pCur->rdr.n+1;
+      }
+      memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
+      i++;
+    }
+  }while( pCur->rdr.cTerm==',' );
+  if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
+    pCur->iRowid = -1;
+  }else{
+    pCur->iRowid++;
+    while( i<pTab->nCol ){
+      sqlite3_free(pCur->azVal[i]);
+      pCur->azVal[i] = 0;
+      pCur->aLen[i] = 0;
+      i++;
+    }
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Return values of columns for the row at which the CsvCursor
+** is currently pointing.
+*/
+static int csvtabColumn(
+  sqlite3_vtab_cursor *cur,   /* The cursor */
+  sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
+  int iOrig                       /* Which column to return */
+){
+  unsigned int i = iOrig;
+  CsvCursor *pCur = (CsvCursor*)cur;
+  CsvTable *pTab = (CsvTable*)cur->pVtab;
+  if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
+    sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** Return the rowid for the current row.
+*/
+static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
+  CsvCursor *pCur = (CsvCursor*)cur;
+  *pRowid = pCur->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Return TRUE if the cursor has been moved off of the last
+** row of output.
+*/
+static int csvtabEof(sqlite3_vtab_cursor *cur){
+  CsvCursor *pCur = (CsvCursor*)cur;
+  return pCur->iRowid<0;
+}
+
+/*
+** Only a full table scan is supported.  So xFilter simply rewinds to
+** the beginning.
+*/
+static int csvtabFilter(
+  sqlite3_vtab_cursor *pVtabCursor, 
+  int idxNum, const char *idxStr,
+  int argc, sqlite3_value **argv
+){
+  CsvCursor *pCur = (CsvCursor*)pVtabCursor;
+  CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
+  pCur->iRowid = 0;
+  if( pCur->rdr.in==0 ){
+    assert( pCur->rdr.zIn==pTab->zData );
+    assert( pTab->iStart>=0 );
+    assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
+    pCur->rdr.iIn = pTab->iStart;
+  }else{
+    fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
+    pCur->rdr.iIn = 0;
+    pCur->rdr.nIn = 0;
+  }
+  return csvtabNext(pVtabCursor);
+}
+
+/*
+** Only a forward full table scan is supported.  xBestIndex is mostly
+** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
+** constraints lowers the estimated cost, which is fiction, but is useful
+** for testing certain kinds of virtual table behavior.
+*/
+static int csvtabBestIndex(
+  sqlite3_vtab *tab,
+  sqlite3_index_info *pIdxInfo
+){
+  pIdxInfo->estimatedCost = 1000000;
+#ifdef SQLITE_TEST
+  if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
+    /* The usual (and sensible) case is to always do a full table scan.
+    ** The code in this branch only runs when testflags=1.  This code
+    ** generates an artifical and unrealistic plan which is useful
+    ** for testing virtual table logic but is not helpful to real applications.
+    **
+    ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
+    ** table (even though it is not) and the cost of running the virtual table
+    ** is reduced from 1 million to just 10.  The constraints are *not* marked
+    ** as omittable, however, so the query planner should still generate a
+    ** plan that gives a correct answer, even if they plan is not optimal.
+    */
+    int i;
+    int nConst = 0;
+    for(i=0; i<pIdxInfo->nConstraint; i++){
+      unsigned char op;
+      if( pIdxInfo->aConstraint[i].usable==0 ) continue;
+      op = pIdxInfo->aConstraint[i].op;
+      if( op==SQLITE_INDEX_CONSTRAINT_EQ 
+       || op==SQLITE_INDEX_CONSTRAINT_LIKE
+       || op==SQLITE_INDEX_CONSTRAINT_GLOB
+      ){
+        pIdxInfo->estimatedCost = 10;
+        pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
+        nConst++;
+      }
+    }
+  }
+#endif
+  return SQLITE_OK;
+}
+
+
+static sqlite3_module CsvModule = {
+  0,                       /* iVersion */
+  csvtabCreate,            /* xCreate */
+  csvtabConnect,           /* xConnect */
+  csvtabBestIndex,         /* xBestIndex */
+  csvtabDisconnect,        /* xDisconnect */
+  csvtabDisconnect,        /* xDestroy */
+  csvtabOpen,              /* xOpen - open a cursor */
+  csvtabClose,             /* xClose - close a cursor */
+  csvtabFilter,            /* xFilter - configure scan constraints */
+  csvtabNext,              /* xNext - advance a cursor */
+  csvtabEof,               /* xEof - check for end of scan */
+  csvtabColumn,            /* xColumn - read data */
+  csvtabRowid,             /* xRowid - read data */
+  0,                       /* xUpdate */
+  0,                       /* xBegin */
+  0,                       /* xSync */
+  0,                       /* xCommit */
+  0,                       /* xRollback */
+  0,                       /* xFindMethod */
+  0,                       /* xRename */
+};
+
+#ifdef SQLITE_TEST
+/*
+** For virtual table testing, make a version of the CSV virtual table
+** available that has an xUpdate function.  But the xUpdate always returns
+** SQLITE_READONLY since the CSV file is not really writable.
+*/
+static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
+  return SQLITE_READONLY;
+}
+static sqlite3_module CsvModuleFauxWrite = {
+  0,                       /* iVersion */
+  csvtabCreate,            /* xCreate */
+  csvtabConnect,           /* xConnect */
+  csvtabBestIndex,         /* xBestIndex */
+  csvtabDisconnect,        /* xDisconnect */
+  csvtabDisconnect,        /* xDestroy */
+  csvtabOpen,              /* xOpen - open a cursor */
+  csvtabClose,             /* xClose - close a cursor */
+  csvtabFilter,            /* xFilter - configure scan constraints */
+  csvtabNext,              /* xNext - advance a cursor */
+  csvtabEof,               /* xEof - check for end of scan */
+  csvtabColumn,            /* xColumn - read data */
+  csvtabRowid,             /* xRowid - read data */
+  csvtabUpdate,            /* xUpdate */
+  0,                       /* xBegin */
+  0,                       /* xSync */
+  0,                       /* xCommit */
+  0,                       /* xRollback */
+  0,                       /* xFindMethod */
+  0,                       /* xRename */
+};
+#endif /* SQLITE_TEST */
+
+#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
+
+
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+/* 
+** This routine is called when the extension is loaded.  The new
+** CSV virtual table module is registered with the calling database
+** connection.
+*/
+extern "C" {
+  int sqlite3_parquet_init(
+    sqlite3 *db, 
+    char **pzErrMsg, 
+    const sqlite3_api_routines *pApi
+  ){
+#ifndef SQLITE_OMIT_VIRTUALTABLE	
+    int rc;
+    SQLITE_EXTENSION_INIT2(pApi);
+    printf("foo!!!\n");
+    gogo();
+    rc = sqlite3_create_module(db, "parquet", &CsvModule, 0);
+#ifdef SQLITE_TEST
+    if( rc==SQLITE_OK ){
+      rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
+    }
+#endif
+    return rc;
+#else
+    return SQLITE_OK;
+#endif
+  }
+}
+
+