From 0d4806ca6f24dc7b11b9d694b5d22216fd2ff051 Mon Sep 17 00:00:00 2001 From: Colin Dellow Date: Tue, 6 Mar 2018 21:02:26 -0500 Subject: [PATCH] Rejig parquet generation - "fixed_size_binary" -> "binary_10" - make null parquet use rowgroups of sie 10: first rowgroup has no nulls, 2nd has all null, 3rd-10th have alternating nulls This is prep for making a Postgres layer to use as an oracle for generating test cases so that we have good coverage before implementing advanced `xBestIndex` and `xFilter` modes. --- parquet-generator/100-rows-1.parquet | Bin 8166 -> 8124 bytes parquet-generator/100-rows-10.parquet | Bin 30818 -> 30522 bytes parquet-generator/100-rows-nulls.parquet | Bin 5508 -> 26918 bytes parquet-generator/parquets.py | 5 +- parquet/cmds.txt | 26 +++++----- parquet/parquet.cc | 63 ++++++++++++++++++++++- tests/queries/008-nulls.sql | 2 +- 7 files changed, 79 insertions(+), 17 deletions(-) diff --git a/parquet-generator/100-rows-1.parquet b/parquet-generator/100-rows-1.parquet index 922b8889eeed0bb934b167e45dd68c7f0010d169..fe4fa702307c996981bfc63af18b67b492b0d786 100644 GIT binary patch delta 53 wcmaE6zsG(4z}(<}?{mW*BGk1bIIW0~Q8`0LLIh00`?5G5`Po delta 99 zcmdmE|IB_vw6v&rT4qIRN_=r;*27+@R*=7Hc9QN!>MB^sYmNwtW!u}ht*sL@0T)JN3B)bI*b zKt#781O)+!F_w3goOB;+;K6s|xZ*`ocr0p0sIU#3nCE zx>oKW&xIMwG>|5X=59gMGPSg26o#GNpdOq;N(3m5P)V+h#^?@QA6&xSg}5PV$pb$O zD!_Sp02Dsw4$VV$a0L+M^vPbr840bMOfw+z-Hl0iuTHkcMC= zo}iY*1!Ix~zQYoUF%(SO)RLNsm|TVH!^%iw7%WnnZ=FigJ`+L0PzOI1e6Ci(IeAoI z*YbW46fVbAt;^^tPNm@7Ab!DEQu2E?2hDn<5l329G7`xvu`?6quF}A%kuI=Rp(j^Y zQzFvFD6F)tg0xj?R`T`Gb0;9)LoKaZixu-4xQUf2kNw2_8l^J?=`^gzO4&NtsXRwA zqbU<<#zu^~UWcWg=Sf{G19CS8qXH<73V@AXG33r>2Bm8grU}sPl}HS4GAP%!Vp6>Y zK2fz0^IHf-Eos}1Q7)WQXZh8>(+`C9o!DEs6ZU9=NYXC0_p^UwlhCQrlM8W>AE=Vv zeixgXcfq^b6q1_=ch{+9XA(I2+;|A|P9;gnuz!PEmir#f3`y|)8V&HH4v_La;62XM zselNk-L!P@PcUS1Zd8&pO1>p&0&Xu>3JEWPC_#$36h# zt82k5cY&j0CWln!a{T~l&Y{c7ayT{@feOx#&IR9jghMz=?g+%Ma|ffd3+(aJld}bs zkIbA;*EHtAg>@PjH#VB&ek4LFq_9y^aFMYos-mqGU%;3R8u&UenzUbLY%(s> zJns_3VxAQgP3~M}eA=(jJoyTY4>ppt8a6<9Xfm(UxaF_Vy&(X$jdO?akR8N)lVTcH zQ_CK4gKY)&hIqoDiMb^EOEykrL?|@Vdebe)-xvT-C&rLtx9Lc^R)0nFYjtcL2@O+_ zPzV^Ngwvrb_^x^p;o*APll8aKX_J<4eCJ(;5LQJ?WQ7g1()M?ViB5x;!@~XB8~ej{ zxREYt{s&wM3+9bYBDSWlf5bNZ1wDDx%&_flX3za6RKAc(Zhga$wLG9%=f9xk4T{nQ z3a9AFnMVxUt%tOBzXklJ7Ly0xTF+fuD~+rF4cvuBqHj*x_5K;O4SRIVBb1 zw<>v43Sl?X+q3ngdM~(aQpxV8(FKoF;fL9&d{G9$OIa=5#~z#x#+Opb0lYmT)Uu)j zG^^MT7dO$XX#~KWc)lu=&^wvlPc`q;dci>$wi)l+xqJDvY(j5lS@MUp(DnhS=2epV z9Jn`6BWpZNv)n^)5}PN5$CBnexVKp&OUa{o+7Y-Bj_<6an7P7?r9)*61vIbEFWvd_ z5st4t!6-q@7K!Zm$Fx#j2=8nuga6JC_s>4rA0m^1J;VUp7X%Ztk%37uieMftz++d+ zz!a6xwu)l-!>?2M_EQv$N|teohIwV+6M^saGmK38X__UUh7E6O;Jb+Zd|HJ_g&w+d zwAS(|7#3FW=FdbTH5b?~pND;sm89ScMx>~cW=$q|dr=NCT*94uIn{GQ?L``!t6;|B zM1n`!3Z$@_=6RP(TNa<^_*=CM$nacOd7VT^xK3-GSHOFFHDtdM?qBs~e@Gsf>5_6Y zD3@Hbie2f`B}QWXf|lq1jN!TQLT|m(+NhnH7kay$h6ugnPS;!NeC-Kc056C}rx1-U zh|V_Y72=iuNVh;pYCjaw7exASRup||v5WMh5EsBi64+9;o|jqV{C(f1Uh?=50vf{;uQ+q$TJN{jl5WU$KX z#Pa{wz|@t>=|5WoDWf#)6q5~=|4k*&WDz6jEy?rophA>beq`$?DQE+OCqr!rtrywE@8#}~l=&gsv zYqWOsP-|Zeb+%gQ!wYu35a&h|Lr<$1if{1{WD_EuDbW#CR!3xYUWX0%!8-c%#aCzl z5>KS065PeX>8XoyJDRAqqlkFEY#mW+#}9Guwu0zuM-H)3v_qxE4n;;2kP3$bS`Qf|i{wRX%=YsVT9qg`#(^{I;z z_W$kaqRygj6>q(Wh;;xN5R%05ss}wYWq+}ojq;D8_d=lbrxq-TAh&@FPV(I zCNo|?>7&Z3kF15)VGn+=ran~i)U1JPq-fep66!VE*0VT{vH k>Z!7-rv)bV@H(-nPYu2Da};#cj>)ntZwH6SnTrGe2R|356aWAK delta 5450 zcmbuBeNa@_8Hc&&GRp!YP+@o3U3OR0D{9OxAh-%@z^KH}MQhp!0va$y0l%VSGgeWM zA$$dSDIrF2MMDW8%9n^_MMQ!GDM7}fr9_QmlvqNPQil>td(OGbgkdj}f&8;G{C@8} zz{7hkPfaiL@EKETuyK`ci#ZS-6|pzwz37OoG4WgW{4vyPGyKkG1?o`2t(|TtT?Gk_ z)8W2N7|NOoXFQdnVvWQ~QUejcC{l@@`r_m~K2@B!deQwUy zk_N;*)8p7QpPW4f&Pb-f{ZL2f*E_+j$$HH(zTjk(*4ALxh*k6-itq$&4hE z;oAN2S^1!I^g%BdQ8wj^Fq;Ha^%~{UwgmSNV7gNp%2`Udh|UM$q!w<3sUgSF30UVa zrXM1mRW_N+$WgC@!)6tnbv};r-=KJmh${-l%@Bla)3{68iwj3DUQwvJoFTf zM5tk*EEeUzO;@FQD^8oXK#MFD4MmexvB`@jr$xa*w+pB!hJdicAjBJaF<|IPBhoirynsCJot0s#vb+051CAxGK-@+@Hq9r1>CzGWQ^a zsJ&7DA2l66-E*N?`EB-NUe4lgef#F$9OWIFW=kN8y{TYLHHiMK6+} z%v&YS!YhY2{1|*TwG0)dLb!*@w)}mPC8sc;^^n3zk8|i*1_W+bI6O_KC#Ny=3OP(u zz_DosZ0&IdcCS#0o3coI^aC(VKZTleATUZT?g5fL%Ld7eL{#u0Tzf|?uFEBv>CYAQ zGwd0ZdJ+QlZqOPf5+6N5io87Nii(0wGlTV&rC7hw6y`(AOdphb1{hB{TW4hG z!UTDSp{BDG(8B^U)^`TFqSf%oGnOqWpR6G|#2c*H~JN!0T>d^@KOwN`+^S7~33xVz;f&MkxHS5BeM zD$3>MMUo|4gsi!VsQfbJ(pF8fmP-`@ug+pnM=j-&pcRP=O{6HQflO_kzNmhDOj@sz zhfiFC13Dixbe%G(yv{S})rBEbBW2R@B^i6%0K2`?SmRebkD4YDHs655G5m{VgLfE8 zZlPR^H%V9840pUsQPtOYaTNCWHr2I~IQOq`!N-J#+9{WTTlDNU$eLG+8agl+wYc?f zB+I%DW!tq-=o^pP{!YR4-6i>pJFv(v5fyb&Fpd8pS$${4_kM3OsPG{MgAce*!u)UG z(2jKcq74o*J{}*K#_z}h+27G+W|qmA0PvE|LNA_BL?usnL|qHQP$hmX;Ct2jJ$c|> zH$*Hil2?#EC^rp_ z>vow^$tfwRh}?c=os3ydf%T+A+fF%KkdEj*vt1g-7Jq=qn_1kDNuIc!0qINA*vxE1 z-$X^nNm!i)yLYJ}{PkGWp95ZNRpQJXlIH_l#`z{Nx8u6 z*23Vj47T@Ud|I4b+QboFTlVmAJkq>VWpJwu;N#Tu~`>+SjClOuBHN>uTEq8FY&N4FOe|63dHa7 z>velgDC(@C!1^zfF6lBDaJPT$aW=Jh2)NRdzte#ui~?tcZNG6bV-#eQWjz znKVYbT8N~YFDwdPI`R~+ zlyY7v=aG`t;TC_;`WO7tY0ZSZ(w|=6DKf?m{|J(XDH0beHxu`>XcBI`XnOLZxmYQ) zR^eHhwJUkT%5%h5uZEo9V# zGq0qv8uZhNNN&V_B*V*-`;Z)v_F~sq$e>r7nQx;r*IN+3?1y95z;3vpU@4 z4>}-huhE8ynkz3OUNXj$;5+KduNkkhsLlwdqa^|ss}V9-jLK4BHJ18{DOma|zKVhZ zi-&oGl@A*%kIWY%^QA<1nN<$UtQi^ZSsgzhEVc(ixQUuA&&*%OcoSyjW@fzVqRQ)( znWg2btvLbB)mZ55jTnuP-a2mA3Fs}xX6d&&v)jst-Ihn@hmra5H;`(sa#(ZC$Z*f< z_yJ4h%`p6W-6ga#CH#i*){9D>P+FG4i)jfMuSURhH5Qh8C{8C}xfqqD=IYFfs~A>X z9@Tt|YCc6}wq5lI+co3DGpvpu8pC{zhF@>IgxMoAvk~Hp-~K|V1%uSURlHA1GV z6S7>)Z4AwoT61R4RgLgmYGz1}3F;|9v+^p3m6!Ic4iDfD{sxbszZWO2zU%f%MyfBt zC7`_+mw@+b1bkOBhU;prIRV+l$SiFakD7H?F|4~hss$L;0t&)xzRF?qHKW1o7yo`XnoPc3=g6HhFzaH}>)cSo^fB2^qD%ecm+Uklf$vRbN^l>n&RjS$zC95|N^ z$qi{Zq+yVTLdt@47o;JO215c4{nbn)BQr8GUa85Lr&G!p#VE=4jGVHkH^FIImE=@y zJ@aLq+@dAR3oEj9awjFnS&449If$c1a%v_d7k7l!wxU~gJ0Y2njsW>tkea&^vH_AC z(uoX09)%=BveLyXH5v1w(Y<7k&`kll*`A6Vj<4SieAmgqmxpq3M*_b7#ul4z&xba$ z_=4@o;9uMPAN<1R`{5oNng5#YNdI*VT`A05_;xB@Qhxj;{A)2rF+erNC=<=7#!gOE4&5`@2%fx(Jl&`o zn`4`ewxoekr&65&6ynK4x&&vDkE}LI>T<)zZxYg3Qhq*lQ0Fv-kVAda$uA%shV)BF zMCHarqu!c22+h(zweW9?Vd;FX7Cr*(sCQY-ZB8{gWT3o!Sw+5X-MS>lCL7(| z1+3fL+36v!Fc#KpMus}UB51Z}4U0+%u}ks{pFJgmOz{1zU89s2W{>k)3YpoOAoN8Lg7-hi|U!S*l zVTDIGXs^w&)<(~E1%nP}U~}moGbS{b?9jf4y0U2d64oY z`zQ})7T{MQeid1ak%~0Q9gY5R)v2Z%^p`DPx@gH)D++Y_XC=pJi8giz`pvgvd+0b& z1CU!Isb4YJ6>CXw((QJqrjTq%IgoN8UK=Pg zCPib2gr4db>BN&cjVyDN(+&NA-;KV`IA8<)Bd{hdL)p>8>-UYyKy5a#oIfMf?J>>|zeh2_J4UPxcpB44t zMdaGM0D0$KA!JLSOt4!>gQrh%Vy|njwh_8xsPN&%Ncnu09xAh|ToV4pCs$ToYF3Gw zra#EtOZvh&C!}7GG)Sl_@N{dcGxBt6GM3V0%+m>^XXyl{Nl8gn#f0t|9(jKF$9=2h z*eTId!@#PlVGy$MC@$^@t87K945GC)U~4V(**8H^^B`GiVJS_NRwl~8+h?BF78Oc%m3D|B;x(CNB2>uZc^U3nlV|1? z?rs>bJ-NW^QVxc=9E^)QV!TS#Nk%ievdG1%Bgt@{Oqx0YLfeCVPTQr7LVCH}jwjdG zyeGpS3sJ?m3__Vu8J@7=$Occ8ehdPNa}xD?fbhnAjE6!jcsPiMEN796tbS81F$@@` zN3XvLMX$ewNcDFRslLNSs{h3zRr7rrjwdlU5>En->4q>IA1D&_n``xO!(RkOY$|km z(jrl+6_pV$Ri_xlU75oVUf%xFux@&Qx*)?h`J>uNAJujV1*hq78b4`1tM82qHN_|+ z^8KYz(Yj@D{soCPj|ASUM}qT3crL+NX2U48(6FJe9tq^Q?a3s^p6t#;70>bL--Q<1 zwwyxEmgDBB8ty&h<+@E%M_(;DswH}9G*DeX8eJShVN{CM(FT=)Eb`33s1qX2!hAiv z%wfT14n~>g8z9M@gHdKqHqXbnZ7+G?kLxY0#V((UPN*9==C)xQM8`UrG1IqtG9)!Q-si6b@b4B z_Uf&57v=Is8BShxFK3iVo9dGs2W|A+Jz&&{v1n8rH8S(l)Gmhg_6)U0&)$73)<}Ic zN#*$&PCt5IREb;ec#bv|1HI$LNKc2r3fm-fH^xU5y4~a3B6Qe@7`oyMLl;QccUa7l zv6Lob9&eoCeEYAY=) zrOBAb(=vR%=Ya?9}*aYq7PWN5bJ-_=bbnN7t1+B<~w{+WOmmeOR* z<9QkGs+(-))k^66lY!TPNx+MTadAhC7cX>f5_oPgY8m<3eb_VfnP8GUB}FaWp19)mQbR zBrQk@jkhHG&Xvx-=b6rSAC!lW>=Sri&q)oByU0F~=l4AvL*^jHIQkHS( z#$5QbP%XILB6GPWnakB>E>|FPxgc}7LYZ4Vlk0&6GPkcBsMgOw2iq75qhgi0T(ivO zGMUR2$y_eVT&_^&svoiP;brdBEZ}ovCh`FYhQk=$w5gRaNXlLX4og)SpNn+JP$_4h z{*?oW(zoA$)tN-;e@EteN|XTu>1G#^GY46kp2o)N+pqr}0|sUe()z|O$KNhS&$50l zXR}EK8)QebjUK81n>Wu!n-j!>dCa7Y{q7hz=uV5((w&;rkLPT3E-mw8D&wyCKshiE zDeDkeVVjh(dSO&$tY6R;Wz=6Op*nnjz^+x;gUVP+lQEAs&S;Q*UyXG2R%JZ<6|imX zSD>foQC!>+z_wP#mC&Ofg8G`^y)_Hc0xZ)y0F<$mCSx8?%kcTdMG;y8Wn8%kcpZ8a zc=0eU?nuDPzi|^p63-I?`1frQi(?@_1f`yVfr@^J=Ax`yK;cAAJpY z@i6cT7zSm$`2N{VdOf8Nz>!rWUzb1`ui1xyGR{6#17!(0zIi16u{cmME(0kGmEj2+ zj%->IQyEt+0m54z$L=}Af`^0d*>V=S2$b=?kTU)Wl<@;l#y@k)_+dt4uOAcfxjhz~Bo zkI(9S<4W)r`}jo8Z&)5f9P8i(d^r$bw+y^5!gC4ELS-Ux%rl8&o-U4g0&&a(am*8n zW9>J%j#(g%4Nn8r%}=9`Z48A`v5I4!Sse42IOd7OF%Ly8PbiM>KV#*?i{t8NfY07< zA|HTYIE+z!lDaW0j*Ze@7dDXb3qfk+B`_k6hM>_qhg4?~#WgsqW*AY1+%@c=nXcOq_f5D*q20CKL;K;^DKI# z1DP=&o8)xxT|X*QfEq)0t(NVC@HYw`KN`Q_s z|6pY$z`_iKFBn$En*WTA5)fO~yQ)nc@3&FLKNTx>^H2MZrtP z3rLMea&bojYRKFF4}T1jPu~psm+lCXh8G0XuoPGfc|0}4X-B`^idrjayrVhzR-`q8F)$*MQ0( z^c8S~`da&4FvU6(fQ_*pU5j?qeMf&J!xC!+HvA7WdJKtW!>R%A?dR`*bolGHyzICC zo8De?|NTc@H-7GAL#QoY%blzkR~o~tEnZO*>{}1K#XtJPyFX~`(eLSPunyZm1nCl- zWp-FzLGw*2o3E>EzCdO3L1puWD!c9#U@QKOpAEGh+<18`9h-PVEZ+OmarnMsOQ zr8pXs=#B5A%XV+lUG~3Bm^F+^pmXyJibh%uoH>NT{k zZmVvcj)HZzNubxg4$x5q`sg-*{bGT>0sVCoq-&5GAvHj<*d}8sO~yRlLZbn<{u9uM zF3?A|qk+x9RWkv(xFhCf(=W37J`V;zf|wx_(yY&d!3_`}SeaodO~yQ)nc@4>KjfGR z1iER5erN4KYCMvQI}%XyFZybb{Aqm9KW9jgT<8?^KlE|HN)1cFn=9n;)C{Lp?P^7> zl|Wbj2&rA%iPU%`c>4|GGHSd)yUCOpPof5s!vjCN32(nc8lH!@-;D>-AcP4L+lrUL zUsg9N#$_O!p)x#Y!<~)Qz&VCMS3<7~zaIWGih~Fd1Uf`7QV+AiBG8|P1o|@&=+8l* zZ*c-0j49BV8xd&05eoFGJzz>Sfv)-&v}4+Fun&fAjZZj&mOZp zdzd_XtcsT8_MoPEg!0_dQoHQxiLgAMu{HYO=lVGG+0f4s zBS*LRh%)MKU&B~pAFGV^j%^r6I*(I)W5xxFiM?1Jdyl188P|Tzjnp^l?$KV~n6dY0 zqy8Ec{>C(VvmHO@{c6(i^@`)5OwYn^&G0I;U-#>mzzEi&CSAYVJLaByEe@CN)ug+5 zla0=B^)JEX7`lGoFj~C!ux_!Af_1h@*N+?l=qS2=n{@rer@^2MX%qw%%dZE6yCM9r zGQ(1ujCnjW!}pc{!7&r)`i@$pcHu3g#v{47BLOw?yORO`q`d)h{Cffay~_gRjWU6* zV<~v?hCH5{;j{y9x1!cc*UukCYU__8H696Gyu-MRnpxMEJm5s>Cg*$numyCz_##Z* zFRaH7^Y{g3jem45SCxejp#by2z7n`ufddPx_;mU+HvYX^+z%Wu;G91 z#hXp6zu~<$qhQm3*RFfn55CvE`QumQjbG8^g-hVmBv5waN^o7MjHn5Ao&w%cl^tHZ z5Bx@NgOk_>B1o6uEYv3&-YzgHdx5U(1p;L+0A()_DtqPcfNd;g-+da{p8KutX&VD# zY^=&&U{>}5rtAeGWiLQwFAysGu6G2Kcx69%1}QZGbA6CI1jR_LNmeh1m3>uGtN8Y@xN;~e}@qvj%ov0MwH!)B``jjZ+CzJ#xk(^0HnRx%CKq(nfO5~CUEVuon z#MEAV-}ng=?=N{kD^3g^f9o1O-j;WH&zrP=YpNq4)1Bwg^IOmAp8t{>#_j`CJaFHH z`yZHWX&7m;Cf&!o!RSP*{{U`?5f7ZJLoe*9)4iahV4ZD>2kPqqI!ZjSu^wQ*I3BnN z@=y<{4$=ik=OI~ald+U0V;*mz(a;SSfkyOrVBJMD@Yo+w*?1rqcf{OmiU;2RBp7r; zDuW@2HCKbdS{R0~GQ(1ujCnjW!}r_!`Z1GVoXwVUrDH6F>u9SNxUM|lF|K#zbw z_p@8%C+BbZha9?PrG};89Uby`YKGI!eb9l(Vgw%K>c+U^xGHQH0kVRIH zIfg%&kQ<#Ign{^ZXMg`G48)(@HV@*aAQ|!4hd5+o;i?#yff$9#@SF{I9=QUXW5ffe z|BRe3Uq&qwAwoP5q8F)$*=HkHM5^ z@xZx{(2m9rC6t74JYWSj{LjS$D?Z6C?6=|*FZ-qW6Yr094}R?U=4)Q|+xIo^Qs>)G zIp4l2hzE=-HN_|+YJ#JUz&lnvaPBX98#G`Wh#*~pv&;_5n|`4w9w^k~fkHt%Pzdor zp)elU{t2*+6%U-ahHRU!>YlbSAjZZT4-}f?fzT2=!Ps?@8wR1_OBfFvyDp%_#{&&b zNNGcpPALqYb;@{~XsnOH`Y(g(`|)5(rac@eC-cgQJpCcdQ~YpZv^>J3^uZ}p%cf0#NGnY=GJk6uJ>t#J`Ia%o z3)N|k!**KNj4gBchTbxNL&mau#}q?6ICa`XGb{}#&CsL=dDj>nZ;js;Glr-SqG$F8 zb+`S<{flFWb08^aA)SGA8qz697TaVjrOBAbTWB=?jTJy6 zdJM5?1#TR0s?r8($OE~!Bj#pP3~}=>!C)Sw#qblwj*lV2f$<0{Gc2Xan8!0Sd|$na zVM(Tmi>Y_P--YAA;2 z0Wm~Rh#`7&F+`u3F$CsDVhF$y#t?OD!IWq*ME!Trj^;HsQw(7RHvAu1GBu9PkGtxv zeLrp9Qu|f-@m`X~HG0)UTPk0j+87#AfaBJ4ffVCH@QnUx^om;G#P@)9)DQyJOx3N^ zo8Y_H1R_Y6;4Cx45<(Q2LWm+ggeVe(5JeC|6bVC!T^oRHtPtY#%g8pcUU#&O0Wmh# z5TeK&LKLwfg(A_ALJ@`#MZyr`yO{!v{hb^wcG~=XY`5B1^>+IkcL2NFOhL!Y zM=Ivdw=}dgUz29??lOAnE!)9eF$OMb( zM0jc~)Nek8osV%Ud<(#dE~-tnY220(Fk}9h)N^;wL+#ydDG^&B(M0S^L=&-D5bcP~ zd1yj5#GxIs6Ag`HgBTj;ZY?za4O3`5JDoUAcXMx z$TCUVA?p@tTuTgToC^Wzmu9&fjcdW1<5K7`iDOM0P0Ru`nxNHUG(k(dXa_B~qKR7j zL_2CR5{-A=5RHFz5B;*Qa-ngpQ*mtX%H$`Gr6@Ep>qcmT7J1MFt;nDqv`T^|YRLla zs8s|s-U<8tU(i(`5?P0UAXbe4*aZGQbiJ6N;6Ety%CTNxq?Vy=AG*Q!FXh%&9>3plE8hxbI+fG6iJed_BAmk*u)O0&mNXLENAG{ VNn=W$IwJXDarBp+kZnEXVT(CIjj$u zL`umdb&65lIB#BS&@QSByE(&bL!FfwD5v|^w(wQ(1CbY($B3(9%ETs-Pa@jQbXa(6nQ+#CaHzylTFVmgD8EX&uyRd5+x z1n0q7&8DHHzW9s@jwI_aDZ(G*X^5U!wn_9ozIbUfI zS+}Z9qAJDBhlOsLyU-hSM1=0-4X1c(NH7p&HyLlK&=wWpB6pB}m!&UJE%aO2hiDHp z5}$MJ=@7 z84K;BYqUj0-=KY{^Qwwk52->Y-cr%bmsFv{Ar;j$szUols%T=SDzrzfqLPzb=-u}? z^1sf7wm-{}XD%1|-2{$u9bD+OB#tcSDfH4ljLJ7D^xP_pg+&zl#dIRg{S;cN3>c!& zk3N;@(mQgfZJkV~pOix_56bjei5#lWmg!KO9Gdc#L?7;zLgkw!>iCru8nr;8x2vU4 zp+}-^{iIO-l`i`2!LCsDjxJjNLRVbisWO4E$IQW&lM-FN%7 zNw;dB?+oTgYkxWs+P|TP_R%%kqN0fQFRR7XLid!jJjPBNUY>;>jVa`zGg^+Z#qLD{ zW2a@6z;rbo&uBO!`^(B?1kT_Kj7wxMW?2Ph)1KdiE>HC6&H@x2eduacC?syY|gZ+MU73HIk<|5d#@r^48(tqGb@brk?ml0fyn~0!s|HqdQ z?0vZr!O(}kyog}Edm(}(>bij7iH;`)=KW%2JjSL>lY`GIb7yb!S+}@G*8QdEPH^`F zaAT@TSk4TR+IrH6POz!33H9BuHM(Zo9kTEcli_BKxUFX$I)KSb z&#^yLW$Lw<+ulmk(I3Iv;9c-%unW8g{tEsE{to^DJ_P%~Kf%Y~6YwwaZ}2HN20jBP zz<DI3lHP7k3`{uHUVf@QUhgHH-bh-#U8e~=^>bV*R$pRP z2M$!!jeJZm=*?z6@UC5lPmF%5SrssBS^i{B6Q?gh2eE-!7RlfD;J8^sUg`2{Ub9^H zr#Afl)jrQ%^J@-RuKM@-POMq5_ljTBX}RJbe)-6fgyEO{nx|}+{BO?rT=Xv! z4p=Vw&rT~?mGROAzh;%?g5O=T#8tB7yk8TrocAv|=kN|#a!$GSIsdC0vmWYLeb%4F zENA^~rd_u7b{ODjid07D~u zbo#&BpxkdWiP{@Rv!;2EG`2PeJw2ncU2R=47-p}?P)23=xV37G%3fI{VEkTQT;DS) zUsWYy6G3)PASZAE4<{JC%8t%q7u4hgF5nT=LOP;b;GJI1LitGEjtjZlaU*v-9^?S; zadEB6Ntgosza1}fx8p+&xO^_#FwqwD{yzujJ-&G5w&*+Vn>hKhgC*w-^gxvXksZJxIuv?miNVQgUF$| z1?C86oybPPoZ)OfvQe=7aMpz^;9YKws2Rju;iMbMD408(^&lGs^Mtbn$VR~m!dWk} zQ7~^f>q8b4`rP3*`3fTce7*ILvHN_+7_>Utg1cX)uDW_#;Eh<%U?&oZRq1fJssHB-f?^^EZX6SYhot4-4}rqMa$iD~2Fhb9u! z=@W~Rh{Y!)jZG$&*eAKtKrE@Rp~groSu{4L5;OEmooym!Ofj{k6HD!%zQ|0>lxAL> zK`ebh#?uyJ=7E+KR$>{0tgCIrESa{o*~F}avp3`rvt{LM$s?A1U*2{*v7B7{J5FMG zL!2EhVs?k?0}nB0zUQEqn9J=wI+U2FVCWZx1S1O14ktEr*zl_XVukkyP8X3-#D@Ee zP8XBDm<2`@OQT5`&5A~jmP$xp!iqtRSYbNWg2aq zP9L9;*e9uPvS{dMOiAr;N=qML9+)x6l4%`m%gVklCpT}1-Qmo4xjhA5-_T)&_Ye06 zMih-K9yNMQN$J>eW#h{yR7|X_s-84?%G7B!(`#qc1?wA(-*x@*mnXy%6*r>8@DS~pFMkvaKYgbyPO4;Kkqk6&2Si&%{~vXerfMR0SqMM9?}dggo7 ze``uvZMsuEy#a}MQ6;L3-`HwA7wzU!Oes6=`t{B|c~N1#Cy(6Ld-CvT{a&6vtoLT0 zZ2f+AVb*W8o3MV{1<<-h=7EaJT^Dh-?}28`>Uzq= 10 and i <= 19) or (i >= 20 and (i + j) % 2 == 0): rows[i][j] = None - write_parquet('100-rows-nulls.parquet', rows, types,row_group_size=100) + write_parquet('100-rows-nulls.parquet', rows, types,row_group_size=10) write_unsupported_parquets() diff --git a/parquet/cmds.txt b/parquet/cmds.txt index 122e932..aa33e47 100644 --- a/parquet/cmds.txt +++ b/parquet/cmds.txt @@ -1,24 +1,24 @@ .load ./libparquet .headers on -select 'creating without enough args'; -create virtual table noargs using parquet; +--select 'creating without enough args'; +--create virtual table noargs using parquet; -select 'creating with invalid file'; -create virtual table nonexistent using parquet('nonexistent'); +--select 'creating with invalid file'; +--create virtual table nonexistent using parquet('nonexistent'); -select 'creating others'; -create virtual table others using parquet('/home/cldellow/src/parquet-vtable/parquet/others.parquet'); -select * from others limit 1; +--select 'creating others'; +--create virtual table others using parquet('/home/cldellow/src/parquet-vtable/parquet/others.parquet'); +--select * from others limit 1; --select 'creating with valid file'; ---create virtual table parquet using parquet('/home/cldellow/src/csv2parquet/12m.parquet.snappy'); ---.tables ---.schema parquet ---.fullschema ---.timer on +create virtual table parquet using parquet('/home/cldellow/src/csv2parquet/12m.parquet.snappy'); +.tables +.timer on +.echo on --select count(*) from (select * from parquet limit 1); ---select rowid,col0 from parquet where rowid > 5 limit 5; +--select rowid,col0,col3,col9 from parquet where rowid > 5 limit 5; --select count(*) from parquet limit 1; --select sum(col0) from parquet limit 1; --select * from parquet limit 10; --select sum(length(col3)) from parquet; +select * from parquet where (col3 = 'Dawson Creeks') or col9 LIKE '%Bicycqq%' limit 20000; diff --git a/parquet/parquet.cc b/parquet/parquet.cc index 7f2cb9e..159dd3b 100644 --- a/parquet/parquet.cc +++ b/parquet/parquet.cc @@ -252,6 +252,7 @@ static int parquetFilter( int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ + printf("xFilter: idxNum=%d\n", idxNum); ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; cursor->reset(); return parquetNext(cur); @@ -265,7 +266,67 @@ static int parquetBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ - pIdxInfo->estimatedCost = 1000000; + printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy); + // Duplicate pIdxInfo and stash it in pIdxInfo->idxStr. + for(int i = 0; i < pIdxInfo->nConstraint; i++) { + printf(" constraint %d: col %d, op %d, usable %d\n", + i, + pIdxInfo->aConstraint[i].iColumn, + pIdxInfo->aConstraint[i].op, + pIdxInfo->aConstraint[i].usable); + } + + if(true || (pIdxInfo->nConstraint == 0 && pIdxInfo->nOrderBy == 0)) { + pIdxInfo->estimatedCost = 1000000000000; + pIdxInfo->idxNum = 0; + pIdxInfo->estimatedRows = 10000; + } else { + pIdxInfo->estimatedCost = 1; + pIdxInfo->idxNum = 1; + pIdxInfo->estimatedRows = 100000; + pIdxInfo->aConstraintUsage[0].argvIndex = 1; +// pIdxInfo->idxFlags = SQLITE_INDEX_SCAN_UNIQUE; + } + printf("idx %d has cost %f\n", pIdxInfo->idxNum, pIdxInfo->estimatedCost); + + size_t dupeSize = sizeof(sqlite3_index_info) + + //pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) + + pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) + + pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) + + pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint_usage); + sqlite3_index_info* dupe = (sqlite3_index_info*)sqlite3_malloc(dupeSize); + pIdxInfo->idxStr = (char*)dupe; + pIdxInfo->needToFreeIdxStr = 1; + + // TODO: populate argvIndex. + memset(dupe, 0, dupeSize); + memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info)); + + dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint*)((char*)dupe + sizeof(sqlite3_index_info)); + dupe->aOrderBy = (sqlite3_index_info::sqlite3_index_orderby*)((char*)dupe + + sizeof(sqlite3_index_info) + + pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint)); + dupe->aConstraintUsage = (sqlite3_index_info::sqlite3_index_constraint_usage*)((char*)dupe + + sizeof(sqlite3_index_info) + + pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) + + pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby)); + + + for(int i = 0; i < pIdxInfo->nConstraint; i++) { + dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn; + dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op; + dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable; + dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset; + + dupe->aConstraintUsage[i].argvIndex = pIdxInfo->aConstraintUsage[i].argvIndex; + dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit; + } + + for(int i = 0; i < pIdxInfo->nOrderBy; i++) { + dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn; + dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc; + } + return SQLITE_OK; } diff --git a/tests/queries/008-nulls.sql b/tests/queries/008-nulls.sql index 7f58a56..a113c7d 100644 --- a/tests/queries/008-nulls.sql +++ b/tests/queries/008-nulls.sql @@ -1,3 +1,3 @@ 100-rows-nulls.parquet -SELECT SUM(CASE WHEN bool_0 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int8_1 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int16_2 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int32_3 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int64_4 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN ts_5 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN double_6 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN string_7 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN string_8 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN binary_9 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN fixed_size_binary IS NULL THEN 1 ELSE 0 END) from test; +SELECT SUM(CASE WHEN bool_0 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int8_1 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int16_2 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int32_3 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN int64_4 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN ts_5 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN double_6 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN string_7 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN string_8 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN binary_9 IS NULL THEN 1 ELSE 0 END), SUM(CASE WHEN binary_10 IS NULL THEN 1 ELSE 0 END) from test; 50|50|50|50|50|50|50|50|50|50|50