From 3675c8c21eda719a3068042a4c2450a20cc064c5 Mon Sep 17 00:00:00 2001 From: setop Date: Wed, 4 Jan 2023 00:05:32 +0100 Subject: [PATCH] benchmark duckdb --- bench.sh | 16 ++++++++++++++++ d1.sql | 1 + d2.sql | 2 +- d5.sql | 4 ++-- d6.sql | 5 +++-- d7.sql | 15 ++++++--------- load_duckdb.sh | 10 ++++++++++ perf.tsv | 18 ++++++++++++++++++ 8 files changed, 57 insertions(+), 14 deletions(-) create mode 100755 bench.sh create mode 100644 d1.sql create mode 100755 load_duckdb.sh create mode 100644 perf.tsv diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000..8a44012 --- /dev/null +++ b/bench.sh @@ -0,0 +1,16 @@ +#!/bin/sh -eu + +TIME="/usr/bin/time -f %M\t%P\t%e\t%x" + +while read e d; +do + echo "day\tram\tcpu\telapsed\texit" >&2 + for i in 1 2 3 4 5 6 7 8; + do + echo -n "d${i}\t" >&2 + $TIME $e $d < "d${i}.sql" > /dev/null + done +done << EEE +sqlite3 noahs.sqlite +./duckdb noahs.duckdb +EEE diff --git a/d1.sql b/d1.sql new file mode 100644 index 0000000..bac4187 --- /dev/null +++ b/d1.sql @@ -0,0 +1 @@ +select name, phone from customers diff --git a/d2.sql b/d2.sql index 40ceaba..86f0f61 100644 --- a/d2.sql +++ b/d2.sql @@ -1,7 +1,7 @@ select *, substr(c.name,1,1) f, SUBSTR(c.name, INSTR(c.name, ' ')+1,1) l from orders as o left join orders_items as oi on (oi.orderid = o.orderid) left join customers as c on (o.customerid = c.customerid) -where oi.sku in (select sku from products where desc like '%bagel%') +where oi.sku in (select sku from products where "desc" like '%bagel%') and f == 'J' and l == 'D'; -- call them all ! diff --git a/d5.sql b/d5.sql index 32cac59..cbf48c6 100644 --- a/d5.sql +++ b/d5.sql @@ -1,7 +1,7 @@ -select *, substr(c.name,1,1) f, SUBSTR(c.name, INSTR(c.name, ' ')+1,1) l from orders as o +select * from orders as o left join orders_items as oi on (oi.orderid = o.orderid) left join customers as c on (o.customerid = c.customerid) -where oi.sku in (select sku from products where desc like '%cat food%') +where oi.sku in (select sku from products where "desc" like '%Cat Food%') and citystatezip like 'Queens Village%' order by phone; -- there is only one woman! diff --git a/d6.sql b/d6.sql index b716927..d8c151c 100644 --- a/d6.sql +++ b/d6.sql @@ -2,9 +2,10 @@ with sales as (select * from orders_items as oi left join products as p on (oi.sku = p.sku) where oi.unit_price < p.wholesale_cost) -select c.*, count(o.customerid) c from sales as s +select c.phone, count(o.customerid) c from sales as s left join orders as o on (o.orderid = s.orderid) left join customers as c on (c.customerid = o.customerid) -group by o.customerid +group by o.customerid, c.phone order by c desc +limit 10 -- pick first one diff --git a/d7.sql b/d7.sql index a1f9805..2ae1907 100644 --- a/d7.sql +++ b/d7.sql @@ -1,23 +1,20 @@ with clue as (select substr(ordered,1,INSTR(ordered,' ')-1) date, - substr(desc,1,INSTR(desc,'(')-2) title, - substr(desc,INSTR(desc,'(')+1, length(desc)-INSTR(desc,'(')-1) color + substr("desc",1,INSTR("desc",'(')-2) title, + substr("desc",INSTR("desc",'(')+1, length("desc")-INSTR("desc",'(')-1) color from orders_items as oi left join orders as o on (o.orderid = oi.orderid) left join products as p on (p.sku = oi.sku) where o.customerid = 8342 and title != '') -select *, - substr(ordered,1,INSTR(ordered,' ')-1) zdate, - substr(desc,1,INSTR(desc,'(')-2) ztitle, - substr(desc,INSTR(desc,'(')+1, length(desc)-INSTR(desc,'(')-1) zcolor +select * from orders_items as oi left join orders as o on (o.orderid = oi.orderid) left join customers as c on (c.customerid = o.customerid) left join products as p on (p.sku = oi.sku) -left join clue as cl on (cl.title = ztitle - and cl.color != zcolor - and cl.date = zdate +left join clue as cl on (cl.title = substr("desc",1,INSTR("desc",'(')-2) + and cl.color != substr("desc",INSTR("desc",'(')+1, length("desc")-INSTR("desc",'(')-1) + and cl.date = substr(ordered,1,INSTR(ordered,' ')-1) ) where o.customerid != 8342 and title is not null diff --git a/load_duckdb.sh b/load_duckdb.sh new file mode 100755 index 0000000..53e1fc5 --- /dev/null +++ b/load_duckdb.sh @@ -0,0 +1,10 @@ +sqlite3 -csv -header noahs.sqlite 'select * from customers' > 1_customers.csv +sqlite3 -csv -header noahs.sqlite 'select * from products' > 2_products.csv +sqlite3 -csv -header noahs.sqlite 'select * from orders' > 3_orders.csv +sqlite3 -csv -header noahs.sqlite 'select * from orders_items' > 4_orders_items.csv + +duckdb noahs.duck "CREATE TABLE customers AS SELECT * FROM read_csv_auto('1_customers.csv');" +duckdb noahs.duck "CREATE TABLE products AS SELECT * FROM read_csv_auto('2_products.csv');" +duckdb noahs.duck "CREATE TABLE orders AS SELECT * FROM read_csv_auto('3_orders.csv');" +duckdb noahs.duck "CREATE TABLE orders_items AS SELECT * FROM read_csv_auto('4_orders_items.csv');" + diff --git a/perf.tsv b/perf.tsv new file mode 100644 index 0000000..8acd630 --- /dev/null +++ b/perf.tsv @@ -0,0 +1,18 @@ +day ram cpu elapsed exit +d1 4936 95% 0.02 0 +d2 8536 99% 0.37 0 +d3 4916 100% 0.00 0 +d4 6016 98% 0.05 0 +d5 8532 99% 0.39 0 +d6 6004 99% 0.12 0 +d7 6120 99% 1.06 0 +d8 7412 99% 1.13 0 +day ram cpu elapsed exit +d1 28568 100% 0.01 0 +d2 72668 192% 0.06 0 +d3 27436 100% 0.01 0 +d4 37196 166% 0.05 0 +d5 82588 200% 0.08 0 +d6 42108 214% 0.02 0 +d7 87504 303% 0.15 0 +d8 52948 230% 0.05 0