don't segfault on low memory

Fixes #8
This commit is contained in:
Colin Dellow 2018-03-24 12:48:29 -04:00
parent 6fa7bc3d0b
commit 51d0f27a68
2 changed files with 233 additions and 189 deletions

View File

@ -72,32 +72,38 @@ static int parquetConnect(
sqlite3_vtab **ppVtab, sqlite3_vtab **ppVtab,
char **pzErr char **pzErr
){ ){
if(argc != 4 || strlen(argv[3]) < 2) {
*pzErr = sqlite3_mprintf("must provide exactly one argument, the path to a parquet file");
return SQLITE_ERROR;
}
// Remove the delimiting single quotes
std::string fname = argv[3];
fname = fname.substr(1, fname.length() - 2);
std::unique_ptr<sqlite3_vtab_parquet, void(*)(void*)> vtab(
(sqlite3_vtab_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)),
sqlite3_free);
memset(vtab.get(), 0, sizeof(*vtab.get()));
try { try {
std::unique_ptr<ParquetTable> table(new ParquetTable(fname)); if(argc != 4 || strlen(argv[3]) < 2) {
*pzErr = sqlite3_mprintf("must provide exactly one argument, the path to a parquet file");
return SQLITE_ERROR;
}
std::string create = table->CreateStatement(); // Remove the delimiting single quotes
int rc = sqlite3_declare_vtab(db, create.data()); std::string fname = argv[3];
if(rc) fname = fname.substr(1, fname.length() - 2);
return rc; std::unique_ptr<sqlite3_vtab_parquet, void(*)(void*)> vtab(
(sqlite3_vtab_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_parquet)),
sqlite3_free);
memset(vtab.get(), 0, sizeof(*vtab.get()));
vtab->table = table.release(); try {
*ppVtab = (sqlite3_vtab*)vtab.release(); std::unique_ptr<ParquetTable> table(new ParquetTable(fname));
return SQLITE_OK;
} catch (const std::exception& e) { std::string create = table->CreateStatement();
*pzErr = sqlite3_mprintf(e.what()); int rc = sqlite3_declare_vtab(db, create.data());
if(rc)
return rc;
vtab->table = table.release();
*ppVtab = (sqlite3_vtab*)vtab.release();
return SQLITE_OK;
} catch (const std::exception& e) {
*pzErr = sqlite3_mprintf(e.what());
return SQLITE_ERROR;
}
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR; return SQLITE_ERROR;
} }
} }
@ -131,16 +137,22 @@ static int parquetClose(sqlite3_vtab_cursor *cur){
** Constructor for a new sqlite3_vtab_parquet cursor object. ** Constructor for a new sqlite3_vtab_parquet cursor object.
*/ */
static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ static int parquetOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
std::unique_ptr<sqlite3_vtab_cursor_parquet, void(*)(void*)> cursor( try {
(sqlite3_vtab_cursor_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_cursor_parquet)), std::unique_ptr<sqlite3_vtab_cursor_parquet, void(*)(void*)> cursor(
sqlite3_free); (sqlite3_vtab_cursor_parquet*)sqlite3_malloc(sizeof(sqlite3_vtab_cursor_parquet)),
memset(cursor.get(), 0, sizeof(*cursor.get())); sqlite3_free);
memset(cursor.get(), 0, sizeof(*cursor.get()));
sqlite3_vtab_parquet* pParquet = (sqlite3_vtab_parquet*)p; sqlite3_vtab_parquet* pParquet = (sqlite3_vtab_parquet*)p;
cursor->cursor = new ParquetCursor(pParquet->table); cursor->cursor = new ParquetCursor(pParquet->table);
*ppCursor = (sqlite3_vtab_cursor*)cursor.release(); *ppCursor = (sqlite3_vtab_cursor*)cursor.release();
return SQLITE_OK; return SQLITE_OK;
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR;
}
} }
const char* opName(int op) { const char* opName(int op) {
@ -183,9 +195,15 @@ const char* opName(int op) {
** Set the EOF marker if we reach the end of input. ** Set the EOF marker if we reach the end of input.
*/ */
static int parquetNext(sqlite3_vtab_cursor *cur){ static int parquetNext(sqlite3_vtab_cursor *cur){
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; try {
cursor->next(); ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
return SQLITE_OK; cursor->next();
return SQLITE_OK;
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR;
}
} }
/* /*
@ -197,64 +215,70 @@ static int parquetColumn(
sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
int col /* Which column to return */ int col /* Which column to return */
){ ){
ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; try {
cursor->ensureColumn(col); ParquetCursor *cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
cursor->ensureColumn(col);
if(cursor->isNull(col)) { if(cursor->isNull(col)) {
sqlite3_result_null(ctx); sqlite3_result_null(ctx);
} else { } else {
switch(cursor->getPhysicalType(col)) { switch(cursor->getPhysicalType(col)) {
case parquet::Type::BOOLEAN: case parquet::Type::BOOLEAN:
case parquet::Type::INT32: case parquet::Type::INT32:
{ {
int rv = cursor->getInt32(col); int rv = cursor->getInt32(col);
sqlite3_result_int(ctx, rv); sqlite3_result_int(ctx, rv);
break; break;
}
case parquet::Type::FLOAT:
case parquet::Type::DOUBLE:
{
double rv = cursor->getDouble(col);
sqlite3_result_double(ctx, rv);
break;
}
case parquet::Type::BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
} else {
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
} }
break; case parquet::Type::FLOAT:
} case parquet::Type::DOUBLE:
case parquet::Type::INT96: {
// This type exists to store timestamps in nanoseconds due to legacy double rv = cursor->getDouble(col);
// reasons. We just interpret it as a timestamp in milliseconds. sqlite3_result_double(ctx, rv);
case parquet::Type::INT64: break;
{ }
long rv = cursor->getInt64(col); case parquet::Type::BYTE_ARRAY:
sqlite3_result_int64(ctx, rv); {
break; parquet::ByteArray* rv = cursor->getByteArray(col);
} if(cursor->getLogicalType(col) == parquet::LogicalType::UTF8) {
case parquet::Type::FIXED_LEN_BYTE_ARRAY: sqlite3_result_text(ctx, (const char*)rv->ptr, rv->len, SQLITE_TRANSIENT);
{ } else {
parquet::ByteArray* rv = cursor->getByteArray(col); sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT); }
break; break;
} }
default: case parquet::Type::INT96:
// Should be impossible to get here as we should have forbidden this at // This type exists to store timestamps in nanoseconds due to legacy
// CREATE time -- maybe file changed underneath us? // reasons. We just interpret it as a timestamp in milliseconds.
std::ostringstream ss; case parquet::Type::INT64:
ss << __FILE__ << ":" << __LINE__ << ": column " << col << " has unsupported type: " << {
parquet::TypeToString(cursor->getPhysicalType(col)); long rv = cursor->getInt64(col);
sqlite3_result_int64(ctx, rv);
break;
}
case parquet::Type::FIXED_LEN_BYTE_ARRAY:
{
parquet::ByteArray* rv = cursor->getByteArray(col);
sqlite3_result_blob(ctx, (void*)rv->ptr, rv->len, SQLITE_TRANSIENT);
break;
}
default:
// Should be impossible to get here as we should have forbidden this at
// CREATE time -- maybe file changed underneath us?
std::ostringstream ss;
ss << __FILE__ << ":" << __LINE__ << ": column " << col << " has unsupported type: " <<
parquet::TypeToString(cursor->getPhysicalType(col));
throw std::invalid_argument(ss.str()); throw std::invalid_argument(ss.str());
break; break;
}
} }
return SQLITE_OK;
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR;
} }
return SQLITE_OK;
} }
/* /*
@ -382,62 +406,68 @@ static int parquetFilter(
int argc, int argc,
sqlite3_value **argv sqlite3_value **argv
){ ){
ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor; try {
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr; ParquetCursor* cursor = ((sqlite3_vtab_cursor_parquet*)cur)->cursor;
sqlite3_index_info* indexInfo = (sqlite3_index_info*)idxStr;
#ifdef DEBUG #ifdef DEBUG
printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc); printf("xFilter: idxNum=%d, idxStr=%lu, argc=%d\n", idxNum, (long unsigned int)idxStr, argc);
debugConstraints(indexInfo, cursor->getTable(), argc, argv); debugConstraints(indexInfo, cursor->getTable(), argc, argv);
#endif #endif
std::vector<Constraint> constraints; std::vector<Constraint> constraints;
int j = 0; int j = 0;
for(int i = 0; i < indexInfo->nConstraint; i++) { for(int i = 0; i < indexInfo->nConstraint; i++) {
if(!indexInfo->aConstraint[i].usable) { if(!indexInfo->aConstraint[i].usable) {
continue; continue;
}
ValueType type = Null;
int64_t intValue = 0;
double doubleValue = 0;
std::vector<unsigned char> blobValue;
int sqliteType = sqlite3_value_type(argv[j]);
if(sqliteType == SQLITE_INTEGER) {
type = Integer;
intValue = sqlite3_value_int64(argv[j]);
} else if(sqliteType == SQLITE_FLOAT) {
type = Double;
doubleValue = sqlite3_value_double(argv[j]);
} else if(sqliteType == SQLITE_TEXT) {
type = Text;
int len = sqlite3_value_bytes(argv[j]);
const unsigned char* ptr = sqlite3_value_text(argv[j]);
for(int k = 0; k < len; k++) {
blobValue.push_back(ptr[k]);
} }
} else if(sqliteType == SQLITE_BLOB) {
type = Blob;
int len = sqlite3_value_bytes(argv[j]);
const unsigned char* ptr = (const unsigned char*)sqlite3_value_blob(argv[j]);
for(int k = 0; k < len; k++) {
blobValue.push_back(ptr[k]);
}
} else if(sqliteType == SQLITE_NULL) {
type = Null;
}
Constraint constraint( ValueType type = Null;
indexInfo->aConstraint[i].iColumn, int64_t intValue = 0;
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op), double doubleValue = 0;
type, std::vector<unsigned char> blobValue;
intValue, int sqliteType = sqlite3_value_type(argv[j]);
doubleValue,
blobValue); if(sqliteType == SQLITE_INTEGER) {
constraints.push_back(constraint); type = Integer;
j++; intValue = sqlite3_value_int64(argv[j]);
} else if(sqliteType == SQLITE_FLOAT) {
type = Double;
doubleValue = sqlite3_value_double(argv[j]);
} else if(sqliteType == SQLITE_TEXT) {
type = Text;
int len = sqlite3_value_bytes(argv[j]);
const unsigned char* ptr = sqlite3_value_text(argv[j]);
for(int k = 0; k < len; k++) {
blobValue.push_back(ptr[k]);
}
} else if(sqliteType == SQLITE_BLOB) {
type = Blob;
int len = sqlite3_value_bytes(argv[j]);
const unsigned char* ptr = (const unsigned char*)sqlite3_value_blob(argv[j]);
for(int k = 0; k < len; k++) {
blobValue.push_back(ptr[k]);
}
} else if(sqliteType == SQLITE_NULL) {
type = Null;
}
Constraint constraint(
indexInfo->aConstraint[i].iColumn,
constraintOperatorFromSqlite(indexInfo->aConstraint[i].op),
type,
intValue,
doubleValue,
blobValue);
constraints.push_back(constraint);
j++;
}
cursor->reset(constraints);
return parquetNext(cur);
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR;
} }
cursor->reset(constraints);
return parquetNext(cur);
} }
/* /*
@ -450,66 +480,72 @@ static int parquetBestIndex(
sqlite3_vtab *tab, sqlite3_vtab *tab,
sqlite3_index_info *pIdxInfo sqlite3_index_info *pIdxInfo
){ ){
try {
#ifdef DEBUG #ifdef DEBUG
ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table; ParquetTable* table = ((sqlite3_vtab_parquet*)tab)->table;
printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy); printf("xBestIndex: nConstraint=%d, nOrderBy=%d\n", pIdxInfo->nConstraint, pIdxInfo->nOrderBy);
debugConstraints(pIdxInfo, table, 0, NULL); debugConstraints(pIdxInfo, table, 0, NULL);
#endif #endif
if(pIdxInfo->nConstraint == 0) { if(pIdxInfo->nConstraint == 0) {
pIdxInfo->estimatedCost = 1000000000000; pIdxInfo->estimatedCost = 1000000000000;
pIdxInfo->idxNum = 0; pIdxInfo->idxNum = 0;
} else { } else {
pIdxInfo->estimatedCost = 1; pIdxInfo->estimatedCost = 1;
pIdxInfo->idxNum = 1; pIdxInfo->idxNum = 1;
int j = 0; int j = 0;
for(int i = 0; i < pIdxInfo->nConstraint; i++) { for(int i = 0; i < pIdxInfo->nConstraint; i++) {
if(pIdxInfo->aConstraint[i].usable) { if(pIdxInfo->aConstraint[i].usable) {
j++; j++;
pIdxInfo->aConstraintUsage[i].argvIndex = j; pIdxInfo->aConstraintUsage[i].argvIndex = j;
}
} }
} }
}
size_t dupeSize = sizeof(sqlite3_index_info) + size_t dupeSize = sizeof(sqlite3_index_info) +
//pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) + //pIdxInfo->nConstraint * sizeof(sqlite3_index_constraint) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) +
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint_usage);
sqlite3_index_info* dupe = (sqlite3_index_info*)sqlite3_malloc(dupeSize);
pIdxInfo->idxStr = (char*)dupe;
pIdxInfo->needToFreeIdxStr = 1;
memset(dupe, 0, dupeSize);
memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info));
dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint*)((char*)dupe + sizeof(sqlite3_index_info));
dupe->aOrderBy = (sqlite3_index_info::sqlite3_index_orderby*)((char*)dupe +
sizeof(sqlite3_index_info) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint));
dupe->aConstraintUsage = (sqlite3_index_info::sqlite3_index_constraint_usage*)((char*)dupe +
sizeof(sqlite3_index_info) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) + pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) +
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby)); pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint_usage);
sqlite3_index_info* dupe = (sqlite3_index_info*)sqlite3_malloc(dupeSize);
pIdxInfo->idxStr = (char*)dupe;
pIdxInfo->needToFreeIdxStr = 1;
memset(dupe, 0, dupeSize);
memcpy(dupe, pIdxInfo, sizeof(sqlite3_index_info));
dupe->aConstraint = (sqlite3_index_info::sqlite3_index_constraint*)((char*)dupe + sizeof(sqlite3_index_info));
dupe->aOrderBy = (sqlite3_index_info::sqlite3_index_orderby*)((char*)dupe +
sizeof(sqlite3_index_info) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint));
dupe->aConstraintUsage = (sqlite3_index_info::sqlite3_index_constraint_usage*)((char*)dupe +
sizeof(sqlite3_index_info) +
pIdxInfo->nConstraint * sizeof(sqlite3_index_info::sqlite3_index_constraint) +
pIdxInfo->nOrderBy * sizeof(sqlite3_index_info::sqlite3_index_orderby));
for(int i = 0; i < pIdxInfo->nConstraint; i++) { for(int i = 0; i < pIdxInfo->nConstraint; i++) {
dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn; dupe->aConstraint[i].iColumn = pIdxInfo->aConstraint[i].iColumn;
dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op; dupe->aConstraint[i].op = pIdxInfo->aConstraint[i].op;
dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable; dupe->aConstraint[i].usable = pIdxInfo->aConstraint[i].usable;
dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset; dupe->aConstraint[i].iTermOffset = pIdxInfo->aConstraint[i].iTermOffset;
dupe->aConstraintUsage[i].argvIndex = pIdxInfo->aConstraintUsage[i].argvIndex; dupe->aConstraintUsage[i].argvIndex = pIdxInfo->aConstraintUsage[i].argvIndex;
dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit; dupe->aConstraintUsage[i].omit = pIdxInfo->aConstraintUsage[i].omit;
}
for(int i = 0; i < pIdxInfo->nOrderBy; i++) {
dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn;
dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc;
}
return SQLITE_OK;
} catch(std::bad_alloc& ba) {
return SQLITE_NOMEM;
} catch(std::exception& e) {
return SQLITE_ERROR;
} }
for(int i = 0; i < pIdxInfo->nOrderBy; i++) {
dupe->aOrderBy[i].iColumn = pIdxInfo->aOrderBy[i].iColumn;
dupe->aOrderBy[i].desc = pIdxInfo->aOrderBy[i].desc;
}
return SQLITE_OK;
} }

View File

@ -3,6 +3,9 @@ set -euo pipefail
# A harness that runs SQLite with the parquet extension in an environment where malloc randomly # A harness that runs SQLite with the parquet extension in an environment where malloc randomly
# fails. "Success" is if the logs don't have any C++ exceptions that talk about std::bad_alloc # fails. "Success" is if the logs don't have any C++ exceptions that talk about std::bad_alloc
#
# The results can need a bit of interpretation; look at the log and see if it sniffs like
# the segfault came from Python or SQLite.
ensure_failmalloc() { ensure_failmalloc() {
if [ ! -d libfailmalloc ]; then if [ ! -d libfailmalloc ]; then
@ -19,11 +22,16 @@ ensure_failmalloc() {
run_under_low_memory() { run_under_low_memory() {
start=$(date +%s%3N) start=$(date +%s%3N)
set +e set +e
env LD_PRELOAD="$here"/libfailmalloc/.libs/libfailmalloc.so FAILMALLOC_PROBABILITY=0.00001 ./test-random &> results.bad_alloc env LD_PRELOAD="$here"/libfailmalloc/.libs/libfailmalloc.so FAILMALLOC_PROBABILITY=0.00001 ./test-random >results.bad_alloc 2>&1
set -e rv=$?
now=$(date +%s%3N) now=$(date +%s%3N)
echo "Bailed after $((now-start)) ms" echo "Bailed after $((now-start)) ms"
! grep std::bad_alloc results.bad_alloc set -e
if [ "$rv" -gt 127 ]; then
cat results.bad_alloc
echo "Segfaulted with exit code: $rv"
exit 1
fi
} }
main() { main() {
@ -33,7 +41,7 @@ main() {
ensure_failmalloc ensure_failmalloc
# Sometimes we'll exit due to a Python memory issue, so try a few times. # Sometimes we'll exit due to a Python memory issue, so try a few times.
for i in {0..10}; do for i in {0..100}; do
run_under_low_memory run_under_low_memory
done done
} }