Skip to content

Commit

Permalink
hyperscan: add caching mechanism for hyperscan contexts
Browse files Browse the repository at this point in the history
Cache Hyperscan serialized databases to disk to prevent compilation
of the same databases when Suricata is run again with the same
ruleset.
The current work operates in /tmp/ folder and caches individual
signature group heads - potentially the ruleset might be even
slightly changed and it still can reuse part of the unchanged
signature groups.
Loading *fresh* ET Open ruleset:  19 seconds
Loading *cached* ET Open ruleset: 07 seconds
  • Loading branch information
Lukas Sismis authored and lukashino committed Jul 14, 2024
1 parent 223a419 commit 790ef22
Showing 1 changed file with 175 additions and 0 deletions.
175 changes: 175 additions & 0 deletions src/util-mpm-hs.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,134 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt)
return pd;
}

static const char *HSCacheConstructFPath(uint32_t hs_db_hash)
{
static char hash_filepath[2048];
char hash_file_path_prefix_path[] = "/tmp/";
char hash_file_path_suffix[] = "_v1.hs.bin";
uint16_t hash_file_bytes_written = 0;
snprintf(hash_filepath, sizeof(hash_filepath), "%s", hash_file_path_prefix_path);
hash_file_bytes_written += sizeof(hash_file_path_prefix_path) - 1;

snprintf(hash_filepath + hash_file_bytes_written,
sizeof(hash_filepath) - hash_file_bytes_written, "%010u", hs_db_hash);
hash_file_bytes_written += 10;

snprintf(hash_filepath + hash_file_bytes_written,
sizeof(hash_filepath) - hash_file_bytes_written, "%s", hash_file_path_suffix);
return hash_filepath;
}

static char *HSReadStream(const char *filePath, size_t *bufferSize)
{
FILE *file = fopen(filePath, "rb");
if (!file) {
perror("Failed to open file");
return NULL;
}

// Seek to the end of the file to determine its size
fseek(file, 0, SEEK_END);
long fileSize = ftell(file);
if (fileSize < 0) {
perror("Failed to determine file size");
fclose(file);
return NULL;
}

// Allocate a buffer to hold the entire file
char *buffer = (char *)malloc(fileSize);
if (!buffer) {
perror("Failed to allocate memory");
fclose(file);
return NULL;
}

// Rewind file pointer and read the file into the buffer
rewind(file);
size_t bytesRead = fread(buffer, 1, fileSize, file);
if (bytesRead != (size_t)fileSize) {
perror("Failed to read the entire file");
free(buffer);
fclose(file);
return NULL;
}

*bufferSize = fileSize;
fclose(file);
return buffer;
}

static int HSLoadCache(hs_database_t **hs_db, uint32_t hs_db_hash)
{
int ret = -1;
const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
SCLogDebug("Loading the cached HS DB from %s", hash_file_static);

FILE *db_cache = fopen(hash_file_static, "r");
char *buffer = NULL;
if (db_cache) {
size_t bufferSize;
buffer = HSReadStream(hash_file_static, &bufferSize);
if (!buffer) {
SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static);
ret = -1;
goto freeup;
}

hs_error_t error = hs_deserialize_database(buffer, bufferSize, hs_db);
if (error != HS_SUCCESS) {
SCLogWarning("Failed to deserialize Hyperscan database: %d", error);
ret = -1;
goto freeup;
}

ret = 0;
goto freeup;
}

freeup:
if (db_cache)
fclose(db_cache);
if (buffer)
free(buffer);
return ret;
}

static int HSSaveCache(hs_database_t *hs_db, uint32_t hs_db_hash)
{
char *db_stream;
size_t db_size;

hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size);
if (err != HS_SUCCESS) {
SCLogWarning("Failed to serialize Hyperscan database: %d", err);
return -1;
}

const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
SCLogDebug("Caching the compiled HS at %s", hash_file_static);

FILE *db_cache_out = fopen(hash_file_static, "w");
if (!db_cache_out) {
SCLogWarning("Failed to open file: %s", hash_file_static);
return -1;
}
int ret = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out);
if (ret > 0 && (size_t)ret != db_size) {
SCLogWarning("Failed to write to file: %s", hash_file_static);
return -1;
}
ret = fclose(db_cache_out);
if (ret != 0) {
SCLogWarning("Failed to close file: %s", hash_file_static);
return -1;
}
free(db_stream);

return 0;
}

/**
* \brief Process the patterns added to the mpm, and create the internal tables.
*
Expand Down Expand Up @@ -633,6 +761,7 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
* before, and reuse the Hyperscan database if so. */
PatternDatabase *pd_cached = HashTableLookup(g_db_table, pd, 1);

uint32_t cached_hash = 0;
if (pd_cached != NULL) {
SCLogDebug("Reusing cached database %p with %" PRIu32
" patterns (ref_cnt=%" PRIu32 ")",
Expand All @@ -644,6 +773,48 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
PatternDatabaseFree(pd);
SCHSFreeCompileData(cd);
return 0;
} else {
uint32_t hash = 0;
hash = hashword(&pd->pattern_cnt, 1, hash);

for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
hash = SCHSPatternHash(pd->parray[i], hash);
}

cached_hash = hash;

int r = HSLoadCache(&pd->hs_db, cached_hash);
if (r == 0) {
pd->ref_cnt = 1;
ctx->pattern_db = pd;

SCMutexLock(&g_scratch_proto_mutex);
err = hs_alloc_scratch(pd->hs_db, &g_scratch_proto);
SCMutexUnlock(&g_scratch_proto_mutex);
if (err != HS_SUCCESS) {
SCLogError("failed to allocate scratch");
SCMutexUnlock(&g_db_table_mutex);
goto error;
}

err = hs_database_size(pd->hs_db, &ctx->hs_db_size);
if (err != HS_SUCCESS) {
SCLogError("failed to query database size");
SCMutexUnlock(&g_db_table_mutex);
goto error;
}

mpm_ctx->memory_cnt++;
mpm_ctx->memory_size += ctx->hs_db_size;

r = HashTableAdd(g_db_table, pd, 1);
SCMutexUnlock(&g_db_table_mutex);
if (r < 0)
goto error;

SCMutexUnlock(&g_db_table_mutex);
return 0;
}
}

BUG_ON(ctx->pattern_db != NULL); /* already built? */
Expand Down Expand Up @@ -725,6 +896,10 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx)
if (r < 0)
goto error;

r = HSSaveCache(pd->hs_db, cached_hash);
if (r < 0)
goto error;

SCHSFreeCompileData(cd);
return 0;

Expand Down

0 comments on commit 790ef22

Please sign in to comment.