From 790ef221cdd2b766dc9066f728bed1c173a04186 Mon Sep 17 00:00:00 2001 From: Lukas Sismis Date: Thu, 11 Jul 2024 21:04:26 +0200 Subject: [PATCH] hyperscan: add caching mechanism for hyperscan contexts Cache Hyperscan serialized databases to disk to prevent compilation of the same databases when Suricata is run again with the same ruleset. The current work operates in /tmp/ folder and caches individual signature group heads - potentially the ruleset might be even slightly changed and it still can reuse part of the unchanged signature groups. Loading *fresh* ET Open ruleset: 19 seconds Loading *cached* ET Open ruleset: 07 seconds --- src/util-mpm-hs.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/src/util-mpm-hs.c b/src/util-mpm-hs.c index 82b91b4ec0e8..aaf2090297f4 100644 --- a/src/util-mpm-hs.c +++ b/src/util-mpm-hs.c @@ -570,6 +570,134 @@ static PatternDatabase *PatternDatabaseAlloc(uint32_t pattern_cnt) return pd; } +static const char *HSCacheConstructFPath(uint32_t hs_db_hash) +{ + static char hash_filepath[2048]; + char hash_file_path_prefix_path[] = "/tmp/"; + char hash_file_path_suffix[] = "_v1.hs.bin"; + uint16_t hash_file_bytes_written = 0; + snprintf(hash_filepath, sizeof(hash_filepath), "%s", hash_file_path_prefix_path); + hash_file_bytes_written += sizeof(hash_file_path_prefix_path) - 1; + + snprintf(hash_filepath + hash_file_bytes_written, + sizeof(hash_filepath) - hash_file_bytes_written, "%010u", hs_db_hash); + hash_file_bytes_written += 10; + + snprintf(hash_filepath + hash_file_bytes_written, + sizeof(hash_filepath) - hash_file_bytes_written, "%s", hash_file_path_suffix); + return hash_filepath; +} + +static char *HSReadStream(const char *filePath, size_t *bufferSize) +{ + FILE *file = fopen(filePath, "rb"); + if (!file) { + perror("Failed to open file"); + return NULL; + } + + // Seek to the end of the file to determine its size + fseek(file, 0, SEEK_END); + long fileSize = ftell(file); + if (fileSize < 0) { + perror("Failed to determine file size"); + fclose(file); + return NULL; + } + + // Allocate a buffer to hold the entire file + char *buffer = (char *)malloc(fileSize); + if (!buffer) { + perror("Failed to allocate memory"); + fclose(file); + return NULL; + } + + // Rewind file pointer and read the file into the buffer + rewind(file); + size_t bytesRead = fread(buffer, 1, fileSize, file); + if (bytesRead != (size_t)fileSize) { + perror("Failed to read the entire file"); + free(buffer); + fclose(file); + return NULL; + } + + *bufferSize = fileSize; + fclose(file); + return buffer; +} + +static int HSLoadCache(hs_database_t **hs_db, uint32_t hs_db_hash) +{ + int ret = -1; + const char *hash_file_static = HSCacheConstructFPath(hs_db_hash); + SCLogDebug("Loading the cached HS DB from %s", hash_file_static); + + FILE *db_cache = fopen(hash_file_static, "r"); + char *buffer = NULL; + if (db_cache) { + size_t bufferSize; + buffer = HSReadStream(hash_file_static, &bufferSize); + if (!buffer) { + SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static); + ret = -1; + goto freeup; + } + + hs_error_t error = hs_deserialize_database(buffer, bufferSize, hs_db); + if (error != HS_SUCCESS) { + SCLogWarning("Failed to deserialize Hyperscan database: %d", error); + ret = -1; + goto freeup; + } + + ret = 0; + goto freeup; + } + +freeup: + if (db_cache) + fclose(db_cache); + if (buffer) + free(buffer); + return ret; +} + +static int HSSaveCache(hs_database_t *hs_db, uint32_t hs_db_hash) +{ + char *db_stream; + size_t db_size; + + hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size); + if (err != HS_SUCCESS) { + SCLogWarning("Failed to serialize Hyperscan database: %d", err); + return -1; + } + + const char *hash_file_static = HSCacheConstructFPath(hs_db_hash); + SCLogDebug("Caching the compiled HS at %s", hash_file_static); + + FILE *db_cache_out = fopen(hash_file_static, "w"); + if (!db_cache_out) { + SCLogWarning("Failed to open file: %s", hash_file_static); + return -1; + } + int ret = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out); + if (ret > 0 && (size_t)ret != db_size) { + SCLogWarning("Failed to write to file: %s", hash_file_static); + return -1; + } + ret = fclose(db_cache_out); + if (ret != 0) { + SCLogWarning("Failed to close file: %s", hash_file_static); + return -1; + } + free(db_stream); + + return 0; +} + /** * \brief Process the patterns added to the mpm, and create the internal tables. * @@ -633,6 +761,7 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx) * before, and reuse the Hyperscan database if so. */ PatternDatabase *pd_cached = HashTableLookup(g_db_table, pd, 1); + uint32_t cached_hash = 0; if (pd_cached != NULL) { SCLogDebug("Reusing cached database %p with %" PRIu32 " patterns (ref_cnt=%" PRIu32 ")", @@ -644,6 +773,48 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx) PatternDatabaseFree(pd); SCHSFreeCompileData(cd); return 0; + } else { + uint32_t hash = 0; + hash = hashword(&pd->pattern_cnt, 1, hash); + + for (uint32_t i = 0; i < pd->pattern_cnt; i++) { + hash = SCHSPatternHash(pd->parray[i], hash); + } + + cached_hash = hash; + + int r = HSLoadCache(&pd->hs_db, cached_hash); + if (r == 0) { + pd->ref_cnt = 1; + ctx->pattern_db = pd; + + SCMutexLock(&g_scratch_proto_mutex); + err = hs_alloc_scratch(pd->hs_db, &g_scratch_proto); + SCMutexUnlock(&g_scratch_proto_mutex); + if (err != HS_SUCCESS) { + SCLogError("failed to allocate scratch"); + SCMutexUnlock(&g_db_table_mutex); + goto error; + } + + err = hs_database_size(pd->hs_db, &ctx->hs_db_size); + if (err != HS_SUCCESS) { + SCLogError("failed to query database size"); + SCMutexUnlock(&g_db_table_mutex); + goto error; + } + + mpm_ctx->memory_cnt++; + mpm_ctx->memory_size += ctx->hs_db_size; + + r = HashTableAdd(g_db_table, pd, 1); + SCMutexUnlock(&g_db_table_mutex); + if (r < 0) + goto error; + + SCMutexUnlock(&g_db_table_mutex); + return 0; + } } BUG_ON(ctx->pattern_db != NULL); /* already built? */ @@ -725,6 +896,10 @@ int SCHSPreparePatterns(MpmCtx *mpm_ctx) if (r < 0) goto error; + r = HSSaveCache(pd->hs_db, cached_hash); + if (r < 0) + goto error; + SCHSFreeCompileData(cd); return 0;