Skip to content

Commit

Permalink
hyperscan: add caching mechanism for hyperscan contexts
Browse files Browse the repository at this point in the history
Cache Hyperscan serialized databases to disk to prevent compilation
of the same databases when Suricata is run again with the same
ruleset.
Hyperscan binary files are stored per rulegroup in the designated
folder, by default in the cached library folder.
Since caching is per signature group heads,
some chunk of the ruleset can change and it still can reuse part of
the unchanged signature groups.

Loading *fresh* ET Open ruleset:  19 seconds
Loading *cached* ET Open ruleset: 07 seconds

Ticket: 7170
  • Loading branch information
Lukas Sismis authored and lukashino committed Jan 13, 2025
1 parent 9b1c453 commit 8f01d2e
Show file tree
Hide file tree
Showing 6 changed files with 384 additions and 17 deletions.
26 changes: 25 additions & 1 deletion doc/userguide/performance/hyperscan.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,28 @@ if it is present on the system in case of the "auto" setting.


If the current suricata installation does not have hyperscan
support, refer to :ref:`installation`
support, refer to :ref:`installation`

Hyperscan caching
~~~~~~~~~~~~~~~~~

Upon startup, Hyperscan compiles and optimizes the ruleset into its own
internal structure. Suricata optimizes the startup process by saving
the Hyperscan internal structures to disk and loading them on the next start.
This prevents the recompilation of the ruleset and results in faster
initialization. If the ruleset is changed, new necessary cache files are
automatically created.

To enable this function, in `suricata.yaml` configure:

::

# Cache MPM contexts to the disk to avoid rule compilation at the startup.
# Cache files are created in the standard library directory.
sgh-mpm-caching: yes
sgh-mpm-caching-path: /var/lib/suricata/cache/hs


**Note**:
You might need to create and adjust permissions to the default caching folder
path, especially if you are running Suricata as a non-root user.
2 changes: 2 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ noinst_HEADERS = \
util-mpm-ac-ks.h \
util-mpm.h \
util-mpm-hs.h \
util-mpm-hs-cache.h \
util-mpm-hs-core.h \
util-optimize.h \
util-pages.h \
Expand Down Expand Up @@ -1077,6 +1078,7 @@ libsuricata_c_a_SOURCES = \
util-mpm-ac-ks-small.c \
util-mpm.c \
util-mpm-hs.c \
util-mpm-hs-cache.c \
util-mpm-hs-core.c \
util-pages.c \
util-path.c \
Expand Down
255 changes: 255 additions & 0 deletions src/util-mpm-hs-cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
/* Copyright (C) 2007-2024 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

/**
* \file
*
* \author Lukas Sismis <[email protected]>
*
* MPM pattern matcher that calls the Hyperscan regex matcher.
*/

#include "suricata-common.h"
#include "suricata.h"
#include "detect-engine.h"
#include "util-debug.h"
#include "util-hash-lookup3.h"
#include "util-mpm-hs-core.h"
#include "util-mpm-hs-cache.h"
#include "util-path.h"

#ifdef BUILD_HYPERSCAN

#include <hs.h>

static const char *HSCacheConstructFPath(uint64_t hs_db_hash)
{
static char hash_file_path[PATH_MAX];

char hash_file_path_suffix[] = "_v1.hs";
char filename[PATH_MAX];
uint64_t r =
snprintf(filename, sizeof(filename), "%020lu%s", hs_db_hash, hash_file_path_suffix);
if (r != (uint64_t)(20 + strlen(hash_file_path_suffix)))
return NULL;

r = PathMerge(
hash_file_path, sizeof(hash_file_path), DetectEngineMpmCachingGetPath(), filename);
if (r)
return NULL;

return hash_file_path;
}

static char *HSReadStream(const char *file_path, size_t *buffer_sz)
{
FILE *file = fopen(file_path, "rb");
if (!file) {
SCLogDebug("Failed to open file %s: %s", file_path, strerror(errno));
return NULL;
}

// Seek to the end of the file to determine its size
fseek(file, 0, SEEK_END);
long file_sz = ftell(file);
if (file_sz < 0) {
SCLogDebug("Failed to determine file size of %s: %s", file_path, strerror(errno));
fclose(file);
return NULL;
}

char *buffer = (char *)SCCalloc(file_sz, sizeof(char));
if (!buffer) {
SCLogWarning("Failed to allocate memory");
fclose(file);
return NULL;
}

// Rewind file pointer and read the file into the buffer
rewind(file);
size_t bytes_read = fread(buffer, 1, file_sz, file);
if (bytes_read != (size_t)file_sz) {
SCLogDebug("Failed to read the entire file %s: %s", file_path, strerror(errno));
SCFree(buffer);
fclose(file);
return NULL;
}

*buffer_sz = file_sz;
fclose(file);
return buffer;
}

/**
* Function to hash the searched pattern, only things relevant to Hyperscan
* compilation are hashed.
*/
static void SCHSCachePatternHash(const SCHSPattern *p, uint32_t *h1, uint32_t *h2)
{
BUG_ON(p->original_pat == NULL);
hashlittle2_safe(&p->len, sizeof(p->len), h1, h2);
hashlittle2_safe(&p->flags, sizeof(p->flags), h1, h2);
hashlittle2_safe(p->original_pat, p->len, h1, h2);
hashlittle2_safe(&p->offset, sizeof(p->offset), h1, h2);
hashlittle2_safe(&p->depth, sizeof(p->depth), h1, h2);
}

int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash)
{
const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
if (hash_file_static == NULL)
return -1;

SCLogDebug("Loading the cached HS DB from %s", hash_file_static);
if (!SCPathExists(hash_file_static))
return -1;

FILE *db_cache = fopen(hash_file_static, "r");
char *buffer = NULL;
int ret = 0;
if (db_cache) {
size_t buffer_size;
buffer = HSReadStream(hash_file_static, &buffer_size);
if (!buffer) {
SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static);
ret = -1;
goto freeup;
}

hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db);
if (error != HS_SUCCESS) {
SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static,
HSErrorToStr(error));
ret = -1;
goto freeup;
}

ret = 0;
goto freeup;
}

freeup:
if (db_cache)
fclose(db_cache);
if (buffer)
SCFree(buffer);
return ret;
}

static int HSSaveCache(hs_database_t *hs_db, uint64_t hs_db_hash)
{
static bool notified = false;
char *db_stream = NULL;
size_t db_size;
int ret = -1;

hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size);
if (err != HS_SUCCESS) {
SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err));
goto cleanup;
}

const char *hash_file_static = HSCacheConstructFPath(hs_db_hash);
SCLogDebug("Caching the compiled HS at %s", hash_file_static);
if (SCPathExists(hash_file_static)) {
// potentially signs that it might not work as expected as we got into
// hash collision. If this happens with older and not used caches it is
// fine.
// It is problematic when one ruleset yields two colliding MPM groups.
SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off "
"the caching",
hash_file_static);
}

if (SCCreateDirectoryTree(DetectEngineMpmCachingGetPath(), true) != 0) {
if (!notified) {
SCLogWarning("Failed to create Hyperscan cache folder, make sure "
"the parent folder is writeable "
"or adjust sgh-mpm-caching-path setting (%s)",
DetectEngineMpmCachingGetPath());
notified = true;
}
return -1;
}

FILE *db_cache_out = fopen(hash_file_static, "w");
if (!db_cache_out) {
if (!notified) {
SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is "
"writable or adjust sgh-mpm-caching-path setting (%s)",
hash_file_static);
notified = true;
}
goto cleanup;
}
size_t r = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out);
if (r > 0 && (size_t)r != db_size) {
SCLogWarning("Failed to write to file: %s", hash_file_static);
if (r != db_size) {
// possibly a corrupted DB cache was created
r = remove(hash_file_static);
if (r != 0) {
SCLogWarning("Failed to remove corrupted cache file: %s", hash_file_static);
}
}
}
ret = fclose(db_cache_out);
if (ret != 0) {
SCLogWarning("Failed to close file: %s", hash_file_static);
goto cleanup;
}

ret = 0;
cleanup:
if (db_stream)
SCFree(db_stream);
return ret;
}

uint64_t HSHashDb(const PatternDatabase *pd)
{
uint64_t cached_hash = 0;
uint32_t *hash = (uint32_t *)(&cached_hash);
hashword2(&pd->pattern_cnt, 1, &hash[0], &hash[1]);
for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
SCHSCachePatternHash(pd->parray[i], &hash[0], &hash[1]);
}

return cached_hash;
}

void HSSaveCacheIterator(void *data, void *aux)
{
PatternDatabase *pd = (PatternDatabase *)data;
PatternDatabaseCache *pd_stats = (PatternDatabaseCache *)aux;
if (pd->no_cache)
return;

// count only cacheable DBs
pd_stats->hs_cacheable_dbs_cnt++;
if (pd->cached) {
pd_stats->hs_dbs_cache_loaded_cnt++;
return;
}

if (HSSaveCache(pd->hs_db, HSHashDb(pd)) == 0) {
pd->cached = true; // for rule reloads
pd_stats->hs_dbs_cache_saved_cnt++;
}
}

#endif /* BUILD_HYPERSCAN */
37 changes: 37 additions & 0 deletions src/util-mpm-hs-cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/* Copyright (C) 2024 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

/**
* \file
*
* \author Lukas Sismis <[email protected]>
*
* Hyperscan caching logic for faster database compilation.
*/

#ifndef SURICATA_UTIL_MPM_HS_CACHE__H
#define SURICATA_UTIL_MPM_HS_CACHE__H

#include "util-mpm-hs-core.h"

#ifdef BUILD_HYPERSCAN
int HSLoadCache(hs_database_t **hs_db, uint64_t hs_db_hash);
uint64_t HSHashDb(const PatternDatabase *pd);
void HSSaveCacheIterator(void *data, void *aux);
#endif /* BUILD_HYPERSCAN */

#endif /* SURICATA_UTIL_MPM_HS_CACHE__H */
Loading

0 comments on commit 8f01d2e

Please sign in to comment.