From 71e245ea8f8860475b27dac9741fe98a581762c1 Mon Sep 17 00:00:00 2001 From: Shivani Bhardwaj Date: Tue, 3 Dec 2024 18:25:18 +0530 Subject: [PATCH] datasets: move initial file reading to rust --- rust/cbindgen.toml | 2 + rust/src/detect/datasets.rs | 87 +++++++++++++++++++++++++++++++++++++ rust/src/detect/mod.rs | 1 + src/datasets-reputation.h | 4 +- src/datasets.c | 8 +++- src/datasets.h | 1 + 6 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 rust/src/detect/datasets.rs diff --git a/rust/cbindgen.toml b/rust/cbindgen.toml index eac6aa737760..c9e3b022c60e 100644 --- a/rust/cbindgen.toml +++ b/rust/cbindgen.toml @@ -84,6 +84,7 @@ include = [ "FtpEvent", "SCSigTableElmt", "SCTransformTableElmt", + "DataRepType", ] # A list of items to not include in the generated bindings @@ -94,6 +95,7 @@ exclude = [ "CLuaState", "DetectEngineState", "Flow", + "Dataset", "StreamingBufferConfig", "HttpRangeContainerBlock", "FileContainer", diff --git a/rust/src/detect/datasets.rs b/rust/src/detect/datasets.rs new file mode 100644 index 000000000000..9446ab0a2e3c --- /dev/null +++ b/rust/src/detect/datasets.rs @@ -0,0 +1,87 @@ +/* Copyright (C) 2024 Open Information Security Foundation + * + * You can copy, redistribute or modify this Program under the terms of + * the GNU General Public License version 2 as published by the Free + * Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +// Author: Shivani Bhardwaj + +//! This module exposes items from the datasets C code to Rust. + +use std::fs::File; +use std::io::{self, BufRead}; +use std::path::Path; +use std::ffi::{c_char, CStr}; +use base64::{Engine, engine::general_purpose::STANDARD}; + +/// Opaque Dataset type defined in C +#[derive(Copy, Clone)] +pub enum Dataset {} + +// Simple C type converted to Rust +#[derive(Debug, PartialEq)] +#[repr(C)] +pub struct DataRepType { + pub value: u16, +} + +// Extern fns operating on the opaque Dataset type above +/// cbindgen:ignore +extern { + pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32; + pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) -> i32; +} + +#[no_mangle] +pub unsafe extern "C" fn ProcessDatasets(set: &Dataset, fname: *const c_char) { + let file_string = CStr::from_ptr(fname).to_str().unwrap(); + let filename = Path::new(file_string); + SCLogNotice!("Path: {:?}", filename); + if let Ok(lines) = read_lines(filename) { + for line in lines.flatten() { + SCLogNotice!("{}", line); + let v: Vec<&str> = line.split(',').collect(); + // Ignore empty and invalid lines in dataset/rep file + if v.is_empty() || v.len() > 2 { + continue; + } + if v.len() == 1 { + // Dataset + let mut decoded: Vec = vec![]; + if STANDARD.decode_vec(v[0], &mut decoded).is_err() { + // FatalErrorOnInit STODO + } + DatasetAdd(&set, decoded.as_ptr(), decoded.len() as u32); + } else { + // Datarep + let mut decoded: Vec = vec![]; + if STANDARD.decode_vec(v[0], &mut decoded).is_err() { + // FatalErrorOnInit STODO + } + if let Ok(val) = v[1].to_string().parse::() { + let rep: DataRepType = DataRepType { value: val }; + DatasetAddwRep(&set, decoded.as_ptr(), decoded.len() as u32, &rep); + } else { + // FatalErrorOnInit STODO + } + } + } + } +} + +fn read_lines

(filename: P) -> io::Result>> +where P: AsRef, { + let file = File::open(filename)?; + Ok(io::BufReader::new(file).lines()) +} diff --git a/rust/src/detect/mod.rs b/rust/src/detect/mod.rs index 1857c22ee2b2..5ade8bf5d9b1 100644 --- a/rust/src/detect/mod.rs +++ b/rust/src/detect/mod.rs @@ -29,6 +29,7 @@ pub mod transforms; pub mod uint; pub mod uri; pub mod tojson; +pub mod datasets; use crate::core::AppProto; use std::os::raw::{c_int, c_void}; diff --git a/src/datasets-reputation.h b/src/datasets-reputation.h index 3483d823cd6e..18ced6803705 100644 --- a/src/datasets-reputation.h +++ b/src/datasets-reputation.h @@ -24,9 +24,7 @@ #ifndef SURICATA_DATASETS_REPUTATION_H #define SURICATA_DATASETS_REPUTATION_H -typedef struct DataRepType { - uint16_t value; -} DataRepType; +#include "rust-bindings.h" typedef struct DataRepResultType { bool found; diff --git a/src/datasets.c b/src/datasets.c index 402c7d34fe99..709198ad0d92 100644 --- a/src/datasets.c +++ b/src/datasets.c @@ -496,6 +496,8 @@ static int DatasetLoadString(Dataset *set) return 0; SCLogConfig("dataset: %s loading from '%s'", set->name, set->load); + ProcessDatasets(set, set->load); +#if 0 const char *fopen_mode = "r"; if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) { fopen_mode = "a+"; @@ -507,6 +509,7 @@ static int DatasetLoadString(Dataset *set) return -1; } + rs_read_datasets(set->load); uint32_t cnt = 0; char line[1024]; while (fgets(line, (int)sizeof(line), fp) != NULL) { @@ -566,10 +569,11 @@ static int DatasetLoadString(Dataset *set) SCLogDebug("line with rep %s, %s", line, r); } } - THashConsolidateMemcap(set->hash); - fclose(fp); SCLogConfig("dataset: %s loaded %u records", set->name, cnt); +#endif + THashConsolidateMemcap(set->hash); + return 0; } diff --git a/src/datasets.h b/src/datasets.h index 86bfed02b22f..44e2b0f8d4b2 100644 --- a/src/datasets.h +++ b/src/datasets.h @@ -19,6 +19,7 @@ #define SURICATA_DATASETS_H #include "util-thash.h" +#include "rust-bindings.h" #include "datasets-reputation.h" int DatasetsInit(void);