Skip to content

Commit

Permalink
datasets: move initial file reading to rust
Browse files Browse the repository at this point in the history
  • Loading branch information
inashivb committed Dec 3, 2024
1 parent fa9a80d commit 71e245e
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 5 deletions.
2 changes: 2 additions & 0 deletions rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ include = [
"FtpEvent",
"SCSigTableElmt",
"SCTransformTableElmt",
"DataRepType",
]

# A list of items to not include in the generated bindings
Expand All @@ -94,6 +95,7 @@ exclude = [
"CLuaState",
"DetectEngineState",
"Flow",
"Dataset",
"StreamingBufferConfig",
"HttpRangeContainerBlock",
"FileContainer",
Expand Down
87 changes: 87 additions & 0 deletions rust/src/detect/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* Copyright (C) 2024 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

// Author: Shivani Bhardwaj <[email protected]>

//! This module exposes items from the datasets C code to Rust.
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use std::ffi::{c_char, CStr};
use base64::{Engine, engine::general_purpose::STANDARD};

/// Opaque Dataset type defined in C
#[derive(Copy, Clone)]
pub enum Dataset {}

// Simple C type converted to Rust
#[derive(Debug, PartialEq)]
#[repr(C)]
pub struct DataRepType {
pub value: u16,
}

// Extern fns operating on the opaque Dataset type above
/// cbindgen:ignore
extern {
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType) -> i32;
}

#[no_mangle]
pub unsafe extern "C" fn ProcessDatasets(set: &Dataset, fname: *const c_char) {
let file_string = CStr::from_ptr(fname).to_str().unwrap();
let filename = Path::new(file_string);
SCLogNotice!("Path: {:?}", filename);
if let Ok(lines) = read_lines(filename) {
for line in lines.flatten() {
SCLogNotice!("{}", line);
let v: Vec<&str> = line.split(',').collect();
// Ignore empty and invalid lines in dataset/rep file
if v.is_empty() || v.len() > 2 {
continue;
}
if v.len() == 1 {
// Dataset
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
// FatalErrorOnInit STODO
}
DatasetAdd(&set, decoded.as_ptr(), decoded.len() as u32);
} else {
// Datarep
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
// FatalErrorOnInit STODO
}
if let Ok(val) = v[1].to_string().parse::<u16>() {
let rep: DataRepType = DataRepType { value: val };
DatasetAddwRep(&set, decoded.as_ptr(), decoded.len() as u32, &rep);
} else {
// FatalErrorOnInit STODO
}
}
}
}
}

fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where P: AsRef<Path>, {
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}
1 change: 1 addition & 0 deletions rust/src/detect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod transforms;
pub mod uint;
pub mod uri;
pub mod tojson;
pub mod datasets;

use crate::core::AppProto;
use std::os::raw::{c_int, c_void};
Expand Down
4 changes: 1 addition & 3 deletions src/datasets-reputation.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#ifndef SURICATA_DATASETS_REPUTATION_H
#define SURICATA_DATASETS_REPUTATION_H

typedef struct DataRepType {
uint16_t value;
} DataRepType;
#include "rust-bindings.h"

typedef struct DataRepResultType {
bool found;
Expand Down
8 changes: 6 additions & 2 deletions src/datasets.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ static int DatasetLoadString(Dataset *set)
return 0;

SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);
ProcessDatasets(set, set->load);
#if 0
const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
Expand All @@ -507,6 +509,7 @@ static int DatasetLoadString(Dataset *set)
return -1;
}

rs_read_datasets(set->load);
uint32_t cnt = 0;
char line[1024];
while (fgets(line, (int)sizeof(line), fp) != NULL) {
Expand Down Expand Up @@ -566,10 +569,11 @@ static int DatasetLoadString(Dataset *set)
SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);

fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
#endif
THashConsolidateMemcap(set->hash);

return 0;
}

Expand Down
1 change: 1 addition & 0 deletions src/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define SURICATA_DATASETS_H

#include "util-thash.h"
#include "rust-bindings.h"
#include "datasets-reputation.h"

int DatasetsInit(void);
Expand Down

0 comments on commit 71e245e

Please sign in to comment.