From d205e5917cc6a6ea9b0655a8add8139098a7b5a2 Mon Sep 17 00:00:00 2001 From: Philippe Antoine Date: Sun, 24 Nov 2024 22:32:05 +0100 Subject: [PATCH] detect/transforms: in place modifications of buffers As is the case when chaining multiple transforms. Avoids using memcpy in these cases. Add tests for these cases. Ticket: 7409 --- .../detect/transforms/compress_whitespace.rs | 37 ++++++++++--------- rust/src/detect/transforms/dotprefix.rs | 28 ++++++++++---- rust/src/detect/transforms/hash.rs | 10 ++++- rust/src/detect/transforms/http_headers.rs | 20 +++++++++- .../src/detect/transforms/strip_whitespace.rs | 24 ++++++++---- rust/src/detect/transforms/urldecode.rs | 9 ++++- rust/src/detect/transforms/xor.rs | 9 ++++- 7 files changed, 99 insertions(+), 38 deletions(-) diff --git a/rust/src/detect/transforms/compress_whitespace.rs b/rust/src/detect/transforms/compress_whitespace.rs index 5e96be1f10d0..ada86a7516ab 100644 --- a/rust/src/detect/transforms/compress_whitespace.rs +++ b/rust/src/detect/transforms/compress_whitespace.rs @@ -35,21 +35,16 @@ unsafe extern "C" fn compress_whitespace_setup( fn compress_whitespace_transform_do(input: &[u8], output: &mut [u8]) -> u32 { let mut nb = 0; - // seems faster than writing one byte at a time via - // for (i, o) in input.iter().filter(|c| !matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ')).zip(output) - for subslice in - input.split_inclusive(|c| matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ')) - { - // a subslice of length 1 not at the beginning is a space following another space - if nb == 0 - || subslice.len() > 1 - || !matches!( - subslice[0], - b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' - ) - { - output[nb..nb + subslice.len()].copy_from_slice(subslice); - nb += subslice.len(); + let mut space = false; + for c in input { + if !matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ') { + output[nb] = *c; + nb += 1; + space = false; + } else if !space { + output[nb] = *c; + nb += 1; + space = true; } } return nb as u32; @@ -142,14 +137,22 @@ mod tests { exp.len() as u32 ); assert_eq!(&out[..exp.len()], exp); - let buf = b"I \t J"; + let mut buf = Vec::new(); + buf.extend_from_slice(b"I \t J"); let mut out = vec![0; buf.len()]; let exp = b"I J"; assert_eq!( - compress_whitespace_transform_do(buf, &mut out), + compress_whitespace_transform_do(&buf, &mut out), exp.len() as u32 ); assert_eq!(&out[..exp.len()], exp); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + assert_eq!( + compress_whitespace_transform_do(still_buf, &mut buf), + exp.len() as u32 + ); + assert_eq!(&still_buf[..exp.len()], b"I J"); } #[test] diff --git a/rust/src/detect/transforms/dotprefix.rs b/rust/src/detect/transforms/dotprefix.rs index eef6d048bd08..56e927226ec6 100644 --- a/rust/src/detect/transforms/dotprefix.rs +++ b/rust/src/detect/transforms/dotprefix.rs @@ -34,24 +34,32 @@ unsafe extern "C" fn dot_prefix_setup( } fn dot_prefix_transform_do(input: &[u8], output: &mut [u8]) { + if std::ptr::eq(output.as_ptr(), input.as_ptr()) { + output.copy_within(0..input.len(), 1); + } else { + output[1..].copy_from_slice(input); + } output[0] = b'.'; - output[1..].copy_from_slice(input); } #[no_mangle] unsafe extern "C" fn dot_prefix_transform(buffer: *mut c_void, _ctx: *mut c_void) { - let input = InspectionBufferPtr(buffer); let input_len = InspectionBufferLength(buffer); - if input.is_null() || input_len == 0 { + if input_len == 0 { return; } - let input = build_slice!(input, input_len as usize); - let output = InspectionBufferCheckAndExpand(buffer, input_len + 1); if output.is_null() { // allocation failure return; } + // get input after possible realloc + let input = InspectionBufferPtr(buffer); + if input.is_null() { + // allocation failure + return; + } + let input = build_slice!(input, input_len as usize); let output = std::slice::from_raw_parts_mut(output, (input_len + 1) as usize); dot_prefix_transform_do(input, output); @@ -89,9 +97,15 @@ mod tests { let mut out = vec![0; b"example.com".len() + 1]; dot_prefix_transform_do(buf, &mut out); assert_eq!(out, b".example.com"); - let buf = b"hello.example.com"; + let mut buf = Vec::with_capacity(b"hello.example.com".len() + 1); + buf.extend_from_slice(b"hello.example.com"); let mut out = vec![0; b"hello.example.com".len() + 1]; - dot_prefix_transform_do(buf, &mut out); + dot_prefix_transform_do(&buf, &mut out); assert_eq!(out, b".hello.example.com"); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + buf.push(b'.'); + dot_prefix_transform_do(still_buf, &mut buf); + assert_eq!(&buf, b".hello.example.com"); } } diff --git a/rust/src/detect/transforms/hash.rs b/rust/src/detect/transforms/hash.rs index 76922f678b91..087427efee03 100644 --- a/rust/src/detect/transforms/hash.rs +++ b/rust/src/detect/transforms/hash.rs @@ -231,9 +231,15 @@ mod tests { #[test] fn test_sha256_transform() { - let buf = b" A B C D "; + let mut buf = Vec::with_capacity(SC_SHA256_LEN); + buf.extend_from_slice(b" A B C D "); let mut out = vec![0; SC_SHA256_LEN]; - sha256_transform_do(buf, &mut out); + sha256_transform_do(&buf, &mut out); assert_eq!(out, b"\xd6\xbf\x7d\x8d\x69\x53\x02\x4d\x0d\x84\x5c\x99\x9b\xae\x93\xcc\xac\x68\xea\xab\x9a\xc9\x77\xd0\xfd\x30\x6a\xf5\x9a\x3d\xe4\x3a"); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + buf.resize(SC_SHA256_LEN, 0); + sha256_transform_do(still_buf, &mut buf); + assert_eq!(&buf, b"\xd6\xbf\x7d\x8d\x69\x53\x02\x4d\x0d\x84\x5c\x99\x9b\xae\x93\xcc\xac\x68\xea\xab\x9a\xc9\x77\xd0\xfd\x30\x6a\xf5\x9a\x3d\xe4\x3a"); } } diff --git a/rust/src/detect/transforms/http_headers.rs b/rust/src/detect/transforms/http_headers.rs index 939cbb3d338a..652765375941 100644 --- a/rust/src/detect/transforms/http_headers.rs +++ b/rust/src/detect/transforms/http_headers.rs @@ -103,11 +103,18 @@ unsafe extern "C" fn strip_pseudo_setup( fn strip_pseudo_transform_do(input: &[u8], output: &mut [u8]) -> u32 { let mut nb = 0; + let mut inb = 0; + let same = std::ptr::eq(output.as_ptr(), input.as_ptr()); for subslice in input.split_inclusive(|c| *c == b'\n') { if !subslice.is_empty() && subslice[0] != b':' { - output[nb..nb + subslice.len()].copy_from_slice(subslice); + if same { + output.copy_within(inb..inb + subslice.len(), nb); + } else { + output[nb..nb + subslice.len()].copy_from_slice(subslice); + } nb += subslice.len(); } + inb += subslice.len(); } return nb as u32; } @@ -183,5 +190,16 @@ mod tests { let mut out = vec![0; buf.len()]; let nb = strip_pseudo_transform_do(buf, &mut out); assert_eq!(&out[..nb as usize], b"header2:Value2"); + let mut buf = Vec::new(); + buf.extend_from_slice( + b"Header1: Value1\n:method:get\nheader2:Value2\n:scheme:https\nheader3:Value3\n", + ); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + let nb = strip_pseudo_transform_do(still_buf, &mut buf); + assert_eq!( + &still_buf[..nb as usize], + b"Header1: Value1\nheader2:Value2\nheader3:Value3\n" + ); } } diff --git a/rust/src/detect/transforms/strip_whitespace.rs b/rust/src/detect/transforms/strip_whitespace.rs index 2fb8599a5365..bb8c095cae4f 100644 --- a/rust/src/detect/transforms/strip_whitespace.rs +++ b/rust/src/detect/transforms/strip_whitespace.rs @@ -35,13 +35,15 @@ unsafe extern "C" fn strip_whitespace_setup( fn strip_whitespace_transform_do(input: &[u8], output: &mut [u8]) -> u32 { let mut nb = 0; - // seems faster than writing one byte at a time via - // for (i, o) in input.iter().filter(|c| !matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ')).zip(output) - for subslice in input.split(|c| matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ')) + for (i, o) in input + .iter() + .filter(|c| !matches!(*c, b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ')) + .zip(output) { - output[nb..nb + subslice.len()].copy_from_slice(subslice); - nb += subslice.len(); + *o = *i; + nb += 1; } + // do not use faster copy_from_slice because input and output may overlap (point to the same data) return nb as u32; } @@ -124,13 +126,21 @@ mod tests { ); assert_eq!(&out[..exp.len()], exp); - let buf = b"I \t J"; + let mut buf = Vec::new(); + buf.extend_from_slice(b"I \t J"); let mut out = vec![0; buf.len()]; let exp = b"IJ"; assert_eq!( - strip_whitespace_transform_do(buf, &mut out), + strip_whitespace_transform_do(&buf, &mut out), exp.len() as u32 ); assert_eq!(&out[..exp.len()], exp); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + assert_eq!( + strip_whitespace_transform_do(still_buf, &mut buf), + exp.len() as u32 + ); + assert_eq!(&still_buf[..exp.len()], b"IJ"); } } diff --git a/rust/src/detect/transforms/urldecode.rs b/rust/src/detect/transforms/urldecode.rs index 59620ad42d2d..deceb3be044e 100644 --- a/rust/src/detect/transforms/urldecode.rs +++ b/rust/src/detect/transforms/urldecode.rs @@ -135,9 +135,14 @@ mod tests { #[test] fn test_url_decode_transform() { - let buf = b"Suricata%20is+%27%61wesome%21%27%25%30%30%ZZ%4"; + let mut buf = Vec::new(); + buf.extend_from_slice(b"Suricata%20is+%27%61wesome%21%27%25%30%30%ZZ%4"); let mut out = vec![0; buf.len()]; - let nb = url_decode_transform_do(buf, &mut out); + let nb = url_decode_transform_do(&buf, &mut out); assert_eq!(&out[..nb as usize], b"Suricata is 'awesome!'%00%ZZ%4"); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + let nb = url_decode_transform_do(still_buf, &mut buf); + assert_eq!(&still_buf[..nb as usize], b"Suricata is 'awesome!'%00%ZZ%4"); } } diff --git a/rust/src/detect/transforms/xor.rs b/rust/src/detect/transforms/xor.rs index b8b40400ac72..6fae23e8f65c 100644 --- a/rust/src/detect/transforms/xor.rs +++ b/rust/src/detect/transforms/xor.rs @@ -143,10 +143,15 @@ mod tests { #[test] fn test_xor_transform() { - let buf = b"example.com"; + let mut buf = Vec::new(); + buf.extend_from_slice(b"example.com"); let mut out = vec![0; buf.len()]; let ctx = xor_parse_do("0a0DC8ff").unwrap(); - xor_transform_do(buf, &mut out, &ctx); + xor_transform_do(&buf, &mut out, &ctx); assert_eq!(out, b"ou\xa9\x92za\xad\xd1ib\xa5"); + // test in place + let still_buf = unsafe { std::slice::from_raw_parts(buf.as_ptr(), buf.len()) }; + xor_transform_do(still_buf, &mut buf, &ctx); + assert_eq!(&still_buf, b"ou\xa9\x92za\xad\xd1ib\xa5"); } }