diff --git a/Cargo.toml b/Cargo.toml index 0f1cba2..e9709d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "nt_hive2" -version = "4.1.0" +version = "4.2.0" edition = "2021" authors = ["Jan Starke ", "Muteb Alqahtani "] license = "GPL-3.0" @@ -30,7 +30,6 @@ memoverlay = ">=0.1.3" num-traits = "0.2" num-derive = "0.4" byteorder = "1.4" - - +getset = "0.1" [dev-dependencies] simplelog = "0.12" \ No newline at end of file diff --git a/src/cell.rs b/src/cell.rs index ee8df20..cb8ce1b 100644 --- a/src/cell.rs +++ b/src/cell.rs @@ -1,6 +1,6 @@ +use std::any::Any; -use binread::{BinRead, derive_binread}; -use std::{any::Any}; +use binread::{derive_binread, BinRead}; #[allow(unused_imports)] use crate::*; @@ -19,12 +19,12 @@ pub struct CellHeader { size: usize, #[br(calc(raw_size > 0))] - is_deleted: bool + is_deleted: bool, } impl CellHeader { - /// Returns the size of the header. - /// + /// Returns the size of this cell, including the header. + /// /// This is *not* the stored size value, but the *absolute* value of it. pub fn size(&self) -> usize { self.size @@ -38,7 +38,7 @@ impl CellHeader { } /// returns [true] iff the [Cell] is considered as being *deleted* - /// + /// pub fn is_deleted(&self) -> bool { self.is_deleted } @@ -46,23 +46,23 @@ impl CellHeader { /// A [Cell] represents the most basic data structure of hive files. /// Nearly every other data is stored as content of a [Cell]. -/// +/// /// As [Cell] is a generic, it receives two generic arguments: /// - `T` denotes the type contained in the [Cell] /// - `A` specifies the arguments required by [binread] to correctly parse an object of type `T` -/// +/// /// # Usage /// If you know what kind of data should be stored in a certain [Cell], /// you can simply read it. Assume you have [Cell] which should contain /// a [`KeyNode`](struct@KeyNode), you can read it as follows: -/// +/// /// ``` /// # use std::error::Error; /// # use std::fs::File; /// use nt_hive2::*; /// use std::io::{Seek, SeekFrom}; /// use binread::BinReaderExt; -/// +/// /// # fn main() -> Result<(), Box> { /// # let hive_file = File::open("tests/data/testhive")?; /// # let mut hive = Hive::new(hive_file, HiveParseMode::NormalWithBaseBlock)?; @@ -76,25 +76,29 @@ impl CellHeader { /// # Ok(()) /// # } /// ``` -/// +/// /// For conveniance reasons, [Hive] already presents the method [read_structure](Hive::read_structure), /// which does basically the same. -/// +/// #[derive(BinRead, Eq, PartialEq)] #[br(import_tuple(data_args: A))] pub struct Cell where - T: BinRead, { - + T: BinRead, +{ header: CellHeader, #[br(args_tuple(data_args))] data: T, } -impl Cell where T: BinRead, A: Any + Copy { +impl Cell +where + T: BinRead, + A: Any + Copy, +{ /// returns [true] iff the [Cell] is considered as being *deleted* - /// + /// pub fn is_deleted(&self) -> bool { self.header.is_deleted } @@ -102,9 +106,9 @@ impl Cell where T: BinRead, A: Any + Copy { /// returns [true] iff the [Cell] is considered as being *allocated*. /// This is a conveniance function which simply calls [is_deleted](Self::is_deleted) /// and negates the result. - /// + /// pub fn is_allocated(&self) -> bool { - ! self.is_deleted() + !self.is_deleted() } /// returns a reference to the contained data structure @@ -116,4 +120,4 @@ impl Cell where T: BinRead, A: Any + Copy { pub(crate) fn into_data(self) -> T { self.data } -} \ No newline at end of file +} diff --git a/src/cell_iterator.rs b/src/cell_iterator.rs deleted file mode 100644 index b6b1c81..0000000 --- a/src/cell_iterator.rs +++ /dev/null @@ -1,246 +0,0 @@ -use std::io::{Seek, SeekFrom, ErrorKind}; - -use binread::{BinReaderExt, BinRead, derive_binread, BinResult}; -use thiserror::Error; - -use crate::*; -use crate::hive::CleanHive; -use crate::hivebin::HiveBin; -use crate::subkeys_list::*; - -pub enum CellFilter { - DeletedOnly, - AllocatedOnly, - DeletedAndAllocated -} - -impl Default for CellFilter { - fn default() -> Self { - Self::DeletedAndAllocated - } -} - -pub struct CellIterator where B: BinReaderExt, C: Fn(u64) { - hive: Hive, - hivebin: Option, - read_from_hivebin: usize, - callback: C, - filter: CellFilter -} - -impl CellIterator where B: BinReaderExt, C: Fn(u64) { - pub fn new(mut hive: Hive, callback: C) -> Self { - hive.seek(SeekFrom::Start(0)).unwrap(); - Self { - hive, - hivebin: None, - read_from_hivebin: 0, - callback, - filter: CellFilter::default() - } - } - - pub fn with_filter(mut self, filter: CellFilter) -> Self { - self.filter = filter; - self - } - - fn read_hivebin_header(&mut self) -> BinResult<()> { - match self.hive.read_le::() { - Err(why) => { - if let binread::Error::Io(kind) = &why { - if kind.kind() == ErrorKind::UnexpectedEof { - log::warn!("unexpected EOF while trying to read hivebin header"); - return Err(why); - } - } - log::warn!("parser error: {}", why); - Err(why) - } - Ok(hivebin) => { - self.hivebin = Some(hivebin); - self.read_from_hivebin = 0; - Ok(()) - } - } - } -} - -impl Iterator for CellIterator where B: BinReaderExt, C: Fn(u64) { - type Item = CellSelector; - - fn next(&mut self) -> Option { - loop { - if self.hivebin.is_none() && self.read_hivebin_header().is_err() { - return None; - } - - let start_position = self.hive.stream_position().unwrap(); - - // there might be the start of a new hive bin at this position - if start_position & (! 0xfff) == start_position { - log::debug!("trying to read hivebin header at {:08x}", start_position + 0x1000); - - match self.hive.read_le::() { - Ok(hivebin) => { - log::debug!("found a new hivebin here"); - self.hivebin = Some(hivebin); - self.read_from_hivebin = 0; - } - Err(why) => { - log::debug!("this does not seem to be a hivebin header (cause was: {})", why); - } - } - - (self.callback)(self.hive.stream_position().unwrap()); - } - - let start_position = self.hive.stream_position().unwrap(); - log::trace!("reading a cell at {:08x}", start_position + 4096); - - let result: BinResult = self.hive.read_le(); - - match result { - Err(why) => { - if let binread::Error::Io(kind) = &why { - if kind.kind() == ErrorKind::UnexpectedEof { - return None; - } - } - log::warn!("parser error: {}", why); - (self.callback)(self.hive.stream_position().unwrap()); - return None; - } - - Ok(header) => { - let handle_this_cell = match self.filter { - CellFilter::DeletedOnly => header.is_deleted(), - CellFilter::AllocatedOnly => ! header.is_deleted(), - CellFilter::DeletedAndAllocated => true, - }; - - if ! handle_this_cell { - self.hive.seek(SeekFrom::Start(header.size() as u64 + start_position)).unwrap(); - continue; - } - - - let result: BinResult = self.hive.read_le(); - match result { - Err(why) => { - if let binread::Error::Io(kind) = &why { - if kind.kind() == ErrorKind::UnexpectedEof { - return None; - } - } - log::warn!("parser error: {}", why); - (self.callback)(self.hive.stream_position().unwrap()); - return None - } - - Ok(content) => { - - if self.read_from_hivebin + header.size() >= self.hivebin.as_ref().unwrap().size().try_into().unwrap() { - // the hivebin has been completely read, the next to be read should be - // the next hivebin header - log::trace!("the current hivebin has been completely read"); - self.hivebin = None; - } - - log::trace!("skipping {} bytes to {:08x}", header.size(), start_position as usize + header.size()); - - self.hive.seek(SeekFrom::Start(header.size() as u64 + start_position)).unwrap(); - (self.callback)(self.hive.stream_position().unwrap()); - return Some(CellSelector{ - offset: Offset(start_position.try_into().unwrap()), - header, - content - }); - } - } - } - } - } - } -} - -#[derive(BinRead)] -pub struct CellSelector { - offset: Offset, - header: CellHeader, - content: CellLookAhead -} - -impl CellSelector { - pub fn offset(&self) -> &Offset { - &self.offset - } - pub fn header(&self) -> &CellHeader { - &self.header - } - pub fn content(&self) -> &CellLookAhead { - &self.content - } -} - -#[derive_binread] -pub enum CellLookAhead { - #[br(magic=b"nk")] NK(KeyNode), - #[br(magic=b"vk")] VK(KeyValue), - #[br(magic=b"sk")] SK, - #[br(magic=b"db")] DB, - - #[br(magic=b"li")] LI{ - #[br(temp)] - count: u16, - - #[br(count=count)] - items: Vec - }, - #[br(magic=b"lf")] LF{ - #[br(temp)] - count: u16, - - #[br(count=count)] - items: Vec - }, - - #[br(magic=b"lh")] LH{ - #[br(temp)] - count: u16, - - #[br(count=count)] - items: Vec - }, - #[br(magic=b"ri")] RI{ - #[br(temp)] - count: u16, - - #[br(count=count)] - items: Vec - }, - UNKNOWN -} - -#[derive(Error, Debug)] -pub enum CellLookAheadConversionError { - #[error("tried to extract some type from this cell, which is not actually stored in this cell.")] - DifferentCellTypeExpected, -} - -impl CellLookAhead { - pub fn is_nk(&self) -> bool {matches!(self, Self::NK(_))} -} - -impl TryInto for CellSelector { - type Error = CellLookAheadConversionError; - - fn try_into(self) -> Result { - match self.content { - CellLookAhead::NK(nk) => Ok(nk), - _ => Err(CellLookAheadConversionError::DifferentCellTypeExpected), - } - } - -} - diff --git a/src/hive/base_block.rs b/src/hive/base_block.rs index a7b83c1..656605c 100644 --- a/src/hive/base_block.rs +++ b/src/hive/base_block.rs @@ -28,12 +28,11 @@ impl BinRead for CalculatedChecksum { options: &binread::ReadOptions, _: Self::Args, ) -> binread::prelude::BinResult { - reader.seek(std::io::SeekFrom::End(0))?; reader.seek(std::io::SeekFrom::Start(0))?; let data: Vec = count(127)(reader, options, ())?; - + let checksum = match data.into_iter().fold(0, |acc, x| acc ^ x) { 0xffff_ffff => 0xffff_fffe, 0 => 1, diff --git a/src/hive/hive_bin_iterator.rs b/src/hive/hive_bin_iterator.rs new file mode 100644 index 0000000..8cd5f5c --- /dev/null +++ b/src/hive/hive_bin_iterator.rs @@ -0,0 +1,82 @@ +use std::{ + cell::RefCell, + io::{ErrorKind, Seek, SeekFrom}, + rc::Rc, +}; + +use binread::BinReaderExt; + +use crate::{hivebin::HiveBin, CleanHive, Hive}; + +pub(crate) struct HiveBinIterator +where + B: BinReaderExt, +{ + hive: Rc>>, + expected_end: u64, + end_of_file: u64, +} + +impl From> for HiveBinIterator +where + B: BinReaderExt, +{ + fn from(hive: Hive) -> Self { + let hive = Rc::new(RefCell::new(hive)); + let end_of_file = hive.borrow_mut().seek(SeekFrom::End(0)).unwrap(); + Self { + hive, + + // this is where we start reading. + // we explicitely seek to this position in next() + expected_end: 0, + end_of_file + } + } +} + +impl Iterator for HiveBinIterator +where + B: BinReaderExt, +{ + type Item = HiveBin; + + fn next(&mut self) -> Option { + + loop { + if self.expected_end >= self.end_of_file { + return None; + } + + /* we could continuously read the HiveBin, because there is no gap + between them. But, the HiveBin struct only consumes the bytes of the + HiveBin header. Because we do not know if all the cells in the hive + have been read, we explicitely seek to the beginning of the next hivebin + */ + let current_start = self + .hive + .borrow_mut() + .seek(SeekFrom::Start(self.expected_end)) + .unwrap(); + + match HiveBin::new(Rc::clone(&self.hive)) { + Ok(hivebin) => { + self.expected_end = current_start + *hivebin.size() as u64; + assert_eq!(self.expected_end & 0xfff, 0, "hivebins must be alligned at 4k boundaries"); + + return Some(hivebin) + } + Err(why) => { + if let binread::Error::Io(kind) = &why { + if kind.kind() == ErrorKind::UnexpectedEof { + return None; + } + } + log::warn!("scanner error: {}", why); + } + } + + self.expected_end += 0x1000; + } + } +} diff --git a/src/hive/hive_struct.rs b/src/hive/hive_struct.rs deleted file mode 100644 index 74a378e..0000000 --- a/src/hive/hive_struct.rs +++ /dev/null @@ -1,350 +0,0 @@ -use crate::nk::{KeyNodeFlags, KeyNodeWithMagic}; -use crate::transactionlog::{ApplicationResult, TransactionLogsEntry}; -use crate::{nk::KeyNode, CellIterator}; -use crate::{Cell, CellFilter, CellLookAhead, HiveParseMode, Offset}; -use anyhow::{anyhow, bail}; -use binread::{BinRead, BinReaderExt, BinResult}; -use binwrite::BinWrite; -use memoverlay::MemOverlay; -use std::collections::BTreeMap; -use std::io::{Cursor, ErrorKind, Read, Seek, SeekFrom, Write}; -use std::marker::PhantomData; - -use super::base_block::HiveBaseBlock; -use super::{CleanHive, ContainsHive, DirtyHive, Dissolve, FileType, HiveStatus, BASEBLOCK_SIZE}; - -pub trait BaseBlock { - fn base_block(&self) -> Option<&HiveBaseBlock>; -} - -impl BaseBlock for Hive -where - B: BinReaderExt, - S: HiveStatus, -{ - fn base_block(&self) -> Option<&HiveBaseBlock> { - self.base_block.as_ref() - } -} - -impl ContainsHive for Hive where B: BinReaderExt {} - -impl Dissolve for Hive -where - B: BinReaderExt, -{ - fn dissolve(self) -> (Hive, BTreeMap) { - (self, Default::default()) - } -} - -/// Represents a registry hive file. -/// -/// Because most offsets in a registry hive file are relative to the start of the hive bins data, -/// this struct provides a own [Seek] and [Read] implementation, which can work directly -/// with those kinds of offsets. You don't know where the hive bins data starts, because [Hive] knows -/// it (this information is stored in the hive base block). To parse data from within the hive bins data, -/// use [Hive] as reader and use offsets read from the hive data structures. -/// -/// The structure of hive files is documented at -#[derive(Debug)] -pub struct Hive -where - B: BinReaderExt, - S: HiveStatus, -{ - pub data: MemOverlay, - pub(crate) base_block: Option, - root_cell_offset: Option, - sequence_number: u32, - status: PhantomData, -} - -impl Hive -where - B: BinReaderExt, - S: HiveStatus, -{ - /// creates a new [Hive] object. This includes parsing the HiveBaseBlock and determining - /// the start of the hive bins data. - pub fn new(mut data: B, parse_mode: HiveParseMode) -> BinResult { - data.seek(SeekFrom::Start(0))?; - let mut data = MemOverlay::from(data); - let me = match parse_mode { - HiveParseMode::Raw => Self { - data, - base_block: None, - root_cell_offset: None, - sequence_number: 0, - status: PhantomData, - }, - HiveParseMode::Normal(offset) => Self { - data, - base_block: None, - root_cell_offset: Some(offset), - sequence_number: 0, - status: PhantomData, - }, - HiveParseMode::NormalWithBaseBlock => { - /* preread the baseblock data to prevent seeking */ - let mut baseblock_data = [0; BASEBLOCK_SIZE]; - data.read_exact(&mut baseblock_data)?; - - /* read baseblock */ - let mut baseblock_cursor = Cursor::new(baseblock_data); - let base_block: HiveBaseBlock = baseblock_cursor - .read_le_args((FileType::HiveFile,)) - .unwrap(); - let data_offset = data.stream_position()? as usize; - if data_offset != BASEBLOCK_SIZE { - panic!("we assume a base block size of {BASEBLOCK_SIZE} bytes, but the current has a size of {data_offset} bytes"); - } - - let root_cell_offset = *base_block.root_cell_offset(); - let sequence_number = *base_block.primary_sequence_number(); - Self { - data, - base_block: Some(base_block), - root_cell_offset: Some(root_cell_offset), - sequence_number, - status: PhantomData, - } - } - }; - - Ok(me) - } - - /// write the baseblock to some writer - /// - /// This method ignores any patches to the base block which might - /// be introduced by log files, because the `apply_transaction_log()` method - /// takes care of the base block and handles all necessary changes - pub fn write_baseblock(&self, writer: &mut W) -> anyhow::Result<()> { - match self.base_block() { - Some(base_block) => base_block.write(writer).map_err(|why| anyhow!(why)), - None => { - bail!("this hive has no base block"); - } - } - } - - pub fn is_checksum_valid(&self) -> Option { - if self.base_block().is_some() { - let mut buffer = Cursor::new([0; BASEBLOCK_SIZE]); - - if self.write_baseblock(&mut buffer).is_err() { - return Some(false); - } - buffer.seek(SeekFrom::Start(0)).unwrap(); - - match buffer.read_le_args::((FileType::HiveFile,)) { - Ok(_) => Some(true), - Err(why) => { - println!("{why}"); - Some(false) - } - } - } else { - None - } - } -} - -impl Hive -where - B: BinReaderExt, -{ - pub fn treat_hive_as_clean(self) -> Hive { - Hive:: { - data: self.data, - base_block: self.base_block, - root_cell_offset: self.root_cell_offset, - sequence_number: self.sequence_number, - status: PhantomData, - } - } - - pub fn apply_transaction_log(&mut self, log: TransactionLogsEntry) -> ApplicationResult { - let base_block = self.base_block.as_ref().unwrap(); - if *base_block.secondary_sequence_number() != 0 - && *log.sequence_number() != base_block.secondary_sequence_number() + 1 - { - log::warn!( - "abort applying transaction logs at sequence number {}", - base_block.secondary_sequence_number() - ); - log::warn!( - "next log entry had transaction number: {}", - log.sequence_number() - ); - return ApplicationResult::SequenceNumberDoesNotMatch; - } - log::info!( - "applying entry with sequence number {}", - log.sequence_number() - ); - - for (reference, page) in log.dirty_pages_references().iter().zip(log.dirty_pages()) { - log::info!( - "placing patch of size {} at 0x{:08x}", - page.len(), - BASEBLOCK_SIZE as u32 + reference.offset().0 - ); - - if let Err(why) = self - .data - .add_bytes_at((BASEBLOCK_SIZE as u32 + reference.offset().0).into(), page) - { - panic!("unable to apply memory patch: {why}"); - } - } - - if let Some(ref mut base_block) = self.base_block { - base_block.set_sequence_number(*log.sequence_number()); - } - ApplicationResult::Applied - } -} - -impl Hive -where - B: BinReaderExt, -{ - pub fn is_primary_file(&self) -> bool { - if let Some(base_block) = &self.base_block { - *base_block.file_type() == FileType::HiveFile - } else { - false - } - } - - /// Is this really needed??? - pub fn enum_subkeys( - &mut self, - callback: fn(&mut Self, &KeyNode) -> BinResult<()>, - ) -> BinResult<()> { - let root_key_node = self.root_key_node()?; - callback(self, &root_key_node)?; - Ok(()) - } - - /// returns the root key of this registry hive file - pub fn root_key_node(&mut self) -> BinResult { - let mkn: KeyNodeWithMagic = self.read_structure(self.root_cell_offset())?; - Ok(mkn.into()) - } - - /// reads a data structure from the given offset. Read the documentation of [Cell] - /// for a detailled discussion - /// - /// # Usage - /// - /// ``` - /// # use std::error::Error; - /// # use std::fs::File; - /// use nt_hive2::*; - /// - /// # fn main() -> Result<(), Box> { - /// # let hive_file = File::open("tests/data/testhive")?; - /// # let mut hive = Hive::new(hive_file, HiveParseMode::NormalWithBaseBlock)?; - /// # let offset = hive.root_cell_offset(); - /// let my_node: KeyNodeWithMagic = hive.read_structure(offset)?; - /// # Ok(()) - /// # } - /// ``` - pub fn read_structure(&mut self, offset: Offset) -> BinResult - where - T: BinRead + std::convert::From>, - { - log::trace!( - "reading cell of type {} from offset {:08x} (was: {:08x})", - std::any::type_name::(), - offset.0 + BASEBLOCK_SIZE as u32, - offset.0 - ); - - self.seek(SeekFrom::Start(offset.0.into()))?; - let cell: Cell = self.read_le().unwrap(); - assert!(cell.is_allocated()); - Ok(cell.into()) - } - - /// returns the start of the hive bins data - pub fn data_offset(&self) -> u32 { - BASEBLOCK_SIZE as u32 - } - - /// returns the offset of the root cell - pub fn root_cell_offset(&self) -> Offset { - match &self.base_block { - None => self.root_cell_offset.unwrap(), - Some(base_block) => *base_block.root_cell_offset(), - } - } - - pub fn find_root_celloffset(self) -> Option { - let iterator = self - .into_cell_iterator(|_| ()) - .with_filter(CellFilter::AllocatedOnly); - for cell in iterator { - if let CellLookAhead::NK(nk) = cell.content() { - if nk.flags.contains(KeyNodeFlags::KEY_HIVE_ENTRY) { - return Some(*cell.offset()); - } - } - } - None - } - - pub fn into_cell_iterator(self, callback: C) -> CellIterator - where - C: Fn(u64), - { - CellIterator::new(self, callback) - } - - pub fn data_size(&self) -> u32 { - match &self.base_block { - None => todo!(), - Some(base_block) => *base_block.data_size(), - } - } -} - -impl Read for Hive -where - B: BinReaderExt, -{ - fn read(&mut self, buf: &mut [u8]) -> std::io::Result { - self.data.read(buf) - } -} - -/// This [`Seek`] implementation hides the base block, -/// because the offsets used in hive files are relative -/// to the end of the base block. -/// -/// If you want to read the base block, don't use `seek()` and `read()`, -/// but `write_baseblock()` instead -impl Seek for Hive -where - B: BinReaderExt, -{ - fn seek(&mut self, pos: SeekFrom) -> std::io::Result { - let new_offset = match pos { - SeekFrom::Start(dst) => self - .data - .seek(SeekFrom::Start(dst + BASEBLOCK_SIZE as u64))?, - SeekFrom::End(_) => self.data.seek(pos)?, - SeekFrom::Current(_) => self.data.seek(pos)?, - }; - if new_offset < BASEBLOCK_SIZE as u64 { - return Err(std::io::Error::new( - ErrorKind::InvalidData, - format!("tried seek to invalid offset: {:?}", pos), - )); - } - Ok(new_offset - BASEBLOCK_SIZE as u64) - } -} diff --git a/src/hive/mod.rs b/src/hive/mod.rs index 7b909ca..7bf6b26 100644 --- a/src/hive/mod.rs +++ b/src/hive/mod.rs @@ -1,15 +1,371 @@ -mod hive_struct; -mod hive_parse_mode; -mod offset; mod base_block; mod file_type; -mod hive_with_logs; +mod hive_bin_iterator; +mod hive_parse_mode; mod hive_status; +mod hive_with_logs; +mod offset; -pub use hive_struct::*; -pub use hive_parse_mode::*; -pub use offset::*; pub use base_block::*; pub use file_type::*; +pub(crate) use hive_bin_iterator::*; +pub use hive_parse_mode::*; +pub use hive_status::*; pub use hive_with_logs::*; -pub use hive_status::*; \ No newline at end of file +pub use offset::*; + +use crate::hivebin::{CellLookAhead, HiveBin}; +use crate::nk::KeyNode; +use crate::nk::{KeyNodeFlags, KeyNodeWithMagic}; +use crate::transactionlog::{ApplicationResult, TransactionLogsEntry}; +use crate::Cell; +use anyhow::{anyhow, bail}; +use binread::{BinRead, BinReaderExt, BinResult}; +use binwrite::BinWrite; +use memoverlay::MemOverlay; +use std::collections::BTreeMap; +use std::io::{self, Cursor, ErrorKind, Read, Seek, SeekFrom, Write}; +use std::marker::PhantomData; + +pub use super::{CleanHive, ContainsHive, DirtyHive, BASEBLOCK_SIZE}; +pub use base_block::HiveBaseBlock; + +pub trait BaseBlock { + fn base_block(&self) -> Option<&HiveBaseBlock>; +} + +impl BaseBlock for Hive +where + B: BinReaderExt, + S: HiveStatus, +{ + fn base_block(&self) -> Option<&HiveBaseBlock> { + self.base_block.as_ref() + } +} + +impl ContainsHive for Hive where B: BinReaderExt {} + +impl Dissolve for Hive +where + B: BinReaderExt, +{ + fn dissolve(self) -> (Hive, BTreeMap) { + (self, Default::default()) + } +} + +/// Represents a registry hive file. +/// +/// Because most offsets in a registry hive file are relative to the start of the hive bins data, +/// this struct provides a own [Seek] and [Read] implementation, which can work directly +/// with those kinds of offsets. You don't know where the hive bins data starts, because [Hive] knows +/// it (this information is stored in the hive base block). To parse data from within the hive bins data, +/// use [Hive] as reader and use offsets read from the hive data structures. +/// +/// The structure of hive files is documented at +#[derive(Debug)] +pub struct Hive +where + B: BinReaderExt, + S: HiveStatus, +{ + pub data: MemOverlay, + pub(crate) base_block: Option, + root_cell_offset: Option, + sequence_number: u32, + status: PhantomData, +} + +impl Hive +where + B: BinReaderExt, + S: HiveStatus, +{ + /// creates a new [Hive] object. This includes parsing the HiveBaseBlock and determining + /// the start of the hive bins data. + pub fn new(mut data: B, parse_mode: HiveParseMode) -> BinResult { + data.seek(SeekFrom::Start(0))?; + let mut data = MemOverlay::from(data); + let me = match parse_mode { + HiveParseMode::Raw => Self { + data, + base_block: None, + root_cell_offset: None, + sequence_number: 0, + status: PhantomData, + }, + HiveParseMode::Normal(offset) => Self { + data, + base_block: None, + root_cell_offset: Some(offset), + sequence_number: 0, + status: PhantomData, + }, + HiveParseMode::NormalWithBaseBlock => { + /* preread the baseblock data to prevent seeking */ + let mut baseblock_data = [0; BASEBLOCK_SIZE]; + data.read_exact(&mut baseblock_data)?; + + /* read baseblock */ + let mut baseblock_cursor = Cursor::new(baseblock_data); + let base_block: HiveBaseBlock = baseblock_cursor + .read_le_args((FileType::HiveFile,)) + .unwrap(); + let data_offset = data.stream_position()? as usize; + if data_offset != BASEBLOCK_SIZE { + panic!("we assume a base block size of {BASEBLOCK_SIZE} bytes, but the current has a size of {data_offset} bytes"); + } + + let root_cell_offset = *base_block.root_cell_offset(); + let sequence_number = *base_block.primary_sequence_number(); + Self { + data, + base_block: Some(base_block), + root_cell_offset: Some(root_cell_offset), + sequence_number, + status: PhantomData, + } + } + }; + + Ok(me) + } + + /// write the baseblock to some writer + /// + /// This method ignores any patches to the base block which might + /// be introduced by log files, because the `apply_transaction_log()` method + /// takes care of the base block and handles all necessary changes + pub fn write_baseblock(&self, writer: &mut W) -> anyhow::Result<()> { + match self.base_block() { + Some(base_block) => base_block.write(writer).map_err(|why| anyhow!(why)), + None => { + bail!("this hive has no base block"); + } + } + } + + pub fn is_checksum_valid(&self) -> Option { + if self.base_block().is_some() { + let mut buffer = Cursor::new([0; BASEBLOCK_SIZE]); + + if self.write_baseblock(&mut buffer).is_err() { + return Some(false); + } + buffer.seek(SeekFrom::Start(0)).unwrap(); + + match buffer.read_le_args::((FileType::HiveFile,)) { + Ok(_) => Some(true), + Err(why) => { + println!("{why}"); + Some(false) + } + } + } else { + None + } + } +} + +impl Hive +where + B: BinReaderExt, +{ + pub fn treat_hive_as_clean(self) -> Hive { + Hive:: { + data: self.data, + base_block: self.base_block, + root_cell_offset: self.root_cell_offset, + sequence_number: self.sequence_number, + status: PhantomData, + } + } + + pub fn apply_transaction_log(&mut self, log: TransactionLogsEntry) -> ApplicationResult { + let base_block = self.base_block.as_ref().unwrap(); + if *base_block.secondary_sequence_number() != 0 + && *log.sequence_number() != base_block.secondary_sequence_number() + 1 + { + log::warn!( + "abort applying transaction logs at sequence number {}", + base_block.secondary_sequence_number() + ); + log::warn!( + "next log entry had transaction number: {}", + log.sequence_number() + ); + return ApplicationResult::SequenceNumberDoesNotMatch; + } + log::info!( + "applying entry with sequence number {}", + log.sequence_number() + ); + + for (reference, page) in log.dirty_pages_references().iter().zip(log.dirty_pages()) { + log::info!( + "placing patch of size {} at 0x{:08x}", + page.len(), + BASEBLOCK_SIZE as u32 + reference.offset().0 + ); + + if let Err(why) = self + .data + .add_bytes_at((BASEBLOCK_SIZE as u32 + reference.offset().0).into(), page) + { + panic!("unable to apply memory patch: {why}"); + } + } + + if let Some(ref mut base_block) = self.base_block { + base_block.set_sequence_number(*log.sequence_number()); + } + ApplicationResult::Applied + } +} + +impl Hive +where + B: BinReaderExt, +{ + pub fn is_primary_file(&self) -> bool { + if let Some(base_block) = &self.base_block { + *base_block.file_type() == FileType::HiveFile + } else { + false + } + } + + /// Is this really needed??? + pub fn enum_subkeys( + &mut self, + callback: fn(&mut Self, &KeyNode) -> BinResult<()>, + ) -> BinResult<()> { + let root_key_node = self.root_key_node()?; + callback(self, &root_key_node)?; + Ok(()) + } + + /// returns the root key of this registry hive file + pub fn root_key_node(&mut self) -> BinResult { + let mkn: KeyNodeWithMagic = self.read_structure(self.root_cell_offset())?; + Ok(mkn.into()) + } + + /// reads a data structure from the given offset. Read the documentation of [Cell] + /// for a detailled discussion + /// + /// # Usage + /// + /// ``` + /// # use std::error::Error; + /// # use std::fs::File; + /// use nt_hive2::*; + /// + /// # fn main() -> Result<(), Box> { + /// # let hive_file = File::open("tests/data/testhive")?; + /// # let mut hive = Hive::new(hive_file, HiveParseMode::NormalWithBaseBlock)?; + /// # let offset = hive.root_cell_offset(); + /// let my_node: KeyNodeWithMagic = hive.read_structure(offset)?; + /// # Ok(()) + /// # } + /// ``` + pub fn read_structure(&mut self, offset: Offset) -> BinResult + where + T: BinRead + std::convert::From>, + { + log::trace!( + "reading cell of type {} from offset {:08x} (was: {:08x})", + std::any::type_name::(), + offset.0 + BASEBLOCK_SIZE as u32, + offset.0 + ); + + self.seek(SeekFrom::Start(offset.0.into()))?; + let cell: Cell = self.read_le().unwrap(); + assert!(cell.is_allocated()); + Ok(cell.into()) + } + + /// returns the start of the hive bins data + pub fn data_offset(&self) -> u32 { + BASEBLOCK_SIZE as u32 + } + + /// returns the offset of the root cell + pub fn root_cell_offset(&self) -> Offset { + match &self.base_block { + None => self.root_cell_offset.unwrap(), + Some(base_block) => *base_block.root_cell_offset(), + } + } + + pub fn find_root_celloffset(self) -> Option { + for cell in self + .hivebins() + .flat_map(|hb| hb.cells()) + .filter(|selector| !selector.header().is_deleted()) + { + if let CellLookAhead::NK(nk) = cell.content() { + if nk.flags.contains(KeyNodeFlags::KEY_HIVE_ENTRY) { + return Some(*cell.offset()); + } + } + } + None + } + + pub fn reset_cursor(&mut self) -> io::Result<()> { + self.data + .seek(SeekFrom::Start(BASEBLOCK_SIZE.try_into().unwrap()))?; + Ok(()) + } + + pub fn hivebins(self) -> impl Iterator> { + HiveBinIterator::from(self) + } + + pub fn data_size(&self) -> u32 { + match &self.base_block { + None => panic!("this hive file has no base block"), + Some(base_block) => *base_block.data_size() - BASEBLOCK_SIZE as u32, + } + } +} + +impl Read for Hive +where + B: BinReaderExt, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.data.read(buf) + } +} + +/// This [`Seek`] implementation hides the base block, +/// because the offsets used in hive files are relative +/// to the end of the base block. +/// +/// If you want to read the base block, don't use `seek()` and `read()`, +/// but `write_baseblock()` instead +impl Seek for Hive +where + B: BinReaderExt, +{ + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + let new_offset = match pos { + SeekFrom::Start(dst) => self + .data + .seek(SeekFrom::Start(dst + BASEBLOCK_SIZE as u64))?, + SeekFrom::End(_) => self.data.seek(pos)?, + SeekFrom::Current(_) => self.data.seek(pos)?, + }; + if new_offset < BASEBLOCK_SIZE as u64 { + return Err(std::io::Error::new( + ErrorKind::InvalidData, + format!("tried seek to invalid offset: {:?}", pos), + )); + } + Ok(new_offset - BASEBLOCK_SIZE as u64) + } +} diff --git a/src/hivebin.rs b/src/hivebin.rs deleted file mode 100644 index 4ef91f3..0000000 --- a/src/hivebin.rs +++ /dev/null @@ -1,20 +0,0 @@ -use binread::derive_binread; - -use crate::Offset; - -#[derive_binread] -#[br(magic=b"hbin")] -#[allow(dead_code)] -pub (crate) struct HiveBin { - offset: Offset, - size: u32, - reserved: u64, - timestamp: u64, - spare: u32 -} - -impl HiveBin { - pub fn size(&self) -> u32 { - self.size - } -} \ No newline at end of file diff --git a/src/hivebin/cell_iterator.rs b/src/hivebin/cell_iterator.rs new file mode 100644 index 0000000..dbe17e5 --- /dev/null +++ b/src/hivebin/cell_iterator.rs @@ -0,0 +1,161 @@ +use std::cell::RefCell; +use std::io::{ErrorKind, Seek}; +use std::rc::Rc; + +use binread::{derive_binread, BinRead, BinReaderExt, BinResult}; +use derive_getters::Getters; +use thiserror::Error; + +use crate::hivebin::HiveBin; +use crate::subkeys_list::*; +use crate::*; + +pub struct CellIterator +where + B: BinReaderExt, +{ + hive: Rc>>, + hivebin_size: usize, + consumed_bytes: usize, +} + +impl CellIterator +where + B: BinReaderExt, +{ + pub fn new(hivebin: &HiveBin, hive: Rc>>) -> Self { + Self { + hive, + hivebin_size: (*hivebin.size()).try_into().unwrap(), + consumed_bytes: hivebin.header_size().into(), + } + } + + fn parse(&self) -> Option { + let r: BinResult = self.hive.borrow_mut().read_le(); + match r { + Ok(t) => Some(t), + Err(why) => { + if let binread::Error::Io(kind) = &why { + if kind.kind() != ErrorKind::UnexpectedEof { + log::warn!("parser error: {}", why); + } + } + None + } + } + } +} + +impl Iterator for CellIterator +where + B: BinReaderExt, +{ + type Item = CellSelector; + + fn next(&mut self) -> Option { + const CELL_HEADER_SIZE: usize = 4; + + // if there is not enough space in this hivebin, give up + if self.consumed_bytes + CELL_HEADER_SIZE >= self.hivebin_size { + return None; + } + + let cell_offset = self.hive.borrow_mut().stream_position().unwrap(); + + if let Some(header) = self.parse::() { + if let Some(lookahead) = self.parse::() { + self.consumed_bytes += header.size(); + return Some(CellSelector { + offset: Offset(cell_offset.try_into().unwrap()), + header, + content: lookahead, + }); + } + } + + None + } +} + +#[derive(BinRead, Getters)] +#[getter(get = "pub")] +pub struct CellSelector { + offset: Offset, + header: CellHeader, + content: CellLookAhead, +} + +#[derive_binread] +pub enum CellLookAhead { + #[br(magic = b"nk")] + NK(KeyNode), + #[br(magic = b"vk")] + VK(KeyValue), + #[br(magic = b"sk")] + SK, + #[br(magic = b"db")] + DB, + + #[br(magic = b"li")] + LI { + #[br(temp)] + count: u16, + + #[br(count=count)] + items: Vec, + }, + #[br(magic = b"lf")] + LF { + #[br(temp)] + count: u16, + + #[br(count=count)] + items: Vec, + }, + + #[br(magic = b"lh")] + LH { + #[br(temp)] + count: u16, + + #[br(count=count)] + items: Vec, + }, + #[br(magic = b"ri")] + RI { + #[br(temp)] + count: u16, + + #[br(count=count)] + items: Vec, + }, + + #[allow(clippy::upper_case_acronyms)] + UNKNOWN, +} + +#[derive(Error, Debug)] +pub enum CellLookAheadConversionError { + #[error( + "tried to extract some type from this cell, which is not actually stored in this cell." + )] + DifferentCellTypeExpected, +} + +impl CellLookAhead { + pub fn is_nk(&self) -> bool { + matches!(self, Self::NK(_)) + } +} + +impl TryInto for CellSelector { + type Error = CellLookAheadConversionError; + + fn try_into(self) -> Result { + match self.content { + CellLookAhead::NK(nk) => Ok(nk), + _ => Err(CellLookAheadConversionError::DifferentCellTypeExpected), + } + } +} diff --git a/src/hivebin/mod.rs b/src/hivebin/mod.rs new file mode 100644 index 0000000..3d8a5ec --- /dev/null +++ b/src/hivebin/mod.rs @@ -0,0 +1,76 @@ +mod cell_iterator; + +use std::{cell::RefCell, ops::Deref, rc::Rc}; +pub use cell_iterator::*; + +use binread::{derive_binread, BinReaderExt, BinResult}; +use getset::Getters; + +use crate::{CleanHive, Hive, Offset}; + +#[derive_binread] +#[derive(Getters)] +#[br(magic = b"hbin")] +#[allow(dead_code)] +#[getset(get = "pub")] +pub struct _HiveBin { + // Offset of a current hive bin in bytes, relative from the start of the + // hive bins data + offset: Offset, + + // Size of a current hive bin in bytes + #[br(assert(size & 0xfff == 0, "hivebins must be alligned at 4k boundaries"))] + size: u32, + + reserved: u64, + + // FILETIME (UTC), defined for the first hive bin only (see below) + // + // A Timestamp in the header of the first hive bin acts as a backup copy of + // a Last written timestamp in the base block. + timestamp: u64, + + // This field has no meaning on a disk (see below) + // + // The Spare field is used when shifting hive bins and cells in memory. In + // Windows 2000, the same field is called MemAlloc, it is used to track + // memory allocations for hive bins. + spare: u32, +} + +pub struct HiveBin +where + B: BinReaderExt, +{ + hive: Rc>>, + hivebin: _HiveBin, +} + +impl HiveBin +where + B: BinReaderExt, +{ + pub fn new(hive: Rc>>) -> BinResult { + let hivebin = hive.borrow_mut().read_le()?; + Ok(Self { hive, hivebin }) + } + + pub fn cells(&self) -> impl Iterator { + CellIterator::new(self, Rc::clone(&self.hive)) + } + + pub fn header_size(&self) -> u8 { + 32 + } +} + +impl Deref for HiveBin +where + B: BinReaderExt, +{ + type Target = _HiveBin; + + fn deref(&self) -> &Self::Target { + &self.hivebin + } +} diff --git a/src/lib.rs b/src/lib.rs index 5e08865..681b5f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,11 +36,9 @@ mod vk; mod db; mod subkeys_list; mod cell_with_u8_list; -mod cell_iterator; pub mod transactionlog; pub use cell::*; -pub use cell_iterator::{CellIterator, CellLookAhead, CellSelector, CellFilter}; pub use hive::{Hive, Offset, HiveParseMode, ContainsHive, BaseBlock, CleanHive, DirtyHive, BASEBLOCK_SIZE, HiveWithLogs}; pub use nk::{KeyNode, KeyNodeWithMagic, SubPath}; pub use vk::{KeyValue, KeyValueWithMagic, RegistryValue}; \ No newline at end of file diff --git a/src/regdump.rs b/src/regdump.rs deleted file mode 100644 index 489e66f..0000000 --- a/src/regdump.rs +++ /dev/null @@ -1,154 +0,0 @@ -use bodyfile::Bodyfile3Line; -use nt_hive2::*; -use simplelog::{SimpleLogger, Config}; -use std::fs::File; -use std::io::{Read, Seek}; -use std::path::PathBuf; -use anyhow::{Result, bail}; -use clap::Parser; - -#[derive(Parser)] -#[clap(name="regdump", author, version, about, long_about = None)] -struct Args { - /// name of the file to dump - #[arg(value_parser = validate_file)] - pub (crate) hive_file: PathBuf, - - /// transaction LOG file(s). This argument can be specified one or two times. - #[clap(short('L'), long("log"))] - #[arg(value_parser = validate_file)] - logfiles: Vec, - - /// print as bodyfile format - #[clap(short('b'),long("bodyfile"))] - display_bodyfile: bool, - - /// ignore the base block (e.g. if it was encrypted by some ransomware) - #[clap(short('I'), long)] - ignore_base_block: bool, - - /// hide timestamps, if output is in reg format - #[clap(short('T'), long)] - hide_timestamps: bool, - - #[clap(flatten)] - pub (crate) verbose: clap_verbosity_flag::Verbosity, -} - -impl Args { - pub fn parse_mode(&self) -> HiveParseMode { - if self.ignore_base_block { - match File::open(&self.hive_file) { - Ok(data) => { - let hive = Hive::new(data, HiveParseMode::Raw).unwrap(); - let offset = match hive.find_root_celloffset() { - Some(offset) => offset, - None => { - log::error!("scan found no root cell offset, aborting..."); - std::process::exit(-1); - } - }; - println!("found offset at {}", offset.0); - HiveParseMode::Normal(offset) - } - Err(why) => { - log::error!("unable to open '{}': {}", self.hive_file.to_string_lossy(), why); - std::process::exit(-1); - }, - } - } else { - HiveParseMode::NormalWithBaseBlock - } - } -} - -fn validate_file(s: &str) -> Result { - let pb = PathBuf::from(s); - if pb.is_file() && pb.exists() { - Ok(pb) - } else { - Err(format!("unable to read file: '{s}'")) - } -} - -fn main() -> Result<()> { - let mut cli = Args::parse(); - let _ = SimpleLogger::init(cli.verbose.log_level_filter(), Config::default()); - - fn do_print_key(hive: &mut Hive, root_key: &KeyNode, cli: &Args) -> Result<()> where RS: Read + Seek { - let mut path = Vec::new(); - print_key(hive, root_key, &mut path, cli) - } - - match File::open(&cli.hive_file) { - Ok(data) => { - let hive = Hive::new(data, cli.parse_mode()).unwrap(); - - let mut clean_hive = - match cli.logfiles.len() { - 0 => { - log::warn!("no log files provided, treating hive as if it was clean"); - hive.treat_hive_as_clean() - } - 1 => { - hive.with_transaction_log(File::open(cli.logfiles.pop().unwrap())?.try_into()?)? - .apply_logs() - } - 2 => { - hive.with_transaction_log(File::open(cli.logfiles.pop().unwrap())?.try_into()?)? - .with_transaction_log(File::open(cli.logfiles.pop().unwrap())?.try_into()?)? - .apply_logs() - } - _ => { - bail!("more than two transaction log files are not supported") - } - }; - - let root_key = &clean_hive.root_key_node().unwrap(); - do_print_key(&mut clean_hive, root_key, &cli).unwrap(); - } - Err(why) => { - eprintln!("unable to open '{}': {}", cli.hive_file.to_string_lossy(), why); - std::process::exit(-1); - }, - } - Ok(()) -} - -fn print_key(hive: &mut Hive, keynode: &KeyNode, path: &mut Vec, cli: &Args) -> Result<()> where RS: Read + Seek { - path.push(keynode.name().to_string()); - - let current_path = path.join("\\"); - if cli.display_bodyfile { - let bf_line = Bodyfile3Line::new() - .with_name(¤t_path) - .with_ctime(keynode.timestamp().timestamp()); - println!("{}", bf_line); - } else { - if cli.hide_timestamps { - println!("\n[{}]", ¤t_path); - } else { - println!("\n[{}]; {}", ¤t_path, keynode.timestamp()); - } - - print_values(keynode); - } - - for sk in keynode.subkeys(hive).unwrap().iter() { - print_key(hive, &sk.borrow(), path, cli)?; - } - path.pop(); - - Ok(()) -} - -fn print_values(keynode: &KeyNode) { - for value in keynode.values() { - let data_type = match value.data_type() { - Some(dt) => format!("{dt}:"), - None => "".into() - }; - - println!("\"{}\" = {data_type}{}", value.name(), value.value()); - } -} \ No newline at end of file