diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f1f0ba439dac..e0bf26470dfd 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -24,7 +24,7 @@ Describe changes: ### Provide values to any of the below to override the defaults. -- To use an LibHTP, Suricata-Verify or Suricata-Update pull request, +- To use a Suricata-Verify or Suricata-Update pull request, link to the pull request in the respective `_BRANCH` variable. - Leave unused overrides blank or remove. @@ -32,5 +32,3 @@ SV_REPO= SV_BRANCH= SU_REPO= SU_BRANCH= -LIBHTP_REPO= -LIBHTP_BRANCH= diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 04a2f1d5102c..cea33b724dc0 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -9,8 +9,6 @@ on: pull_request: workflow_dispatch: inputs: - LIBHTP_REPO: - LIBHTP_BRANCH: SU_REPO: SU_BRANCH: SV_REPO: @@ -148,7 +146,6 @@ jobs: with: name: prep path: prep - - run: tar xvf prep/libhtp.tar.gz - run: tar xvf prep/suricata-update.tar.gz - run: tar xvf prep/suricata-verify.tar.gz - name: Configuring @@ -296,7 +293,6 @@ jobs: with: name: prep path: prep - - run: tar xvf prep/libhtp.tar.gz - run: tar xvf prep/suricata-update.tar.gz - run: tar xvf prep/suricata-verify.tar.gz - name: Build @@ -314,81 +310,6 @@ jobs: run: cargo clippy --all-features working-directory: rust - almalinux-9-non-bundled-libhtp: - name: AlmaLinux 9 Non-Bundled LibHTP - runs-on: ubuntu-latest - container: almalinux:9 - needs: [prepare-deps, ubuntu-22-04-dist] - steps: - # Cache Rust stuff. - - name: Cache cargo registry - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 - with: - path: ~/.cargo/registry - key: cargo-registry - - - name: Cache RPMs - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 - with: - path: /var/cache/dnf - key: ${{ github.job }}-dnf - - run: echo "keepcache=1" >> /etc/dnf/dnf.conf - - - name: Determine number of CPUs - run: echo CPUS=$(nproc --all) >> $GITHUB_ENV - - - name: Install system packages - run: | - dnf -y install dnf-plugins-core epel-release - dnf config-manager --set-enabled crb - dnf -y install \ - autoconf \ - automake \ - cargo-vendor \ - cbindgen \ - diffutils \ - numactl-devel \ - dpdk-devel \ - file-devel \ - gcc \ - gcc-c++ \ - git \ - jansson-devel \ - jq \ - libtool \ - libyaml-devel \ - libnfnetlink-devel \ - libnetfilter_queue-devel \ - libnet-devel \ - libcap-ng-devel \ - libevent-devel \ - libmaxminddb-devel \ - libpcap-devel \ - libtool \ - lz4-devel \ - make \ - pcre2-devel \ - pkgconfig \ - python3-devel \ - python3-sphinx \ - python3-yaml \ - rust-toolset \ - sudo \ - which \ - zlib-devel - - - name: Download suricata.tar.gz - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 - with: - name: dist - - - run: tar xf suricata-*.tar.gz --strip-components=1 - - run: cd libhtp && ./configure --prefix=/usr/local - - run: cd libhtp && make -j ${{ env.CPUS }} - - run: cd libhtp && make install - - - run: PKG_CONFIG_PATH=/usr/local/lib/pkgconfig ./configure --enable-non-bundled-htp --with-libhtp-includes=/usr/local/include --with-libhtp-libraries=/usr/local/lib - rpms: name: Build RPMs runs-on: ubuntu-latest @@ -547,7 +468,6 @@ jobs: with: name: prep path: prep - - run: tar xvf prep/libhtp.tar.gz - run: tar xvf prep/suricata-update.tar.gz - run: tar xvf prep/suricata-verify.tar.gz - uses: ./.github/actions/install-cbindgen @@ -727,7 +647,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: ./configure --enable-warnings --disable-shared @@ -823,7 +742,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: CC="clang" CFLAGS="$DEFAULT_CFLAGS -Wshadow" ./configure --disable-shared @@ -917,7 +835,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: ./configure --enable-warnings --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue @@ -1017,7 +934,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: CC="clang" CFLAGS="$DEFAULT_CFLAGS -Wshadow -fsanitize=address -fno-omit-frame-pointer" ./configure --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue @@ -1108,7 +1024,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: ./configure --enable-debug --enable-unittests --disable-shared --enable-rust-strict --enable-hiredis --enable-nfqueue @@ -1194,7 +1109,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: mkdir /home/suricata/suricata @@ -1286,7 +1200,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: ./autogen.sh - run: | if ./configure; then @@ -1348,7 +1261,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: ./autogen.sh - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure - run: make -j ${{ env.CPUS }} @@ -1416,7 +1328,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: ./autogen.sh @@ -1493,7 +1404,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh - run: ./configure --enable-warnings --disable-shared --enable-unittests @@ -1549,14 +1459,6 @@ jobs: CARGO_INCREMENTAL: 0 - run: llvm-profdata-14 merge -o ct.profdata /tmp/ct.profraw - run: llvm-cov-14 show $(find rust/target/debug/deps/ -type f -regex 'rust/target/debug/deps/suricata\-[a-z0-9]+$') -instr-profile=ct.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt - - run: | - cd libhtp - make test - cd .. - env: - LLVM_PROFILE_FILE: "/tmp/htp-test.profraw" - - run: llvm-profdata-14 merge -o htp-test.profdata /tmp/htp-test.profraw - - run: llvm-cov-14 show libhtp/test/test_all -instr-profile=htp-test.profdata --show-instantiations --ignore-filename-regex="^/root/.*" >> coverage.txt - name: Upload coverage to Codecov uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303 with: @@ -1634,7 +1536,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-verify.tar.gz - uses: ./.github/actions/install-cbindgen - name: Fix kernel mmap rnd bits @@ -1740,7 +1641,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - name: Fix kernel mmap rnd bits # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with @@ -1875,7 +1775,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - name: Extracting suricata-verify run: tar xf prep/suricata-verify.tar.gz - name: Fix kernel mmap rnd bits @@ -1963,7 +1862,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - name: Fix kernel mmap rnd bits run: sudo sysctl vm.mmap_rnd_bits=28 - run: ./autogen.sh @@ -2066,7 +1964,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - name: Fix kernel mmap rnd bits # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with @@ -2157,7 +2054,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh @@ -2307,7 +2203,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - name: Fix kernel mmap rnd bits # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with @@ -2385,7 +2280,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh - run: AFL_HARDEN=1 ac_cv_func_realloc_0_nonnull=yes ac_cv_func_malloc_0_nonnull=yes CFLAGS="-fsanitize=address -fno-omit-frame-pointer" CXXFLAGS=$CFLAGS CC=afl-clang-fast CXX=afl-clang-fast++ LDFLAGS="-fsanitize=address" ./configure --enable-warnings --enable-fuzztargets --disable-shared @@ -2485,7 +2379,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-netmap @@ -2520,7 +2413,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: ./autogen.sh @@ -2621,7 +2513,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-dpdk @@ -2702,7 +2593,6 @@ jobs: run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y - run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH - uses: ./.github/actions/install-cbindgen - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: ./autogen.sh @@ -2800,7 +2690,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh @@ -2886,7 +2775,6 @@ jobs: run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain $(grep rust-version rust/Cargo.toml.in|sed 's/\"//g'|awk '{print $3}') -y - run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH - uses: ./.github/actions/install-cbindgen - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - run: ./autogen.sh @@ -2962,7 +2850,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh @@ -3033,7 +2920,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - uses: ./.github/actions/install-cbindgen - run: ./autogen.sh @@ -3084,7 +2970,6 @@ jobs: with: name: prep path: prep - - run: tar xvf prep/libhtp.tar.gz - run: tar xvf prep/suricata-update.tar.gz - name: Create Python virtual environment run: python3 -m venv ./testenv @@ -3095,8 +2980,8 @@ jobs: - run: ./autogen.sh - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" ./configure --enable-warnings --enable-unittests --prefix="$HOME/.local/" - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make -j2 - # somehow it gets included by some C++ stdlib header (case unsensitive) - - run: rm libhtp/VERSION && make check + - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" ./src/suricata -u -U DetectEngineHttpServerBodyFileDataTest21 + - run: CPATH="$HOMEBREW_PREFIX/include:$CPATH" LIBRARY_PATH="$HOMEBREW_PREFIX/lib:$LIBRARY_PATH" PATH="/opt/homebrew/opt/libtool/libexec/gnubin:$PATH" CFLAGS="${DEFAULT_CFLAGS}" make check - run: tar xf prep/suricata-verify.tar.gz - name: Running suricata-verify run: | @@ -3138,7 +3023,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - name: Npcap DLL run: | @@ -3194,7 +3078,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: tar xf prep/suricata-verify.tar.gz - name: Build @@ -3238,7 +3121,6 @@ jobs: with: name: prep path: prep - - run: tar xf prep/libhtp.tar.gz - name: WinDivert run: | curl -sL -O https://github.com/basil00/Divert/releases/download/v1.4.3/WinDivert-1.4.3-A.zip diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index f0f17c8e3cd8..db44813c8487 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -54,7 +54,6 @@ jobs: sudo apt-get install libjansson-dev sudo apt-get install libpcap-dev sudo apt-get install libnuma-dev - git clone --depth 1 https://github.com/OISF/libhtp.git cargo install cbindgen export PATH=/opt/work/.cargo/bin:$PATH chmod +x autogen.sh diff --git a/.github/workflows/commits.yml b/.github/workflows/commits.yml index cbcb118d8f21..1825cfb28d64 100644 --- a/.github/workflows/commits.yml +++ b/.github/workflows/commits.yml @@ -80,7 +80,6 @@ jobs: # The action above is supposed to do this for us, but it doesn't appear to stick. - run: /usr/bin/git config --global --add safe.directory /__w/suricata/suricata - run: git fetch - - run: git clone https://github.com/OISF/libhtp -b 0.5.x - name: Building all commits run: | echo "Building commits from ${GITHUB_BASE_REF}." diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0b2bce92a387..7950485fa94a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,8 +8,6 @@ on: pull_request: workflow_dispatch: inputs: - LIBHTP_REPO: - LIBHTP_BRANCH: SU_REPO: SU_BRANCH: SV_REPO: @@ -130,7 +128,6 @@ jobs: cp prep/cbindgen $HOME/.cargo/bin chmod 755 $HOME/.cargo/bin/cbindgen echo "$HOME/.cargo/bin" >> $GITHUB_PATH - - run: tar xf prep/libhtp.tar.gz - run: tar xf prep/suricata-update.tar.gz - run: ./autogen.sh - run: CFLAGS="${DEFAULT_CFLAGS}" ./configure diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 0366f104ec89..d70eeea3f64d 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -128,7 +128,6 @@ jobs: echo "No github merge commit found" fi shell: bash {0} - - run: git clone https://github.com/OISF/libhtp -b 0.5.x - run: ./autogen.sh - run: ./configure --enable-warnings --enable-unittests - name: Check formatting diff --git a/.github/workflows/prepare-deps.yml b/.github/workflows/prepare-deps.yml index 2f8ba57b146b..5255b8f38523 100644 --- a/.github/workflows/prepare-deps.yml +++ b/.github/workflows/prepare-deps.yml @@ -29,9 +29,6 @@ jobs: echo "Parsing branch and PR info from:" echo "${body}" - LIBHTP_REPO=$(echo "${body}" | awk -F = '/^LIBHTP_REPO=/ { print $2 }') - LIBHTP_BRANCH=$(echo "${body}" | awk -F = '/^LIBHTP_BRANCH=/ { print $2 }') - SU_REPO=$(echo "${body}" | awk -F = '/^SU_REPO=/ { print $2 }') SU_BRANCH=$(echo "${body}" | awk -F = '/^SU_BRANCH=/ { print $2 }') @@ -39,8 +36,6 @@ jobs: SV_BRANCH=$(echo "${body}" | awk -F = '/^SV_BRANCH=/ { print $2 }') else echo "No pull request body, will use inputs or defaults." - LIBHTP_REPO=${{ inputs.LIBHTP_REPO }} - LIBHTP_BRANCH=${{ inputs.LIBHTP_BRANCH }} SU_REPO=${{ inputs.SU_REPO }} SU_BRANCH=${{ inputs.SU_BRANCH }} SV_REPO=${{ inputs.SV_REPO }} @@ -48,9 +43,6 @@ jobs: fi # If the _REPO variables don't contain a full URL, add GitHub. - if [ "${LIBHTP_REPO}" ] && ! echo "${LIBHTP_REPO}" | grep -q '^https://'; then - LIBHTP_REPO="https://github.com/${LIBHTP_REPO}" - fi if [ "${SU_REPO}" ] && ! echo "${SU_REPO}" | grep -q '^https://'; then SU_REPO="https://github.com/${SU_REPO}" fi @@ -58,9 +50,6 @@ jobs: SV_REPO="https://github.com/${SV_REPO}" fi - echo LIBHTP_REPO=${LIBHTP_REPO} | tee -a ${GITHUB_ENV} - echo LIBHTP_BRANCH=${LIBHTP_BRANCH} | tee -a ${GITHUB_ENV} - echo SU_REPO=${SU_REPO} | tee -a ${GITHUB_ENV} echo SU_BRANCH=${SU_BRANCH} | tee -a ${GITHUB_ENV} @@ -69,8 +58,6 @@ jobs: - name: Annotate output run: | - echo "::notice:: LIBHTP_REPO=${LIBHTP_REPO}" - echo "::notice:: LIBHTP_BRANCH=${LIBHTP_BRANCH}" echo "::notice:: SU_REPO=${SU_REPO}" echo "::notice:: SU_BRANCH=${SU_BRANCH}" echo "::notice:: SV_REPO=${SV_REPO}" @@ -81,10 +68,6 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - run: git config --global --add safe.directory /__w/suricata/suricata - - name: Fetching libhtp - run: | - DESTDIR=./bundle ./scripts/bundle.sh libhtp - tar zcf libhtp.tar.gz -C bundle libhtp - name: Fetching suricata-update run: | DESTDIR=./bundle ./scripts/bundle.sh suricata-update @@ -116,6 +99,5 @@ jobs: with: name: prep path: | - libhtp.tar.gz suricata-update.tar.gz suricata-verify.tar.gz diff --git a/.github/workflows/rust-checks.yml b/.github/workflows/rust-checks.yml index 0a701ac6d847..2cab65011aa9 100644 --- a/.github/workflows/rust-checks.yml +++ b/.github/workflows/rust-checks.yml @@ -83,7 +83,6 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - name: Configure Suricata run: | - ./scripts/bundle.sh libhtp ./autogen.sh ./configure --enable-warnings - name: Run Cargo Audit @@ -165,7 +164,6 @@ jobs: echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Configure Suricata run: | - ./scripts/bundle.sh libhtp ./autogen.sh ./configure --enable-warnings - name: Cargo Update and Build diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 28d8b3ee7218..949fba74d355 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -90,3 +90,6 @@ jobs: # especially without debug feature - run: cargo clippy working-directory: rust + # does not work in other subdirectories for now + - run: cargo fmt + working-directory: rust/htp diff --git a/.github/workflows/scan-build.yml b/.github/workflows/scan-build.yml index 366f5233ccd3..f1ad46ab1f3d 100644 --- a/.github/workflows/scan-build.yml +++ b/.github/workflows/scan-build.yml @@ -74,11 +74,10 @@ jobs: - run: scan-build-18 ./configure --enable-warnings --enable-dpdk --enable-nfqueue --enable-nflog env: CC: clang-18 - # exclude libhtp from the analysis # disable security.insecureAPI.DeprecatedOrUnsafeBufferHandling explicitly as # this will require significant effort to address. - run: | - scan-build-18 --status-bugs --exclude libhtp/ --exclude rust \ + scan-build-18 --status-bugs --exclude rust \ -enable-checker valist.Uninitialized \ -enable-checker valist.CopyToSelf \ -enable-checker valist.Unterminated \ diff --git a/.gitignore b/.gitignore index 66416e27d14e..e214bbd78e69 100644 --- a/.gitignore +++ b/.gitignore @@ -32,19 +32,6 @@ doc/userguide/suricata.1 etc/suricata.logrotate etc/suricata.service install-sh -libhtp/TAGS -libhtp/aclocal.m4 -libhtp/autom4te.cache/ -libhtp/config.h -libhtp/config.log -libhtp/config.status -libhtp/configure -libhtp/htp.pc -libhtp/htp/TAGS -libhtp/htp/libhtp.la -libhtp/libtool -libhtp/stamp-h1 -libhtp/test/TAGS libtool ltmain.sh missing diff --git a/Makefile.am b/Makefile.am index 20e50bdc4a03..62ed549874bd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,7 +12,7 @@ EXTRA_DIST = ChangeLog COPYING LICENSE suricata.yaml.in \ scripts/docs-ubuntu-debian-minimal-build.sh \ scripts/evedoc.py \ examples/plugins -SUBDIRS = $(HTP_DIR) rust src plugins qa rules doc contrib etc python ebpf \ +SUBDIRS = rust src plugins qa rules doc contrib etc python ebpf \ $(SURICATA_UPDATE_DIR) DIST_SUBDIRS = $(SUBDIRS) examples/lib/simple @@ -53,7 +53,7 @@ endif @echo "You can now start suricata by running as root something like:" @echo " $(DESTDIR)$(bindir)/suricata -c $(DESTDIR)$(e_sysconfdir)suricata.yaml -i eth0" @echo "" - @echo "If a library like libhtp.so is not found, you can run suricata with:" + @echo "If a shared library is not found, you can add library paths with:" @echo " LD_LIBRARY_PATH="$(DESTDIR)$(prefix)/lib" "$(DESTDIR)$(bindir)/suricata" -c "$(DESTDIR)$(e_sysconfdir)suricata.yaml" -i eth0" @echo "" @echo "The Emerging Threats Open rules are now installed. Rules can be" diff --git a/configure.ac b/configure.ac index ca964d9039a0..32cbff33b165 100644 --- a/configure.ac +++ b/configure.ac @@ -1565,108 +1565,6 @@ [test "x$install_suricata_update" = "xyes"]) AC_SUBST([install_suricata_update_reason]) - # libhtp - AC_ARG_ENABLE(non-bundled-htp, - AS_HELP_STRING([--enable-non-bundled-htp], [Enable the use of an already installed version of htp]),[enable_non_bundled_htp=$enableval],[enable_non_bundled_htp=no]) - AS_IF([test "x$enable_non_bundled_htp" = "xyes"], [ - PKG_CHECK_MODULES([libhtp], htp,, [with_pkgconfig_htp=no]) - if test "$with_pkgconfig_htp" != "no"; then - CPPFLAGS="${CPPFLAGS} ${libhtp_CFLAGS}" - LIBS="${LIBS} ${libhtp_LIBS}" - fi - - AC_ARG_WITH(libhtp_includes, - [ --with-libhtp-includes=DIR libhtp include directory], - [with_libhtp_includes="$withval"],[with_libhtp_includes=no]) - AC_ARG_WITH(libhtp_libraries, - [ --with-libhtp-libraries=DIR libhtp library directory], - [with_libhtp_libraries="$withval"],[with_libhtp_libraries="no"]) - - if test "$with_libhtp_includes" != "no"; then - CPPFLAGS="-I${with_libhtp_includes} ${CPPFLAGS}" - fi - - if test "$with_libhtp_libraries" != "no"; then - LDFLAGS="${LDFLAGS} -L${with_libhtp_libraries}" - fi - - AC_CHECK_HEADER(htp/htp.h,,[AC_MSG_ERROR(htp/htp.h not found ...)]) - - LIBHTP="" - AC_CHECK_LIB(htp, htp_conn_create,, LIBHTP="no") - if test "$LIBHTP" = "no"; then - echo - echo " ERROR! libhtp library not found" - echo - exit 1 - fi - PKG_CHECK_MODULES(LIBHTPMINVERSION, [htp >= 0.5.45],[libhtp_minver_found="yes"],[libhtp_minver_found="no"]) - if test "$libhtp_minver_found" = "no"; then - PKG_CHECK_MODULES(LIBHTPDEVVERSION, [htp = 0.5.X],[libhtp_devver_found="yes"],[libhtp_devver_found="no"]) - if test "$libhtp_devver_found" = "no"; then - echo - echo " ERROR! libhtp was found but it is neither >= 0.5.45, nor the dev 0.5.X" - echo - exit 1 - fi - fi - - AC_CHECK_LIB([htp], [htp_config_register_request_uri_normalize],AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Found htp_config_register_request_uri_normalize function in libhtp]) ,,[-lhtp]) - # check for htp_tx_get_response_headers_raw - AC_CHECK_LIB([htp], [htp_tx_get_response_headers_raw],AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Found htp_tx_get_response_headers_raw in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_decode_query_inplace],AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Found htp_decode_query_inplace function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_response_decompression_layer_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Found htp_config_set_response_decompression_layer_limit function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_allow_space_uri],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Found htp_config_set_allow_space_uri function in libhtp]) ,,[-lhtp]) - AC_EGREP_HEADER(htp_config_set_path_decode_u_encoding, htp/htp.h, AC_DEFINE_UNQUOTED([HAVE_HTP_SET_PATH_DECODE_U_ENCODING],[1],[Found usable htp_config_set_path_decode_u_encoding function in libhtp]) ) - AC_CHECK_LIB([htp], [htp_config_set_lzma_memlimit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Found htp_config_set_lzma_memlimit function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_lzma_layers],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Found htp_config_set_lzma_layers function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_compression_bomb_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Found htp_config_set_compression_bomb_limit function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_compression_time_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Found htp_config_set_compression_time_limit function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_max_tx],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Found htp_config_set_max_tx function in libhtp]) ,,[-lhtp]) - AC_CHECK_LIB([htp], [htp_config_set_number_headers_limit],AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Found htp_config_set_number_headers_limit function in libhtp]) ,,[-lhtp]) - ]) - - if test "x$enable_non_bundled_htp" = "xno"; then - # test if we have a bundled htp - if test -d "$srcdir/libhtp"; then - AC_CONFIG_SUBDIRS([libhtp]) - HTP_DIR="libhtp" - AC_SUBST(HTP_DIR) - HTP_LDADD="../libhtp/htp/libhtp.la" - AC_SUBST(HTP_LDADD) - # make sure libhtp is added to the includes - CPPFLAGS="-I\${srcdir}/../libhtp/ ${CPPFLAGS}" - - AC_CHECK_HEADER(iconv.h,,[AC_MSG_ERROR(iconv.h not found ...)]) - AC_CHECK_LIB(iconv, libiconv_close) - AC_DEFINE_UNQUOTED([HAVE_HTP_URI_NORMALIZE_HOOK],[1],[Assuming htp_config_register_request_uri_normalize function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_TX_GET_RESPONSE_HEADERS_RAW],[1],[Assuming htp_tx_get_response_headers_raw function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_DECODE_QUERY_INPLACE],[1],[Assuming htp_decode_query_inplace function in bundled libhtp]) - # enable when libhtp has been updated - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT],[1],[Assuming htp_config_set_response_decompression_layer_limit function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI],[1],[Assuming htp_config_set_allow_space_uri function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT],[1],[Assuming htp_config_set_lzma_memlimit function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_LZMA_LAYERS],[1],[Assuming htp_config_set_lzma_layers function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT],[1],[Assuming htp_config_set_compression_bomb_limit function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT],[1],[Assuming htp_config_set_compression_time_limit function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_MAX_TX],[1],[Assuming htp_config_set_max_tx function in bundled libhtp]) - AC_DEFINE_UNQUOTED([HAVE_HTP_CONFIG_SET_HEADERS_LIMIT],[1],[Assuming htp_config_set_number_headers_limit function in bundled libhtp]) - else - echo - echo " ERROR: Libhtp is not bundled. Get libhtp by doing:" - echo " git clone https://github.com/OISF/libhtp" - echo " Then re-run Suricata's autogen.sh and configure script." - echo " Or, if libhtp is installed in a different location," - echo " pass --enable-non-bundled-htp to Suricata's configure script." - echo " Add --with-libhtp-includes= and --with-libhtp-libraries= if" - echo " libhtp is not installed in the include and library paths." - echo - exit 1 - fi - fi - - AM_CONDITIONAL([HTP_LDADD], [test "x${HTP_LDADD}" != "x"]) - # Check for libcap-ng case $host in *-*-linux*) @@ -2511,7 +2409,6 @@ AC_SUBST(MAJOR_MINOR) AC_SUBST(RUST_FEATURES) AC_SUBST(RUST_SURICATA_LIBDIR) AC_SUBST(RUST_SURICATA_LIBNAME) -AC_SUBST(enable_non_bundled_htp) AM_CONDITIONAL([BUILD_SHARED_LIBRARY], [test "x$enable_shared" = "xyes"] && [test "x$can_build_shared_library" = "xyes"]) @@ -2559,7 +2456,6 @@ SURICATA_BUILD_CONF="Suricata Configuration: GeoIP2 support: ${enable_geoip} JA3 support: ${enable_ja3} JA4 support: ${enable_ja4} - Non-bundled htp: ${enable_non_bundled_htp} Hyperscan support: ${enable_hyperscan} Libnet support: ${enable_libnet} liblz4 support: ${enable_liblz4} diff --git a/doc/userguide/devguide/codebase/installation-from-git.rst b/doc/userguide/devguide/codebase/installation-from-git.rst index 9d7a45a54392..373a6e4fe968 100644 --- a/doc/userguide/devguide/codebase/installation-from-git.rst +++ b/doc/userguide/devguide/codebase/installation-from-git.rst @@ -72,7 +72,7 @@ Next, enter the following line in the terminal: git clone https://github.com/OISF/suricata.git cd suricata -Libhtp and suricata-update are not bundled. Get them by doing: +Suricata-update is not bundled. Get it by doing: .. code-block:: bash diff --git a/doc/userguide/devguide/codebase/testing.rst b/doc/userguide/devguide/codebase/testing.rst index c712e90a99b8..41cd88c81047 100644 --- a/doc/userguide/devguide/codebase/testing.rst +++ b/doc/userguide/devguide/codebase/testing.rst @@ -30,7 +30,7 @@ Use these to check that specific functions behave as expected, in success and in during development, for nom parsers in the Rust codebase, for instance, or for checking that messages or message parts of a protocol/stream are processed as they should. -To execute all unit tests (both from C and Rust code), as well as ``libhtp`` ones, from the Suricata main directory, run:: +To execute all unit tests (both from C and Rust code) from the Suricata main directory, run:: make check diff --git a/doc/userguide/lua/lua-functions.rst b/doc/userguide/lua/lua-functions.rst index 92473d52c35e..f74d845b6c13 100644 --- a/doc/userguide/lua/lua-functions.rst +++ b/doc/userguide/lua/lua-functions.rst @@ -231,7 +231,7 @@ Example: HttpGetRequestHost ~~~~~~~~~~~~~~~~~~ -Get the host from libhtp's tx->request_hostname, which can either be +Get the host from libhtp's htp_tx_request_hostname(tx), which can either be the host portion of the url or the host portion of the Host header. Example: diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index 4bf74b65284d..c9d81b21935f 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -107,6 +107,10 @@ Logging changes live transaction, to reduce the chances of logging unrelated data.** This may lead to what looks like a regression in behavior, but it is a considered choice. +Other Changes +~~~~~~~~~~~~~ +- libhtp has been replaced with a rust version. This means libhtp is no longer built and linked as a shared library, and the libhtp dependency is now built directly into suricata. + Upgrading 6.0 to 7.0 -------------------- diff --git a/doxygen.cfg b/doxygen.cfg index 22fc4543a34d..c110daa439a5 100644 --- a/doxygen.cfg +++ b/doxygen.cfg @@ -829,7 +829,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = src/ libhtp/htp/ examples/ +INPUT = src/ examples/ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/examples/lib/simple/Makefile.am b/examples/lib/simple/Makefile.am index c4004b9446c9..32821827392a 100644 --- a/examples/lib/simple/Makefile.am +++ b/examples/lib/simple/Makefile.am @@ -6,7 +6,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/src simple_LDFLAGS = $(all_libraries) $(SECLDFLAGS) simple_LDADD = "-Wl,--start-group,$(top_builddir)/src/libsuricata_c.a,../../$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD) -if HTP_LDADD -simple_LDADD += ../../$(HTP_LDADD) -endif simple_DEPENDENCIES = $(top_builddir)/src/libsuricata_c.a ../../$(RUST_SURICATA_LIB) diff --git a/libsuricata-config.in b/libsuricata-config.in index 1fabe0765268..94bbf9a81b98 100644 --- a/libsuricata-config.in +++ b/libsuricata-config.in @@ -9,8 +9,6 @@ LIBS="@LIBS@ @RUST_LDADD@" shared_lib="-lsuricata" static_lib="-lsuricata_c -lsuricata_rust" -enable_non_bundled_htp="@enable_non_bundled_htp@" - lib="$shared_lib" show_libs="no" @@ -47,12 +45,6 @@ if [ "$use_static" = "no" ]; then fi fi -# If we're using a bundled htp, add it to the libs as well. It will -# already be present if we're use a non-bundled libhtp. -if [ "$enable_non_bundled_htp" = "no" ]; then - lib="${lib} -lhtp" -fi - output="" if [ "$show_cflags" = "yes" ]; then diff --git a/requirements.txt b/requirements.txt index 6df1358f075f..537f896bfd79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ -# Specify libhtp and suricata-update requirements. +# Specify suricata-update requirements. # # Format: # # name {repo} {branch|tag} -libhtp https://github.com/OISF/libhtp 0.5.x suricata-update https://github.com/OISF/suricata-update master diff --git a/rules/http-events.rules b/rules/http-events.rules index e08d84eb46cd..7e4d1fd258e0 100644 --- a/rules/http-events.rules +++ b/rules/http-events.rules @@ -96,4 +96,5 @@ alert http any any -> any any (msg:"SURICATA HTTP request missing protocol"; flo alert http any any -> any any (msg:"SURICATA HTTP request too many headers"; flow:established,to_server; app-layer-event:http.request_too_many_headers; classtype:protocol-command-decode; sid:2221056; rev:1;) alert http any any -> any any (msg:"SURICATA HTTP response too many headers"; flow:established,to_client; app-layer-event:http.response_too_many_headers; classtype:protocol-command-decode; sid:2221057; rev:1;) -# next sid 2221058 +#alert http any any -> any any (msg:"SURICATA HTTP response chunk extension"; flow:established; app-layer-event:http.response_chunk_extension; classtype:protocol-command-decode; sid:2221058; rev:1;) +# next sid 2221059 diff --git a/rust/Cargo.toml.in b/rust/Cargo.toml.in index eaeacff8356e..80a3d78662e2 100644 --- a/rust/Cargo.toml.in +++ b/rust/Cargo.toml.in @@ -7,7 +7,7 @@ edition = "2021" rust-version = "1.67.1" [workspace] -members = [".", "./derive"] +members = [".", "./derive", "./htp"] [lib] crate-type = ["staticlib", "rlib"] @@ -71,5 +71,7 @@ suricata-derive = { path = "./derive", version = "@PACKAGE_VERSION@" } suricata-lua-sys = { version = "0.1.0-alpha.5" } +htp = { path = "./htp", version = "2.0.0" } + [dev-dependencies] test-case = "~3.3.1" diff --git a/rust/Makefile.am b/rust/Makefile.am index d53eb97090e1..2482d8f31ac6 100644 --- a/rust/Makefile.am +++ b/rust/Makefile.am @@ -1,10 +1,12 @@ -EXTRA_DIST = src derive \ +EXTRA_DIST = src derive htp \ .cargo/config.toml.in \ cbindgen.toml \ dist/rust-bindings.h \ + dist/htp/htp_rs.h \ vendor \ Cargo.toml Cargo.lock \ - derive/Cargo.toml + derive/Cargo.toml \ + htp/Cargo.toml if !DEBUG RELEASE = --release @@ -61,6 +63,7 @@ all-local: Cargo.toml $(RUST_SURICATA_LIBDIR)/${RUST_SURICATA_LIBNAME}; \ fi $(MAKE) gen/rust-bindings.h + $(MAKE) gen/htp/htp_rs.h install-library: $(MKDIR_P) "$(DESTDIR)$(libdir)" @@ -92,6 +95,15 @@ else gen/rust-bindings.h: endif +if HAVE_CBINDGEN +gen/htp/htp_rs.h: $(RUST_SURICATA_LIB) + cd $(abs_top_srcdir)/rust/htp && \ + cbindgen --config $(abs_top_srcdir)/rust/htp/cbindgen.toml \ + --quiet --verify --output $(abs_top_builddir)/rust/gen/htp/htp_rs.h || true +else +gen/htp/htp_rs.h: +endif + doc: CARGO_HOME=$(CARGO_HOME) $(CARGO) doc --all-features --no-deps @@ -103,6 +115,15 @@ else dist/rust-bindings.h: endif +if HAVE_CBINDGEN +dist/htp/htp_rs.h: + cd $(abs_top_srcdir)/rust/htp && \ + cbindgen --config cbindgen.toml \ + --quiet --output $(abs_top_builddir)/rust/dist/htp/htp_rs.h +else +dist/htp/htp_rs.h: +endif + Cargo.toml: Cargo.toml.in update-lock: Cargo.toml diff --git a/rust/htp/.gitignore b/rust/htp/.gitignore new file mode 100644 index 000000000000..01c356623241 --- /dev/null +++ b/rust/htp/.gitignore @@ -0,0 +1 @@ +!Cargo.toml diff --git a/rust/htp/Cargo.toml b/rust/htp/Cargo.toml new file mode 100644 index 000000000000..10b4d0eca043 --- /dev/null +++ b/rust/htp/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "htp" +authors = ["ivanr = Ivan Ristic ", "cccs = Canadian Centre for Cyber Security"] +version = "2.0.0" +publish = false +edition = "2018" +autobins = false +license-file = "LICENSE" +description = "Security Aware HTP Protocol parsing library" +readme = "README.md" +repository = "https://github.com/CybercentreCanada/libhtp-rs-internal" +homepage = "https://github.com/CybercentreCanada/libhtp-rs-internal" +keywords = ["parser", "HTTP", "protocol", "network", "api"] +categories = ["parsing", "network-programming"] +include = [ + "Cargo.toml", + "LICENSE", + "README.md", + "src/**/*.rs", + "cbindgen.toml", +] + +[lib] +crate-type = ["staticlib", "rlib", "cdylib"] + +[features] +default = ["cbindgen"] + +[dependencies] +base64 = "0.12.3" +bstr = "0.2" +libc = "0.2" +nom = "7.1.1" +lzma-rs = { version = "0.2.0", features = ["stream"] } +flate2 = { version = "~1.0.19", features = ["zlib"], default-features = false } +lazy_static = "1.4.0" +time = "=0.3.36" + +[dev-dependencies] +rstest = "0.12.0" + +[build-dependencies] +cbindgen = { version = "0.14.1", optional = true } +cdylib-link-lines = "0.1.1" diff --git a/rust/htp/LICENSE b/rust/htp/LICENSE new file mode 100644 index 000000000000..3d4227e3a2da --- /dev/null +++ b/rust/htp/LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2009-2010 Open Information Security Foundation +Copyright (c) 2010-2013 Qualys, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +- Neither the name of the Qualys, Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/rust/htp/README.md b/rust/htp/README.md new file mode 100644 index 000000000000..b7ceea73f33b --- /dev/null +++ b/rust/htp/README.md @@ -0,0 +1,67 @@ +# LibHTP + +--- + +Copyright 2009-2010 Open Information Security Foundation +Copyright 2010-2013 Qualys, Inc. + +--- + +LibHTP is a security-aware parser for the HTTP protocol and the related bits +and pieces. The goals of the project, in the order of importance, are as +follows: + + 1. Completeness of coverage; LibHTP must be able to parse virtually all + traffic that is found in practice. + + 2. Permissive parsing; LibHTP must never fail to parse a stream that would + be parsed by some other web server. + + 3. Awareness of evasion techniques; LibHTP must be able to detect and + effectively deal with various evasion techniques, producing, where + practical, identical or practically identical results as the web + server processing the same traffic stream. + + 4. Performance; The performance must be adequate for the desired tasks. + Completeness and security are often detrimental to performance. Our + idea of handling the conflicting requirements is to put the library + user in control, allowing him to choose the most desired library + characteristic. + + | STATUS LIBHTP IS VERY YOUNG AT THIS POINT. IT WILL BE SOME TIME BEFORE + | IT CAN BE CONSIDER COMPLETE. AT THE MOMENT, THE FOCUS OF DEVELOPMENT + | IS ON ACHIEVING THE FIRST TWO GOALS. + +See the LICENSE file distributed with this work for information +regarding licensing, copying and copyright ownership. + + +# Usage +Start using libHTP by including it in your project's `Cargo.toml` +dependencies. The base library will also be required for using common +types. + +**The minimum supported version of `rustc` is `1.58.1`.** + +## Example +``` +[dependencies] +htp = "2.0.0" +``` + +## FFI Support +LibHTP has a foreign function interface for use in C/C++ projects. +FFI Support can be enabled by building with the `cbindgen` feature. + +``` +# Install cbindgen which is required to generate headers +cargo install --force cbindgen + +# Build headers and shared objects +make +``` + +## LICENSE + +LibHTP is licensed under the BSD 3-Clause license (also known as "BSD New" and +"BSD Simplified".) The complete text of the license is enclosed in the file LICENSE. diff --git a/rust/htp/cbindgen.toml b/rust/htp/cbindgen.toml new file mode 100644 index 000000000000..8d132e07383a --- /dev/null +++ b/rust/htp/cbindgen.toml @@ -0,0 +1,95 @@ +language = "C" + +# Header wrapping options +#header = "LICENSE here" +#trailer = "" +include_guard = "_HTP_H" +autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Do NOT modify manually */" +#include_version = true +#sys_includes = [] # Sys headers +includes = [] +no_includes = false +cpp_compat = true +#after_includes = "" + +# Code style +#braces = "SameLine" +#line_length = 100 +#tab_wideth = 2 +#documentation_style = auto + +# Codegen +style = "both" + +after_includes = """ +#define htp_status_t HtpStatus +#define htp_server_personality_t HtpServerPersonality +#define htp_protocol_t HtpProtocol +#define htp_unwanted_t HtpUnwanted +#define htp_url_encoding_handling_t HtpUrlEncodingHandling +#define htp_stream_state_t HtpStreamState +#define htp_content_encoding_t HtpContentEncoding +#define htp_log_code_t HtpLogCode +#define htp_log_level_t HtpLogLevel +#define htp_method_t HtpMethod +#define htp_data_source_t HtpDataSource +#define htp_parser_id_t HtpParserId +#define htp_transfer_coding_t HtpTransferCoding +#define htp_res_progress_t HtpResponseProgress +#define htp_req_progress_t HtpRequestProgress +""" + +[export.rename] +"ConnectionFlags" = "HTP_CONNECTION_FLAGS" +"HeaderFlags" = "HTP_HEADER_FLAGS" +"HtpFlags" = "HTP_FLAGS" +"Config" = "htp_cfg_t" +"Connection" = "htp_conn_t" +"ConnectionParser" = "htp_connp_t" +"Header" = "htp_header_t" +"Headers" = "htp_headers_t" +"Param" = "htp_param_t" +"Data" = "htp_tx_data_t" +"Transaction" = "htp_tx_t" +"Transactions" = "htp_txs_t" +"Uri" = "htp_uri_t" +"Bstr" = "bstr" +"Table" = "htp_table_t" +"Log" = "htp_log_t" +"timeval" = "struct timeval" +"Logs" = "htp_logs_t" + +[export] +include = ["HtpStatus", +"HtpServerPersonality", +"HtpProtocol", +"HtpUnwanted", +"HtpUrlEncodingHandling", +"HtpStreamState", +"HtpContentEncoding", +"HtpLogCode", +"HtpLogLevel", +"HtpMethod", +"HtpDataSource", +"HtpParserId", +"HtpTransferCoding", +"HtpResponseProgress", +"HtpRequestProgress", +"HtpFlags", +"HeaderFlags", +"ConnectionFlags"] + +[enum] +rename_variants = "QualifiedScreamingSnakeCase" +prefix_with_name = false + +[macro_expansion] +bitflags = true + +# Rust parsing options +[parse] +parse_deps = false +clean = false + +[parse.expand] +features = ["cbindgen"] diff --git a/rust/htp/fuzz/Cargo.toml b/rust/htp/fuzz/Cargo.toml new file mode 100644 index 000000000000..4ef6ca68c330 --- /dev/null +++ b/rust/htp/fuzz/Cargo.toml @@ -0,0 +1,25 @@ + +[package] +name = "htp-fuzz" +version = "0.0.1" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies.htp] +path = ".." +[dependencies.libfuzzer-sys] +git = "https://github.com/rust-fuzz/libfuzzer-sys.git" + +[dependencies] + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_htp_rs" +path = "fuzz_targets/fuzz_htp.rs" diff --git a/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs b/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs new file mode 100644 index 000000000000..fb8ff542e488 --- /dev/null +++ b/rust/htp/fuzz/fuzz_targets/fuzz_htp.rs @@ -0,0 +1,14 @@ +#![allow(non_snake_case)] +#![no_main] +#[macro_use] extern crate libfuzzer_sys; + +extern crate htp; + +use htp::test::{Test, TestConfig}; +use std::env; + + +fuzz_target!(|data: &[u8]| { + let mut t = Test::new(TestConfig()); + t.run_slice(data); +}); diff --git a/rust/htp/src/bstr.rs b/rust/htp/src/bstr.rs new file mode 100644 index 000000000000..2d85f3f125e0 --- /dev/null +++ b/rust/htp/src/bstr.rs @@ -0,0 +1,482 @@ +use bstr::{BString, ByteSlice}; +use core::cmp::Ordering; +use std::ops::{Deref, DerefMut}; + +/// Bstr is a convenience wrapper around binary data that adds string-like functions. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Bstr { + // Wrap a BString under the hood. We want to be able to + // implement behaviours on top of this if needed, so we wrap + // it instead of exposing it directly in our public API. + s: BString, +} + +impl Default for Bstr { + fn default() -> Self { + Self { + s: BString::from(Vec::new()), + } + } +} + +impl Bstr { + /// Make a new owned Bstr + pub fn new() -> Self { + Bstr { + s: BString::from(Vec::new()), + } + } + + /// Make a new owned Bstr with given capacity + pub fn with_capacity(len: usize) -> Self { + Bstr { + s: BString::from(Vec::with_capacity(len)), + } + } + + /// Split the Bstr into a a collection of substrings, seperated by the given byte string. + /// Each element yielded is guaranteed not to include the splitter substring. + /// Returns a Vector of the substrings. + pub fn split_str_collect<'b, B: ?Sized + AsRef<[u8]>>( + &'b self, splitter: &'b B, + ) -> Vec<&'b [u8]> { + self.s.as_bstr().split_str(splitter.as_ref()).collect() + } + + /// Compare this bstr with the given slice + pub fn cmp_slice>(&self, other: B) -> Ordering { + self.as_slice().cmp(other.as_ref()) + } + + /// Return true if self is equal to other + pub fn eq_slice>(&self, other: B) -> bool { + self.cmp_slice(other) == Ordering::Equal + } + + /// Compare bstr with the given slice, ingnoring ascii case. + pub fn cmp_nocase>(&self, other: B) -> Ordering { + let lefts = &self.as_slice(); + let rights = &other.as_ref(); + let left = LowercaseIterator::new(lefts); + let right = LowercaseIterator::new(rights); + left.cmp(right) + } + + /// Compare trimmed bstr with the given slice, ingnoring ascii case. + pub fn cmp_nocase_trimmed>(&self, other: B) -> Ordering { + let lefts = &self.trim_with(|c| c.is_ascii_whitespace()); + let rights = &other.as_ref(); + let left = LowercaseIterator::new(lefts); + let right = LowercaseIterator::new(rights); + left.cmp(right) + } + + /// Return true if self is equal to other ignoring ascii case + pub fn eq_nocase>(&self, other: B) -> bool { + self.cmp_nocase(other) == Ordering::Equal + } + + /// Case insensitive comparison between self and other, ignoring any zeros in self + pub fn cmp_nocase_nozero>(&self, other: B) -> Ordering { + let lefts = &self.as_slice(); + let rights = &other.as_ref(); + let left = LowercaseNoZeroIterator::new(lefts); + let right = LowercaseIterator::new(rights); + left.cmp(right) + } + + /// Case insensitive comparison between trimmed self and other, ignoring any zeros in self + pub fn cmp_nocase_nozero_trimmed>(&self, other: B) -> Ordering { + let lefts = &self.trim(); + let rights = &other.as_ref(); + let left = LowercaseNoZeroIterator::new(lefts); + let right = LowercaseIterator::new(rights); + left.cmp(right) + } + + /// Return true if self is equal to other, ignoring ascii case and zeros in self + pub fn eq_nocase_nozero>(&self, other: B) -> bool { + self.cmp_nocase_nozero(other) == Ordering::Equal + } + + /// Extend this bstr with the given slice + pub fn add>(&mut self, other: B) { + self.extend_from_slice(other.as_ref()) + } + + /// Extend the bstr as much as possible without growing + pub fn add_noex>(&mut self, other: B) { + let len = std::cmp::min(self.capacity() - self.len(), other.as_ref().len()); + self.add(&other.as_ref()[..len]); + } + + /// Return true if this bstr starts with other + pub fn starts_with>(&self, other: B) -> bool { + self.as_slice().starts_with(other.as_ref()) + } + + /// Return true if this bstr starts with other, ignoring ascii case + pub fn starts_with_nocase>(&self, other: B) -> bool { + if self.len() < other.as_ref().len() { + return false; + } + let len: usize = std::cmp::min(self.len(), other.as_ref().len()); + self.as_slice()[..len].eq_ignore_ascii_case(&other.as_ref()[..len]) + } + + /// Find the index of the given slice + pub fn index_of>(&self, other: B) -> Option { + self.find(other.as_ref()) + } + + /// Find the index of the given slice ignoring ascii case + pub fn index_of_nocase>(&self, other: B) -> Option { + let src = self.as_slice(); + let mut haystack = LowercaseIterator::new(&src); + let needle = other.as_ref().to_ascii_lowercase(); + haystack.index_of(&needle) + } + + /// Find the index of the given slice ignoring ascii case and any zeros in self + pub fn index_of_nocase_nozero>(&self, other: B) -> Option { + let src = self.as_slice(); + let mut haystack = LowercaseNoZeroIterator::new(&src); + let needle = other.as_ref().to_ascii_lowercase(); + haystack.index_of(&needle) + } +} + +// Trait Implementations for Bstr + +/// Let callers access BString functions +impl Deref for Bstr { + type Target = BString; + + fn deref(&self) -> &Self::Target { + &self.s + } +} + +/// Let callers access mutable BString functions +impl DerefMut for Bstr { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.s + } +} + +impl From<&[u8]> for Bstr { + fn from(src: &[u8]) -> Self { + Bstr { + s: BString::from(src), + } + } +} + +impl From<&str> for Bstr { + fn from(src: &str) -> Self { + src.as_bytes().into() + } +} + +impl From> for Bstr { + fn from(src: Vec) -> Self { + Bstr { + s: BString::from(src), + } + } +} + +/// Compare a Bstr to a &str byte for byte +impl PartialEq<&str> for Bstr { + fn eq(&self, rhs: &&str) -> bool { + self.as_bytes() == rhs.as_bytes() + } +} + +/// A trait that lets us find the byte index of slices in a generic way. +/// +/// This layer of abstraction is motivated by the need to find needle in +/// haystack when we want to perform case sensitive, case insensitive, and +/// case insensitive + zero skipping. All of these algorithms are identical +/// except we compare the needle bytes with the src bytes in different ways, +/// and in the case of zero skipping we want to pretend that zero bytes in +/// the haystack do not exist. So we define iterators for each of lowercase +/// and lowercase + zero skipping, and then implement this trait for both of +/// those, and then define the search function in terms of this trait. +trait SubIterator: Iterator { + /// Return a new iterator of the same type starting at the current byte index + fn subiter(&self) -> Self; + /// Return the current byte index into the iterator + fn index(&self) -> usize; + /// Find the given needle in self and return the byte index + fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option; +} + +/// Find the byte index of the given slice in the source. +/// +/// Someday an enterprising soul can implement this function inside SubIterator +/// directly (where it arguably belongs), but this involves handling dyn Self, +/// and implementing it this way lets monomorphization emit concrete +/// implementations for each of the two types we actually have. +fn index_of>(haystack: &mut T, needle: &S) -> Option { + let first = needle.as_ref().first()?; + while let Some(s) = haystack.next() { + if s == *first { + let mut test = haystack.subiter(); + let mut equal = false; + for cmp_byte in needle.as_ref().as_bytes() { + equal = Some(*cmp_byte) == test.next(); + if !equal { + break; + } + } + if equal { + return Some(haystack.index()); + } + } + } + None +} + +/// A convenience iterator for anything that satisfies AsRef<[u8]> +/// that yields lowercase ascii bytes and skips null bytes +struct LowercaseNoZeroIterator<'a, T: AsRef<[u8]>> { + src: &'a T, + idx: usize, + first: bool, +} + +impl<'a, T: AsRef<[u8]>> LowercaseNoZeroIterator<'a, T> { + fn new(src: &'a T) -> Self { + LowercaseNoZeroIterator { + src, + idx: 0, + first: true, + } + } +} + +impl> Iterator for LowercaseNoZeroIterator<'_, T> { + type Item = u8; + + fn next(&mut self) -> Option { + loop { + if self.first { + self.first = false; + } else { + self.idx += 1; + } + let next = self + .src + .as_ref() + .get(self.idx) + .map(|c| c.to_ascii_lowercase()); + if next != Some(0) { + break next; + } + } + } +} + +impl> SubIterator for LowercaseNoZeroIterator<'_, T> { + fn subiter(&self) -> Self { + LowercaseNoZeroIterator { + src: self.src, + idx: self.idx, + first: true, + } + } + + fn index(&self) -> usize { + self.idx + } + + fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option { + index_of(self, &needle) + } +} + +/// A convenience iterator for anything that satisfies AsRef<[u8]> +/// that yields lowercase ascii bytes +struct LowercaseIterator<'a, T: AsRef<[u8]>> { + src: &'a T, + idx: usize, + first: bool, +} + +impl<'a, T: AsRef<[u8]>> LowercaseIterator<'a, T> { + fn new(src: &'a T) -> Self { + LowercaseIterator { + src, + idx: 0, + first: true, + } + } +} + +impl> Iterator for LowercaseIterator<'_, T> { + type Item = u8; + fn next(&mut self) -> Option { + if self.first { + self.first = false; + } else { + self.idx += 1; + } + self.src + .as_ref() + .get(self.idx) + .map(|c| c.to_ascii_lowercase()) + } +} + +impl> SubIterator for LowercaseIterator<'_, T> { + fn subiter(&self) -> Self { + LowercaseIterator { + src: self.src, + idx: self.idx, + first: true, + } + } + + fn index(&self) -> usize { + self.idx + } + + fn index_of(&mut self, needle: impl AsRef<[u8]>) -> Option { + index_of(self, &needle) + } +} + +#[cfg(test)] +mod tests { + use crate::bstr::*; + use core::cmp::Ordering; + use rstest::rstest; + + #[test] + fn Compare() { + let b = Bstr::from("ABCDefgh"); + // direct equality + assert_eq!(Ordering::Equal, b.cmp_slice("ABCDefgh")); + // case sensitive + assert_ne!(Ordering::Equal, b.cmp_slice("abcdefgh")); + // src shorter than dst + assert_eq!(Ordering::Less, b.cmp_slice("ABCDefghi")); + // src longer than dst + assert_eq!(Ordering::Greater, b.cmp_slice("ABCDefg")); + // case less + assert_eq!(Ordering::Less, b.cmp_slice("abcdefgh")); + // case greater + assert_eq!(Ordering::Greater, b.cmp_slice("ABCDEFGH")); + } + + #[test] + fn CompareNocase() { + let b = Bstr::from("ABCDefgh"); + assert_eq!(Ordering::Equal, b.cmp_nocase("ABCDefgh")); + assert_eq!(Ordering::Equal, b.cmp_nocase("abcdefgh")); + assert_eq!(Ordering::Equal, b.cmp_nocase("ABCDEFGH")); + assert_eq!(Ordering::Less, b.cmp_nocase("ABCDefghi")); + assert_eq!(Ordering::Greater, b.cmp_nocase("ABCDefg")); + } + + #[test] + fn CompareNocaseNozero() { + // nocase_nozero only applies to the source string. The caller + // is not expected to pass in a search string with nulls in it. + let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h"); + assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("ABCDefgh")); + assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("abcdefgh")); + assert_eq!(Ordering::Equal, b.cmp_nocase_nozero("ABCDEFGH")); + assert_eq!(Ordering::Less, b.cmp_nocase_nozero("ABCDefghi")); + assert_eq!(Ordering::Greater, b.cmp_nocase_nozero("ABCDefg")); + } + + #[rstest] + #[case("abc", "defgh", "abcdefgh")] + #[case("ABC", "DEFGH", "ABCDEFGH")] + #[case("aBc", "Defgh", "aBcDefgh")] + #[case( + "TestLongerDataBc", + "Defghikjlmnopqrstuvwxyz", + "TestLongerDataBcDefghikjlmnopqrstuvwxyz" + )] + fn test_add(#[case] input: &str, #[case] input_add: &str, #[case] expected: &str) { + let mut b = Bstr::from(input); + b.add(input_add); + assert_eq!(b.cmp_slice(expected), Ordering::Equal); + } + + #[rstest] + #[case(10, "abcd", "efghij", "abcdefghij")] + #[case(5, "ABcd", "efgh", "ABcde")] + #[case(4, "AbCd", "EFGH", "AbCd")] + #[case(20, "abcd", "efGHij", "abcdefGHij")] + fn test_add_no_ex( + #[case] capacity: usize, #[case] input: &str, #[case] input_add: &str, + #[case] expected: &str, + ) { + let mut b = Bstr::with_capacity(capacity); + b.add_noex(input); + b.add_noex(input_add); + assert_eq!(b.cmp_slice(expected), Ordering::Equal); + } + + #[test] + fn StartsWith() { + let b = Bstr::from("ABCD"); + assert!(b.starts_with("AB")); + assert!(!b.starts_with("ab")); + assert!(!b.starts_with("Ab")); + assert!(!b.starts_with("aB")); + assert!(!b.starts_with("CD")); + } + + #[test] + fn StartsWithNocase() { + let b = Bstr::from("ABCD"); + assert!(b.starts_with_nocase("AB")); + assert!(b.starts_with_nocase("ab")); + assert!(b.starts_with_nocase("Ab")); + assert!(b.starts_with_nocase("aB")); + assert!(!b.starts_with_nocase("CD")); + } + + #[test] + fn IndexOf() { + let b = Bstr::from("ABCDefgh"); + assert_eq!(Some(4), b.index_of("e")); + assert_eq!(Some(0), b.index_of("A")); + assert_eq!(Some(7), b.index_of("h")); + assert_eq!(Some(3), b.index_of("De")); + assert_eq!(None, b.index_of("z")); + assert_eq!(None, b.index_of("a")); + assert_eq!(None, b.index_of("hi")); + } + + #[test] + fn IndexOfNocase() { + let b = Bstr::from("ABCDefgh"); + assert_eq!(Some(4), b.index_of_nocase("E")); + assert_eq!(Some(0), b.index_of_nocase("a")); + assert_eq!(Some(0), b.index_of_nocase("A")); + assert_eq!(Some(7), b.index_of_nocase("H")); + assert_eq!(Some(3), b.index_of_nocase("dE")); + assert_eq!(None, b.index_of_nocase("z")); + assert_eq!(None, b.index_of_nocase("Hi")); + } + + #[test] + fn IndexOfNocaseNozero() { + let b = Bstr::from("A\x00B\x00\x00C\x00Defg\x00h"); + assert_eq!(Some(8), b.index_of_nocase_nozero("E")); + assert_eq!(Some(0), b.index_of_nocase_nozero("a")); + assert_eq!(Some(0), b.index_of_nocase_nozero("A")); + assert_eq!(Some(12), b.index_of_nocase_nozero("H")); + assert_eq!(Some(7), b.index_of_nocase_nozero("dE")); + assert_eq!(Some(2), b.index_of_nocase_nozero("bc")); + assert_eq!(None, b.index_of_nocase_nozero("z")); + assert_eq!(None, b.index_of_nocase_nozero("Hi")); + assert_eq!(None, b.index_of_nocase_nozero("ghi")); + } +} diff --git a/rust/htp/src/c_api/bstr.rs b/rust/htp/src/c_api/bstr.rs new file mode 100644 index 000000000000..d1f55288adeb --- /dev/null +++ b/rust/htp/src/c_api/bstr.rs @@ -0,0 +1,201 @@ +use crate::bstr::Bstr; +use core::cmp::Ordering; +use std::{boxed::Box, ffi::CStr}; + +/// Allocate a zero-length bstring, reserving space for at least size bytes. +#[no_mangle] +pub extern "C" fn bstr_alloc(len: libc::size_t) -> *mut Bstr { + let b = Bstr::with_capacity(len); + let boxed = Box::new(b); + Box::into_raw(boxed) +} + +/// Deallocate the supplied bstring instance. Allows NULL on input. +/// # Safety +/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer. +#[no_mangle] +pub unsafe extern "C" fn bstr_free(b: *mut Bstr) { + if !b.is_null() { + drop(Box::from_raw(b)); + } +} + +/// Return the length of the string +/// # Safety +/// x must be properly intialized: not NULL, dangling, or misaligned +#[no_mangle] +pub unsafe extern "C" fn bstr_len(x: *const Bstr) -> libc::size_t { + (*x).len() +} + +/// Return a pointer to the bstr payload +/// # Safety +/// x must be properly intialized: not NULL, dangling, or misaligned +#[no_mangle] +pub unsafe extern "C" fn bstr_ptr(x: *const Bstr) -> *mut libc::c_uchar { + (*x).as_ptr() as *mut u8 +} + +/// Return the capacity of the string +/// # Safety +/// x must be properly intialized: not NULL, dangling, or misaligned +#[no_mangle] +pub unsafe extern "C" fn bstr_size(x: *const Bstr) -> libc::size_t { + (*x).capacity() +} + +/// Case-sensitive comparison of a bstring and a NUL-terminated string. +/// returns -1 if b is less than c +/// 0 if b is equal to c +/// 1 if b is greater than c +/// # Safety +/// b and c must be properly intialized: not NULL, dangling, or misaligned. +/// c must point to memory that contains a valid nul terminator byte at the end of the string +#[no_mangle] +pub unsafe extern "C" fn bstr_cmp_c(b: *const Bstr, c: *const libc::c_char) -> libc::c_int { + let cs = CStr::from_ptr(c); + match (*b).cmp_slice(cs.to_bytes()) { + Ordering::Less => -1, + Ordering::Equal => 0, + Ordering::Greater => 1, + } +} + +/// Case-indensitive comparison of a bstring and a NUL-terminated string. +/// returns -1 if b is less than c +/// 0 if b is equal to c +/// 1 if b is greater than c +/// # Safety +/// b and c must be properly intialized: not NULL, dangling, or misaligned. +/// c must point to memory that contains a valid nul terminator byte at the end of the string +#[no_mangle] +pub unsafe extern "C" fn bstr_cmp_c_nocase(b: *const Bstr, c: *const libc::c_char) -> libc::c_int { + let cs = CStr::from_ptr(c); + match (*b).cmp_nocase(cs.to_bytes()) { + Ordering::Less => -1, + Ordering::Equal => 0, + Ordering::Greater => 1, + } +} + +/// Create a new bstring by copying the provided NUL-terminated string +/// # Safety +/// cstr must be properly intialized: not NULL, dangling, or misaligned. +/// cstr must point to memory that contains a valid nul terminator byte at the end of the string +#[no_mangle] +pub unsafe extern "C" fn bstr_dup_c(cstr: *const libc::c_char) -> *mut Bstr { + let cs = CStr::from_ptr(cstr).to_bytes(); + let new = bstr_alloc(cs.len()); + (*new).add(cs); + new +} + +/// Create a new NUL-terminated string out of the provided bstring. If NUL bytes +/// are contained in the bstring, each will be replaced with "\0" (two characters). +/// The caller is responsible to keep track of the allocated memory area and free +/// it once it is no longer needed. +/// returns The newly created NUL-terminated string, or NULL in case of memory +/// allocation failure. +/// # Safety +/// b must be properly intialized and not dangling nor misaligned. +#[no_mangle] +pub unsafe extern "C" fn bstr_util_strdup_to_c(b: *const Bstr) -> *mut libc::c_char { + if b.is_null() { + return std::ptr::null_mut(); + } + let src = std::slice::from_raw_parts(bstr_ptr(b), bstr_len(b)); + + // Since the memory returned here is just a char* and the caller will + // free() it we have to use malloc() here. + // So we allocate enough space for doubled NULL bytes plus the trailing NULL. + let mut null_count = 1; + for byte in src { + if *byte == 0 { + null_count += 1; + } + } + let newlen = bstr_len(b) + null_count; + let mem = libc::malloc(newlen) as *mut i8; + if mem.is_null() { + return std::ptr::null_mut(); + } + let dst: &mut [i8] = std::slice::from_raw_parts_mut(mem, newlen); + let mut dst_idx = 0; + for byte in src { + if *byte == 0 { + dst[dst_idx] = '\\' as i8; + dst_idx += 1; + dst[dst_idx] = '0' as i8; + } else { + dst[dst_idx] = *byte as i8; + } + dst_idx += 1; + } + dst[dst_idx] = 0; + + mem +} + +#[cfg(test)] +mod test { + use super::*; + use std::ffi::CString; + + macro_rules! cstr { + ( $x:expr ) => {{ + CString::new($x).unwrap() + }}; + } + + #[test] + fn Bstr_Alloc() { + unsafe { + let p1 = bstr_alloc(10); + assert_eq!(10, bstr_size(p1)); + assert_eq!(0, bstr_len(p1)); + bstr_free(p1); + } + } + + #[test] + fn Bstr_DupC() { + unsafe { + let p1 = bstr_dup_c(cstr!("arfarf").as_ptr()); + + assert_eq!(6, bstr_size(p1)); + assert_eq!(6, bstr_len(p1)); + assert_eq!( + 0, + libc::memcmp( + cstr!("arfarf").as_ptr() as *const core::ffi::c_void, + bstr_ptr(p1) as *const core::ffi::c_void, + 6 + ) + ); + bstr_free(p1); + } + } + + #[test] + fn Bstr_UtilDupToC() { + unsafe { + let s = Bstr::from(b"ABCDEFGHIJKL\x00NOPQRST" as &[u8]); + let c = bstr_util_strdup_to_c(&s); + let e = CString::new("ABCDEFGHIJKL\\0NOPQRST").unwrap(); + assert_eq!(0, libc::strcmp(e.as_ptr(), c)); + + libc::free(c as *mut core::ffi::c_void); + } + } + + #[test] + fn Bstr_CmpC() { + unsafe { + let p1 = Bstr::from("arfarf"); + assert_eq!(0, bstr_cmp_c(&p1, cstr!("arfarf").as_ptr())); + assert_eq!(-1, bstr_cmp_c(&p1, cstr!("arfarf2").as_ptr())); + assert_eq!(1, bstr_cmp_c(&p1, cstr!("arf").as_ptr())); + assert_eq!(-1, bstr_cmp_c(&p1, cstr!("not equal").as_ptr())); + } + } +} diff --git a/rust/htp/src/c_api/config.rs b/rust/htp/src/c_api/config.rs new file mode 100644 index 000000000000..a9d1af3ed6cd --- /dev/null +++ b/rust/htp/src/c_api/config.rs @@ -0,0 +1,596 @@ +#![deny(missing_docs)] +use crate::{ + config::{Config, HtpServerPersonality, HtpUrlEncodingHandling}, + hook::{DataExternalCallbackFn, LogExternalCallbackFn, TxExternalCallbackFn}, + HtpStatus, +}; +use std::convert::TryInto; + +/// Creates a new configuration structure. Configuration structures created at +/// configuration time must not be changed afterwards in order to support lock-less +/// copying. +#[no_mangle] +pub extern "C" fn htp_config_create() -> *mut Config { + let cfg: Config = Config::default(); + let b = Box::new(cfg); + Box::into_raw(b) +} + +/// Destroy a configuration structure. +/// # Safety +/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer. +#[no_mangle] +pub unsafe extern "C" fn htp_config_destroy(cfg: *mut Config) { + if !cfg.is_null() { + drop(Box::from_raw(cfg)); + } +} + +/// Registers a callback that is invoked every time there is a log message with +/// severity equal and higher than the configured log level. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_log(cfg: *mut Config, cbk_fn: LogExternalCallbackFn) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_log.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_BODY_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_body_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_body_data.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_COMPLETE callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_complete( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_complete.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_HEADERS callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_headers( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_headers.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_HEADER_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_header_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_header_data.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_LINE callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_line( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_line.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_START callback, which is invoked every time a new +/// request begins and before any parsing is done. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_start( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_start.register_extern(cbk_fn) + } +} + +/// Registers a HTP_REQUEST_TRAILER callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_trailer( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_trailer.register_extern(cbk_fn) + } +} + +/// Registers a REQUEST_TRAILER_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_request_trailer_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_request_trailer_data.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_BODY_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_body_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_body_data.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_COMPLETE callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_complete( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_complete.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_HEADERS callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_headers( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_headers.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_HEADER_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_header_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_header_data.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_START callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_start( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_start.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_TRAILER callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_trailer( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_trailer.register_extern(cbk_fn) + } +} + +/// Registers a RESPONSE_TRAILER_DATA callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_response_trailer_data( + cfg: *mut Config, cbk_fn: DataExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_response_trailer_data.register_extern(cbk_fn) + } +} + +/// Registers a TRANSACTION_COMPLETE callback. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_register_transaction_complete( + cfg: *mut Config, cbk_fn: TxExternalCallbackFn, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.hook_transaction_complete.register_extern(cbk_fn) + } +} + +/// Configures whether backslash characters are treated as path segment separators. They +/// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path +/// such as "/one\two/three" will be converted to "/one/two/three". +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_backslash_convert_slashes( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_backslash_convert_slashes(enabled == 1) + } +} + +/// Sets the replacement character that will be used to in the lossy best-fit +/// mapping from multi-byte to single-byte streams. The question mark character +/// is used as the default replacement byte. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_bestfit_replacement_byte(cfg: *mut Config, b: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_bestfit_replacement_byte(b as u8) + } +} + +/// Configures the maximum compression bomb size LibHTP will decompress. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_compression_bomb_limit( + cfg: *mut Config, bomblimit: libc::size_t, +) { + if let Ok(bomblimit) = bomblimit.try_into() { + if let Some(cfg) = cfg.as_mut() { + cfg.compression_options.set_bomb_limit(bomblimit) + } + } +} + +/// Configures the maximum compression time LibHTP will allow. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_compression_time_limit( + cfg: *mut Config, timelimit: libc::c_uint, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.compression_options.set_time_limit(timelimit) + } +} + +/// Configures whether input data will be converted to lowercase. Useful for handling servers with +/// case-insensitive filesystems. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_convert_lowercase(cfg: *mut Config, enabled: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_convert_lowercase(enabled == 1) + } +} + +/// Configures the maximum size of the buffer LibHTP will use when all data is not available +/// in the current buffer (e.g., a very long header line that might span several packets). This +/// limit is controlled by the field_limit parameter. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_field_limit(cfg: *mut Config, field_limit: libc::size_t) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_field_limit(field_limit) + } +} + +/// Configures the maximum memlimit LibHTP will pass to liblzma. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_lzma_memlimit(cfg: *mut Config, memlimit: libc::size_t) { + if let Some(cfg) = cfg.as_mut() { + cfg.compression_options.set_lzma_memlimit(memlimit) + } +} + +/// Configures the maximum number of lzma layers to pass to the decompressor. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_lzma_layers(cfg: *mut Config, limit: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.compression_options.set_lzma_layers(if limit <= 0 { + None + } else { + limit.try_into().ok() + }) + } +} + +/// Configures the maximum number of live transactions per connection +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_max_tx(cfg: *mut Config, limit: u32) { + if let Some(cfg) = cfg.as_mut() { + cfg.max_tx = limit; + } +} + +/// Configures the maximum number of headers in one transaction +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_number_headers_limit(cfg: *mut Config, limit: u32) { + if let Some(cfg) = cfg.as_mut() { + cfg.number_headers_limit = limit; + } +} + +/// Configures how the server reacts to encoded NUL bytes. Some servers will stop at +/// at NUL, while some will respond with 400 or 404. When the termination option is not +/// used, the NUL byte will remain in the path. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_nul_encoded_terminates( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_nul_encoded_terminates(enabled == 1) + } +} + +/// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_nul_raw_terminates(cfg: *mut Config, enabled: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_nul_raw_terminates(enabled == 1) + } +} + +/// Enable or disable request cookie parsing. Enabled by default. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_parse_request_cookies( + _cfg: *mut Config, _parse_request_cookies: libc::c_int, +) { + // do nothing, but keep API +} + +/// Configures whether consecutive path segment separators will be compressed. When enabled, a path +/// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator +/// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four" +/// will be converted to "/one/two/three/four" (assuming all 3 options are enabled). +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_path_separators_compress( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_path_separators_compress(enabled == 1) + } +} + +/// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This +/// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding +/// is taking place. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_plusspace_decode(cfg: *mut Config, enabled: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_plusspace_decode(enabled == 1) + } +} + +/// Configures whether encoded path segment separators will be decoded. Apache does not do +/// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized +/// to "/one/two". If the backslash_separators option is also enabled, encoded backslash +/// characters will be converted too (and subsequently normalized to forward slashes). +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_path_separators_decode( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_path_separators_decode(enabled == 1) + } +} + +/// Configures whether request data is decompressed +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_request_decompression( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_request_decompression(enabled == 1) + } +} + +/// Configures many layers of compression we try to decompress. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_decompression_layer_limit( + cfg: *mut Config, limit: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_decompression_layer_limit(if limit <= 0 { + None + } else { + limit.try_into().ok() + }) + } +} + +/// Enable or disable allowing spaces in URIs. Disabled by default. +/// # Safety +/// When calling this method the given cfg must be initialized or NULL. +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_allow_space_uri(cfg: *mut Config, allow_space: bool) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_allow_space_uri(allow_space) + } +} + +/// Configure desired server personality. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_server_personality( + cfg: *mut Config, personality: HtpServerPersonality, +) -> HtpStatus { + cfg.as_mut() + .map(|cfg| cfg.set_server_personality(personality).into()) + .unwrap_or(HtpStatus::ERROR) +} + +/// Configures whether %u-encoded sequences are decoded. Such sequences +/// will be treated as invalid URL encoding if decoding is not desirable. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_u_encoding_decode(cfg: *mut Config, enabled: libc::c_int) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_u_encoding_decode(enabled == 1) + } +} + +/// Configures how the server handles to invalid URL encoding. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_url_encoding_invalid_handling( + cfg: *mut Config, handling: HtpUrlEncodingHandling, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_url_encoding_invalid_handling(handling) + } +} + +/// Controls whether the data should be treated as UTF-8 and converted to a single-byte +/// stream using best-fit mapping. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_utf8_convert_bestfit( + cfg: *mut Config, enabled: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_utf8_convert_bestfit(enabled == 1) + } +} + +/// Configures whether to attempt to decode a double encoded query in the normalized uri +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_double_decode_normalized_query( + cfg: *mut Config, set: bool, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_double_decode_normalized_query(set) + } +} + +/// Configures whether to attempt to decode a double encoded path in the normalized uri +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_double_decode_normalized_path(cfg: *mut Config, set: bool) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_double_decode_normalized_path(set) + } +} + +/// Configures whether to normalize URIs into a complete or partial form. +/// Pass `true` to use complete normalized URI or `false` to use partials. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_normalized_uri_include_all(cfg: *mut Config, set: bool) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_normalized_uri_include_all(set) + } +} + +/// Configures whether transactions will be automatically destroyed once they +/// are processed and all callbacks invoked. This option is appropriate for +/// programs that process transactions as they are processed. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_tx_auto_destroy( + cfg: *mut Config, tx_auto_destroy: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_tx_auto_destroy(tx_auto_destroy == 1) + } +} + +/// Configures whether incomplete transactions will be flushed when a connection is closed. +/// +/// This will invoke the transaction complete callback for each incomplete transaction. The +/// transactions passed to the callback will not have their request and response state set +/// to complete - they will simply be passed with the state they have within the parser at +/// the time of the call. +/// +/// This option is intended to be used when a connection is closing and we want to process +/// any incomplete transactions that were in flight, or which never completed due to packet +/// loss or parsing errors. +/// +/// These transactions will also be removed from the parser when auto destroy is enabled. +/// +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_flush_incomplete( + cfg: *mut Config, flush_incomplete: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_flush_incomplete(flush_incomplete == 1) + } +} + +/// Enable or disable the built-in Urlencoded parser. Disabled by default. +/// The parser will parse query strings and request bodies with the appropriate MIME type. +/// # Safety +/// When calling this method, you have to ensure that cfg is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_config_set_parse_urlencoded( + cfg: *mut Config, parse_urlencoded: libc::c_int, +) { + if let Some(cfg) = cfg.as_mut() { + cfg.set_parse_urlencoded(parse_urlencoded == 1) + } +} diff --git a/rust/htp/src/c_api/connection.rs b/rust/htp/src/c_api/connection.rs new file mode 100644 index 000000000000..d2544101e3ca --- /dev/null +++ b/rust/htp/src/c_api/connection.rs @@ -0,0 +1,36 @@ +#![deny(missing_docs)] +use crate::{connection::Connection, log::Log}; + +/// Returns the request_data_counter +/// # Safety +/// When calling this method, you have to ensure that conn is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_conn_request_data_counter(conn: *const Connection) -> u64 { + conn.as_ref() + .map(|conn| conn.request_data_counter) + .unwrap_or(0) +} + +/// Returns the response_data_counter +/// # Safety +/// When calling this method, you have to ensure that conn is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_conn_response_data_counter(conn: *const Connection) -> u64 { + conn.as_ref() + .map(|conn| conn.response_data_counter) + .unwrap_or(0) +} + +/// Get the next logged message from the connection +/// +/// Returns the next log or NULL on error. +/// The caller must free this result with htp_log_free +/// # Safety +/// When calling this method, you have to ensure that conn is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_conn_next_log(conn: *const Connection) -> *mut Log { + conn.as_ref() + .and_then(|conn| conn.get_next_log()) + .map(|log| Box::into_raw(Box::new(log))) + .unwrap_or(std::ptr::null_mut()) +} diff --git a/rust/htp/src/c_api/connection_parser.rs b/rust/htp/src/c_api/connection_parser.rs new file mode 100644 index 000000000000..98ef752d4d12 --- /dev/null +++ b/rust/htp/src/c_api/connection_parser.rs @@ -0,0 +1,310 @@ +#![deny(missing_docs)] +use crate::{ + config::Config, + connection::Connection, + connection_parser::{ConnectionParser, HtpStreamState, ParserData}, + transaction::Transaction, +}; +use std::{ + convert::{TryFrom, TryInto}, + ffi::CStr, +}; +use time::{Duration, OffsetDateTime}; + +/// Take seconds and microseconds and return a OffsetDateTime +fn datetime_from_sec_usec(sec: i64, usec: i64) -> Option { + match OffsetDateTime::from_unix_timestamp(sec) { + Ok(date) => Some(date + Duration::microseconds(usec)), + Err(_) => None, + } +} + +/// Closes the connection associated with the supplied parser. +/// +/// timestamp is optional +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +#[allow(clippy::useless_conversion)] +pub unsafe extern "C" fn htp_connp_close( + connp: *mut ConnectionParser, timestamp: *const libc::timeval, +) { + if let Some(connp) = connp.as_mut() { + connp.close( + timestamp + .as_ref() + .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into())) + .unwrap_or(None), + ) + } +} + +/// Creates a new connection parser using the provided configuration or a default configuration if NULL provided. +/// Note the provided config will be copied into the created connection parser. Therefore, subsequent modification +/// to the original config will have no effect. +/// +/// Returns a new connection parser instance, or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_create(cfg: *mut Config) -> *mut ConnectionParser { + Box::into_raw(Box::new(ConnectionParser::new( + cfg.as_ref().cloned().unwrap_or_default(), + ))) +} + +/// Destroys the connection parser, its data structures, as well +/// as the connection and its transactions. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_destroy_all(connp: *mut ConnectionParser) { + drop(Box::from_raw(connp)); +} + +/// Returns the connection associated with the connection parser. +/// +/// Returns Connection instance, or NULL if one is not available. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_connection(connp: *const ConnectionParser) -> *const Connection { + connp + .as_ref() + .map(|val| &val.conn as *const Connection) + .unwrap_or(std::ptr::null()) +} + +/// Retrieve the user data associated with this connection parser. +/// Returns user data, or NULL if there isn't any. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_user_data(connp: *const ConnectionParser) -> *mut libc::c_void { + connp + .as_ref() + .and_then(|val| val.user_data::<*mut libc::c_void>()) + .copied() + .unwrap_or(std::ptr::null_mut()) +} + +/// Associate user data with the supplied parser. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_set_user_data( + connp: *mut ConnectionParser, user_data: *mut libc::c_void, +) { + if let Some(connp) = connp.as_mut() { + connp.set_user_data(Box::new(user_data)) + } +} + +/// Opens connection. +/// +/// timestamp is optional +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +#[allow(clippy::useless_conversion)] +pub unsafe extern "C" fn htp_connp_open( + connp: *mut ConnectionParser, client_addr: *const libc::c_char, client_port: libc::c_int, + server_addr: *const libc::c_char, server_port: libc::c_int, timestamp: *const libc::timeval, +) { + if let Some(connp) = connp.as_mut() { + connp.open( + client_addr.as_ref().and_then(|client_addr| { + CStr::from_ptr(client_addr) + .to_str() + .ok() + .and_then(|val| val.parse().ok()) + }), + client_port.try_into().ok(), + server_addr.as_ref().and_then(|server_addr| { + CStr::from_ptr(server_addr) + .to_str() + .ok() + .and_then(|val| val.parse().ok()) + }), + server_port.try_into().ok(), + timestamp + .as_ref() + .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into())) + .unwrap_or(None), + ) + } +} + +/// Closes the connection associated with the supplied parser. +/// +/// timestamp is optional +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +#[allow(clippy::useless_conversion)] +pub unsafe extern "C" fn htp_connp_request_close( + connp: *mut ConnectionParser, timestamp: *const libc::timeval, +) { + if let Some(connp) = connp.as_mut() { + connp.request_close( + timestamp + .as_ref() + .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into())) + .unwrap_or(None), + ) + } +} + +/// Process a chunk of inbound client request data +/// +/// timestamp is optional +/// Returns HTP_STREAM_STATE_DATA, HTP_STREAM_STATE_ERROR or HTP_STREAM_STATE_DATA_OTHER (see QUICK_START). +/// HTP_STREAM_STATE_CLOSED and HTP_STREAM_STATE_TUNNEL are also possible. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +#[allow(clippy::useless_conversion)] +pub unsafe extern "C" fn htp_connp_request_data( + connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void, + len: libc::size_t, +) -> HtpStreamState { + connp + .as_mut() + .map(|connp| { + connp.request_data( + ParserData::from((data as *const u8, len)), + timestamp + .as_ref() + .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into())) + .unwrap_or(None), + ) + }) + .unwrap_or(HtpStreamState::ERROR) +} + +/// Process a chunk of outbound (server or response) data. +/// +/// timestamp is optional. +/// Returns HTP_STREAM_STATE_OK on state change, HTP_STREAM_STATE_ERROR on error, or HTP_STREAM_STATE_DATA when more data is needed +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +#[allow(clippy::useless_conversion)] +pub unsafe extern "C" fn htp_connp_response_data( + connp: *mut ConnectionParser, timestamp: *const libc::timeval, data: *const libc::c_void, + len: libc::size_t, +) -> HtpStreamState { + connp + .as_mut() + .map(|connp| { + connp.response_data( + ParserData::from((data as *const u8, len)), + timestamp + .as_ref() + .map(|val| datetime_from_sec_usec(val.tv_sec.into(), val.tv_usec.into())) + .unwrap_or(None), + ) + }) + .unwrap_or(HtpStreamState::ERROR) +} + +/// Get the number of transactions processed on this connection. +/// +/// Returns the number of transactions or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_tx_size(connp: *const ConnectionParser) -> isize { + connp + .as_ref() + .map(|connp| isize::try_from(connp.tx_size()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get a transaction. +/// +/// Returns the transaction or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_tx( + connp: *mut ConnectionParser, tx_id: usize, +) -> *const Transaction { + connp + .as_ref() + .map(|connp| { + connp + .tx(tx_id) + .map(|tx| { + if tx.is_started() { + tx as *const Transaction + } else { + std::ptr::null() + } + }) + .unwrap_or(std::ptr::null()) + }) + .unwrap_or(std::ptr::null()) +} + +/// Retrieves the pointer to the active response transaction. In connection +/// parsing mode there can be many open transactions, and up to 2 active +/// transactions at any one time. This is due to HTTP pipelining. Can be NULL. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_get_response_tx( + connp: *mut ConnectionParser, +) -> *const Transaction { + if let Some(connp) = connp.as_mut() { + if let Some(req) = connp.response() { + return req; + } + } + std::ptr::null() +} + +/// Retrieves the pointer to the active request transaction. In connection +/// parsing mode there can be many open transactions, and up to 2 active +/// transactions at any one time. This is due to HTTP pipelining. Call be NULL. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_get_request_tx( + connp: *mut ConnectionParser, +) -> *const Transaction { + if let Some(connp) = connp.as_mut() { + if let Some(req) = connp.request() { + return req; + } + } + std::ptr::null() +} + +/// Returns the number of bytes consumed from the current data chunks so far or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_request_data_consumed(connp: *const ConnectionParser) -> i64 { + connp + .as_ref() + .map(|connp| connp.request_data_consumed().try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation +/// of htp_connp_response_data() will consume all data from the supplied buffer, but there are circumstances +/// where only partial consumption is possible. In such cases HTP_STREAM_DATA_OTHER will be returned. +/// Consumed bytes are no longer necessary, but the remainder of the buffer will be need to be saved +/// for later. +/// Returns the number of bytes consumed from the last data chunk sent for outbound processing +/// or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that connp is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_connp_response_data_consumed(connp: *const ConnectionParser) -> i64 { + connp + .as_ref() + .map(|connp| connp.response_data_consumed().try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} diff --git a/rust/htp/src/c_api/header.rs b/rust/htp/src/c_api/header.rs new file mode 100644 index 000000000000..2b93eb54fea4 --- /dev/null +++ b/rust/htp/src/c_api/header.rs @@ -0,0 +1,189 @@ +#![deny(missing_docs)] +use crate::{ + bstr::Bstr, + c_api::bstr::bstr_ptr, + transaction::{Header, Headers}, +}; +use std::convert::TryFrom; + +/// Get the first header value matching the key. +/// +/// headers: Header table. +/// ckey: Header name to match. +/// +/// Returns the header or NULL when not found or on error +/// # Safety +/// When calling this method, you have to ensure that headers is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_headers_get( + headers: *const Headers, ckey: *const libc::c_char, +) -> *const Header { + if let (Some(headers), Some(ckey)) = (headers.as_ref(), ckey.as_ref()) { + headers + .get_nocase_nozero(std::ffi::CStr::from_ptr(ckey).to_bytes()) + .map(|value| value as *const Header) + .unwrap_or(std::ptr::null()) + } else { + std::ptr::null() + } +} + +/// Get all headers flags +/// +/// headers: Header table. +/// +/// Returns the accumulated header flags or 0 on error. +/// # Safety +/// When calling this method, you have to ensure that headers is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_headers_flags(headers: *const Headers) -> u64 { + headers + .as_ref() + .map(|headers| { + headers + .into_iter() + .fold(0, |flags, header| flags | header.flags) + }) + .unwrap_or(0) +} + +/// Get the header at a given index. +/// +/// headers: Header table. +/// index: Index into the table. +/// +/// Returns the header or NULL when not found or on error +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_headers_get_index( + headers: *const Headers, index: usize, +) -> *const Header { + headers + .as_ref() + .map(|headers| { + headers + .elements + .get(index) + .map(|value| value as *const Header) + .unwrap_or(std::ptr::null()) + }) + .unwrap_or(std::ptr::null()) +} + +/// Get the size of the headers table. +/// +/// headers: Headers table. +/// +/// Returns the size or -1 on error +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_headers_size(headers: *const Headers) -> isize { + headers + .as_ref() + .map(|headers| isize::try_from(headers.size()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get the name of a header. +/// +/// header: Header pointer. +/// +/// Returns the name or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_name(header: *const Header) -> *const Bstr { + header + .as_ref() + .map(|header| &header.name as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the name of a header as a ptr. +/// +/// header: Header pointer. +/// +/// Returns the pointer or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_name_ptr(header: *const Header) -> *const u8 { + header + .as_ref() + .map(|header| bstr_ptr(&header.name) as *const u8) + .unwrap_or(std::ptr::null()) +} + +/// Get the header flags +/// +/// header: Header pointer. +/// +/// Returns the header flags or 0 on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_flags(header: *const Header) -> u64 { + header.as_ref().map(|header| header.flags).unwrap_or(0) +} + +/// Get the length of a header name. +/// +/// tx: Header pointer. +/// +/// Returns the length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_name_len(header: *const Header) -> isize { + header + .as_ref() + .map(|header| isize::try_from(header.name.len()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get the value of a header. +/// +/// tx: Header pointer. +/// +/// Returns the value or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_value(header: *const Header) -> *const Bstr { + header + .as_ref() + .map(|header| &header.value as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the value of a header as a ptr. +/// +/// tx: Header pointer. +/// +/// Returns the pointer or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_value_ptr(header: *const Header) -> *const u8 { + header + .as_ref() + .map(|header| bstr_ptr(&header.value) as *const u8) + .unwrap_or(std::ptr::null()) +} + +/// Get the length of a header value. +/// +/// tx: Header pointer. +/// +/// Returns the length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that header is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_header_value_len(header: *const Header) -> isize { + header + .as_ref() + .map(|header| isize::try_from(header.value.len()).unwrap_or(-1)) + .unwrap_or(-1) +} diff --git a/rust/htp/src/c_api/log.rs b/rust/htp/src/c_api/log.rs new file mode 100644 index 000000000000..801131918718 --- /dev/null +++ b/rust/htp/src/c_api/log.rs @@ -0,0 +1,53 @@ +#![deny(missing_docs)] +use crate::log::{HtpLogCode, Log}; +use std::{ffi::CString, os::raw::c_char}; + +/// Get the log's message string +/// +/// Returns the log message as a cstring or NULL on error +/// The caller must free this result with htp_free_cstring +/// # Safety +/// When calling this method, you have to ensure that log is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_log_message(log: *const Log) -> *mut c_char { + log.as_ref() + .and_then(|log| CString::new(log.msg.msg.clone()).ok()) + .map(|msg| msg.into_raw()) + .unwrap_or(std::ptr::null_mut()) +} + +/// Get a log's message file +/// +/// Returns the file as a cstring or NULL on error +/// The caller must free this result with htp_free_cstring +/// # Safety +/// When calling this method, you have to ensure that log is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_log_file(log: *const Log) -> *mut c_char { + log.as_ref() + .and_then(|log| CString::new(log.msg.file.clone()).ok()) + .map(|msg| msg.into_raw()) + .unwrap_or(std::ptr::null_mut()) +} + +/// Get a log's message code +/// +/// Returns a code or HTP_LOG_CODE_ERROR on error +/// # Safety +/// When calling this method, you have to ensure that log is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_log_code(log: *const Log) -> HtpLogCode { + log.as_ref() + .map(|log| log.msg.code) + .unwrap_or(HtpLogCode::ERROR) +} + +/// Free log +/// # Safety +/// This function is unsafe because improper use may lead to memory problems. For example, a double-free may occur if the function is called twice on the same raw pointer. +#[no_mangle] +pub unsafe extern "C" fn htp_log_free(log: *mut Log) { + if !log.is_null() { + drop(Box::from_raw(log)); + } +} diff --git a/rust/htp/src/c_api/mod.rs b/rust/htp/src/c_api/mod.rs new file mode 100644 index 000000000000..bc5c982a2304 --- /dev/null +++ b/rust/htp/src/c_api/mod.rs @@ -0,0 +1,35 @@ +#![deny(missing_docs)] +use crate::util::get_version; +use std::ffi::CString; + +/// Functions for working with Bstr. +pub mod bstr; +/// Functions for working with config. +pub mod config; +/// Functions for working with connection. +pub mod connection; +/// Functions for working with connection parser. +pub mod connection_parser; +/// Functions for working with headers. +pub mod header; +/// Functions for working with logs. +pub mod log; +/// Functions for working with transactions. +pub mod transaction; +/// Functions for working with request uri. +pub mod uri; + +/// Returns the LibHTP version string. +#[no_mangle] +pub extern "C" fn htp_get_version() -> *const libc::c_char { + get_version().as_ptr() as *const libc::c_char +} + +/// Free rust allocated cstring +/// +/// # Safety +/// This should only ever be called with a pointer that was earlier obtained by calling [CString::into_raw]. +#[no_mangle] +pub unsafe extern "C" fn htp_free_cstring(input: *mut libc::c_char) { + input.as_mut().map(|input| CString::from_raw(input)); +} diff --git a/rust/htp/src/c_api/transaction.rs b/rust/htp/src/c_api/transaction.rs new file mode 100644 index 000000000000..c5578890db0b --- /dev/null +++ b/rust/htp/src/c_api/transaction.rs @@ -0,0 +1,757 @@ +use crate::{ + bstr::Bstr, c_api::header::htp_headers_get, config::Config, + connection_parser::ConnectionParser, decompressors::HtpContentEncoding, + hook::DataExternalCallbackFn, request::HtpMethod, transaction::*, uri::Uri, +}; +use std::{ + convert::{TryFrom, TryInto}, + rc::Rc, +}; + +/// Destroys the supplied transaction. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_destroy(connp: *mut ConnectionParser, tx: *const Transaction) { + if let (Some(connp), Some(tx)) = (connp.as_mut(), tx.as_ref()) { + connp.remove_tx(tx.index) + } +} + +/// Get a transaction's normalized parsed uri. +/// +/// tx: Transaction pointer. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_normalized_uri(tx: *const Transaction) -> *const Bstr { + if (*tx).cfg.decoder_cfg.normalized_uri_include_all { + tx.as_ref() + .and_then(|tx| tx.complete_normalized_uri.as_ref()) + .map(|uri| uri as *const Bstr) + .unwrap_or(std::ptr::null()) + } else { + tx.as_ref() + .and_then(|tx| tx.partial_normalized_uri.as_ref()) + .map(|uri| uri as *const Bstr) + .unwrap_or(std::ptr::null()) + } +} + +/// Get the transaction's configuration. +/// +/// tx: Transaction pointer. +/// +/// Returns a pointer to the configuration or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_cfg(tx: *const Transaction) -> *const Config { + tx.as_ref() + .map(|tx| Rc::as_ptr(&tx.cfg)) + .unwrap_or(std::ptr::null()) +} + +/// Returns the user data associated with this transaction or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_get_user_data(tx: *const Transaction) -> *mut libc::c_void { + tx.as_ref() + .and_then(|val| val.user_data::<*mut libc::c_void>()) + .copied() + .unwrap_or(std::ptr::null_mut()) +} + +/// Associates user data with this transaction. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_set_user_data(tx: *mut Transaction, user_data: *mut libc::c_void) { + if let Some(tx) = tx.as_mut() { + tx.set_user_data(Box::new(user_data)) + } +} + +/// Get a transaction's request line. +/// +/// tx: Transaction pointer. +/// +/// Returns the request line or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_line(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_line.as_ref()) + .map(|line| line as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request method. +/// +/// tx: Transaction pointer. +/// +/// Returns the request method or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_method(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_method.as_ref()) + .map(|method| method as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the transaction's request method number. +/// +/// tx: Transaction pointer. +/// +/// Returns the request method number or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_method_number(tx: *const Transaction) -> HtpMethod { + tx.as_ref() + .map(|tx| tx.request_method_number) + .unwrap_or(HtpMethod::ERROR) +} + +/// Get a transaction's request uri. +/// +/// tx: Transaction pointer. +/// +/// Returns the request uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_uri(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_uri.as_ref()) + .map(|uri| uri as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request protocol. +/// +/// tx: Transaction pointer. +/// +/// Returns the protocol or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_protocol(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_protocol.as_ref()) + .map(|protocol| protocol as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request protocol number. +/// +/// tx: Transaction pointer. +/// +/// Returns the protocol number or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_protocol_number(tx: *const Transaction) -> HtpProtocol { + tx.as_ref() + .map(|tx| tx.request_protocol_number) + .unwrap_or(HtpProtocol::ERROR) +} + +/// Get whether a transaction's protocol is version 0.9. +/// +/// tx: Transaction pointer. +/// +/// Returns 1 if the version is 0.9 or 0 otherwise. A NULL argument will +/// also result in a return value of 0. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_is_protocol_0_9(tx: *const Transaction) -> i32 { + tx.as_ref().map(|tx| tx.is_protocol_0_9 as i32).unwrap_or(0) +} + +/// Get whether a transaction contains a successful 101 Switching Protocol response to HTTP/2.0 +/// +/// tx: Transaction pointer. +/// +/// Returns 1 if the transaction is an HTTP/2.0 upgrade or 0 otherwise. A NULL argument will +/// also result in a return value of 0. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_is_http_2_upgrade(tx: *const Transaction) -> i32 { + tx.as_ref() + .map(|tx| tx.is_http_2_upgrade as i32) + .unwrap_or(0) +} + +/// Get a transaction's parsed uri. +/// +/// tx: Transaction pointer. +/// +/// Returns the parsed uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_parsed_uri(tx: *const Transaction) -> *const Uri { + tx.as_ref() + .and_then(|tx| tx.parsed_uri.as_ref()) + .map(|uri| uri as *const Uri) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request headers. +/// +/// tx: Transaction pointer. +/// +/// Returns the request headers or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_headers(tx: *const Transaction) -> *const Headers { + tx.as_ref() + .map(|tx| &tx.request_headers as *const Headers) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request headers size. +/// +/// tx: Transaction pointer. +/// +/// Returns the size or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_headers_size(tx: *const Transaction) -> isize { + tx.as_ref() + .map(|tx| isize::try_from(tx.request_headers.size()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get the first request header value matching the key from a transaction. +/// +/// tx: Transaction pointer. +/// ckey: Header name to match. +/// +/// Returns the header or NULL when not found or on error +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_header( + tx: *const Transaction, ckey: *const libc::c_char, +) -> *const Header { + tx.as_ref() + .map(|tx| htp_headers_get(&tx.request_headers, ckey)) + .unwrap_or(std::ptr::null()) +} + +/// Get the request header at the given index. +/// +/// tx: Transaction pointer. +/// index: request header table index. +/// +/// Returns the header or NULL on error +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_header_index( + tx: *const Transaction, index: usize, +) -> *const Header { + tx.as_ref() + .map(|tx| { + tx.request_headers + .elements + .get(index) + .map(|value| value as *const Header) + .unwrap_or(std::ptr::null()) + }) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request transfer coding. +/// +/// tx: Transaction pointer. +/// +/// Returns the transfer coding or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_transfer_coding( + tx: *const Transaction, +) -> HtpTransferCoding { + tx.as_ref() + .map(|tx| tx.request_transfer_coding) + .unwrap_or(HtpTransferCoding::ERROR) +} + +/// Get a transaction's request content encoding. +/// +/// tx: Transaction pointer. +/// +/// Returns the content encoding or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_content_encoding( + tx: *const Transaction, +) -> HtpContentEncoding { + tx.as_ref() + .map(|tx| tx.request_content_encoding) + .unwrap_or(HtpContentEncoding::ERROR) +} + +/// Get a transaction's request content type. +/// +/// tx: Transaction pointer. +/// +/// Returns the content type or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_content_type(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_content_type.as_ref()) + .map(|content_type| content_type as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's request content length. +/// +/// tx: Transaction pointer. +/// +/// Returns the content length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_content_length(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| { + tx.request_content_length + .map(|len| len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) + }) + .unwrap_or(-1) +} + +/// Get the transaction's request authentication type. +/// +/// tx: Transaction pointer. +/// +/// Returns the auth type or HTP_AUTH_ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_auth_type(tx: *const Transaction) -> HtpAuthType { + tx.as_ref() + .map(|tx| tx.request_auth_type) + .unwrap_or(HtpAuthType::ERROR) +} + +/// Get a transaction's request hostname. +/// +/// tx: Transaction pointer. +/// +/// Returns the request hostname or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_hostname(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.request_hostname.as_ref()) + .map(|hostname| hostname as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the transaction's request port number. +/// +/// tx: Transaction pointer. +/// +/// Returns the request port number or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_port_number(tx: *const Transaction) -> i32 { + tx.as_ref() + .and_then(|tx| tx.request_port_number.as_ref()) + .map(|port| *port as i32) + .unwrap_or(-1) +} + +/// Get a transaction's request message length. +/// +/// tx: Transaction pointer. +/// +/// Returns the request message length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_message_len(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| tx.request_message_len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get a transaction's request entity length. +/// +/// tx: Transaction pointer. +/// +/// Returns the request entity length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_entity_len(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| tx.request_entity_len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get a transaction's response line. +/// +/// tx: Transaction pointer. +/// +/// Returns the response line or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_line(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.response_line.as_ref()) + .map(|response_line| response_line as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's response protocol. +/// +/// tx: Transaction pointer. +/// +/// Returns the response protocol or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_protocol(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.response_protocol.as_ref()) + .map(|response_protocol| response_protocol as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's response protocol number. +/// +/// tx: Transaction pointer. +/// +/// Returns the protocol number or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_protocol_number(tx: *const Transaction) -> HtpProtocol { + tx.as_ref() + .map(|tx| tx.response_protocol_number) + .unwrap_or(HtpProtocol::ERROR) +} + +/// Get the transaction's response status. +/// +/// tx: Transaction pointer. +/// +/// Returns the response status or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_status(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.response_status.as_ref()) + .map(|response_status| response_status as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the transaction's response status number. +/// +/// tx: Transaction pointer. +/// +/// Returns the response status number or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_status_number(tx: *const Transaction) -> i32 { + tx.as_ref() + .map(|tx| match tx.response_status_number { + HtpResponseNumber::UNKNOWN => 0, + HtpResponseNumber::INVALID => -1, + HtpResponseNumber::VALID(status) => status as i32, + }) + .unwrap_or(-1) +} +/// Get the transaction's response status expected number. +/// +/// tx: Transaction pointer. +/// +/// Returns the expected number or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_status_expected_number(tx: *const Transaction) -> i32 { + tx.as_ref() + .map(|tx| tx.response_status_expected_number as i32) + .unwrap_or(-1) +} + +/// Get a transaction's response message. +/// +/// tx: Transaction pointer. +/// +/// Returns the response message or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_message(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.response_message.as_ref()) + .map(|response_message| response_message as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's response headers. +/// +/// tx: Transaction pointer. +/// +/// Returns the response headers or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_headers(tx: *const Transaction) -> *const Headers { + tx.as_ref() + .map(|tx| &tx.response_headers as *const Headers) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's response headers size. +/// +/// tx: Transaction pointer. +/// +/// Returns the size or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_headers_size(tx: *const Transaction) -> isize { + tx.as_ref() + .map(|tx| isize::try_from(tx.response_headers.size()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get the first response header value matching the key from a transaction. +/// +/// tx: Transaction pointer. +/// ckey: Header name to match. +/// +/// Returns the header or NULL when not found or on error +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_header( + tx: *const Transaction, ckey: *const libc::c_char, +) -> *const Header { + tx.as_ref() + .map(|tx| htp_headers_get(&tx.response_headers, ckey)) + .unwrap_or(std::ptr::null()) +} + +/// Get the response header at the given index. +/// +/// tx: Transaction pointer. +/// index: response header table index. +/// +/// Returns the header or NULL on error +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_header_index( + tx: *const Transaction, index: usize, +) -> *const Header { + tx.as_ref() + .map(|tx| { + tx.response_headers + .elements + .get(index) + .map(|value| value as *const Header) + .unwrap_or(std::ptr::null()) + }) + .unwrap_or(std::ptr::null()) +} + +/// Get a transaction's response message length. +/// +/// tx: Transaction pointer. +/// +/// Returns the response message length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_message_len(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| tx.response_message_len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get a transaction's response entity length. +/// +/// tx: Transaction pointer. +/// +/// Returns the response entity length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_entity_len(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| tx.response_entity_len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get a transaction's response content length. +/// +/// tx: Transaction pointer. +/// +/// Returns the response content length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_content_length(tx: *const Transaction) -> i64 { + tx.as_ref() + .map(|tx| { + tx.response_content_length + .map(|len| len.try_into().ok().unwrap_or(-1)) + .unwrap_or(-1) + }) + .unwrap_or(-1) +} + +/// Get a transaction's response content type. +/// +/// tx: Transaction pointer. +/// +/// Returns the response content type or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_content_type(tx: *const Transaction) -> *const Bstr { + tx.as_ref() + .and_then(|tx| tx.response_content_type.as_ref()) + .map(|response_content_type| response_content_type as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the transaction's bit flags. +/// +/// tx: Transaction pointer. +/// +/// Returns the flags represented as an integer or 0 if the flags are empty +/// or a NULL ptr is passed as an argument. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_flags(tx: *const Transaction) -> u64 { + tx.as_ref().map(|tx| tx.flags).unwrap_or(0) +} + +/// Get the transaction's request progress. +/// +/// tx: Transaction pointer. +/// +/// Returns the progress or HTP_REQUEST_ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_request_progress(tx: *const Transaction) -> HtpRequestProgress { + tx.as_ref() + .map(|tx| tx.request_progress) + .unwrap_or(HtpRequestProgress::ERROR) +} + +/// Get the transaction's response progress. +/// +/// tx: Transaction pointer. +/// +/// Returns the progress or ERROR on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_response_progress(tx: *const Transaction) -> HtpResponseProgress { + tx.as_ref() + .map(|tx| tx.response_progress) + .unwrap_or(HtpResponseProgress::ERROR) +} + +/// Get the transaction's index. +/// +/// tx: Transaction pointer. +/// +/// Returns an index or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_index(tx: *const Transaction) -> isize { + tx.as_ref() + .map(|tx| isize::try_from(tx.index).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Register callback for the transaction-specific RESPONSE_BODY_DATA hook. +/// # Safety +/// When calling this method, you have to ensure that tx is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_register_response_body_data( + tx: *mut Transaction, cbk_fn: DataExternalCallbackFn, +) { + if let Some(tx) = tx.as_mut() { + tx.hook_response_body_data.register_extern(cbk_fn) + } +} + +/// Get the data's transaction. +/// +/// Returns the transaction or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that data is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_data_tx(data: *const Data) -> *const Transaction { + data.as_ref() + .map(|data| data.tx() as *const Transaction) + .unwrap_or(std::ptr::null()) +} + +/// Get the data pointer. +/// +/// Returns the data or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that data is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_data_data(data: *const Data) -> *const u8 { + data.as_ref() + .map(|data| data.data()) + .unwrap_or(std::ptr::null()) +} + +/// Get the length of the data. +/// +/// Returns the length or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that data is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_data_len(data: *const Data) -> isize { + data.as_ref() + .map(|data| isize::try_from(data.len()).unwrap_or(-1)) + .unwrap_or(-1) +} + +/// Get whether this data is empty. +/// +/// Returns true if data is NULL or zero-length. +/// # Safety +/// When calling this method, you have to ensure that data is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_tx_data_is_empty(data: *const Data) -> bool { + data.as_ref().map(|data| data.is_empty()).unwrap_or(true) +} diff --git a/rust/htp/src/c_api/uri.rs b/rust/htp/src/c_api/uri.rs new file mode 100644 index 000000000000..a51f00ca916e --- /dev/null +++ b/rust/htp/src/c_api/uri.rs @@ -0,0 +1,118 @@ +use crate::{bstr::Bstr, uri::Uri}; + +/// Get the scheme of a uri. +/// +/// Returns the scheme for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_scheme(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.scheme.as_ref()) + .map(|scheme| scheme as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the username of a uri. +/// +/// Returns the username for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_username(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.username.as_ref()) + .map(|username| username as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the password of a uri. +/// +/// Returns the password for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_password(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.password.as_ref()) + .map(|password| password as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the hostname of a uri. +/// +/// Returns the hostname for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_hostname(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.hostname.as_ref()) + .map(|hostname| hostname as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the port of a uri. +/// +/// Returns the port for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_port(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.port.as_ref()) + .map(|port| port as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the port_number of a uri. +/// +/// Returns the port_number for uri or -1 on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_port_number(uri: *const Uri) -> i32 { + uri.as_ref() + .and_then(|uri| uri.port_number) + .map(|port| port as i32) + .unwrap_or(-1) +} + +/// Get the path of a uri. +/// +/// Returns the path for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_path(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.path.as_ref()) + .map(|path| path as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the query of a uri. +/// +/// Returns the query for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_query(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.query.as_ref()) + .map(|query| query as *const Bstr) + .unwrap_or(std::ptr::null()) +} + +/// Get the fragment of a uri. +/// +/// Returns the fragment for uri or NULL on error. +/// # Safety +/// When calling this method, you have to ensure that uri is either properly initialized or NULL +#[no_mangle] +pub unsafe extern "C" fn htp_uri_fragment(uri: *const Uri) -> *const Bstr { + uri.as_ref() + .and_then(|uri| uri.fragment.as_ref()) + .map(|fragment| fragment as *const Bstr) + .unwrap_or(std::ptr::null()) +} diff --git a/rust/htp/src/config.rs b/rust/htp/src/config.rs new file mode 100644 index 000000000000..ceb103d13165 --- /dev/null +++ b/rust/htp/src/config.rs @@ -0,0 +1,636 @@ +use crate::decompressors::Options; +use crate::{ + error::Result, + hook::{ + DataHook, DataNativeCallbackFn, LogHook, LogNativeCallbackFn, TxHook, TxNativeCallbackFn, + }, + log::HtpLogLevel, + transaction::Param, + unicode_bestfit_map::UnicodeBestfitMap, + HtpStatus, +}; + +/// Configuration for libhtp parsing. +#[derive(Clone)] +pub struct Config { + /// The maximum size of the buffer that is used when the current + /// input chunk does not contain all the necessary data (e.g., a header + /// line that spans several packets). + pub field_limit: usize, + /// Log level, which will be used when deciding whether to store or + /// ignore the messages issued by the parser. + pub log_level: HtpLogLevel, + /// Whether to delete each transaction after the last hook is invoked. This + /// feature should be used when parsing traffic streams in real time. + pub tx_auto_destroy: bool, + /// Server personality identifier. + pub server_personality: HtpServerPersonality, + /// The function to use to transform parameters after parsing. + pub parameter_processor: Option Result<()>>, + /// Decoder configuration for url path. + pub decoder_cfg: DecoderConfig, + /// Whether to decompress compressed response bodies. + pub response_decompression_enabled: bool, + /// Whether to parse urlencoded data. + pub parse_urlencoded: bool, + /// Whether to parse HTTP Authentication headers. + pub parse_request_auth: bool, + /// Request start hook, invoked when the parser receives the first byte of a new + /// request. Because an HTTP transaction always starts with a request, this hook + /// doubles as a transaction start hook. + pub hook_request_start: TxHook, + /// Request line hook, invoked after a request line has been parsed. + pub hook_request_line: TxHook, + /// Request URI normalization hook, for overriding default normalization of URI. + pub hook_request_uri_normalize: TxHook, + /// Receives raw request header data, starting immediately after the request line, + /// including all headers as they are seen on the TCP connection, and including the + /// terminating empty line. Not available on genuine HTTP/0.9 requests (because + /// they don't use headers). + pub hook_request_header_data: DataHook, + /// Request headers hook, invoked after all request headers are seen. + pub hook_request_headers: TxHook, + /// Request body data hook, invoked every time body data is available. Each + /// invocation will provide a Data instance. Chunked data + /// will be dechunked before the data is passed to this hook. Decompression + /// is not currently implemented. At the end of the request body + /// there will be a call with the data set to None. + pub hook_request_body_data: DataHook, + /// Receives raw request trailer data, which can be available on requests that have + /// chunked bodies. The data starts immediately after the zero-length chunk + /// and includes the terminating empty line. + pub hook_request_trailer_data: DataHook, + /// Request trailer hook, invoked after all trailer headers are seen, + /// and if they are seen (not invoked otherwise). + pub hook_request_trailer: TxHook, + /// Request hook, invoked after a complete request is seen. + pub hook_request_complete: TxHook, + /// Response startup hook, invoked when a response transaction is found and + /// processing started. + pub hook_response_start: TxHook, + /// Response line hook, invoked after a response line has been parsed. + pub hook_response_line: TxHook, + /// Receives raw response header data, starting immediately after the status line + /// and including all headers as they are seen on the TCP connection, and including the + /// terminating empty line. Not available on genuine HTTP/0.9 responses (because + /// they don't have response headers). + pub hook_response_header_data: DataHook, + /// Response headers book, invoked after all response headers have been seen. + pub hook_response_headers: TxHook, + /// Response body data hook, invoked every time body data is available. Each + /// invocation will provide a Data instance. Chunked data + /// will be dechunked before the data is passed to this hook. By default, + /// compressed data will be decompressed, but decompression can be disabled + /// in configuration. At the end of the response body there will be a call + /// with the data pointer set to NULL. + pub hook_response_body_data: DataHook, + /// Receives raw response trailer data, which can be available on responses that have + /// chunked bodies. The data starts immediately after the zero-length chunk + /// and includes the terminating empty line. + pub hook_response_trailer_data: DataHook, + /// Response trailer hook, invoked after all trailer headers have been processed, + /// and only if the trailer exists. + pub hook_response_trailer: TxHook, + /// Response hook, invoked after a response has been seen. Because sometimes servers + /// respond before receiving complete requests, a response_complete callback may be + /// invoked prior to a request_complete callback. + pub hook_response_complete: TxHook, + /// Transaction complete hook, which is invoked once the entire transaction is + /// considered complete (request and response are both complete). This is always + /// the last hook to be invoked. + pub hook_transaction_complete: TxHook, + /// Log hook, invoked every time the library wants to log. + pub hook_log: LogHook, + /// Reaction to leading whitespace on the request line + pub requestline_leading_whitespace_unwanted: HtpUnwanted, + /// Whether to decompress compressed request bodies. + pub request_decompression_enabled: bool, + /// Configuration options for decompression. + pub compression_options: Options, + /// Flush incomplete transactions + pub flush_incomplete: bool, + /// Maximum number of transactions + pub max_tx: u32, + /// Maximum number of headers + pub number_headers_limit: u32, +} + +impl Default for Config { + fn default() -> Self { + Self { + field_limit: 18000, + log_level: HtpLogLevel::NOTICE, + tx_auto_destroy: false, + server_personality: HtpServerPersonality::MINIMAL, + parameter_processor: None, + decoder_cfg: Default::default(), + response_decompression_enabled: true, + parse_urlencoded: false, + parse_request_auth: true, + hook_request_start: TxHook::default(), + hook_request_line: TxHook::default(), + hook_request_uri_normalize: TxHook::default(), + hook_request_header_data: DataHook::default(), + hook_request_headers: TxHook::default(), + hook_request_body_data: DataHook::default(), + hook_request_trailer_data: DataHook::default(), + hook_request_trailer: TxHook::default(), + hook_request_complete: TxHook::default(), + hook_response_start: TxHook::default(), + hook_response_line: TxHook::default(), + hook_response_header_data: DataHook::default(), + hook_response_headers: TxHook::default(), + hook_response_body_data: DataHook::default(), + hook_response_trailer_data: DataHook::default(), + hook_response_trailer: TxHook::default(), + hook_response_complete: TxHook::default(), + hook_transaction_complete: TxHook::default(), + hook_log: LogHook::default(), + requestline_leading_whitespace_unwanted: HtpUnwanted::IGNORE, + request_decompression_enabled: false, + compression_options: Options::default(), + flush_incomplete: false, + max_tx: 512, + number_headers_limit: 1024, + } + } +} + +/// Configuration options for decoding. +#[derive(Copy, Clone)] +pub struct DecoderConfig { + ///Whether to double decode the path in normalized uri + pub double_decode_normalized_path: bool, + /// Whether to double decode the query in the normalized uri + pub double_decode_normalized_query: bool, + // Path-specific decoding options. + /// Convert backslash characters to slashes. + pub backslash_convert_slashes: bool, + /// Convert to lowercase. + pub convert_lowercase: bool, + /// Compress slash characters. + pub path_separators_compress: bool, + /// Should we URL-decode encoded path segment separators? + pub path_separators_decode: bool, + /// Should we decode '+' characters to spaces? + pub plusspace_decode: bool, + /// Reaction to encoded path separators. + pub path_separators_encoded_unwanted: HtpUnwanted, + // Special characters options. + /// Controls how raw NUL bytes are handled. + pub nul_raw_terminates: bool, + /// Determines server response to a raw NUL byte in the path. + pub nul_raw_unwanted: HtpUnwanted, + /// Reaction to control characters. + pub control_chars_unwanted: HtpUnwanted, + /// Allow whitespace characters in request uri path + pub allow_space_uri: bool, + // URL encoding options. + /// Should we decode %u-encoded characters? + pub u_encoding_decode: bool, + /// Reaction to %u encoding. + pub u_encoding_unwanted: HtpUnwanted, + /// Handling of invalid URL encodings. + pub url_encoding_invalid_handling: HtpUrlEncodingHandling, + /// Reaction to invalid URL encoding. + pub url_encoding_invalid_unwanted: HtpUnwanted, + /// Controls how encoded NUL bytes are handled. + pub nul_encoded_terminates: bool, + /// How are we expected to react to an encoded NUL byte? + pub nul_encoded_unwanted: HtpUnwanted, + // Normalized URI preference + /// Controls whether the client wants the complete or partial normalized URI. + pub normalized_uri_include_all: bool, + // UTF-8 options. + /// Controls how invalid UTF-8 characters are handled. + pub utf8_invalid_unwanted: HtpUnwanted, + /// Convert UTF-8 characters into bytes using best-fit mapping. + pub utf8_convert_bestfit: bool, + /// Best-fit map for UTF-8 decoding. + pub bestfit_map: UnicodeBestfitMap, +} + +impl Default for DecoderConfig { + fn default() -> Self { + Self { + double_decode_normalized_path: false, + double_decode_normalized_query: false, + backslash_convert_slashes: false, + convert_lowercase: false, + path_separators_compress: false, + path_separators_decode: false, + plusspace_decode: true, + path_separators_encoded_unwanted: HtpUnwanted::IGNORE, + nul_raw_terminates: false, + nul_raw_unwanted: HtpUnwanted::IGNORE, + control_chars_unwanted: HtpUnwanted::IGNORE, + allow_space_uri: false, + u_encoding_decode: false, + u_encoding_unwanted: HtpUnwanted::IGNORE, + url_encoding_invalid_handling: HtpUrlEncodingHandling::PRESERVE_PERCENT, + url_encoding_invalid_unwanted: HtpUnwanted::IGNORE, + nul_encoded_terminates: false, + nul_encoded_unwanted: HtpUnwanted::IGNORE, + normalized_uri_include_all: false, + utf8_invalid_unwanted: HtpUnwanted::IGNORE, + utf8_convert_bestfit: false, + bestfit_map: UnicodeBestfitMap::default(), + } + } +} + +/// Enumerates the possible server personalities. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpServerPersonality { + /// Minimal personality that performs as little work as possible. All optional + /// features are disabled. This personality is a good starting point for customization. + MINIMAL, + /// A generic personality that aims to work reasonably well for all server types. + GENERIC, + /// The IDS personality tries to perform as much decoding as possible. + IDS, + /// Mimics the behavior of IIS 4.0, as shipped with Windows NT 4.0. + IIS_4_0, + /// Mimics the behavior of IIS 5.0, as shipped with Windows 2000. + IIS_5_0, + /// Mimics the behavior of IIS 5.1, as shipped with Windows XP Professional. + IIS_5_1, + /// Mimics the behavior of IIS 6.0, as shipped with Windows 2003. + IIS_6_0, + /// Mimics the behavior of IIS 7.0, as shipped with Windows 2008. + IIS_7_0, + /// Mimics the behavior of IIS 7.5, as shipped with Windows 7. + IIS_7_5, + /// Mimics the behavior of Apache 2.x. + APACHE_2, +} + +/// Enumerates the ways in which servers respond to malformed data. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpUnwanted { + /// Ignores problem. + IGNORE, + /// Responds with HTTP 400 status code. + CODE_400 = 400, + /// Responds with HTTP 404 status code. + CODE_404 = 404, +} + +/// Enumerates the possible approaches to handling invalid URL-encodings. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpUrlEncodingHandling { + /// Ignore invalid URL encodings and leave the % in the data. + PRESERVE_PERCENT, + /// Ignore invalid URL encodings, but remove the % from the data. + REMOVE_PERCENT, + /// Decode invalid URL encodings. + PROCESS_INVALID, +} + +impl Config { + /// Registers a callback that is invoked every time there is a log message with + /// severity equal and higher than the configured log level. + pub fn register_log(&mut self, cbk_fn: LogNativeCallbackFn) { + self.hook_log.register(cbk_fn); + } + + /// Registers a request_complete callback, which is invoked when we see the + /// first bytes of data from a request. + pub fn register_request_complete(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_request_complete.register(cbk_fn); + } + + /// Registers a request_body_data callback, which is invoked whenever we see + /// bytes of request body data. + pub fn register_request_body_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_request_body_data.register(cbk_fn); + } + + /// Registers a request_header_data callback, which is invoked when we see header + /// data. This callback receives raw header data as seen on the connection, including + /// the terminating line and anything seen after the request line. + pub fn register_request_header_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_request_header_data.register(cbk_fn); + } + + /// Registers a request_headers callback, which is invoked after we see all the + /// request headers. + pub fn register_request_headers(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_request_headers.register(cbk_fn); + } + + /// Registers a request_line callback, which is invoked after we parse the entire + /// request line. + pub fn register_request_line(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_request_line.register(cbk_fn); + } + + /// Registers a request_start callback, which is invoked every time a new + /// request begins and before any parsing is done. + pub fn register_request_start(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_request_start.register(cbk_fn); + } + + /// Registers a request_trailer callback, which is invoked when all trailer headers + /// are seen, if present. + pub fn register_request_trailer(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_request_trailer.register(cbk_fn); + } + + /// Registers a request_trailer_data callback, which may be invoked on requests with + /// chunked bodies. This callback receives the raw response trailer data after the zero-length + /// chunk including the terminating line. + pub fn register_request_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_request_trailer_data.register(cbk_fn); + } + + /// Registers a response_body_data callback, which is invoked whenever we see + /// bytes of response body data. + pub fn register_response_body_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_response_body_data.register(cbk_fn); + } + + /// Registers a response_complete callback, which is invoked when we see the + /// first bytes of data from a response. + pub fn register_response_complete(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_response_complete.register(cbk_fn); + } + + /// Registers a response_header_data callback, which is invoked when we see header + /// data. This callback receives raw header data as seen on the connection, including + /// the terminating line and anything seen after the response line. + pub fn register_response_header_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_response_header_data.register(cbk_fn); + } + + /// Registers a response_headers callback, which is invoked after we see all the + /// response headers. + #[allow(dead_code)] + pub fn register_response_headers(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_response_headers.register(cbk_fn); + } + + /// Registers a response_line callback, which is invoked after we parse the entire + /// response line. + #[allow(dead_code)] + pub fn register_response_line(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_response_line.register(cbk_fn); + } + + /// Registers a response_start callback, which is invoked when we see the + /// first bytes of data from a response. + pub fn register_response_start(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_response_start.register(cbk_fn); + } + + /// Registers a response_trailer callback, which is invoked if when all + /// trailer headers are seen, if present. + pub fn register_response_trailer(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_response_trailer.register(cbk_fn); + } + + /// Registers a response_trailer_data callback, which may be invoked on responses with + /// chunked bodies. This callback receives the raw response trailer data after the zero-length + /// chunk and including the terminating line. + pub fn register_response_trailer_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_response_trailer_data.register(cbk_fn); + } + + /// Registers a transaction_complete callback, which is invoked once the request and response + /// are both complete. + pub fn register_transaction_complete(&mut self, cbk_fn: TxNativeCallbackFn) { + self.hook_transaction_complete.register(cbk_fn); + } + + /// Enable or disable the double decoding of the path in the normalized uri + pub fn set_double_decode_normalized_path(&mut self, double_decode_normalized_path: bool) { + self.decoder_cfg.double_decode_normalized_path = double_decode_normalized_path; + } + + /// Enable or disable the double decoding of the query in the normalized uri + pub fn set_double_decode_normalized_query(&mut self, double_decode_normalized_query: bool) { + self.decoder_cfg.double_decode_normalized_query = double_decode_normalized_query; + } + + /// Enable or disable the built-in Urlencoded parser. Disabled by default. + /// The parser will parse query strings and request bodies with the appropriate MIME type. + pub fn set_parse_urlencoded(&mut self, parse_urlencoded: bool) { + self.parse_urlencoded = parse_urlencoded; + } + + /// Configures the maximum size of the buffer LibHTP will use when all data is not available + /// in the current buffer (e.g., a very long header line that might span several packets). This + /// limit is controlled by the field_limit parameter. + pub fn set_field_limit(&mut self, field_limit: usize) { + self.field_limit = field_limit; + } + + /// Enable or disable spaces in URIs. Disabled by default. + pub fn set_allow_space_uri(&mut self, allow_space: bool) { + self.decoder_cfg.allow_space_uri = allow_space; + } + + /// Configure desired server personality. + /// Returns an Error if the personality is not supported. + pub fn set_server_personality(&mut self, personality: HtpServerPersonality) -> Result<()> { + match personality { + HtpServerPersonality::MINIMAL => {} + HtpServerPersonality::GENERIC => { + self.set_backslash_convert_slashes(true); + self.set_path_separators_decode(true); + self.set_path_separators_compress(true); + } + HtpServerPersonality::IDS => { + self.set_backslash_convert_slashes(true); + self.set_path_separators_decode(true); + self.set_path_separators_compress(true); + self.set_convert_lowercase(true); + self.set_utf8_convert_bestfit(true); + self.set_u_encoding_decode(true); + self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE); + } + HtpServerPersonality::APACHE_2 => { + self.set_backslash_convert_slashes(false); + self.set_path_separators_decode(false); + self.set_path_separators_compress(true); + self.set_u_encoding_decode(false); + self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + self.set_url_encoding_invalid_unwanted(HtpUnwanted::CODE_400); + self.set_control_chars_unwanted(HtpUnwanted::IGNORE); + self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::CODE_400); + } + HtpServerPersonality::IIS_5_1 => { + self.set_backslash_convert_slashes(true); + self.set_path_separators_decode(true); + self.set_path_separators_compress(true); + self.set_u_encoding_decode(false); + self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + self.set_control_chars_unwanted(HtpUnwanted::IGNORE); + self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE); + } + HtpServerPersonality::IIS_6_0 => { + self.set_backslash_convert_slashes(true); + self.set_path_separators_decode(true); + self.set_path_separators_compress(true); + self.set_u_encoding_decode(true); + self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + self.set_u_encoding_unwanted(HtpUnwanted::CODE_400); + self.set_control_chars_unwanted(HtpUnwanted::CODE_400); + self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE); + } + HtpServerPersonality::IIS_7_0 | HtpServerPersonality::IIS_7_5 => { + self.set_backslash_convert_slashes(true); + self.set_path_separators_decode(true); + self.set_path_separators_compress(true); + self.set_u_encoding_decode(true); + self.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + self.set_url_encoding_invalid_unwanted(HtpUnwanted::CODE_400); + self.set_control_chars_unwanted(HtpUnwanted::CODE_400); + self.set_requestline_leading_whitespace_unwanted(HtpUnwanted::IGNORE); + } + _ => return Err(HtpStatus::ERROR), + } + // Remember the personality + self.server_personality = personality; + Ok(()) + } + + /// Configures whether transactions will be automatically destroyed once they + /// are processed and all callbacks invoked. This option is appropriate for + /// programs that process transactions as they are processed. + pub fn set_tx_auto_destroy(&mut self, tx_auto_destroy: bool) { + self.tx_auto_destroy = tx_auto_destroy; + } + + /// Configures whether incomplete transactions will be flushed when a connection is closed. + /// + /// This will invoke the transaction complete callback for each incomplete transaction. The + /// transactions passed to the callback will not have their request and response state set + /// to complete - they will simply be passed with the state they have within the parser at + /// the time of the call. + /// + /// This option is intended to be used when a connection is closing and we want to process + /// any incomplete transactions that were in flight, or which never completed due to packet + /// loss or parsing errors. + /// + /// These transactions will also be removed from the parser when auto destroy is enabled. + pub fn set_flush_incomplete(&mut self, flush_incomplete: bool) { + self.flush_incomplete = flush_incomplete; + } + + /// Configures a best-fit map, which is used whenever characters longer than one byte + /// need to be converted to a single-byte. By default a Windows 1252 best-fit map is used. + pub fn set_bestfit_map(&mut self, map: UnicodeBestfitMap) { + self.decoder_cfg.bestfit_map = map; + } + + /// Sets the replacement character that will be used in the lossy best-fit + /// mapping from multi-byte to single-byte streams. The question mark character + /// is used as the default replacement byte. + pub fn set_bestfit_replacement_byte(&mut self, b: u8) { + self.decoder_cfg.bestfit_map.replacement_byte = b; + } + + /// Configures how the server handles to invalid URL encoding. + pub fn set_url_encoding_invalid_handling(&mut self, handling: HtpUrlEncodingHandling) { + self.decoder_cfg.url_encoding_invalid_handling = handling; + } + + /// Configures the handling of raw NUL bytes. If enabled, raw NUL terminates strings. + pub fn set_nul_raw_terminates(&mut self, enabled: bool) { + self.decoder_cfg.nul_raw_terminates = enabled; + } + + /// Configures how the server reacts to encoded NUL bytes. Some servers will stop at + /// at NUL, while some will respond with 400 or 404. When the termination option is not + /// used, the NUL byte will remain in the path. + pub fn set_nul_encoded_terminates(&mut self, enabled: bool) { + self.decoder_cfg.nul_encoded_terminates = enabled; + } + + /// Configures whether %u-encoded sequences are decoded. Such sequences + /// will be treated as invalid URL encoding if decoding is not desirable. + pub fn set_u_encoding_decode(&mut self, enabled: bool) { + self.decoder_cfg.u_encoding_decode = enabled; + } + + /// Configures whether backslash characters are treated as path segment separators. They + /// are not on Unix systems, but are on Windows systems. If this setting is enabled, a path + /// such as "/one\two/three" will be converted to "/one/two/three". + pub fn set_backslash_convert_slashes(&mut self, enabled: bool) { + self.decoder_cfg.backslash_convert_slashes = enabled; + } + + /// Configures whether encoded path segment separators will be decoded. Apache does not do + /// this by default, but IIS does. If enabled, a path such as "/one%2ftwo" will be normalized + /// to "/one/two". If the backslash_separators option is also enabled, encoded backslash + /// characters will be converted too (and subsequently normalized to forward slashes). + pub fn set_path_separators_decode(&mut self, enabled: bool) { + self.decoder_cfg.path_separators_decode = enabled; + } + + /// Configures whether consecutive path segment separators will be compressed. When enabled, a path + /// such as "/one//two" will be normalized to "/one/two". Backslash conversion and path segment separator + /// decoding are carried out before compression. For example, the path "/one\\/two\/%5cthree/%2f//four" + /// will be converted to "/one/two/three/four" (assuming all 3 options are enabled). + pub fn set_path_separators_compress(&mut self, enabled: bool) { + self.decoder_cfg.path_separators_compress = enabled; + } + + /// Configures whether plus characters are converted to spaces when decoding URL-encoded strings. This + /// is appropriate to do for parameters, but not for URLs. Only applies to contexts where decoding + /// is taking place. + pub fn set_plusspace_decode(&mut self, enabled: bool) { + self.decoder_cfg.plusspace_decode = enabled; + } + + /// Configures whether input data will be converted to lowercase. Useful for handling servers with + /// case-insensitive filesystems. + pub fn set_convert_lowercase(&mut self, enabled: bool) { + self.decoder_cfg.convert_lowercase = enabled; + } + + /// Controls whether the data should be treated as UTF-8 and converted to a single-byte + /// stream using best-fit mapping. + pub fn set_utf8_convert_bestfit(&mut self, enabled: bool) { + self.decoder_cfg.utf8_convert_bestfit = enabled; + } + + /// Configures reaction to %u-encoded sequences in input data. + pub fn set_u_encoding_unwanted(&mut self, unwanted: HtpUnwanted) { + self.decoder_cfg.u_encoding_unwanted = unwanted; + } + + /// Controls reaction to raw control characters in the data. + pub fn set_control_chars_unwanted(&mut self, unwanted: HtpUnwanted) { + self.decoder_cfg.control_chars_unwanted = unwanted; + } + + /// Controls whether to use complete or partial URI normalization + pub fn set_normalized_uri_include_all(&mut self, set: bool) { + self.decoder_cfg.normalized_uri_include_all = set; + } + + /// Configures how the server reacts to invalid URL encoding. + pub fn set_url_encoding_invalid_unwanted(&mut self, unwanted: HtpUnwanted) { + self.decoder_cfg.url_encoding_invalid_unwanted = unwanted; + } + + /// Configures how the server reacts to leading whitespace on the request line. + pub fn set_requestline_leading_whitespace_unwanted(&mut self, unwanted: HtpUnwanted) { + self.requestline_leading_whitespace_unwanted = unwanted; + } + + /// Configures whether request data is decompressed. + pub fn set_request_decompression(&mut self, set: bool) { + self.request_decompression_enabled = set; + } + + /// Configures many layers of compression we try to decompress. + pub fn set_decompression_layer_limit(&mut self, limit: Option) { + self.compression_options.set_layer_limit(limit); + } +} diff --git a/rust/htp/src/connection.rs b/rust/htp/src/connection.rs new file mode 100644 index 000000000000..0c81e10b8a34 --- /dev/null +++ b/rust/htp/src/connection.rs @@ -0,0 +1,135 @@ +use crate::log::{Log, Message}; +use std::{ + net::IpAddr, + sync::mpsc::{channel, Receiver, Sender}, + time::SystemTime, +}; +use time::OffsetDateTime; + +/// Export Connection ConnectionFlags +#[repr(C)] +pub struct ConnectionFlags; + +/// `Connection` Flags +impl ConnectionFlags { + /// Default, no flags raised. + pub const UNKNOWN: u8 = 0x00; + /// Seen pipelined requests. + pub const PIPELINED: u8 = 0x01; + /// Seen extra data after a HTTP 0.9 communication. + pub const HTTP_0_9_EXTRA: u8 = 0x02; +} + +/// Stores information about the session. +pub struct Connection { + /// Client IP address. + pub client_addr: Option, + /// Client port. + pub client_port: Option, + /// Server IP address. + pub server_addr: Option, + /// Server port. + pub server_port: Option, + + /// Messages channel associated with this connection. + log_channel: (Sender, Receiver), + + /// Parsing flags. + pub flags: u8, + /// When was this connection opened? + pub open_timestamp: OffsetDateTime, + /// When was this connection closed? + pub close_timestamp: OffsetDateTime, + /// Inbound data counter. + pub request_data_counter: u64, + /// Outbound data counter. + pub response_data_counter: u64, +} + +impl Default for Connection { + /// Returns a new Connection instance with default values. + fn default() -> Self { + Self { + client_addr: None, + client_port: None, + server_addr: None, + server_port: None, + log_channel: channel(), + flags: 0, + open_timestamp: OffsetDateTime::from(SystemTime::now()), + close_timestamp: OffsetDateTime::from(SystemTime::now()), + request_data_counter: 0, + response_data_counter: 0, + } + } +} + +impl Connection { + /// Opens a connection. This function will essentially only store the provided data + /// for future reference. + pub fn open( + &mut self, client_addr: Option, client_port: Option, + server_addr: Option, server_port: Option, timestamp: Option, + ) { + self.client_addr = client_addr; + self.client_port = client_port; + self.server_addr = server_addr; + self.server_port = server_port; + + // Remember when the connection was opened. + if let Some(timestamp) = timestamp { + self.open_timestamp = timestamp; + } + } + + /// Closes the connection. + pub fn close(&mut self, timestamp: Option) { + // Update timestamp. + if let Some(timestamp) = timestamp { + self.close_timestamp = timestamp; + } + } + + /// Keeps track of inbound packets and data. + pub fn track_inbound_data(&mut self, len: usize) { + self.request_data_counter = (self.request_data_counter).wrapping_add(len as u64); + } + + /// Keeps track of outbound packets and data. + pub fn track_outbound_data(&mut self, len: usize) { + self.response_data_counter = (self.response_data_counter).wrapping_add(len as u64); + } + + /// Return the log channel sender + pub fn get_sender(&self) -> &Sender { + &self.log_channel.0 + } + + /// Drains and returns a vector of all current logs received by the log channel + pub fn get_logs(&self) -> Vec { + let mut logs = Vec::with_capacity(8); + while let Ok(message) = self.log_channel.1.try_recv() { + logs.push(Log::new(self, message)) + } + logs + } + + /// Returns the next logged message received by the log channel + pub fn get_next_log(&self) -> Option { + self.log_channel + .1 + .try_recv() + .map(|message| Log::new(self, message)) + .ok() + } +} + +impl PartialEq for Connection { + /// Returns true if connections are the same, false otherwise. + fn eq(&self, rhs: &Self) -> bool { + self.client_addr == rhs.client_addr + && self.client_port == rhs.client_port + && self.server_addr == rhs.server_addr + && self.server_port == rhs.server_port + } +} diff --git a/rust/htp/src/connection_parser.rs b/rust/htp/src/connection_parser.rs new file mode 100644 index 000000000000..2f8908c34847 --- /dev/null +++ b/rust/htp/src/connection_parser.rs @@ -0,0 +1,981 @@ +use crate::{ + bstr::Bstr, + config::Config, + connection::{Connection, ConnectionFlags}, + decompressors::HtpContentEncoding, + error::Result, + hook::DataHook, + log::Logger, + transaction::{HtpRequestProgress, HtpResponseProgress, HtpTransferCoding, Transaction}, + transactions::Transactions, + util::{FlagOperations, HtpFlags}, + HtpStatus, +}; +use std::{any::Any, borrow::Cow, cell::Cell, net::IpAddr, rc::Rc, time::SystemTime}; +use time::OffsetDateTime; + +/// Enumerates parsing state. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum State { + /// Default state. + NONE, + /// State once a transaction is processed or about to be processed. + IDLE, + /// State for request/response line parsing. + LINE, + /// State for header parsing. + HEADERS, + /// State for finalizing chunked body data parsing. + BODY_CHUNKED_DATA_END, + /// State for chunked body data. + BODY_CHUNKED_DATA, + /// Parse the chunked length state. + BODY_CHUNKED_LENGTH, + /// State to determine encoding of body data. + BODY_DETERMINE, + /// State for finalizing transaction side. + FINALIZE, + // Used by request_state only + /// State for determining the request protocol. + PROTOCOL, + /// State to determine if there is a CONNECT request. + CONNECT_CHECK, + /// State to determine if inbound parsing needs to be suspended. + CONNECT_PROBE_DATA, + /// State to determine if inbound parsing can continue if it was suspended. + CONNECT_WAIT_RESPONSE, + /// State to process request body data. + BODY_IDENTITY, + /// State to consume remaining data in request buffer for the HTTP 0.9 case. + IGNORE_DATA_AFTER_HTTP_0_9, + // Used by response_state only + /// State to consume response remaining body data when content-length is unknown. + BODY_IDENTITY_STREAM_CLOSE, + /// State to consume response body data when content-length is known. + BODY_IDENTITY_CL_KNOWN, +} + +/// Enumerates all stream states. Each connection has two streams, one +/// inbound and one outbound. Their states are tracked separately. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpStreamState { + /// Default stream state. + NEW, + /// State when connection is open. + OPEN, + /// State when connection is closed. + CLOSED, + /// State when stream produces a fatal error. + ERROR, + /// State for a tunnelled stream. + TUNNEL, + /// State when parsing is suspended and not consumed in order. This is to + /// allow processing on another stream. + DATA_OTHER, + /// State when we should stop parsing the associated connection. + STOP, + /// State when all current data in the stream has been processed. + DATA, +} + +#[derive(Debug, Default, Clone)] +/// This structure is used to pass data (for example +/// request and response body buffers or gaps) to parsers. +pub struct ParserData<'a> { + /// Ref to the data buffer. + data: Option>, + // Length of data gap. Only set if is a gap. + gap_len: Option, + // Current position offset of the data to parse + position: Cell, + // Current callback data position + callback_position: usize, +} + +impl<'a> ParserData<'a> { + /// Returns a pointer to the raw data associated with Data. + /// This returns a pointer to the entire data chunk. + pub fn data_ptr(&self) -> *const u8 { + self.data() + .as_ref() + .map(|data| data.as_ptr()) + .unwrap_or(std::ptr::null()) + } + + /// Returns the unconsumed data + pub fn data(&self) -> Option<&[u8]> { + let data = self.data.as_ref()?; + if self.position.get() <= data.len() { + Some(&data[self.position.get()..]) + } else { + None + } + } + + /// Returns the length of the unconsumed data. + pub fn len(&self) -> usize { + if let Some(gap_len) = self.gap_len { + if self.position.get() >= gap_len { + 0 + } else { + gap_len - self.position.get() + } + } else { + self.as_slice().len() + } + } + + /// Returns how much data has been consumed so far + fn consumed_len(&self) -> usize { + self.position.get() + } + + /// Return an immutable slice view of the unconsumed data. + pub fn as_slice(&self) -> &[u8] { + if let Some(data) = self.data.as_ref() { + if self.position.get() <= data.len() { + return &data[self.position.get()..]; + } + } + b"" + } + + /// Determines if this chunk is a gap or not + pub fn is_gap(&self) -> bool { + self.gap_len.is_some() + } + + /// Determine whether there is no more data to consume. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Set the position offset into the data for parsing + fn set_position(&self, position: usize) { + self.position.set(position); + } + + /// Advances the internal position where we are parsing + pub fn consume(&self, consumed: usize) { + self.set_position(self.position.get() + consumed); + } + + /// Decrements the internal position where we are parsing + fn unconsume(&self, unconsume: usize) { + if unconsume < self.position.get() { + self.set_position(self.position.get() - unconsume); + } else { + self.set_position(0); + } + } + + /// Make an owned version of this data. + pub fn into_owned(self) -> ParserData<'static> { + ParserData { + data: self.data.map(|d| Cow::Owned(d.into_owned())), + gap_len: self.gap_len, + position: self.position, + callback_position: self.callback_position, + } + } + + /// Callback data is raw data buffer content that is passed to the + /// application via the header and trailer data hooks. + /// + /// This function will return any data that has been consumed but not + /// yet returned from this function. + pub fn callback_data(&mut self) -> &[u8] { + if let Some(data) = self.data.as_ref() { + if self.position.get() <= data.len() && self.callback_position <= self.position.get() { + let d = &data[self.callback_position..self.position.get()]; + self.callback_position = self.position.get(); + return d; + } + } + b"" + } + + /// Sets the callback start location to the current parsing location + pub fn reset_callback_start(&mut self) { + self.callback_position = self.position.get(); + } +} + +impl<'a> From> for ParserData<'a> { + fn from(data: Option<&'a [u8]>) -> Self { + ParserData { + data: data.map(Cow::Borrowed), + gap_len: None, + position: Cell::new(0), + callback_position: 0, + } + } +} + +impl<'a> From<&'a [u8]> for ParserData<'a> { + fn from(data: &'a [u8]) -> Self { + ParserData { + data: Some(Cow::Borrowed(data)), + gap_len: None, + position: Cell::new(0), + callback_position: 0, + } + } +} + +impl From> for ParserData<'static> { + fn from(data: Vec) -> Self { + ParserData { + data: Some(Cow::Owned(data)), + gap_len: None, + position: Cell::new(0), + callback_position: 0, + } + } +} + +impl<'a> From<&'a Vec> for ParserData<'a> { + fn from(data: &'a Vec) -> Self { + ParserData { + data: Some(Cow::Borrowed(data.as_slice())), + gap_len: None, + position: Cell::new(0), + callback_position: 0, + } + } +} + +impl<'a> From for ParserData<'a> { + fn from(gap_len: usize) -> Self { + ParserData { + data: None, + gap_len: Some(gap_len), + position: Cell::new(0), + callback_position: 0, + } + } +} + +impl<'a> From<(*const u8, usize)> for ParserData<'a> { + fn from((data, len): (*const u8, usize)) -> Self { + if data.is_null() { + if len > 0 { + ParserData::from(len) + } else { + ParserData::from(b"".as_ref()) + } + } else { + unsafe { ParserData::from(std::slice::from_raw_parts(data, len)) } + } + } +} + +/// Stores information about the parsing process and associated transactions. +pub struct ConnectionParser { + // General fields + /// The logger structure associated with this parser + pub logger: Logger, + /// A reference to the current parser configuration structure. + pub cfg: Rc, + /// The connection structure associated with this parser. + pub conn: Connection, + /// Opaque user data associated with this parser. + pub user_data: Option>, + // Request parser fields + /// Parser inbound status. Starts as OK, but may turn into ERROR. + pub request_status: HtpStreamState, + /// Parser outbound status. Starts as OK, but may turn into ERROR. + pub response_status: HtpStreamState, + /// When true, this field indicates that there is unprocessed inbound data, and + /// that the response parsing code should stop at the end of the current request + /// in order to allow more requests to be produced. + pub response_data_other_at_tx_end: bool, + /// The time when the last request data chunk was received. + pub request_timestamp: OffsetDateTime, + /// How many bytes from the last input chunk have we consumed + /// This is mostly used from callbacks, where the caller + /// wants to know how far into the last chunk the parser is. + pub request_bytes_consumed: usize, + /// How many data chunks does the inbound connection stream consist of? + pub request_chunk_count: usize, + /// The index of the first chunk used in the current request. + pub request_chunk_request_index: usize, + /// Used to buffer a line of inbound data when buffering cannot be avoided. + pub request_buf: Bstr, + /// Stores the current value of a folded request header. Such headers span + /// multiple lines, and are processed only when all data is available. + pub request_header: Option, + /// The request body length declared in a valid request header. The key here + /// is "valid". This field will not be populated if the request contains both + /// a Transfer-Encoding header and a Content-Length header. + pub request_content_length: Option, + /// Holds the remaining request body length that we expect to read. This + /// field will be available only when the length of a request body is known + /// in advance, i.e. when request headers contain a Content-Length header. + pub request_body_data_left: Option, + /// Holds the amount of data that needs to be read from the + /// current data chunk. Only used with chunked request bodies. + pub request_chunked_length: Option, + /// Current request parser state. + pub request_state: State, + /// Previous request parser state. Used to detect state changes. + pub request_state_previous: State, + /// The hook that should be receiving raw connection data. + pub request_data_receiver_hook: Option, + + // Response parser fields + /// The time when the last response data chunk was received. + pub response_timestamp: OffsetDateTime, + /// How many bytes from the last input chunk have we consumed + /// This is mostly used from callbacks, where the caller + /// wants to know how far into the last chunk the parser is. + pub response_bytes_consumed: usize, + /// Used to buffer a line of outbound data when buffering cannot be avoided. + pub response_buf: Bstr, + /// Stores the current value of a folded response header. Such headers span + /// multiple lines, and are processed only when all data is available. + pub response_header: Option, + /// The length of the current response body as presented in the + /// Content-Length response header. + pub response_content_length: Option, + /// The remaining length of the current response body, if known. Set to None otherwise. + pub response_body_data_left: Option, + /// Holds the amount of data that needs to be read from the + /// current response data chunk. Only used with chunked response bodies. + pub response_chunked_length: Option, + /// Current response parser state. + pub response_state: State, + /// Previous response parser state. + pub response_state_previous: State, + /// The hook that should be receiving raw connection data. + pub response_data_receiver_hook: Option, + + /// Transactions processed by this parser + transactions: Transactions, +} + +impl std::fmt::Debug for ConnectionParser { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("ConnectionParser") + .field("request_status", &self.request_status) + .field("response_status", &self.response_status) + .field("request_index", &self.request_index()) + .field("response_index", &self.response_index()) + .finish() + } +} + +impl ConnectionParser { + /// Creates a new ConnectionParser with a preconfigured `Config` struct. + pub fn new(cfg: Config) -> Self { + let cfg = Rc::new(cfg); + let conn = Connection::default(); + let logger = Logger::new(conn.get_sender(), cfg.log_level); + Self { + logger: logger.clone(), + cfg: Rc::clone(&cfg), + conn, + user_data: None, + request_status: HtpStreamState::NEW, + response_status: HtpStreamState::NEW, + response_data_other_at_tx_end: false, + request_timestamp: OffsetDateTime::from(SystemTime::now()), + request_bytes_consumed: 0, + request_chunk_count: 0, + request_chunk_request_index: 0, + request_buf: Bstr::new(), + request_header: None, + request_content_length: None, + request_body_data_left: None, + request_chunked_length: None, + request_state: State::IDLE, + request_state_previous: State::NONE, + request_data_receiver_hook: None, + response_timestamp: OffsetDateTime::from(SystemTime::now()), + response_bytes_consumed: 0, + response_buf: Bstr::new(), + response_header: None, + response_content_length: None, + response_body_data_left: None, + response_chunked_length: None, + response_state: State::IDLE, + response_state_previous: State::NONE, + response_data_receiver_hook: None, + transactions: Transactions::new(&cfg, &logger), + } + } + + /// Get the current request transaction + pub fn request(&mut self) -> Option<&Transaction> { + self.transactions.request() + } + + /// Get the current request transaction + pub fn request_mut(&mut self) -> Option<&mut Transaction> { + self.transactions.request_mut() + } + + /// Get the current response transaction + pub fn response(&mut self) -> Option<&Transaction> { + self.transactions.response() + } + + /// Get the current response transaction + pub fn response_mut(&mut self) -> Option<&mut Transaction> { + self.transactions.response_mut() + } + + /// Advance to the next request + /// Returns the next request transaction id + pub fn request_next(&mut self) -> usize { + // Detect pipelining. + if self.transactions.request_index() > self.transactions.response_index() { + self.conn.flags.set(ConnectionFlags::PIPELINED) + } + self.transactions.request_next() + } + + /// Advance to the next response + /// Returns the next response transaction id + pub fn response_next(&mut self) -> usize { + self.transactions.response_next() + } + + /// Get the index of the request transaction + pub fn request_index(&self) -> usize { + self.transactions.request_index() + } + + /// Get the index of the response transaction + pub fn response_index(&self) -> usize { + self.transactions.response_index() + } + + /// Get the number of transactions processed up to now + pub fn tx_size(&self) -> usize { + self.transactions.size() + } + + /// Get a specific transaction + pub fn tx(&self, index: usize) -> Option<&Transaction> { + self.transactions.get(index) + } + + /// Get a specific transaction + pub fn tx_mut(&mut self, index: usize) -> Option<&mut Transaction> { + self.transactions.get_mut(index) + } + + /// Handle the current state to be processed. + pub fn handle_request_state(&mut self, data: &mut ParserData) -> Result<()> { + match self.request_state { + State::NONE => Err(HtpStatus::ERROR), + State::IDLE => self.request_idle(data), + State::IGNORE_DATA_AFTER_HTTP_0_9 => self.request_ignore_data_after_http_0_9(data), + State::LINE => self.request_line(data), + State::PROTOCOL => self.request_protocol(data), + State::HEADERS => self.request_headers(data), + State::CONNECT_WAIT_RESPONSE => self.request_connect_wait_response(), + State::CONNECT_CHECK => self.request_connect_check(), + State::CONNECT_PROBE_DATA => self.request_connect_probe_data(data), + State::BODY_DETERMINE => self.request_body_determine(), + State::BODY_CHUNKED_DATA => self.request_body_chunked_data(data), + State::BODY_CHUNKED_LENGTH => self.request_body_chunked_length(data), + State::BODY_CHUNKED_DATA_END => self.request_body_chunked_data_end(data), + State::BODY_IDENTITY => self.request_body_identity(data), + State::FINALIZE => self.request_finalize(data), + // These are only used by response_state + _ => Err(HtpStatus::ERROR), + } + } + + /// Handle the current state to be processed. + pub fn handle_response_state(&mut self, data: &mut ParserData) -> Result<()> { + match self.response_state { + State::NONE => Err(HtpStatus::ERROR), + State::IDLE => self.response_idle(data), + State::LINE => self.response_line(data), + State::HEADERS => self.response_headers(data), + State::BODY_DETERMINE => self.response_body_determine(data), + State::BODY_CHUNKED_DATA => self.response_body_chunked_data(data), + State::BODY_CHUNKED_LENGTH => self.response_body_chunked_length(data), + State::BODY_CHUNKED_DATA_END => self.response_body_chunked_data_end(data), + State::FINALIZE => self.response_finalize(data), + State::BODY_IDENTITY_STREAM_CLOSE => self.response_body_identity_stream_close(data), + State::BODY_IDENTITY_CL_KNOWN => self.response_body_identity_cl_known(data), + // These are only used by request_state + _ => Err(HtpStatus::ERROR), + } + } + + /// Closes the connection associated with the supplied parser. + pub fn request_close(&mut self, timestamp: Option) { + // Update internal flags + if self.request_status != HtpStreamState::ERROR { + self.request_status = HtpStreamState::CLOSED + } + // Call the parsers one last time, which will allow them + // to process the events that depend on stream closure + self.request_data(ParserData::default(), timestamp); + } + + /// Closes the connection associated with the supplied parser. + pub fn close(&mut self, timestamp: Option) { + // Close the underlying connection. + self.conn.close(timestamp); + // Update internal flags + if self.request_status != HtpStreamState::ERROR { + self.request_status = HtpStreamState::CLOSED + } + if self.response_status != HtpStreamState::ERROR { + self.response_status = HtpStreamState::CLOSED + } + // Call the parsers one last time, which will allow them + // to process the events that depend on stream closure + self.request_data(ParserData::default(), timestamp); + self.response_data(ParserData::default(), timestamp); + + if self.cfg.flush_incomplete { + self.flush_incomplete_transactions() + } + } + + /// This function is most likely not used and/or not needed. + pub fn request_reset(&mut self) { + self.request_content_length = None; + self.request_body_data_left = None; + self.request_chunk_request_index = self.request_chunk_count; + } + + /// Returns the number of bytes consumed from the current data chunks so far. + pub fn request_data_consumed(&self) -> usize { + self.request_bytes_consumed + } + + /// Consume the given number of bytes from the ParserData and update + /// the internal counter for how many bytes consumed so far. + pub fn request_data_consume(&mut self, input: &ParserData, consumed: usize) { + input.consume(consumed); + self.request_bytes_consumed = input.consumed_len(); + } + + /// Unconsume the given number of bytes from the ParserData and update the + /// the internal counter for how many bytes are consumed. + /// If the requested number of bytes is larger than the number of bytes + /// already consumed then the parser will be unwound to the beginning. + pub fn request_data_unconsume(&mut self, input: &mut ParserData, unconsume: usize) { + input.unconsume(unconsume); + self.request_bytes_consumed = input.consumed_len(); + } + + /// Consume the given number of bytes from the ParserData and update + /// the internal counter for how many bytes consumed so far. + pub fn response_data_consume(&mut self, input: &ParserData, consumed: usize) { + input.consume(consumed); + self.response_bytes_consumed = input.consumed_len(); + } + + /// Unconsume the given number of bytes from the ParserData and update the + /// the internal counter for how many bytes are consumed. + /// If the requested number of bytes is larger than the number of bytes + /// already consumed then the parser will be unwound to the beginning. + pub fn response_data_unconsume(&mut self, input: &mut ParserData, unconsume: usize) { + input.unconsume(unconsume); + self.response_bytes_consumed = input.consumed_len(); + } + + /// Returns the number of bytes consumed from the most recent outbound data chunk. Normally, an invocation + /// of response_data() will consume all data from the supplied buffer, but there are circumstances + /// where only partial consumption is possible. In such cases DATA_OTHER will be returned. + /// Consumed bytes are no longer necessary, but the remainder of the buffer will be saved + /// for later. + pub fn response_data_consumed(&self) -> usize { + self.response_bytes_consumed + } + + /// Opens connection. + pub fn open( + &mut self, client_addr: Option, client_port: Option, + server_addr: Option, server_port: Option, timestamp: Option, + ) { + // Check connection parser state first. + if self.request_status != HtpStreamState::NEW || self.response_status != HtpStreamState::NEW + { + htp_error!( + self.logger, + HtpLogCode::CONNECTION_ALREADY_OPEN, + "Connection is already open" + ); + return; + } + self.conn.open( + client_addr, + client_port, + server_addr, + server_port, + timestamp, + ); + self.request_status = HtpStreamState::OPEN; + self.response_status = HtpStreamState::OPEN; + } + + /// Set the user data. + pub fn set_user_data(&mut self, data: Box) { + self.user_data = Some(data); + } + + /// Get a reference to the user data. + pub fn user_data(&self) -> Option<&T> { + self.user_data + .as_ref() + .and_then(|ud| ud.downcast_ref::()) + } + + /// Get a mutable reference to the user data. + pub fn user_data_mut(&mut self) -> Option<&mut T> { + self.user_data + .as_mut() + .and_then(|ud| ud.downcast_mut::()) + } + + /// Initialize request parsing, change state to LINE, + /// and invoke all registered callbacks. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the + /// callbacks does not want to follow the transaction any more. + pub fn state_request_start(&mut self) -> Result<()> { + // Change state into request line parsing. + self.request_state = State::LINE; + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + req.unwrap().request_progress = HtpRequestProgress::LINE; + // Run hook REQUEST_START. + self.cfg + .hook_request_start + .clone() + .run_all(self, self.request_index())?; + Ok(()) + } + + /// Change transaction state to HEADERS and invoke all + /// registered callbacks. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the + /// callbacks does not want to follow the transaction any more. + pub fn state_request_headers(&mut self, input: &mut ParserData) -> Result<()> { + // Finalize sending raw header data + self.request_receiver_finalize_clear(input)?; + // If we're in HTP_REQ_HEADERS that means that this is the + // first time we're processing headers in a request. Otherwise, + // we're dealing with trailing headers. + let req = self.request(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let request_progress = req.unwrap().request_progress; + if request_progress > HtpRequestProgress::HEADERS { + // Request trailers. + // Run hook HTP_REQUEST_TRAILER. + self.cfg + .hook_request_trailer + .clone() + .run_all(self, self.request_index())?; + // Completed parsing this request; finalize it now. + self.request_state = State::FINALIZE; + } else if request_progress >= HtpRequestProgress::LINE { + // Request headers. + // Did this request arrive in multiple data chunks? + let req = self.transactions.request_mut().unwrap(); + if self.request_chunk_count != self.request_chunk_request_index { + req.flags.set(HtpFlags::MULTI_PACKET_HEAD) + } + req.process_request_headers()?; + // Run hook REQUEST_HEADERS. + self.cfg + .hook_request_headers + .clone() + .run_all(self, self.request_index())?; + self.request_initialize_decompressors()?; + + // We still proceed if the request is invalid. + self.request_state = State::CONNECT_CHECK; + } else { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_BODY_INTERNAL_ERROR, + format!( + "[Internal Error] Invalid tx progress: {:?}", + request_progress + ) + ); + return Err(HtpStatus::ERROR); + } + Ok(()) + } + + /// Change transaction state to PROTOCOL and invoke all + /// registered callbacks. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP if one of the + /// callbacks does not want to follow the transaction any more. + pub fn state_request_line(&mut self) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + req.unwrap().parse_request_line()?; + // Run hook REQUEST_URI_NORMALIZE. + self.cfg + .hook_request_uri_normalize + .clone() + .run_all(self, self.request_index())?; + // Run hook REQUEST_LINE. + self.cfg + .hook_request_line + .clone() + .run_all(self, self.request_index())?; + let logger = self.logger.clone(); + let req = self.request_mut().unwrap(); + if let Some(parsed_uri) = req.parsed_uri.as_mut() { + let (partial_normalized_uri, complete_normalized_uri) = + parsed_uri.generate_normalized_uri(Some(logger)); + req.partial_normalized_uri = partial_normalized_uri; + req.complete_normalized_uri = complete_normalized_uri; + } + // Move on to the next phase. + self.request_state = State::PROTOCOL; + Ok(()) + } + + /// Advance state after processing request headers. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP + /// if one of the callbacks does not want to follow the transaction any more. + pub fn state_request_complete(&mut self, input: &mut ParserData) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + if req.request_progress != HtpRequestProgress::COMPLETE { + // Finalize request body. + if req.request_has_body() { + self.request_body_data(None)?; + } + self.request_mut().unwrap().request_progress = HtpRequestProgress::COMPLETE; + // Run hook REQUEST_COMPLETE. + self.cfg + .hook_request_complete + .clone() + .run_all(self, self.request_index())?; + + // Clear request data + self.request_receiver_finalize_clear(input)?; + } + // Determine what happens next, and remove this transaction from the parser. + self.request_state = if self.request().unwrap().is_protocol_0_9 { + State::IGNORE_DATA_AFTER_HTTP_0_9 + } else { + State::IDLE + }; + // Check if the entire transaction is complete. + self.finalize(self.request_index())?; + self.request_next(); + Ok(()) + } + + /// Determine if the transaction is complete and run any hooks. + fn finalize(&mut self, tx_index: usize) -> Result<()> { + if let Some(tx) = self.tx(tx_index) { + if !tx.is_complete() { + return Ok(()); + } + // Disconnect transaction from the parser. + // Run hook TRANSACTION_COMPLETE. + self.cfg + .hook_transaction_complete + .clone() + .run_all(self, tx_index)?; + } + Ok(()) + } + + /// Advance state to LINE, or BODY if http version is 0.9. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP + /// if one of the callbacks does not want to follow the transaction any more. + pub fn state_response_start(&mut self) -> Result<()> { + // Change state into response line parsing, except if we're following + // a HTTP/0.9 request (no status line or response headers). + let tx = self.response_mut(); + if tx.is_none() { + return Err(HtpStatus::ERROR); + } + let tx = tx.unwrap(); + + if tx.is_protocol_0_9 { + tx.response_transfer_coding = HtpTransferCoding::IDENTITY; + tx.response_content_encoding_processing = HtpContentEncoding::NONE; + tx.response_progress = HtpResponseProgress::BODY; + self.response_state = State::BODY_IDENTITY_STREAM_CLOSE; + self.response_body_data_left = None + } else { + tx.response_progress = HtpResponseProgress::LINE; + self.response_state = State::LINE + } + // Run hook RESPONSE_START. + self.cfg + .hook_response_start + .clone() + .run_all(self, self.response_index())?; + // If at this point we have no method and no uri and our status + // is still REQ_LINE, we likely have timed out request + // or a overly long request + let tx = self.response_mut().unwrap(); + if tx.request_method.is_none() + && tx.request_uri.is_none() + && self.request_state == State::LINE + { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_INCOMPLETE, + "Request line incomplete" + ); + } + Ok(()) + } + + /// Advance state after processing response headers. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP + /// if one of the callbacks does not want to follow the transaction any more. + pub fn state_response_headers(&mut self, input: &mut ParserData) -> Result<()> { + // Finalize sending raw header data. + self.response_receiver_finalize_clear(input)?; + // Run hook RESPONSE_HEADERS. + self.cfg + .hook_response_headers + .clone() + .run_all(self, self.response_index())?; + self.response_initialize_decompressors() + } + + /// Change transaction state to RESPONSE_LINE and invoke registered callbacks. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP + /// if one of the callbacks does not want to follow the transaction any more. + pub fn state_response_line(&mut self) -> Result<()> { + // Is the response line valid? + let tx = self.response_mut(); + if tx.is_none() { + return Err(HtpStatus::ERROR); + } + let tx = tx.unwrap(); + + tx.validate_response_line(); + let index = tx.index; + // Run hook HTP_RESPONSE_LINE + self.cfg.hook_response_line.clone().run_all(self, index) + } + + /// Change transaction state to COMPLETE and invoke registered callbacks. + /// + /// Returns HtpStatus::OK on success; HtpStatus::ERROR on error, HtpStatus::STOP + /// if one of the callbacks does not want to follow the transaction any more. + pub fn state_response_complete(&mut self, input: &mut ParserData) -> Result<()> { + let response_index = self.response_index(); + let tx = self.response_mut(); + if tx.is_none() { + return Err(HtpStatus::ERROR); + } + let tx = tx.unwrap(); + if tx.response_progress != HtpResponseProgress::COMPLETE { + tx.response_progress = HtpResponseProgress::COMPLETE; + // Run the last RESPONSE_BODY_DATA HOOK, but only if there was a response body present. + if tx.response_transfer_coding != HtpTransferCoding::NO_BODY { + let _ = self.response_body_data(None); + } + // Run hook RESPONSE_COMPLETE. + self.cfg + .hook_response_complete + .clone() + .run_all(self, response_index)?; + + // Clear the data receivers hook if any + self.response_receiver_finalize_clear(input)?; + } + // Check if we want to signal the caller to send request data + self.request_parser_check_waiting()?; + // Otherwise finalize the transaction + self.finalize(response_index)?; + self.response_next(); + self.response_state = State::IDLE; + Ok(()) + } + + /// Check if we had previously signalled the caller to give us response + /// data, and now we are ready to receive it + fn request_parser_check_waiting(&mut self) -> Result<()> { + // Check if the inbound parser is waiting on us. If it is, that means that + // there might be request data that the inbound parser hasn't consumed yet. + // If we don't stop parsing we might encounter a response without a request, + // which is why we want to return straight away before processing any data. + // + // This situation will occur any time the parser needs to see the server + // respond to a particular situation before it can decide how to proceed. For + // example, when a CONNECT is sent, different paths are used when it is accepted + // and when it is not accepted. + // + // It is not enough to check only in_status here. Because of pipelining, it's possible + // that many inbound transactions have been processed, and that the parser is + // waiting on a response that we have not seen yet. + if self.response_status == HtpStreamState::DATA_OTHER + && self.response_index() == self.request_index() + { + return Err(HtpStatus::DATA_OTHER); + } + + // Do we have a signal to yield to inbound processing at + // the end of the next transaction? + if self.response_data_other_at_tx_end { + // We do. Let's yield then. + self.response_data_other_at_tx_end = false; + return Err(HtpStatus::DATA_OTHER); + } + Ok(()) + } + + /// Remove the given transaction from the parser + pub fn remove_tx(&mut self, tx_id: usize) { + self.transactions.remove(tx_id); + } + + /// For each transaction that is started but not completed, invoke the + /// transaction complete callback and remove it from the transactions list. + /// + /// This function is meant to be used before dropping the ConnectionParser + /// so any incomplete transactions can be processed by the caller. + /// + /// Safety: must only be called after the current transaction is closed + fn flush_incomplete_transactions(&mut self) { + let mut to_remove = Vec::::new(); + for tx in &mut self.transactions { + if tx.is_started() && !tx.is_complete() { + to_remove.push(tx.index); + } + } + for index in to_remove { + self.cfg + .hook_transaction_complete + .clone() + .run_all(self, index) + .ok(); + if self.cfg.tx_auto_destroy { + self.transactions.remove(index); + } + } + } +} diff --git a/rust/htp/src/decompressors.rs b/rust/htp/src/decompressors.rs new file mode 100644 index 000000000000..f2f132176954 --- /dev/null +++ b/rust/htp/src/decompressors.rs @@ -0,0 +1,1064 @@ +use std::{ + io::{Cursor, Write}, + time::Instant, +}; + +/// Buffer compression output to this chunk size. +const ENCODING_CHUNK_SIZE: usize = 8192; + +/// Default LZMA dictionary memory limit in bytes. +const DEFAULT_LZMA_MEMLIMIT: usize = 1_048_576; +/// Default number of LZMA layers to pass to the decompressor. +const DEFAULT_LZMA_LAYERS: u32 = 1; +/// Default max output size for a compression bomb in bytes (1 MB default). +const DEFAULT_BOMB_LIMIT: u64 = 1_048_576; +/// Default compressed-to-decrompressed ratio that should not be exceeded during decompression. +const DEFAULT_BOMB_RATIO: u64 = 2048; +/// Default time limit for a decompression bomb in microseconds. +const DEFAULT_TIME_LIMIT: u32 = 100_000; +/// Default number of iterations before checking the time limit. +const DEFAULT_TIME_FREQ_TEST: u32 = 256; +/// Default number of layers that will be decompressed +const DEFAULT_LAYER_LIMIT: u32 = 2; + +#[derive(Copy, Clone)] +/// Decompression options +pub struct Options { + /// lzma options or None to disable lzma. + lzma: Option, + /// Max number of LZMA layers to pass to the decompressor. + lzma_layers: Option, + /// max output size for a compression bomb. + bomb_limit: u64, + /// max compressed-to-decrompressed ratio that should not be exceeded during decompression. + bomb_ratio: u64, + /// max time for a decompression bomb in microseconds. + time_limit: u32, + /// number of iterations to before checking the time_limit. + time_test_freq: u32, + /// Max number of layers of compression we will decompress + layer_limit: Option, +} + +impl Options { + /// Get the lzma memlimit. + /// + /// A value of 0 indicates that lzma is disabled. + pub fn get_lzma_memlimit(&self) -> usize { + if let Some(options) = self.lzma { + options.memlimit.unwrap_or(0) + } else { + 0 + } + } + + /// Set the lzma memlimit. + /// + /// A value of 0 will disable lzma. + pub fn set_lzma_memlimit(&mut self, memlimit: usize) { + self.lzma = if memlimit == 0 { + None + } else { + Some(lzma_rs::decompress::Options { + memlimit: Some(memlimit), + ..Default::default() + }) + } + } + + /// Configures the maximum layers passed to lzma-rs. + pub fn set_lzma_layers(&mut self, layers: Option) { + self.lzma_layers = layers; + } + + /// Gets the maximum layers passed to lzma-rs. + pub fn get_lzma_layers(&self) -> Option { + self.lzma_layers + } + + /// Get the compression bomb limit. + pub fn get_bomb_limit(&self) -> u64 { + self.bomb_limit + } + + /// Set the compression bomb limit. + pub fn set_bomb_limit(&mut self, bomblimit: u64) { + self.bomb_limit = bomblimit; + } + + /// Get the bomb ratio. + pub fn get_bomb_ratio(&self) -> u64 { + self.bomb_ratio + } + + /// Set the bomb ratio. + pub fn set_bomb_ratio(&mut self, bomb_ratio: u64) { + self.bomb_ratio = bomb_ratio; + } + + /// Get the compression time limit in microseconds. + pub fn get_time_limit(&self) -> u32 { + self.time_limit + } + + /// Set the compression time limit in microseconds. + pub fn set_time_limit(&mut self, time_limit: u32) { + self.time_limit = time_limit + } + + /// Get the time test frequency. + pub fn get_time_test_freq(&self) -> u32 { + self.time_test_freq + } + + /// Set the time test frequency. + pub fn set_time_test_freq(&mut self, time_test_freq: u32) { + self.time_test_freq = time_test_freq; + } + + /// Get the decompression layer limit. + pub fn get_layer_limit(&self) -> Option { + self.layer_limit + } + + /// Set the decompression layer limit. + pub fn set_layer_limit(&mut self, layer_limit: Option) { + self.layer_limit = layer_limit; + } +} + +impl Default for Options { + fn default() -> Self { + Self { + lzma: Some(lzma_rs::decompress::Options { + memlimit: Some(DEFAULT_LZMA_MEMLIMIT), + ..Default::default() + }), + lzma_layers: Some(DEFAULT_LZMA_LAYERS), + bomb_limit: DEFAULT_BOMB_LIMIT, + bomb_ratio: DEFAULT_BOMB_RATIO, + time_limit: DEFAULT_TIME_LIMIT, + time_test_freq: DEFAULT_TIME_FREQ_TEST, + layer_limit: Some(DEFAULT_LAYER_LIMIT), + } + } +} + +/// Describes a decompressor that is able to restart and passthrough data. +/// Actual decompression is done using the `Write` trait. +pub trait Decompress: Write { + /// Restarts the decompressor to try the same one again or a different one. + fn restart(&mut self) -> std::io::Result<()>; + + /// Tells all decompressors to passthrough their data instead of + /// decompressing to directly call the callback + fn set_passthrough(&mut self, passthrough: bool); + + /// Indicates that we have reached the end of data. This would be equivalent + /// to sending a NULL pointer in C and may be used by the hooks. + fn finish(&mut self) -> std::io::Result<()>; +} + +/// Type alias for callback function. +pub type CallbackFn = Box) -> Result>; + +/// Simple wrapper around a closure to chain it to the other decompressors +pub struct CallbackWriter(CallbackFn); + +impl CallbackWriter { + /// Create a new CallbackWriter. + pub fn new(cbk: CallbackFn) -> Self { + CallbackWriter(cbk) + } +} + +impl Write for CallbackWriter { + fn write(&mut self, data: &[u8]) -> std::result::Result { + (self.0)(Some(data)) + } + + fn flush(&mut self) -> std::result::Result<(), std::io::Error> { + Ok(()) + } +} + +impl Decompress for CallbackWriter { + fn restart(&mut self) -> std::io::Result<()> { + Ok(()) + } + + fn set_passthrough(&mut self, _passthrough: bool) {} + + fn finish(&mut self) -> std::io::Result<()> { + (self.0)(None)?; + Ok(()) + } +} + +/// Type of compression. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpContentEncoding { + /// No compression. + NONE, + /// Gzip compression. + GZIP, + /// Deflate compression (RFC 1951). + DEFLATE, + /// Deflate compression with zlib header (RFC 1950) + ZLIB, + /// LZMA compression. + LZMA, + /// Error retrieving the content encoding. + ERROR, +} + +/// The outer decompressor tracks the number of callbacks and time spent +/// decompressing. +pub struct Decompressor { + /// First decompressor to call + inner: Box, + /// Time we started decompression + time_before: Option, + /// Time spent decompressing so far in microseconds (usec) + time_spent: u64, + /// Number of times the callback was called + nb_callbacks: u32, +} + +impl Decompressor { + /// Creates a new decompressor from a struct implementing the Decompress trait. + fn new(inner: Box) -> Self { + Self { + inner, + time_before: None, + time_spent: 0, + nb_callbacks: 0, + } + } + + /// Creates a new decompressor from a callback to call when decompressed + /// data is ready. + fn callback(callback: CallbackFn) -> Self { + Self::new(Box::new(CallbackWriter::new(callback))) + } + + /// Prepends a decompressor to this chain by consuming `self.inner` + /// and creating a new Decompressor. + /// + /// Note that decompressors should be added in the same order the data was + /// compressed, starting with the callback. + /// + /// ``` + /// use htp::decompressors::{HtpContentEncoding, Decompressor}; + /// + /// // Example for "Content-Encoding: gzip, deflate" + /// let mut decompressor = Decompressor::new_with_callback(HtpContentEncoding::GZIP, + /// Box::new(|data: Option<&[u8]>| -> Result { + /// if let Some(data) = data { + /// println!("CALLBACK: {}", data.len()); + /// Ok(data.len()) + /// } else { + /// println!("CALLBACK: end of data"); + /// Ok(0) + /// } + /// }), Default::default()).unwrap(); + /// + /// decompressor = decompressor.prepend(HtpContentEncoding::DEFLATE, Default::default()).unwrap(); + /// + /// // Decompressors will be called in this order: + /// // 1. deflate + /// // 2. gzip + /// // 3. callback + /// decompressor.decompress(&[]).unwrap(); + /// ``` + pub fn prepend(self, encoding: HtpContentEncoding, options: Options) -> std::io::Result { + match encoding { + HtpContentEncoding::NONE => Ok(Decompressor::new(self.inner)), + HtpContentEncoding::GZIP + | HtpContentEncoding::DEFLATE + | HtpContentEncoding::ZLIB + | HtpContentEncoding::LZMA => Ok(Decompressor::new(Box::new(InnerDecompressor::new( + encoding, self.inner, options, + )?))), + HtpContentEncoding::ERROR => Err(std::io::Error::new( + std::io::ErrorKind::Other, + "expected a valid encoding", + )), + } + } + + /// Creates a new decompressor with `encoding` and adds a callback to be called + /// when data is ready. + pub fn new_with_callback( + encoding: HtpContentEncoding, callback: CallbackFn, options: Options, + ) -> std::io::Result { + Self::callback(callback).prepend(encoding, options) + } + + /// Starts the decompression timer. + fn timer_start(&mut self) { + self.time_before.replace(Instant::now()); + } + + /// Stops the decompression timer, updates and returns the time spent + /// decompressing in microseconds (usec). + pub fn timer_reset(&mut self) -> Option { + let now = Instant::now(); + if let Some(time_before) = self.time_before.replace(now) { + // it is unlikely that more than 2^64 will be spent on a single stream + self.time_spent = self + .time_spent + .wrapping_add(now.duration_since(time_before).as_micros() as u64); + Some(self.time_spent) + } else { + None + } + } + + /// Increments the number of times the callback was called. + pub fn callback_inc(&mut self) -> u32 { + self.nb_callbacks = self.nb_callbacks.wrapping_add(1); + self.nb_callbacks + } + + /// Returns the time spent decompressing in microseconds (usec). + pub fn time_spent(&self) -> u64 { + self.time_spent + } + + /// Decompress the input `data` by calling the chain of decompressors and + /// the data callback. + /// + /// This will reset the number of callbacks called and restart the + /// decompression timer. + pub fn decompress(&mut self, data: &[u8]) -> std::io::Result<()> { + self.nb_callbacks = 0; + self.timer_start(); + + let result = self.inner.write_all(data).and_then(|_| self.inner.flush()); + + self.timer_reset(); + result + } + + /// Notify decompressors that the end of stream as reached. This is equivalent + /// to sending a NULL data pointer. + pub fn finish(&mut self) -> std::io::Result<()> { + self.inner.finish() + } + + /// Set this decompressor to passthrough + pub fn set_passthrough(&mut self, passthrough: bool) { + self.inner.set_passthrough(passthrough) + } +} + +impl std::fmt::Debug for Decompressor { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("Decompressor") + .field("time_spent", &self.time_spent) + .field("nb_callbacks", &self.nb_callbacks) + .finish() + } +} + +/// Trait that represents the decompression writers (gzip, deflate, etc.) and +/// methods needed to write to a temporary buffer. +pub trait BufWriter: Write { + /// Get a mutable reference to the buffer. + fn get_mut(&mut self) -> Option<&mut Cursor>>; + /// Notify end of data. + fn finish(self: Box) -> std::io::Result>>; + /// Attempt to finish this output stream, writing out final chunks of data. + fn try_finish(&mut self) -> std::io::Result<()>; +} + +/// A BufWriter that doesn't consume any data. +/// +/// This should be used exclusively with passthrough mode. +struct NullBufWriter(Cursor>); + +impl Write for NullBufWriter { + fn write(&mut self, _: &[u8]) -> std::io::Result { + Ok(0) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl BufWriter for NullBufWriter { + fn get_mut(&mut self) -> Option<&mut Cursor>> { + Some(&mut self.0) + } + + fn finish(self: Box) -> std::io::Result>> { + Ok(self.0) + } + + fn try_finish(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +#[derive(Debug, PartialEq)] +enum GzState { + Start, + Xlen, + Extra, + Filename, + Comment, + Crc, + AfterHeader, +} + +/// Wrapper around a gzip header parser and a deflate decoder. +/// We parse the header separately because we want to be tolerant of +/// checksum or other gzip errors that do not affect our ability +/// to decompress the data stream but would cause 'correct' gzip decoders +/// to fail. We want to be tolerant of gzip errors because browsers +/// are apparently tolerant of gzip errors +/// +/// https://noxxi.de/research/http-evader-explained-5-gzip.html +struct GzipBufWriter { + buffer: Vec, + flags: u8, + xlen: u16, + inner: flate2::write::DeflateDecoder>>, + state: GzState, +} + +impl GzipBufWriter { + fn new(buf: Cursor>) -> Self { + GzipBufWriter { + buffer: Vec::with_capacity(10), + flags: 0, + xlen: 0, + inner: flate2::write::DeflateDecoder::new(buf), + state: GzState::Start, + } + } + + fn parse_start(data: &[u8]) -> nom::IResult<&[u8], u8> { + use nom::bytes::streaming::tag; + use nom::number::streaming::{le_i32, le_u8}; + use nom::sequence::tuple; + + let (rest, (_, flags, _mtime, _xfl, _operating_system)) = + tuple((tag(b"\x1f\x8b\x08"), le_u8, le_i32, le_u8, le_u8))(data)?; + Ok((rest, flags)) + } +} + +impl Write for GzipBufWriter { + fn write(&mut self, data: &[u8]) -> std::io::Result { + use nom::bytes::streaming::{tag, take_until}; + use nom::number::streaming::le_u16; + use nom::sequence::tuple; + + const FHCRC: u8 = 1 << 1; + const FEXTRA: u8 = 1 << 2; + const FNAME: u8 = 1 << 3; + const FCOMMENT: u8 = 1 << 4; + + let (mut parse, direct) = if !self.buffer.is_empty() && self.state == GzState::Start { + self.buffer.extend_from_slice(data); + (self.buffer.as_ref(), false) + } else { + (data, true) + }; + + loop { + match self.state { + GzState::Start => match GzipBufWriter::parse_start(parse) { + Ok((rest, flags)) => { + parse = rest; + self.flags = flags; + self.state = GzState::Xlen; + } + Err(nom::Err::Incomplete(_)) => { + if direct { + self.buffer.extend_from_slice(data); + } + return Ok(data.len()); + } + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Could not parse gzip header", + )); + } + }, + GzState::Xlen => { + if self.flags & FEXTRA != 0 { + match le_u16::<&[u8], nom::error::Error<&[u8]>>(parse) { + Ok((rest, xlen)) => { + parse = rest; + self.xlen = xlen; + } + Err(nom::Err::Incomplete(_)) => { + return Ok(data.len() - parse.len()); + } + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Could not parse gzip header", + )); // this one is unreachable + } + } + } + self.state = GzState::Extra; + } + GzState::Extra => { + if self.xlen > 0 { + if parse.len() < self.xlen as usize { + self.xlen -= parse.len() as u16; + return Ok(data.len()); + } + parse = &parse[self.xlen as usize..]; + } + self.state = GzState::Filename; + } + GzState::Filename => { + if self.flags & FNAME != 0 { + match tuple(( + take_until::<&[u8], &[u8], nom::error::Error<&[u8]>>(b"\0" as &[u8]), + tag(b"\0"), + ))(parse) + { + Ok((rest, _)) => { + parse = rest; + } + Err(nom::Err::Incomplete(_)) => { + return Ok(data.len()); + } + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Could not parse gzip header", + )); // this one is unreachable + } + } + } + self.state = GzState::Comment; + } + GzState::Comment => { + if self.flags & FCOMMENT != 0 { + match tuple(( + take_until::<&[u8], &[u8], nom::error::Error<&[u8]>>(b"\0" as &[u8]), + tag(b"\0"), + ))(parse) + { + Ok((rest, _)) => { + parse = rest; + } + Err(nom::Err::Incomplete(_)) => { + return Ok(data.len()); + } + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Could not parse gzip header", + )); // this one is unreachable + } + } + } + self.state = GzState::Crc; + } + GzState::Crc => { + if self.flags & FHCRC != 0 { + match le_u16::<&[u8], nom::error::Error<&[u8]>>(parse) { + Ok((rest, _)) => { + parse = rest; + } + Err(nom::Err::Incomplete(_)) => { + return Ok(data.len() - parse.len()); + } + Err(_) => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Could not parse gzip header", + )); // this one is unreachable + } + } + } + self.state = GzState::AfterHeader; + return Ok(data.len() - parse.len()); + } + GzState::AfterHeader => { + return self.inner.write(parse); + } + } + } + } + + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } +} + +impl BufWriter for GzipBufWriter { + fn get_mut(&mut self) -> Option<&mut Cursor>> { + Some(self.inner.get_mut()) + } + + fn finish(self: Box) -> std::io::Result>> { + self.inner.finish() + } + + fn try_finish(&mut self) -> std::io::Result<()> { + self.inner.try_finish() + } +} + +/// Simple wrapper around a deflate implementation +struct DeflateBufWriter(flate2::write::DeflateDecoder>>); + +impl Write for DeflateBufWriter { + fn write(&mut self, data: &[u8]) -> std::io::Result { + self.0.write(data) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +impl BufWriter for DeflateBufWriter { + fn get_mut(&mut self) -> Option<&mut Cursor>> { + Some(self.0.get_mut()) + } + + fn finish(self: Box) -> std::io::Result>> { + self.0.finish() + } + + fn try_finish(&mut self) -> std::io::Result<()> { + self.0.try_finish() + } +} + +/// Simple wrapper around a zlib implementation +struct ZlibBufWriter(flate2::write::ZlibDecoder>>); + +impl Write for ZlibBufWriter { + fn write(&mut self, data: &[u8]) -> std::io::Result { + self.0.write(data) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +impl BufWriter for ZlibBufWriter { + fn get_mut(&mut self) -> Option<&mut Cursor>> { + Some(self.0.get_mut()) + } + + fn finish(self: Box) -> std::io::Result>> { + self.0.finish() + } + + fn try_finish(&mut self) -> std::io::Result<()> { + self.0.try_finish() + } +} + +/// Simple wrapper around an lzma implementation +struct LzmaBufWriter(lzma_rs::decompress::Stream>>); + +impl Write for LzmaBufWriter { + fn write(&mut self, data: &[u8]) -> std::io::Result { + self.0.write(data) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.0.flush() + } +} + +impl BufWriter for LzmaBufWriter { + fn get_mut(&mut self) -> Option<&mut Cursor>> { + self.0.get_output_mut() + } + + fn finish(self: Box) -> std::io::Result>> { + self.0.finish().map_err(|e| match e { + lzma_rs::error::Error::IoError(e) => e, + lzma_rs::error::Error::HeaderTooShort(e) => { + std::io::Error::new(std::io::ErrorKind::Other, format!("{}", e)) + } + lzma_rs::error::Error::LzmaError(e) | lzma_rs::error::Error::XzError(e) => { + std::io::Error::new(std::io::ErrorKind::Other, e) + } + }) + } + + fn try_finish(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +/// Structure that represents each decompressor in the chain. +struct InnerDecompressor { + /// Decoder implementation that will write to a temporary buffer. + writer: Option>, + /// Next decompressor to call. + inner: Option>, + /// Encoding type of the decompressor. + encoding: HtpContentEncoding, + /// Indicates whether to pass through the data without calling the writer. + passthrough: bool, + /// Tracks the number of restarts + restarts: u8, + /// Options for decompression + options: Options, +} + +impl InnerDecompressor { + /// Returns a new writer according to the content encoding type and whether to passthrough. + fn writer( + encoding: HtpContentEncoding, options: &Options, + ) -> std::io::Result<(Box, bool)> { + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + + match encoding { + HtpContentEncoding::GZIP => Ok((Box::new(GzipBufWriter::new(buf)), false)), + HtpContentEncoding::DEFLATE => Ok(( + Box::new(DeflateBufWriter(flate2::write::DeflateDecoder::new(buf))), + false, + )), + HtpContentEncoding::ZLIB => Ok(( + Box::new(ZlibBufWriter(flate2::write::ZlibDecoder::new(buf))), + false, + )), + HtpContentEncoding::LZMA => { + if let Some(options) = options.lzma { + Ok(( + Box::new(LzmaBufWriter( + lzma_rs::decompress::Stream::new_with_options(&options, buf), + )), + false, + )) + } else { + Ok((Box::new(NullBufWriter(buf)), true)) + } + } + HtpContentEncoding::NONE | HtpContentEncoding::ERROR => Err(std::io::Error::new( + std::io::ErrorKind::Other, + "expected a valid encoding", + )), + } + } + + /// Create a new `InnerDecompressor` given a content encoding type and the + /// next (`inner`) decompressor to call. + fn new( + encoding: HtpContentEncoding, inner: Box, options: Options, + ) -> std::io::Result { + let (writer, passthrough) = Self::writer(encoding, &options)?; + Ok(Self { + inner: Some(inner), + encoding, + writer: Some(writer), + passthrough, + restarts: 0, + options, + }) + } + + /// Tries to pass data to the callback instead of calling the writers. + /// + /// This will set passthrough mode on success or revert on error. + fn try_passthrough(&mut self, data: &[u8]) -> std::io::Result { + self.set_passthrough(true); + if let Some(inner) = &mut self.inner { + let result = inner.write(data); + if result.is_err() { + self.set_passthrough(false); + } + result + } else { + Ok(data.len()) + } + } + + /// Flushes the writer and the temporary buffer it writes to. + /// + /// The writer should be taken out of its slot and passed directly instead of + /// `self.writer` to avoid holding multiple mutable references. + fn flush_writer(&mut self, writer: &mut Box) -> std::io::Result<()> { + if let Some(mut inner) = self.inner.take() { + loop { + let result = writer.flush(); + + // Flush all of the bytes the writer has written to our temporary + // buffer of fixed size. + if let Some(cursor) = writer.get_mut() { + inner.write_all(&cursor.get_ref()[0..cursor.position() as usize])?; + cursor.set_position(0); + } + + // Continue flushing if the flush resulted in a `WriteZero`. This + // error indicates that the writer was unable to write all bytes + // to our temporary buffer, likely because it was full. + if let Err(e) = result { + match e.kind() { + std::io::ErrorKind::WriteZero => {} + _ => { + self.restart()?; + break; + } + } + } else { + break; + } + } + self.inner.replace(inner); + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "nothing to flush to", + )) + } + } + + fn try_finish(&mut self, writer: &mut Box) -> bool { + _ = writer.try_finish(); + if let Some(cursor) = writer.get_mut() { + if cursor.position() > 0 { + if let Some(mut inner) = self.inner.take() { + _ = inner.write_all(&cursor.get_ref()[0..cursor.position() as usize]); + cursor.set_position(0); + self.inner.replace(inner); + return true; + } + } + } + return false; + } +} + +impl Write for InnerDecompressor { + fn write(&mut self, data: &[u8]) -> std::io::Result { + // Passthrough mode + if self.passthrough { + if let Some(inner) = &mut self.inner { + inner.write(data) + } else { + Ok(data.len()) + } + + // Take the writer out of its slot to avoid holding multiple mutable + // references. Any calls using `self.writer` should be avoided while the + // writer is in this state. + } else if let Some(mut writer) = self.writer.take() { + match writer.write(data) { + Ok(consumed) => { + let result = if consumed == 0 { + // This could indicate that we have reached the end + // of the stream. Any data after the first end of + // stream (such as in multipart gzip) is ignored and + // we pretend to have consumed this data. + Ok(data.len()) + } else { + Ok(consumed) + }; + self.writer.replace(writer); + result + } + Err(e) => { + match e.kind() { + std::io::ErrorKind::WriteZero => { + self.flush_writer(&mut writer)?; + // Recursion: the buffer was flushed until `WriteZero` + // stopped occuring. + self.writer.replace(writer); + self.write(data) + } + _ => { + let written = self.try_finish(&mut writer); + if written && self.restarts == 0 { + // error, but some data has been written, stop here + return Err(e); + } + // try to restart, any data in the temp buffer will be + // discarded + if self.restart().is_err() { + self.try_passthrough(data) + } else { + // Recursion: restart will fail after a small + // number of attempts + self.write(data) + } + } + } + } + } + } else { + Err(std::io::Error::new( + std::io::ErrorKind::WriteZero, + "writer was not initialized", + )) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + if let Some(mut writer) = self.writer.take() { + self.flush_writer(&mut writer)?; + self.writer.replace(writer); + } + if let Some(inner) = &mut self.inner { + inner.flush() + } else { + Ok(()) + } + } +} + +impl Decompress for InnerDecompressor { + fn restart(&mut self) -> std::io::Result<()> { + if self.restarts < 3 { + // first retry the same encoding type + self.encoding = match self.encoding { + HtpContentEncoding::GZIP => HtpContentEncoding::DEFLATE, + HtpContentEncoding::DEFLATE => HtpContentEncoding::ZLIB, + HtpContentEncoding::ZLIB => HtpContentEncoding::GZIP, + HtpContentEncoding::LZMA => HtpContentEncoding::DEFLATE, + HtpContentEncoding::NONE | HtpContentEncoding::ERROR => { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "expected a valid encoding", + )) + } + }; + let (writer, passthrough) = Self::writer(self.encoding, &self.options)?; + self.writer = Some(writer); + if passthrough { + self.passthrough = passthrough; + } + self.restarts += 1; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "too many restart attempts", + )) + } + } + + // Tell all the decompressors to pass through the data instead of calling + // the writer. + fn set_passthrough(&mut self, passthrough: bool) { + self.passthrough = passthrough; + if let Some(inner) = &mut self.inner { + inner.set_passthrough(passthrough); + } + } + + // Tell all decompressors that there is no more data to receive. + fn finish(&mut self) -> std::io::Result<()> { + let output = if let Some(mut writer) = self.writer.take() { + self.flush_writer(&mut writer)?; + Some(writer.finish()?) + } else { + None + }; + + if let Some(mut inner) = self.inner.take() { + if let Some(output) = output { + inner.write_all(&output.get_ref()[..output.position() as usize])?; + } + inner.finish() + } else { + Ok(()) + } + } +} + +#[test] +fn test_gz_header() { + // No flags or other bits + let input = b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Just CRC + let input = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\x00\x11\x22"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Just extra + let input = b"\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\x00\x04\x00abcd"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Just filename + let input = b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\x00variable\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Just comment + let input = b"\x1f\x8b\x08\x10\x00\x00\x00\x00\x00\x00also variable\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Extra and Filename + let input = b"\x1f\x8b\x08\x0c\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Extra and Comment and CRC + let input = b"\x1f\x8b\x08\x16\x00\x00\x00\x00\x00\x00\x05\x00extracomment\x00\x34\x12"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Filename and Comment + let input = b"\x1f\x8b\x08\x18\x00\x00\x00\x00\x00\x00filename\x00comment\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Extra Filename and Comment and CRC + let input = + b"\x1f\x8b\x08\x1e\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00comment\x00\x34\x12"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + + // Too short + let input = b"\x1f\x8b\x08\x1e\x00\x00\x00\x00\x00\x00\x05\x00extrafilename\x00comment\x00\x34"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len() - 1); + assert_eq!(gzw.state, GzState::Crc); + // final missing CRC in header + let input = b"\x34\xee"; + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::AfterHeader); + let input = b"\x1f\x8b\x08\x01\x00\x00\x00\x00\x00"; + let buf = Cursor::new(Box::new([0u8; ENCODING_CHUNK_SIZE]) as Box<[u8]>); + let mut gzw = GzipBufWriter::new(buf); + assert_eq!(gzw.write(input).unwrap(), input.len()); + assert_eq!(gzw.state, GzState::Start); +} diff --git a/rust/htp/src/error.rs b/rust/htp/src/error.rs new file mode 100644 index 000000000000..2dc0157f54d9 --- /dev/null +++ b/rust/htp/src/error.rs @@ -0,0 +1,47 @@ +use crate::HtpStatus; +use nom::error::ErrorKind as NomErrorKind; + +/// Helper for nom's default error type +pub type NomError = nom::error::Error; + +/// Alias for libhtp Result type. Result types are classified by `HtpStatus`. +pub type Result = std::result::Result; + +impl From> for HtpStatus { + /// Returns HtpStatus from result. + fn from(res: Result) -> HtpStatus { + match res { + Ok(_) => HtpStatus::OK, + Err(e) => e, + } + } +} + +impl From for Result<()> { + /// Returns Result from `HtpStatus` + fn from(status: HtpStatus) -> Result<()> { + if status == HtpStatus::OK { + Ok(()) + } else { + Err(status) + } + } +} + +impl From for HtpStatus { + fn from(_: std::io::Error) -> Self { + HtpStatus::ERROR + } +} + +impl From>> for HtpStatus { + fn from(_: nom::Err>) -> Self { + HtpStatus::ERROR + } +} + +impl From for HtpStatus { + fn from(_: NomErrorKind) -> Self { + HtpStatus::ERROR + } +} diff --git a/rust/htp/src/headers.rs b/rust/htp/src/headers.rs new file mode 100644 index 000000000000..1f97341860aa --- /dev/null +++ b/rust/htp/src/headers.rs @@ -0,0 +1,989 @@ +use crate::util::{is_token, trimmed, FlagOperations}; +use nom::{ + branch::alt, + bytes::complete::tag as complete_tag, + bytes::streaming::{tag, take_till, take_while, take_while1}, + character::{complete::one_of as complete_one_of, is_space, streaming::space0}, + combinator::{complete, map, not, opt, peek}, + sequence::tuple, + Err::Incomplete, + IResult, Needed, +}; + +/// Helper for Parsed bytes and corresponding HeaderFlags +pub type ParsedBytes<'a> = (&'a [u8], u64); +// Helper for Parsed Headers and corresonding termination +pub type ParsedHeaders = (Vec
, bool); +// Helper for matched eol+ folding bytes + flags +pub type FoldingBytes<'a> = (&'a [u8], &'a [u8], u64); +// Helper for folding or terminator bytes +pub type FoldingOrTerminator<'a> = (ParsedBytes<'a>, Option<&'a [u8]>); +// Helper for value bytes and the value terminator +pub type ValueBytes<'a> = (&'a [u8], FoldingOrTerminator<'a>); + +#[repr(C)] +#[derive(Debug, PartialEq, Eq)] +pub struct HeaderFlags; + +impl HeaderFlags { + pub const FOLDING: u64 = 0x0001; + pub const FOLDING_SPECIAL_CASE: u64 = (0x0002 | Self::FOLDING); + pub const NAME_EMPTY: u64 = 0x0004; + pub const VALUE_EMPTY: u64 = 0x0008; + pub const NAME_NON_TOKEN_CHARS: u64 = 0x0010; + pub const FIELD_REPEATED: u64 = 0x0020; + pub const NAME_TRAILING_WHITESPACE: u64 = 0x0040; + pub const NAME_LEADING_WHITESPACE: u64 = 0x0080; + pub const NULL_TERMINATED: u64 = 0x0100; + pub const MISSING_COLON: u64 = (0x0200 | Self::NAME_EMPTY); + pub const DEFORMED_EOL: u64 = 0x0400; +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Name { + pub name: Vec, + pub flags: u64, +} + +impl Name { + pub fn new(name: &[u8], flags: u64) -> Self { + Self { + name: trimmed(name).to_vec(), + flags, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Value { + pub value: Vec, + pub flags: u64, +} + +impl Value { + pub fn new(value: &[u8], flags: u64) -> Self { + Self { + value: trimmed(value).to_vec(), + flags, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Header { + pub name: Name, + pub value: Value, +} + +impl Header { + pub fn new(name: Name, value: Value) -> Self { + Self { name, value } + } + + pub fn new_with_flags( + name_bytes: &[u8], name_flags: u64, value_bytes: &[u8], value_flags: u64, + ) -> Self { + Self::new( + Name::new(name_bytes, name_flags), + Value::new(value_bytes, value_flags), + ) + } +} + +/// Enumerates possible parser types +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +pub enum Side { + /// Request Parser: null terminates + Request, + /// Response Parser: accepts CR as a line ending + Response, +} + +pub struct Parser { + side: Side, + complete: bool, +} + +impl Parser { + pub fn new(side: Side) -> Self { + Self { + side, + complete: false, + } + } + + /// Sets the parser complete state. + /// + /// If set to true, parser operates under the assumption that no more data is incoming + pub fn set_complete(&mut self, complete: bool) { + self.complete = complete; + } + + /// Returns true if c is a line feed character + fn is_eol(&self) -> impl Fn(u8) -> bool + '_ { + move |c| c == b'\n' || (self.side == Side::Response && c == b'\r') + } + + /// Parse one complete end of line character or character set + fn complete_eol_regular(&self) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> + '_ { + move |input| { + if self.side == Side::Response { + alt(( + complete_tag("\r\n"), + complete_tag("\n\r"), + complete_tag("\n"), + complete_tag("\r"), + ))(input) + } else { + alt((complete_tag("\r\n"), complete_tag("\n")))(input) + } + } + } + + /// Parse one complete deformed end of line character set + fn complete_eol_deformed(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| { + if self.side == Side::Response { + alt(( + map( + tuple(( + complete_tag("\n\r\r\n"), + peek(alt((complete_tag("\n"), complete_tag("\r\n")))), + )), + |(eol, _)| (eol, HeaderFlags::DEFORMED_EOL), + ), + // Treat EOL + empty folding + EOL as just EOL + self.folding_empty(), + map( + tuple(( + complete_tag("\r\n\r"), + take_while1(|c| c == b'\r' || c == b' ' || c == b'\t'), + opt(complete_tag("\n")), + not(alt((complete_tag("\n"), complete_tag("\r\n")))), + )), + |(eol1, eol2, eol3, _): (&[u8], &[u8], Option<&[u8]>, _)| { + ( + &input[..(eol1.len() + eol2.len() + eol3.unwrap_or(b"").len())], + HeaderFlags::DEFORMED_EOL, + ) + }, + ), + ))(input) + } else { + map( + alt(( + tuple(( + complete_tag("\n\r\r\n"), + peek(alt((complete_tag("\n"), complete_tag("\r\n")))), + )), + tuple((complete_tag("\n\r"), peek(complete_tag("\r\n")))), + )), + |(eol, _)| (eol, HeaderFlags::DEFORMED_EOL), + )(input) + } + } + } + + /// Parse one complete end of line character or character set + fn complete_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| { + alt(( + self.complete_eol_deformed(), + map(self.complete_eol_regular(), |eol| (eol, 0)), + ))(input) + } + } + + /// Parse one header end of line, and guarantee that it is not folding + fn eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| { + map( + tuple((self.complete_eol(), not(folding_lws))), + |(end, _)| end, + )(input) + } + } + + /// Parse one null byte or one end of line, and guarantee that it is not folding + fn null_or_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| alt((null, self.eol()))(input) + } + + /// Parse one null byte or complete end of line + fn complete_null_or_eol(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| alt((null, self.complete_eol()))(input) + } + + /// Parse empty header folding as a single EOL (eol + whitespace + eol = eol) + fn folding_empty(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedBytes> + '_ { + move |input| { + map( + tuple(( + self.complete_eol_regular(), + complete_one_of("\t "), + self.complete_eol_regular(), + )), + |(eol1, _spaces, eol2)| (&input[..eol1.len() + 1 + eol2.len()], 0), + )(input) + } + } + /// Parse header folding bytes (eol + whitespace or eol + special cases) + fn folding(&self) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingBytes> + '_ { + move |input| { + if self.side == Side::Response { + map( + tuple(( + map(self.complete_eol_regular(), |eol| (eol, 0)), + folding_lws, + )), + |((eol, flags), (lws, other_flags))| (eol, lws, flags | other_flags), + )(input) + } else { + map( + tuple((self.complete_eol(), folding_lws)), + |((eol, flags), (lws, other_flags))| (eol, lws, flags | other_flags), + )(input) + } + } + } + + /// Parse complete folding bytes or a value terminator (eol or null) + fn complete_folding_or_terminator( + &self, + ) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ { + move |input| { + alt(( + complete(map(self.folding(), |(end, fold, flags)| { + ((end, flags), Some(fold)) + })), + map(self.complete_null_or_eol(), |end| (end, None)), + ))(input) + } + } + + /// Parse complete folding bytes or a value terminator (eol or null) + fn streaming_folding_or_terminator( + &self, + ) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ { + move |input| { + alt(( + map(self.folding(), |(end, fold, flags)| { + ((end, flags), Some(fold)) + }), + map(self.null_or_eol(), |end| (end, None)), + ))(input) + } + } + + /// Parse folding bytes or a value terminator (eol or null) + fn folding_or_terminator(&self) -> impl Fn(&[u8]) -> IResult<&[u8], FoldingOrTerminator> + '_ { + move |input| { + if self.complete { + self.complete_folding_or_terminator()(input) + } else { + self.streaming_folding_or_terminator()(input) + } + } + } + + /// Parse a header value. + /// Returns the bytes and the value terminator; null, eol or folding + /// eg. (bytes, (eol_bytes, Option)) + fn value_bytes(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ValueBytes> + '_ { + move |input| { + let (mut remaining, mut value) = take_till(self.is_eol())(input)?; + if value.last() == Some(&b'\r') { + value = &value[..value.len() - 1]; + remaining = &input[value.len()..]; + } + let (remaining, result) = self.folding_or_terminator()(remaining)?; + Ok((remaining, (value, result))) + } + } + + /// Parse a complete header value, including any folded headers + fn value(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Value> + '_ { + move |input| { + let (mut rest, (val_bytes, ((_eol, mut flags), fold))) = self.value_bytes()(input)?; + let mut value = val_bytes.to_vec(); + if let Some(fold) = fold { + let mut i = rest; + let mut ofold = fold; + loop { + if self.side == Side::Response { + // Peek ahead for ambiguous name with lws vs. value with folding + match tuple((token_chars, separator_regular))(i) { + Ok((_, ((_, tokens, _), (_, _)))) if !tokens.is_empty() => { + flags.unset(HeaderFlags::FOLDING_SPECIAL_CASE); + if value.is_empty() { + flags.set(HeaderFlags::VALUE_EMPTY); + } + // i is now the latest rest + return Ok((i, Value::new(&value, flags))); + } + Err(Incomplete(_)) => { + return Err(Incomplete(Needed::new(1))); + } + _ => {} + } + } + let (rest2, (val_bytes, ((eol, other_flags), fold))) = self.value_bytes()(i)?; + i = rest2; + flags.set(other_flags); + //If the value is empty, the value started with a fold and we don't want to push back a space + if !value.is_empty() { + if !ofold.is_empty() { + value.push(ofold[0]); + } else { + value.push(b' '); + } + } + if !val_bytes.is_empty() || eol.len() > 1 { + // we keep empty folding as a future new eol + rest = rest2; + value.extend(val_bytes); + } else if val_bytes.is_empty() + && eol.len() == 1 + && !rest2.is_empty() + && rest2[0] == b'\n' + { + // eol empty fold double eol is enfo of headers + rest = rest2; + } + if let Some(fold) = fold { + ofold = fold; + } else { + return Ok((rest, Value::new(&value, flags))); + } + } + } else { + if value.is_empty() { + flags.set(HeaderFlags::VALUE_EMPTY); + } + Ok((rest, Value::new(&value, flags))) + } + } + } + + /// Parse one header name + fn name(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Name> + '_ { + move |input| { + let mut terminated = 0; + let mut offset = 0; + for i in 0..input.len() { + if terminated == 0 { + if input[i] == b':' { + offset = i; + break; + } else if input[i] == b'\n' + || (self.side == Side::Response && input[i] == b'\r') + { + terminated = input[i]; + } + } else { + if input[i] == b' ' { + terminated = 0; + } else if input[i] == b'\n' && terminated == b'\r' { + terminated = input[i]; + } else { + offset = i - 1; + break; + } + } + } + let (name, rem) = input.split_at(offset); + let mut flags = 0; + if !name.is_empty() { + if is_space(name[0]) { + flags.set(HeaderFlags::NAME_LEADING_WHITESPACE) + } + if let Some(end) = name.last() { + if is_space(*end) { + flags.set(HeaderFlags::NAME_TRAILING_WHITESPACE); + } + } + match token_chars(name) { + Ok((rem, _)) => { + if !rem.is_empty() { + flags.set(HeaderFlags::NAME_NON_TOKEN_CHARS); + } + } + _ => {} + } + } else { + flags.set(HeaderFlags::NAME_EMPTY) + } + return Ok((rem, Name::new(name, flags))); + } + } + + /// Parse a separator between header name and value + fn separator(&self) -> impl Fn(&[u8]) -> IResult<&[u8], u64> + '_ { + move |input| map(separator_regular, |_| 0)(input) + } + + /// Parse data before an eol with no colon as an empty name with the data as the value + fn header_sans_colon(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ { + move |input| { + let (remaining, (_, value)) = tuple((not(complete_tag("\r\n")), self.value()))(input)?; + + let flags = value.flags | HeaderFlags::MISSING_COLON; + Ok(( + remaining, + Header::new_with_flags(b"", flags, &value.value, flags), + )) + } + } + + /// Parse a header name separator value + fn header_with_colon(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ { + move |input| { + map( + tuple((self.name(), self.separator(), self.value())), + |(mut name, flag, mut value)| { + name.flags |= flag; + value.flags |= flag; + Header::new(name, value) + }, + )(input) + } + } + + /// Parses a header name and value with, or without a colon separator + fn header(&self) -> impl Fn(&[u8]) -> IResult<&[u8], Header> + '_ { + move |input| alt((complete(self.header_with_colon()), self.header_sans_colon()))(input) + } + + /// Parse multiple headers and indicate if end of headers or null was found + pub fn headers(&self) -> impl Fn(&[u8]) -> IResult<&[u8], ParsedHeaders> + '_ { + move |input| { + let (rest, head) = self.header()(input)?; + let is_null_terminated = head.value.flags.is_set(HeaderFlags::NULL_TERMINATED); + let mut out = Vec::with_capacity(16); + out.push(head); + if is_null_terminated { + return Ok((rest, (out, true))); + } + if let Ok((rest, _eoh)) = self.complete_eol()(rest) { + return Ok((rest, (out, true))); + } + let mut i = rest; + loop { + match self.header()(i) { + Ok((rest, head)) => { + i = rest; + let is_null_terminated = + head.value.flags.is_set(HeaderFlags::NULL_TERMINATED); + out.push(head); + if is_null_terminated { + return Ok((rest, (out, true))); + } + if let Ok((rest, _eoh)) = self.complete_eol()(rest) { + return Ok((rest, (out, true))); + } + } + Err(Incomplete(_)) => { + return Ok((i, (out, false))); + } + Err(e) => return Err(e), + } + } + } + } +} + +/// Parse one null character and return it and the NULL_TERMINATED flag +fn null(input: &[u8]) -> IResult<&[u8], ParsedBytes> { + map(complete_tag("\0"), |null| { + (null, HeaderFlags::NULL_TERMINATED) + })(input) +} + +/// Extracts folding lws (whitespace only) +fn folding_lws(input: &[u8]) -> IResult<&[u8], ParsedBytes> { + map(alt((tag(" "), tag("\t"), tag("\0"))), |fold| { + (fold, HeaderFlags::FOLDING) + })(input) +} + +/// Parse a regular separator (colon followed by optional spaces) between header name and value +fn separator_regular(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + tuple((complete_tag(":"), space0))(input) +} + +type leading_token_trailing<'a> = (&'a [u8], &'a [u8], &'a [u8]); +/// Parse token characters with leading and trailing whitespace +fn token_chars(input: &[u8]) -> IResult<&[u8], leading_token_trailing> { + tuple((space0, take_while(is_token), space0))(input) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::error::NomError; + use nom::{ + error::ErrorKind::{Not, Tag}, + Err::{Error, Incomplete}, + Needed, + }; + use rstest::rstest; + macro_rules! b { + ($b: literal) => { + $b.as_bytes() + }; + } + // Helper for matched leading whitespace, byes, and trailing whitespace + pub type SurroundedBytes<'a> = (&'a [u8], &'a [u8], &'a [u8]); + + #[rstest] + #[case::null_does_not_terminate(b"k1:v1\r\nk2:v2 before\0v2 after\r\n\r\n",Ok((b!(""), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0), Header::new_with_flags(b"k2", 0, b"v2 before\0v2 after", 0)], true))), None)] + #[case::flags(b"k1:v1\r\n:v2\r\n v2+\r\nk3: v3\r\nk4 v4\r\nk\r5:v\r5\n\rmore\r\n\r\n", Ok((b!(""), ( + vec![ + Header::new_with_flags(b"k1", 0, b"v1", 0), + Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"v2 v2+", HeaderFlags::FOLDING), + Header::new_with_flags(b"k3", 0, b"v3", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k4 v4", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"k\r5", HeaderFlags::NAME_NON_TOKEN_CHARS, b"v\r5", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"more", HeaderFlags::MISSING_COLON), + ], true))), Some(Ok((b!(""), ( + vec![ + Header::new_with_flags(b"k1", 0, b"v1", 0), + Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"v2 v2+", HeaderFlags::FOLDING), + Header::new_with_flags(b"k3", 0, b"v3", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k4 v4", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"k", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"5", 0, b"v", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"5", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"more", HeaderFlags::MISSING_COLON), + ], true)))))] + #[case::incomplete_eoh(b"k1:v1\r\nk2:v2\r", Ok((b!("k2:v2\r"), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0)], false))), None)] + #[case::incomplete_eoh_null(b"k1:v1\nk2:v2\0v2\r\nk3:v3\r", Ok((b!("k3:v3\r"), (vec![Header::new_with_flags(b"k1", 0, b"v1", 0), Header::new_with_flags(b"k2", 0, b"v2\0v2", 0)], false))), None)] + fn test_headers( + #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedHeaders>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.headers()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.headers()(input), res_expected); + } else { + assert_eq!(res_parser.headers()(input), expected); + } + } + + #[rstest] + #[case::only_lf_eoh( + b"Name1: Value1\nName2:Value2\nName3: Val\n ue3\nName4: Value4\n Value4.1\n Value4.2\n\n", + None + )] + #[case::only_crlf_eoh(b"Name1: Value1\r\nName2:Value2\r\nName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\r\n Value4.2\r\n\r\n", None)] + #[case::crlf_lf_eoh(b"Name1: Value1\r\nName2:Value2\nName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\n Value4.2\r\n\n", None)] + #[case::only_cr(b"Name1: Value1\rName2:Value2\rName3: Val\r\n ue3\rName4: Value4\r\n Value4.1\r\n Value4.2\r\r\n", Some(Err(Incomplete(Needed::new(1)))))] + #[case::cr_lf_crlf_eoh(b"Name1: Value1\rName2:Value2\rName3: Val\r\n ue3\r\nName4: Value4\r\n Value4.1\n Value4.2\r\n\n", Some(Ok((b!(""), + ( + vec![ + Header::new_with_flags(b"Name1", 0, b"Value1\rName2:Value2\rName3: Val ue3", HeaderFlags::FOLDING), + Header::new_with_flags(b"Name4", 0, b"Value4 Value4.1 Value4.2", HeaderFlags::FOLDING) + ], + true + )))))] + #[case::crlf_lfcr_lf(b"Name1: Value1\r\nName2:Value2\nName3: Val\n\r ue3\n\rName4: Value4\r\n Value4.1\n Value4.2\r\n\n", Some(Ok((b!(""), + ( + vec![ + Header::new_with_flags(b"Name1", 0, b"Value1", 0), + Header::new_with_flags(b"Name2", 0, b"Value2", 0), + Header::new_with_flags(b"Name3", 0, b"Val", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"ue3", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"Name4", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value4 Value4.1 Value4.2", HeaderFlags::FOLDING), + ], + true + )))))] + #[case::lfcr_eoh(b"Name1: Value1\n\rName2:Value2\n\rName3: Val\n\r ue3\n\rName4: Value4\n\r Value4.1\n\r Value4.2\n\r\n\r", Some(Ok((b!("\r"), + ( + vec![ + Header::new_with_flags(b"Name1", 0, b"Value1", 0), + Header::new_with_flags(b"Name2", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value2", 0), + Header::new_with_flags(b"Name3", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Val", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"ue3", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"Name4", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value4", 0), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"Value4.1", HeaderFlags::MISSING_COLON), + Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"Value4.2", HeaderFlags::MISSING_COLON), + ], + true + )))))] + fn test_headers_eoh( + #[case] input: &[u8], #[case] diff_req_expected: Option>, + ) { + let expected = Ok(( + b!(""), + ( + vec![ + Header::new_with_flags(b"Name1", 0, b"Value1", 0), + Header::new_with_flags(b"Name2", 0, b"Value2", 0), + Header::new_with_flags(b"Name3", 0, b"Val ue3", HeaderFlags::FOLDING), + Header::new_with_flags( + b"Name4", + 0, + b"Value4 Value4.1 Value4.2", + HeaderFlags::FOLDING, + ), + ], + true, + ), + )); + let req_parser = Parser::new(Side::Request); + let res_parser = Parser::new(Side::Response); + if let Some(req_expected) = diff_req_expected { + assert_eq!(req_parser.headers()(input), req_expected); + } else { + assert_eq!(req_parser.headers()(input), expected); + } + assert_eq!(res_parser.headers()(input), expected); + } + + #[rstest] + #[case::incomplete(b"K V", Err(Incomplete(Needed::new(1))), None)] + #[case::contains_colon_1(b"K:V\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::contains_colon_2(b"K:V\r\nK2: V2", Ok((b!("K2: V2"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K:V", HeaderFlags::MISSING_COLON))), None)] + #[case::empty_name_value(b"\r\n", Err(Error(NomError::new(b!("\r\n"), Not))), None)] + #[case::contains_null(b"K V\0alue\r\nk", Ok((b!("k"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V\0alue", HeaderFlags::MISSING_COLON))), None)] + #[case::folding(b"K V\ralue\r\nk", Ok((b!("k"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V\ralue", HeaderFlags::MISSING_COLON))), Some(Ok((b!("alue\r\nk"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON)))))] + #[case::crlf(b"K V\r\nk1:v1\r\n", Ok((b!("k1:v1\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON))), None)] + #[case::lf(b"K V\nk1:v1\r\n", Ok((b!("k1:v1\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K V", HeaderFlags::MISSING_COLON))), None)] + fn test_header_sans_colon( + #[case] input: &[u8], #[case] expected: IResult<&[u8], Header>, + #[case] response_parser_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.header_sans_colon()(input), expected); + + let res_parser = Parser::new(Side::Response); + let res_expected = if let Some(response_expected) = response_parser_expected { + response_expected + } else { + expected + }; + assert_eq!(res_parser.header_sans_colon()(input), res_expected); + } + + #[rstest] + #[case::incomplete(b"K: V", Err(Incomplete(Needed::new(1))))] + #[case::contains_colon(b"K: V\r\n", Err(Incomplete(Needed::new(1))))] + #[case::missing_colon(b"K V\nK:V\r\n", Err(Error(NomError::new(b!("\nK:V\r\n"), Tag))))] + #[case::contains_null(b":\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"", HeaderFlags::VALUE_EMPTY))))] + #[case::folding(b"K:\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"", HeaderFlags::VALUE_EMPTY))))] + #[case::crlf(b":V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"V", 0))))] + #[case::lf_1(b"K:folded\r\n\rV\r\n\r\n", Ok((b!("\rV\r\n\r\n"), Header::new_with_flags(b"K", 0, b"folded", 0))))] + #[case::lf_2(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))))] + #[case::lf_3(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))))] + #[case::lf_4(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))))] + #[case::lf_5(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))))] + #[case::lf_6(b"K: V\r\n a\r\n l\r\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING))))] + fn test_header_with_colon(#[case] input: &[u8], #[case] expected: IResult<&[u8], Header>) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.header_with_colon()(input), expected); + + let res_parser = Parser::new(Side::Response); + assert_eq!(res_parser.header_with_colon()(input), expected); + } + + #[rstest] + #[case::incomplete(b"K: V", Err(Incomplete(Needed::new(1))), None)] + #[case::contains_colon(b"K: V\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::missing_colon_1(b"K V\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::missing_colon_2(b"K1 V1\r\nK2:V2\n\r\n", Ok((b!("K2:V2\n\r\n"), Header::new_with_flags(b"", HeaderFlags::MISSING_COLON, b"K1 V1", HeaderFlags::MISSING_COLON))), None)] + #[case::empty_name_value(b"K1:V1\nK2:V2\n\r\n", Ok((b!("K2:V2\n\r\n"), Header::new_with_flags(b"K1", 0, b"V1", 0))), None)] + #[case::contains_null(b":\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"", HeaderFlags::VALUE_EMPTY))), None)] + #[case::folding(b"K:\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"", HeaderFlags::VALUE_EMPTY))), None)] + #[case::empty_name(b":V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"", HeaderFlags::NAME_EMPTY, b"V", 0))), None)] + #[case::special_folding(b"K:folded\r\n\rV\r\n\r\n", Ok((b!("\rV\r\n\r\n"), Header::new_with_flags(b"K", 0, b"folded", 0))), None)] + #[case::regular_eoh(b"K: V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V", 0))), None)] + #[case::folding(b"K: V\n a\r\n l\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING))), None)] + #[case::cr_in_name(b"Host:www.google.com\rName: Value\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"Host", 0, b"www.google.com\rName: Value", 0))), Some(Ok((b!("Name: Value\r\n\r\n"), Header::new_with_flags(b"Host", 0, b"www.google.com", 0)))))] + #[case::null_in_value(b"K: V before\0 V after\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V before\0 V after", 0))), None)] + #[case::folding(b"K: V\r a\r\n l\n u\r\n\te\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V\r a l u\te", HeaderFlags::FOLDING))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"V a l u\te", HeaderFlags::FOLDING)))))] + #[case::deformed_folding_1(b"K:deformed folded\n\r V\n\r\r\n\n", Ok((b!("\r V\n\r\r\n\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\n"), Header::new_with_flags(b"K", 0, b"deformed folded V", HeaderFlags::FOLDING | HeaderFlags::DEFORMED_EOL)))))] + #[case::deformed_folding_2(b"K:deformed folded\n\r V\r\n\r\n", Ok(( b!("\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded V", HeaderFlags::FOLDING)))))] + #[case::deformed_folding_3(b"K:deformed folded\n\r\r V\r\n\r\n", Ok(( b!("\r\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0))), Some(Ok((b!("\r V\r\n\r\n"), Header::new_with_flags(b"K", 0, b"deformed folded", 0)))))] + #[case::non_token_trailing_ws(b"K\r \r :\r V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\r \r ", HeaderFlags::NAME_NON_TOKEN_CHARS | HeaderFlags::NAME_TRAILING_WHITESPACE, b"\r V", 0))), Some(Ok((b!("\r\n"), Header::new_with_flags(b"K", HeaderFlags::NAME_NON_TOKEN_CHARS | HeaderFlags::NAME_TRAILING_WHITESPACE, b"V", HeaderFlags::FOLDING)))))] + #[case::non_token(b"K\x0c:Value\r\n V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\x0c", HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value V", HeaderFlags::FOLDING))), None)] + #[case::non_token_trailing(b"K\r :Value\r\n V\r\n\r\n", Ok((b!("\r\n"), Header::new_with_flags(b"K\r ", HeaderFlags::NAME_TRAILING_WHITESPACE | HeaderFlags::NAME_NON_TOKEN_CHARS, b"Value V", HeaderFlags::FOLDING))), None)] + fn test_header( + #[case] input: &[u8], #[case] expected: IResult<&[u8], Header>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.header()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.header()(input), res_expected); + } else { + assert_eq!(res_parser.header()(input), expected); + } + } + + #[rstest] + #[case::not_a_separator(b"\n", Err(Error(NomError::new(b!("\n"), Tag))), None)] + #[case::colon(b":value", Ok((b!("value"), 0)), None)] + #[case::colon_whitespace(b": value", Ok((b!("value"), 0)), None)] + #[case::colon_tab(b":\t value", Ok((b!("value"), 0)), None)] + fn test_separators( + #[case] input: &[u8], #[case] expected: IResult<&[u8], u64>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.separator()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.separator()(input), res_expected); + } else { + assert_eq!(res_parser.separator()(input), expected); + } + } + + #[rstest] + #[case::incomplete(b"name", Err(Incomplete(Needed::new(1))))] + #[case::token(b"name:", Ok((b!(":"), (b!(""), b!("name"), b!("")))))] + #[case::trailing_whitespace(b"name :", Ok((b!(":"), (b!(""), b!("name"), b!(" ")))))] + #[case::surrounding_whitespace(b" name :", Ok((b!(":"), (b!(" "), b!("name"), b!(" ")))))] + fn test_token_chars(#[case] input: &[u8], #[case] expected: IResult<&[u8], SurroundedBytes>) { + assert_eq!(token_chars(input), expected); + } + + #[rstest] + #[case::name(b"Hello: world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: 0})), None)] + #[case::name(b"Host:www.google.com\rName: Value", Ok((b!(":www.google.com\rName: Value"), Name {name: b"Host".to_vec(), flags: 0})), None)] + #[case::trailing_whitespace(b"Hello : world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: HeaderFlags::NAME_TRAILING_WHITESPACE})), None)] + #[case::surrounding_whitespace(b" Hello : world", Ok((b!(": world"), Name {name: b"Hello".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE})), None)] + #[case::semicolon(b"Hello;invalid: world", Ok((b!(": world"), Name {name: b"Hello;invalid".to_vec(), flags: HeaderFlags::NAME_NON_TOKEN_CHARS})), None)] + #[case::space(b"Hello invalid: world", Ok((b!(": world"), Name {name: b"Hello invalid".to_vec(), flags: HeaderFlags::NAME_NON_TOKEN_CHARS})), None)] + #[case::surrounding_internal_space(b" Hello invalid : world", Ok((b!(": world"), Name {name: b"Hello invalid".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE | HeaderFlags::NAME_NON_TOKEN_CHARS})), None)] + #[case::only_space_name(b" : world", Ok((b!(": world"), Name {name: b"".to_vec(), flags: HeaderFlags::NAME_LEADING_WHITESPACE | HeaderFlags::NAME_TRAILING_WHITESPACE })), None)] + fn test_name( + #[case] input: &[u8], #[case] expected: IResult<&[u8], Name>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.name()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.name()(input), res_expected); + } else { + assert_eq!(res_parser.name()(input), expected); + } + } + + #[rstest] + #[case(b"test", Err(Error(NomError::new(b!("test"), Tag))))] + #[case(b"\r\n", Err(Error(NomError::new(b!("\r\n"), Tag))))] + #[case(b"\n", Err(Error(NomError::new(b!("\n"), Tag))))] + #[case(b"\0a", Ok((b!("a"), (b!("\0"), HeaderFlags::NULL_TERMINATED))))] + fn test_null(#[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>) { + assert_eq!(null(input), expected); + } + + #[rstest] + #[case::not_eol(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)] + #[case::incomplete_eol(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_eol(b"\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_eol(b"\r\n\t", Err(Error(NomError::new(b!("\t"), Not))), None)] + #[case::complete_cr(b"\ra", Err(Error(NomError::new(b!("\ra"), Tag))), Some(Ok((b!("a"), (b!("\r"), 0)))))] + #[case::incomplete_crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), (b!("\r"), 0)))))] + #[case::incomplete_lfcr(b"\n\r", Ok((b!("\r"), (b!("\n"), 0))), Some(Err(Incomplete(Needed::new(1)))))] + #[case::complete_lfcr(b"\n\ra", Ok((b!("\ra"), (b!("\n"), 0))), Some(Ok((b!("a"), (b!("\n\r"), 0)))))] + #[case::lfcrlf(b"\n\r\n", Ok((b!("\r\n"), (b!("\n"), 0))), Some(Ok((b!("\n"), (b!("\n\r"), 0)))))] + #[case::lfcrlfcr(b"\n\r\n\r", Ok((b!("\r\n\r"), (b!("\n"), 0))), Some(Ok((b!("\n\r"), (b!("\n\r"), 0)))))] + #[case::complete_lf(b"\na", Ok((b!("a"), (b!("\n"), 0))), None)] + #[case::complete_lfcrcrlf(b"\n\r\r\na", Ok((b!("\r\na"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\na"), (b!("\n\r"), 0)))))] + #[case::complete_crlfcrlf(b"\r\n\r\na", Ok((b!("\r\na"), (b!("\r\n"), 0))), None)] + #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_lf(b"\n", Err(Incomplete(Needed::new(1))), None)] + #[case::lfcrcrlf(b"\n\r\r\n", Ok((b!("\r\n"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\n"), (b!("\n\r"), 0)))))] + #[case::crlfcrlf(b"\r\n\r\n", Ok((b!("\r\n"), (b!("\r\n"), 0))), None)] + #[case::null(b"\0a", Err(Error(NomError::new(b!("\0a"), Tag))), None)] + fn test_eol( + #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.eol()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.eol()(input), res_expected); + } else { + assert_eq!(res_parser.eol()(input), expected); + } + } + + #[rstest] + #[case::not_eol(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)] + #[case::incomplete_eol(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_eol(b"\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_eol(b"\r\n\t", Err(Error(NomError::new(b!("\t"), Not))), None)] + #[case::complete_cr(b"\ra", Err(Error(NomError::new(b!("\ra"), Tag))), Some(Ok((b!("a"), (b!("\r"), 0)))))] + #[case::incomplete_crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), (b!("\r"), 0)))))] + #[case::incomplete_lfcr(b"\n\r", Ok((b!("\r"), (b!("\n"), 0))), Some(Err(Incomplete(Needed::new(1)))))] + #[case::complete_lfcr(b"\n\ra", Ok((b!("\ra"), (b!("\n"), 0))), Some(Ok((b!("a"), (b!("\n\r"), 0)))))] + #[case::lfcrlf(b"\n\r\n", Ok((b!("\r\n"), (b!("\n"), 0))), Some(Ok((b!("\n"), (b!("\n\r"), 0)))))] + #[case::lfcrlfcr(b"\n\r\n\r", Ok((b!("\r\n\r"), (b!("\n"), 0))), Some(Ok((b!("\n\r"), (b!("\n\r"), 0)))))] + #[case::complete_lf(b"\na", Ok((b!("a"), (b!("\n"), 0))), None)] + #[case::complete_lfcrcrlf(b"\n\r\r\na", Ok((b!("\r\na"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\na"), (b!("\n\r"), 0)))))] + #[case::complete_crlfcrlf(b"\r\n\r\na", Ok((b!("\r\na"), (b!("\r\n"), 0))), None)] + #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_lf(b"\n", Err(Incomplete(Needed::new(1))), None)] + #[case::lfcrcrlf(b"\n\r\r\n", Ok((b!("\r\n"), (b!("\n\r"), HeaderFlags::DEFORMED_EOL))), Some(Ok((b!("\r\n"), (b!("\n\r"), 0)))))] + #[case::crlfcrlf(b"\r\n\r\n", Ok((b!("\r\n"), (b!("\r\n"), 0))), None)] + #[case::null(b"\0a", Ok((b!("a"), (b!("\0"), HeaderFlags::NULL_TERMINATED))), None)] + fn test_null_or_eol( + #[case] input: &[u8], #[case] expected: IResult<&[u8], ParsedBytes>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.null_or_eol()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.null_or_eol()(input), res_expected); + } else { + assert_eq!(res_parser.null_or_eol()(input), expected); + } + } + + #[rstest] + #[case::no_fold_tag(b"test", Err(Error(NomError::new(b!("test"), Tag))), None)] + #[case::cr(b"\r", Err(Error(NomError::new(b!("\r"), Tag))), Some(Err(Incomplete(Needed::new(1)))))] + #[case::crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Err(Error(NomError::new(b!("\r"), Tag)))))] + #[case::incomplete_crlf(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_crlf_ws(b"\r\n\t", Ok((b!(""), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)] + #[case::incomplete_crlf_ws(b"\r\n \t", Ok((b!("\t"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)] + #[case::incomplete_crlfcr(b"\r\n\r", Err(Error(NomError::new(b!("\r"), Tag))), None)] + #[case::not_fold_1(b"\r\n\r\n", Err(Error(NomError::new(b!("\r\n"), Tag))), None)] + #[case::not_fold_2(b"\r\n\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), None)] + #[case::fold(b"\r\n next", Ok((b!("next"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)] + #[case::fold(b"\r\n\tnext", Ok((b!("next"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)] + #[case::fold(b"\r\n\t next", Ok((b!(" next"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)] + #[case::fold_not_res(b"\r\n\t\t\r\n", Ok((b!("\t\r\n"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)] + #[case::fold_not_res(b"\r\n\t \t\r", Ok((b!(" \t\r"), (b!("\r\n"), b!("\t"), HeaderFlags::FOLDING))), None)] + #[case::fold_not_res(b"\r\n \n", Ok((b!(" \n"), (b!("\r\n"), b!(" "), HeaderFlags::FOLDING))), None)] + #[case::special_fold_not_res(b"\n\r \n", Err(Error(NomError::new(b!("\r \n"), Tag))), Some( Ok((b!(" \n"), (b!("\n\r"), b!(" "), HeaderFlags::FOLDING)))))] + #[case::special_fold_1(b"\r\n\rnext", Err(Error(NomError::new(b!("\rnext"), Tag))), None)] + #[case::special_fold_2(b"\r\n\r\t next", Err(Error(NomError::new(b!("\r\t next"), Tag))), None)] + #[case::fold_res(b"\r hello \n", Err(Error(NomError::new(b!("\r hello \n"), Tag))), Some(Ok((b!(" hello \n"), (b!("\r"), b!(" "), HeaderFlags::FOLDING)))))] + fn test_folding( + #[case] input: &[u8], #[case] expected: IResult<&[u8], FoldingBytes>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.folding()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.folding()(input), res_expected); + } else { + assert_eq!(res_parser.folding()(input), expected); + } + } + + #[rstest] + #[case::incomplete_1(b"\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_2(b"\r\n\t", Ok((b!(""), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t"))))), None)] + #[case::incomplete_3(b"\r\n ", Ok((b!(""), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)] + #[case::incomplete_4(b"\r\n\r", Ok((b!("\r"),((b!("\r\n"), 0), None))), Some(Err(Incomplete(Needed::new(1)))))] + #[case::crcr(b"\r\r", Err(Error(NomError::new(b!("\r\r"), Tag))), Some(Ok((b!("\r"), ((b!("\r"), 0), None)))))] + #[case::fold(b"\r\n\ta", Ok((b!("a"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t"))))), None)] + #[case::special_fold(b"\r\n\ra", Ok((b!("\ra"),((b!("\r\n"), 0), None))), None)] + #[case::fold(b"\r\n a", Ok((b!("a"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)] + #[case::crlf_eol(b"\r\na", Ok((b!("a"), ((b!("\r\n"), 0), None))), None)] + #[case::lflf_eol(b"\n\na", Ok((b!("\na"), ((b!("\n"), 0), None))), None)] + #[case::crlfcrlf_eol(b"\r\n\r\na", Ok((b!("\r\na"), ((b!("\r\n"), 0), None))), None)] + #[case::req_deformed_eol(b"\n\r\r\na", Ok((b!("\r\na"), ((b!("\n\r"), HeaderFlags::DEFORMED_EOL), None))), Some(Ok((b!("\r\na"), ((b!("\n\r"), 0), None)))))] + #[case::null_terminated(b"\0a", Ok((b!("a"), ((b!("\0"), HeaderFlags::NULL_TERMINATED), None))), None)] + #[case::res_fold(b"\r a", Err(Error(NomError::new(b!("\r a"), Tag))), Some(Ok((b!("a"), ((b!("\r"), HeaderFlags::FOLDING), Some(b!(" ")))))))] + #[case::multi_space_line(b"\n \r\n\n", Ok((b!(" \r\n\n"), ((b!("\n"), HeaderFlags::FOLDING), Some(b!(" "))))), None)] + fn test_folding_or_terminator( + #[case] input: &[u8], #[case] expected: IResult<&[u8], FoldingOrTerminator>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.folding_or_terminator()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.folding_or_terminator()(input), res_expected); + } else { + assert_eq!(res_parser.folding_or_terminator()(input), expected); + } + } + + #[rstest] + #[case::incomplete_1(b" ", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_2(b"value", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_3(b"\tvalue", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_4(b" value", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_5(b"value\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete_6(b"\r\r", Err(Incomplete(Needed::new(1))), Some(Ok((b!("\r"), (b!(""), ((b!("\r"), 0), None))))))] + #[case::diff_values_1(b"www.google.com\rName: Value\r\n\r\n", Ok((b!("\r\n"), (b!("www.google.com\rName: Value"), ((b!("\r\n"), 0), None)))), Some(Ok((b!("Name: Value\r\n\r\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))] + #[case::diff_values_2(b"www.google.com\rName: Value\n\r\n", Ok((b!("\r\n"), (b!("www.google.com\rName: Value"), ((b!("\n"), 0), None)))), Some(Ok((b!("Name: Value\n\r\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))] + #[case::diff_values_3(b"www.google.com\rName: Value\r\n\n", Ok((b!("\n"), (b!("www.google.com\rName: Value"), ((b!("\r\n"), 0), None)))), Some(Ok((b!("Name: Value\r\n\n"), (b!("www.google.com"), ((b!("\r"), 0), None))))))] + #[case::value_1(b"\r\nnext", Ok((b!("next"), (b!(""), ((b!("\r\n"), 0), None)))), None)] + #[case::value_2(b"value\r\nname2", Ok((b!("name2"), (b!("value"), ((b!("\r\n"), 0), None)))), None)] + #[case::fold_value_1(b"value\n more", Ok((b!("more"), (b!("value"), ((b!("\n"), HeaderFlags::FOLDING), Some(b!(" ")))))), None)] + #[case::fold_value_2(b"value\r\n\t more", Ok((b!(" more"), (b!("value"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t")))))), None)] + #[case::req_special_fold_res_value_1(b"value\r\n\t more", Ok((b!(" more"), (b!("value"), ((b!("\r\n"), HeaderFlags::FOLDING), Some(b!("\t")))))), None)] + #[case::req_special_fold_res_value_2(b"value\n\rmore", Ok((b!("\rmore"), (b!("value"), ((b!("\n"), 0), None)))), Some(Ok((b!("more"), (b!("value"), ((b!("\n\r"), 0), None))))))] + #[case::special_fold(b"value\r\n\rmore", Ok((b!("\rmore"), (b!("value"), ((b!("\r\n"), 0), None)))), None)] + fn test_value_bytes( + #[case] input: &[u8], #[case] expected: IResult<&[u8], ValueBytes>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.value_bytes()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.value_bytes()(input), res_expected); + } else { + assert_eq!(res_parser.value_bytes()(input), expected); + } + } + + #[rstest] + #[case::incomplete(b"value\r\n more\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete(b"value\r\n ", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete(b"value\r\n more", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete(b"value\r\n more\n", Err(Incomplete(Needed::new(1))), None)] + #[case::incomplete(b"value\n more\r\n", Err(Incomplete(Needed::new(1))), None)] + #[case::fold(b"\r\n value \r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: HeaderFlags::FOLDING})), None)] + #[case::fold(b"\r\n value\r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: HeaderFlags::FOLDING})), None)] + #[case::fold(b"value\r\n more\r\n\r\n", Ok((b!("\r\n"), Value {value: b"value more".to_vec(), flags: HeaderFlags::FOLDING})), None)] + #[case::fold(b"value\r\n more\r\n\tand more\r\nnext:", Ok((b!("next:"), Value {value: b"value more\tand more".to_vec(), flags: HeaderFlags::FOLDING})), None)] + #[case::fold(b"value\n\t\tmore\r\n and\r\n more\r\nnext:", Ok((b!("next:"), Value {value: b"value\t\tmore and more".to_vec(), flags: HeaderFlags::FOLDING})), None)] + #[case::req_special_res_fold_1(b"value\n more\n\r\tand more\r\n\r\n", Ok((b!("\r\tand more\r\n\r\n"), Value {value: b"value more".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("\r\n"), Value {value: b"value more\tand more".to_vec(), flags: HeaderFlags::FOLDING}))))] + #[case::req_special_res_fold_2(b"value\n\r\t\tmore\r\n and\r\n more\r\nnext:", Ok((b!("\r\t\tmore\r\n and\r\n more\r\nnext:"), Value {value: b"value".to_vec(), flags: 0})), Some(Ok((b!("next:"), Value {value: b"value\t\tmore and more".to_vec(), flags: HeaderFlags::FOLDING}))))] + #[case::req_special_res_value(b"value\n\r\t\tmore\r\n and\r\n more\r\nnext:", Ok((b!("\r\t\tmore\r\n and\r\n more\r\nnext:"), Value {value: b"value".to_vec(), flags: 0})), Some(Ok((b!("next:"), Value {value: b"value\t\tmore and more".to_vec(), flags: HeaderFlags::FOLDING}))))] + #[case::req_special_deformed_res_fold(b"value1\n\r next: value2\r\n and\r\n more\r\nnext3:", Ok((b!("\r next: value2\r\n and\r\n more\r\nnext3:"), Value {value: b"value1".to_vec(), flags: 0})), Some(Ok((b!("next: value2\r\n and\r\n more\r\nnext3:"), Value {value: b"value1".to_vec(), flags: 0}))))] + #[case::value(b"value\r\nnext:", Ok((b!("next:"), Value {value: b"value".to_vec(), flags: 0})), None)] + #[case::value_empty(b"\r\nnext:", Ok((b!("next:"), Value {value: b"".to_vec(), flags: HeaderFlags::VALUE_EMPTY})), None)] + #[case::value_wrapping_with_colon(b"b\r\n c: d\r\nAAA", Ok((b!("AAA"), Value {value: b"b c: d".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("c: d\r\nAAA"), Value {value: b"b".to_vec(), flags: 0}))))] + #[case::value_wrapping_with_colon_no_tokens(b"b\r\n : d\r\nAAA", Ok((b!("AAA"), Value {value: b"b : d".to_vec(), flags: HeaderFlags::FOLDING})), Some(Ok((b!("AAA"), Value {value: b"b : d".to_vec(), flags: HeaderFlags::FOLDING}))))] + fn test_value( + #[case] input: &[u8], #[case] expected: IResult<&[u8], Value>, + #[case] diff_res_expected: Option>, + ) { + let req_parser = Parser::new(Side::Request); + assert_eq!(req_parser.value()(input), expected); + + let res_parser = Parser::new(Side::Response); + if let Some(res_expected) = diff_res_expected { + assert_eq!(res_parser.value()(input), res_expected); + } else { + assert_eq!(res_parser.value()(input), expected); + } + } +} diff --git a/rust/htp/src/hook.rs b/rust/htp/src/hook.rs new file mode 100644 index 000000000000..ffb77718186d --- /dev/null +++ b/rust/htp/src/hook.rs @@ -0,0 +1,181 @@ +use crate::{ + connection_parser::{ConnectionParser, ParserData}, + error::Result, + log::Log, + transaction::{Data, Transaction}, + HtpStatus, +}; + +/// External (C) callback function prototype +pub type TxExternalCallbackFn = + unsafe extern "C" fn(connp: *const ConnectionParser, tx: *mut Transaction) -> HtpStatus; + +/// Native (rust) callback function prototype +pub type TxNativeCallbackFn = fn(tx: &mut Transaction) -> Result<()>; + +/// Hook for Transaction +pub type TxHook = Hook; + +/// External (C) callback function prototype +pub type DataExternalCallbackFn = + unsafe extern "C" fn(connp: *const ConnectionParser, data: *mut Data) -> HtpStatus; + +/// Native (rust) callback function prototype +pub type DataNativeCallbackFn = fn(&mut Transaction, data: &ParserData) -> Result<()>; + +/// Hook for Data +pub type DataHook = Hook; + +/// External (C) callback function prototype +pub type LogExternalCallbackFn = unsafe extern "C" fn(log: *mut Log) -> HtpStatus; + +/// Native (rust) callback function prototype +pub type LogNativeCallbackFn = fn(log: &mut Log) -> Result<()>; + +/// Hook for Log +pub type LogHook = Hook; + +/// Callback list +#[derive(Clone)] +pub struct Hook { + /// List of all callbacks. + pub callbacks: Vec>, +} + +impl Default for Hook { + /// Create a new callback list + fn default() -> Self { + Hook { + callbacks: Vec::new(), + } + } +} +impl Hook { + /// Register a native (rust) callback function + pub fn register(&mut self, cbk_fn: N) { + self.callbacks.push(Callback::Native(cbk_fn)) + } + + /// Register an external (C) callback function + pub fn register_extern(&mut self, cbk_fn: E) { + self.callbacks.push(Callback::External(cbk_fn)) + } +} + +impl TxHook { + /// Run all callbacks on the list + /// + /// This function will exit early if a callback fails to return HtpStatus::OK + /// or HtpStatus::DECLINED. + pub fn run_all(&self, connp: &mut ConnectionParser, tx_index: usize) -> Result<()> { + let connp_ptr: *mut ConnectionParser = connp as *mut ConnectionParser; + if let Some(tx) = connp.tx_mut(tx_index) { + for cbk_fn in &self.callbacks { + match cbk_fn { + Callback::External(cbk_fn) => { + let result = unsafe { cbk_fn(connp_ptr, tx) }; + if result != HtpStatus::OK && result != HtpStatus::DECLINED { + return Err(result); + } + } + Callback::Native(cbk_fn) => { + if let Err(e) = cbk_fn(tx) { + if e != HtpStatus::DECLINED { + return Err(e); + } + } + } + }; + } + } + Ok(()) + } +} + +impl DataHook { + /// Run all callbacks on the list + /// + /// This function will exit early if a callback fails to return HtpStatus::OK + /// or HtpStatus::DECLINED. + pub fn run_all(&self, connp: &ConnectionParser, data: &mut Data) -> Result<()> { + for cbk_fn in &self.callbacks { + match cbk_fn { + Callback::External(cbk_fn) => { + let result = unsafe { cbk_fn(connp, data) }; + if result != HtpStatus::OK && result != HtpStatus::DECLINED { + return Err(result); + } + } + Callback::Native(cbk_fn) => { + if let Err(e) = cbk_fn(unsafe { &mut *data.tx() }, data.parser_data()) { + if e != HtpStatus::DECLINED { + return Err(e); + } + } + } + }; + } + Ok(()) + } +} + +impl LogHook { + /// Run all callbacks on the list + /// + /// This function will exit early if a callback fails to return HtpStatus::OK + /// or HtpStatus::DECLINED. + pub fn run_all(&self, log: &mut Log) -> Result<()> { + for cbk_fn in &self.callbacks { + match cbk_fn { + Callback::External(cbk_fn) => { + let result = unsafe { cbk_fn(log) }; + if result != HtpStatus::OK && result != HtpStatus::DECLINED { + return Err(result); + } + } + Callback::Native(cbk_fn) => { + if let Err(e) = cbk_fn(log) { + if e != HtpStatus::DECLINED { + return Err(e); + } + } + } + }; + } + Ok(()) + } +} + +/// Type of callbacks +#[derive(Copy, Clone)] +pub enum Callback { + /// External (C) callback function + External(E), + /// Native (rust) callback function + Native(N), +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{config::Config, connection_parser::ParserData}; + + #[test] + fn test_callback() { + unsafe extern "C" fn foo(_: *const ConnectionParser, _: *mut Data) -> HtpStatus { + HtpStatus::OK + } + let connp = ConnectionParser::new(Config::default()); + let mut hook = DataHook::default(); + + hook.register(|_, _| Ok(())); + hook.register_extern(foo); + + assert!(hook + .run_all( + &connp, + &mut Data::new(std::ptr::null_mut(), &ParserData::default()) + ) + .is_ok()); + } +} diff --git a/rust/htp/src/lib.rs b/rust/htp/src/lib.rs new file mode 100644 index 000000000000..27e28a9c74e9 --- /dev/null +++ b/rust/htp/src/lib.rs @@ -0,0 +1,92 @@ +//! Root crate for libhtp. + +#![deny(missing_docs)] +#![deny(unused_lifetimes)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#[repr(C)] +#[derive(PartialEq, Eq, Debug)] + +/// Status codes used by LibHTP internally. +pub enum HtpStatus { + /// The lowest value LibHTP will use internally. + ERROR_RESERVED = -1000, + /// General-purpose error code. + ERROR = -1, + /// No processing or work was done. This is typically used by callbacks + /// to indicate that they were not interested in doing any work in the + /// given context. + DECLINED = 0, + /// Returned by a function when its work was successfully completed. + OK = 1, + /// Returned when processing a connection stream, after consuming all + /// provided data. The caller should call again with more data. + DATA = 2, + /// Returned when processing a connection stream, after encountering + /// a situation where processing needs to continue on the alternate + /// stream (e.g., the inbound parser needs to observe some outbound + /// data). The data provided was not completely consumed. On the next + /// invocation the caller should supply only the data that has not + /// been processed already. Use request_data_consumed() and response_data_consumed() + /// to determine how much of the most recent data chunk was consumed. + DATA_OTHER = 3, + /// Used by callbacks to indicate that the processing should stop. For example, + /// returning HtpStatus::STOP from a connection callback indicates that LibHTP should + /// stop following that particular connection. + STOP = 4, + /// Same as DATA, but indicates that any non-consumed part of the data chunk + /// should be preserved (buffered) for later. + DATA_BUFFER = 5, + /// The highest value LibHTP will use internally. + STATUS_RESERVED = 1000, +} + +/// Module for providing logging functions. +#[macro_use] +pub mod log; +/// Module for bstr functions. +pub mod bstr; +/// Module for all functions facing c_api. +pub mod c_api; +/// Module for all decompressors functions. +pub mod decompressors; +/// Module for all errors. +pub mod error; +/// Module for header parsing. +mod headers; +/// Module for hooks. +pub mod hook; +/// Module for providing unicode bestfit mappings. +#[macro_use] +mod unicode_bestfit_map; +/// Module for libhtp configurations. +pub mod config; +/// Module for all connection. +pub mod connection; +/// Module for connection parser. +pub mod connection_parser; +/// Module for extra utility parsers. (only public for doc tests) +pub mod parsers; +/// Module for request parsing. +pub mod request; +/// Module for response parsing. +pub mod response; +/// Module for custom table. +pub mod table; +/// Module for transaction parsing. +pub mod transaction; +/// Module to track multiple transactions +pub mod transactions; +/// Module for uri parsing. +pub mod uri; +/// Module for url decoding. +pub mod urlencoded; +/// Module for utf8 decoding. +mod utf8_decoder; +/// Module for utility functions. +pub mod util; + +/// Test harness +// TODO: add #[cfg(test)] here when this is fixed: https://github.com/rust-lang/cargo/issues/8379 +pub mod test; diff --git a/rust/htp/src/list.rs b/rust/htp/src/list.rs new file mode 100644 index 000000000000..9ba9fe8ae699 --- /dev/null +++ b/rust/htp/src/list.rs @@ -0,0 +1,423 @@ +//! A continuously growing list +//! +//! This container implements a list that can only grow as elements are added and removed. +//! This is implemented as a Vec> where each element is either present as a Some() +//! or is not present as a None. New elements are always pushed to the end of the list and the +//! capacity grows to accommodate and removed elements are substituted with a `None`; removal or +//! replace operations will never cause another element to move indices. This is done to +//! ensure that indexes are always valid even after other operations are executed on the list. + +use crate::HtpStatus; +use core::{ops::Index, slice::SliceIndex}; + +/// The List structure +#[derive(Clone, Debug)] +pub struct List { + elements: Vec>, +} + +/// Facilitates creating iterators over `List` +pub struct IntoIter<'a, T> { + inner: std::slice::Iter<'a, Option>, +} + +impl<'a, T> Iterator for IntoIter<'a, T> { + type Item = &'a T; + + /// Returns a reference to the next element. + fn next(&mut self) -> Option { + while let Some(next) = self.inner.next() { + if let Some(next) = next { + return Some(next); + } + } + None + } +} + +impl<'a, T> IntoIterator for &'a List { + type Item = &'a T; + type IntoIter = IntoIter<'a, T>; + + /// Returns an iterator over the List + fn into_iter(self) -> Self::IntoIter { + IntoIter { + inner: self.elements.iter(), + } + } +} + +impl]>> Index for List { + type Output = I::Output; + + #[inline] + /// This allows for square bracket indexing of List. + fn index(&self, index: I) -> &Self::Output { + Index::index(&self.elements, index) + } +} + +impl Default for List { + fn default() -> List { + Self { + elements: Vec::with_capacity(32), + } + } +} + +impl List { + /// Create a new list with specified capacity. + pub fn with_capacity(size: usize) -> Self { + Self { + elements: Vec::with_capacity(size), + } + } + + /// Return the current capacity of the List. + pub fn capacity(&self) -> usize { + self.elements.capacity() + } + + /// Remove all elements from the list. + pub fn clear(&mut self) { + self.elements.clear(); + } + + /// Find the element at the given index. + /// + /// If the index is out of bounds it returns `None`, otherwise it will return the value + /// at the given index. The value at the given index can also be `None` if it has + /// been removed. + pub fn get(&self, idx: usize) -> Option<&T> { + self.elements.get(idx).map(|val| val.as_ref()).flatten() + } + + /// Find the element at the given index. + /// + /// Functions much like [`get`](crate::list::List::get) but returns a mutable reference. + pub fn get_mut(&mut self, idx: usize) -> Option<&mut T> { + self.elements.get_mut(idx).map(|val| val.as_mut()).flatten() + } + + /// Retrieve the last element in the list. + /// + /// The element returned will always be the last element. The returned element can be + /// `None` if the element as been removed. + pub fn get_last(&self) -> Option<&T> { + self.elements.last().map(|val| val.as_ref()).flatten() + } + + /// Retrieve a mutable reference to the last element in the list. + /// + /// Functions much like [`get_last`](crate::list::List::get_last) but returns a + /// mutable reference. + pub fn get_last_mut(&mut self) -> Option<&mut T> { + let idx = self.elements.len() - 1; //Works around borrowing twice as mut/immut + self.elements.get_mut(idx).map(|val| val.as_mut()).flatten() + } + + /// Remove one element from the end of the list. + /// + /// Returns the last element which is also removed, or None if the list is empty. + /// Unlike `remove` this function shrinks the size of the list instead of replacing + /// the element with `None`. + pub fn pop(&mut self) -> Option { + self.elements.pop().flatten() + } + + /// Add new element to the end of the list. + /// + /// This function may expand the capacity of the list when necessary. + pub fn push(&mut self, value: T) { + self.elements.push(Some(value)); + } + + /// Replace the element at the given index with the provided element. + /// + /// When the index is within range it will do the replacement, even on previously + /// removed elements. If the index is out of bounds it will return `HtpStatus::DECLINED`. + pub fn replace(&mut self, idx: usize, value: T) -> Result<(), HtpStatus> { + if idx < self.elements.len() { + self.elements[idx] = Some(value); + Ok(()) + } else { + Err(HtpStatus::DECLINED) + } + } + + /// Remove the element at the given index. + /// + /// Returns HtpStatus::DECLINED if no element at the given index exists. + /// This does not resize the list nor affect ordering, so + /// [`len`](crate::list::List::len) and [`get`](crate::list::List::get) (on any other + /// index) will behave identically before and after a removal. + pub fn remove(&mut self, idx: usize) -> Result<(), HtpStatus> { + if idx < self.elements.len() { + self.elements[idx] = None; + Ok(()) + } else { + Err(HtpStatus::DECLINED) + } + } + + /// Returns the size of the list. + /// + /// Returns the effective size of the list including `None` values where they have been + /// removed. + pub fn len(&self) -> usize { + self.elements.len() + } + + /// Returns whether the list is empty. + pub fn is_empty(&self) -> bool { + self.elements.is_empty() + } +} + +#[cfg(test)] +mod tests { + use crate::{list::List, HtpStatus}; + + #[test] + fn create() { + let list: List = List::with_capacity(4); + assert_eq!(list.capacity(), 4); + assert_eq!(list.len(), 0); + } + + #[test] + fn insert() { + let mut list = List::with_capacity(4); + list.push('a'); + assert_eq!(list.len(), 1); + assert_eq!(list.get(0), Some(&'a')); + } + + #[test] + fn clear() { + let mut list = List::with_capacity(4); + list.push('a'); + assert_eq!(list.len(), 1); + list.clear(); + assert_eq!(list.len(), 0); + } + + #[test] + fn remove() { + let mut list = List::with_capacity(4); + list.push('a'); + list.push('b'); + list.push('c'); + assert_eq!(list.len(), 3); + let status = list.remove(1); // 'b' + assert_eq!(status, Ok(())); + assert_eq!(list.len(), 3); + assert_eq!(list.get(0), Some(&'a')); + assert_eq!(list.get(1), None); + assert_eq!(list.get(2), Some(&'c')); + } + + #[test] + fn get_out_of_bounds() { + let mut list = List::with_capacity(4); + assert_eq!(list.get(0), None); + list.push('a'); + assert_eq!(list.get(0), Some(&'a')); + assert_eq!(list.get(1), None); + } + + #[test] + fn get_last() { + let mut list = List::with_capacity(4); + list.push('a'); + assert_eq!(list.len(), 1); + let elem = list.get_last(); + assert_eq!(list.len(), 1); + assert_eq!(elem, Some(&'a')); + + let elem = list.get_last_mut().unwrap(); + *elem = 'b'; + assert_eq!(list.get(0), Some(&'b')); + } + + #[test] + fn remove_out_of_bounds() { + let mut list = List::with_capacity(4); + list.push('a'); + assert_eq!(list.len(), 1); + let status = list.remove(2); + assert_eq!(status, Err(HtpStatus::DECLINED)); + assert_eq!(list.len(), 1); + assert_eq!(list.get(0), Some(&'a')); + } + + #[test] + fn pop() { + let mut list = List::with_capacity(4); + let elem = list.pop(); + assert_eq!(elem, None); + list.push('a'); + assert_eq!(list.len(), 1); + let elem = list.pop(); + assert_eq!(elem, Some('a')); + assert_eq!(list.len(), 0); + } + + #[test] + fn replace() { + let mut list = List::with_capacity(4); + let status = list.replace(0, 'a'); + assert_eq!(status, Err(HtpStatus::DECLINED)); + list.push('a'); + list.push('b'); + assert_eq!(list.replace(0, 'b'), Ok(())); //Replace element + assert_eq!(list.get(0), Some(&'b')); + let _ = list.remove(0); + assert_eq!(list.get(0), None); + let _ = list.replace(0, 'a'); //Replace deleted element + assert_eq!(list.get(0), Some(&'a')); + assert_eq!(list.replace(2, 'a'), Err(HtpStatus::DECLINED)); //Replace out of bounds + } + + #[test] + fn iterators() { + let mut list = List::with_capacity(4); + list.push('a'); + list.push('b'); + list.push('c'); + let list = list; // No long mut + + let mut list_copy = Vec::new(); + for each in &list { + list_copy.push(each); + } + assert_eq!(list_copy, [&'a', &'b', &'c']); + } + + #[test] + fn iterators_with_gaps() { + let mut list = List::with_capacity(4); + list.push('a'); + list.push('b'); + list.push('c'); + let _ = list.remove(1); + let list = list; + + let mut list_copy = Vec::new(); + for each in &list { + list_copy.push(each); + } + assert_eq!(list_copy, [&'a', &'c']); + } + + #[test] + fn iterator_empty() { + let list: List = List::with_capacity(4); + for each in &list { + assert!( + false, + "list had value when it should have been empty. Value: {}", + each + ); + } + } + + #[test] + fn index() { + let mut list = List::with_capacity(4); + list.push('a'); + list.push('b'); + + assert_eq!(list[0], Some('a')); + assert_eq!(list[1], Some('b')); + } + + #[test] + fn expand1() { + let mut l = List::with_capacity(2); + + l.push("1"); + l.push("2"); + + assert_eq!(2, l.len()); + + l.push("3"); + + assert_eq!(3, l.len()); + + let p = l.get(0).unwrap(); + assert_eq!(*p, "1"); + + let p = l.get(1).unwrap(); + assert_eq!(*p, "2"); + + let p = l.get(2).unwrap(); + assert_eq!(*p, "3"); + + drop(&l); + } + + #[test] + fn expand2() { + let mut l = List::with_capacity(2); + l.push("1"); + l.push("2"); + + assert_eq!(2, l.len()); + + l.push("3"); + l.push("4"); + + assert_eq!(4, l.len()); + + let p = l.get(0).unwrap(); + assert_eq!(*p, "1"); + + let p = l.get(1).unwrap(); + assert_eq!(*p, "2"); + + let p = l.get(2).unwrap(); + assert_eq!(*p, "3"); + + let p = l.pop().unwrap(); + assert_eq!(p, "4"); + } + + #[test] + fn misc() { + let mut l = List::with_capacity(16); + l.push("1"); + l.push("2"); + l.push("3"); + + assert_eq!(3, l.len()); + + let p = l.pop().unwrap(); + assert_eq!("3", p); + + assert_eq!(2, l.len()); + + let p = l.pop().unwrap(); + assert_eq!(p, "2"); + + let p = l.pop().unwrap(); + assert_eq!(p, "1"); + + let p = l.pop(); + assert!(p.is_none()); + } + + #[test] + fn misc2() { + let mut l = List::with_capacity(2); + l.push("1"); + l.push("2"); + l.push("3"); + let p = l.get(2).unwrap(); + assert_eq!(*p, "3"); + assert_eq!(3, l.len()); + let _ = l.replace(2, "4"); + let p = l.pop().unwrap(); + assert_eq!(p, "4"); + } +} diff --git a/rust/htp/src/log.rs b/rust/htp/src/log.rs new file mode 100644 index 000000000000..8a16249d07c2 --- /dev/null +++ b/rust/htp/src/log.rs @@ -0,0 +1,331 @@ +use crate::connection::Connection; +use std::{net::IpAddr, sync::mpsc::Sender}; + +/// Different codes used for logging. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpLogCode { + /// Default + UNKNOWN = 0, + /// Gzip Decompression Failed + GZIP_DECOMPRESSION_FAILED, + /// Request field missing a colon. + REQUEST_FIELD_MISSING_COLON, + /// Response field missing a colon. + RESPONSE_FIELD_MISSING_COLON, + /// Request chunk length parsing failed. + INVALID_REQUEST_CHUNK_LEN, + /// Response chunked-length parsing failed. + INVALID_RESPONSE_CHUNK_LEN, + /// Response chunk exension. + REQUEST_CHUNK_EXTENSION, + /// Response chunk exension. + RESPONSE_CHUNK_EXTENSION, + /// Request has too many headers. + REQUEST_TOO_MANY_HEADERS, + /// Response has too many headers. + RESPONSE_TOO_MANY_HEADERS, + /// Request transfer-encoding invalid. + INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST, + /// Response transfer-encoding invalid. + INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE, + /// Request content-length parsing failed. + INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST, + /// Response content-length parsing failed. + INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE, + /// Request has a duplicate content-length field. + DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST, + /// Response has a duplicate content-length field. + DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE, + /// 100 Continue response status already seen. + CONTINUE_ALREADY_SEEN, + /// Unable to match response to a request. + UNABLE_TO_MATCH_RESPONSE_TO_REQUEST, + /// Request server port is invalid. + INVALID_SERVER_PORT_IN_REQUEST, + /// Authority port is invalid. + INVALID_AUTHORITY_PORT, + /// Request header name is incorrectly formed. + REQUEST_HEADER_INVALID, + /// Response header name is incorrectly formed. + RESPONSE_HEADER_INVALID, + /// Host header is missing. + MISSING_HOST_HEADER, + /// Host header is ambiguous. + HOST_HEADER_AMBIGUOUS, + /// Request has invalid line folding. + INVALID_REQUEST_FIELD_FOLDING, + /// Response has invalid line folding. + INVALID_RESPONSE_FIELD_FOLDING, + /// Request buffer field is over the limit. + REQUEST_FIELD_TOO_LONG, + /// Response buffer field is over the limit. + RESPONSE_FIELD_TOO_LONG, + /// Mismatch between request server port and tcp port. + REQUEST_SERVER_PORT_TCP_PORT_MISMATCH, + /// Uri hostname is invalid. + URI_HOST_INVALID, + /// Header hostname is invalid. + HEADER_HOST_INVALID, + /// Non compliant delimiter between method and URI in request line. + METHOD_DELIM_NON_COMPLIANT, + /// Parsed request-uri contains a non compliant delimiter. + URI_DELIM_NON_COMPLIANT, + /// Request line has leading whitespace. + REQUEST_LINE_LEADING_WHITESPACE, + /// Response content encoding lzma layers is greater than limit. + RESPONSE_TOO_MANY_LZMA_LAYERS, + /// Request content encoding lzma layers is greater than limit. + REQUEST_TOO_MANY_LZMA_LAYERS, + /// Too many request or response encoding layers + TOO_MANY_ENCODING_LAYERS, + /// Response header content-encoding header is invalid + ABNORMAL_CE_HEADER, + /// Request authorization header unrecognized + AUTH_UNRECOGNIZED, + /// Request header has been seen more than once. + REQUEST_HEADER_REPETITION, + /// response header has been seen more than once. + RESPONSE_HEADER_REPETITION, + /// Response content-type is multipart-byteranges (unsupported). + RESPONSE_MULTIPART_BYTERANGES, + /// Response transfer-encoding has an abnormal chunked value. + RESPONSE_ABNORMAL_TRANSFER_ENCODING, + /// Response chunked transfer-encoding on HTTP/0.9 or HTTP/1.0. + RESPONSE_CHUNKED_OLD_PROTO, + /// Response protocol invalid. + RESPONSE_INVALID_PROTOCOL, + /// Response status invalid. + RESPONSE_INVALID_STATUS, + /// Response line is incomplete. + REQUEST_LINE_INCOMPLETE, + /// Request uri has double encoding. + DOUBLE_ENCODED_URI, + /// Request line is invalid. + REQUEST_LINE_INVALID, + /// Unexpected request body present. + REQUEST_BODY_UNEXPECTED, + /// Reached LZMA memory limit. + LZMA_MEMLIMIT_REACHED, + /// Reached configured time limit for decompression or reached bomb limit. + COMPRESSION_BOMB, + /// Unexpected response body present. + RESPONSE_BODY_UNEXPECTED, + /// Content-length parsing contains extra leading characters. + CONTENT_LENGTH_EXTRA_DATA_START, + /// Content-length parsing contains extra trailing characters + CONTENT_LENGTH_EXTRA_DATA_END, + /// 101 Switching Protocol seen with a content-length. + SWITCHING_PROTO_WITH_CONTENT_LENGTH, + /// End of line is deformed. + DEFORMED_EOL, + /// Parsing error encountered in request or response. + PARSER_STATE_ERROR, + /// Missing outbound transaction while state is not idle. + MISSING_OUTBOUND_TRANSACTION_DATA, + /// Missing inbound transaction while state is not idle. + MISSING_INBOUND_TRANSACTION_DATA, + /// Supplied data chunk has a length of zero. + ZERO_LENGTH_DATA_CHUNKS, + /// Request Line method is unknown. + REQUEST_LINE_UNKNOWN_METHOD, + /// Request line method is unknown and no protocol information was found. + REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL, + /// Request line method is unknown and protocol is invalid. + REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL, + /// Request line protocol information was not found. + REQUEST_LINE_NO_PROTOCOL, + /// Response line protocol is invalid. + RESPONSE_LINE_INVALID_PROTOCOL, + /// Response line status number is out of range. + RESPONSE_LINE_INVALID_RESPONSE_STATUS, + /// Response parsing progress is at an invalid state. + RESPONSE_BODY_INTERNAL_ERROR, + /// Request body data callback produced a error. + REQUEST_BODY_DATA_CALLBACK_ERROR, + /// Response header name is empty. + RESPONSE_INVALID_EMPTY_NAME, + /// Request header name is empty. + REQUEST_INVALID_EMPTY_NAME, + /// Response header name has extra whitespace after name. + RESPONSE_INVALID_LWS_AFTER_NAME, + /// Response header name is not a valid token. + RESPONSE_HEADER_NAME_NOT_TOKEN, + /// Request header name has extra whitespace after name. + REQUEST_INVALID_LWS_AFTER_NAME, + /// LZMA decompression is disabled. + LZMA_DECOMPRESSION_DISABLED, + /// Tried to open a connection that is already open. + CONNECTION_ALREADY_OPEN, + /// Protocol parsing detected leading or trailing data. + PROTOCOL_CONTAINS_EXTRA_DATA, + /// Invalid gap detected. + INVALID_GAP, + /// Compression bomb due to double lzma encoding. + COMPRESSION_BOMB_DOUBLE_LZMA, + /// Invalid content-encoding detected. + INVALID_CONTENT_ENCODING, + /// Error retrieving a log message's code + ERROR, +} + +/// Enumerates all log levels. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +pub enum HtpLogLevel { + /// No log level. + NONE, + /// Designates fatal error. + ERROR, + /// Designates hazardous situations. + WARNING, + /// Default log level value. + NOTICE, + /// Designates useful information, + INFO, + /// Designates lower priority information. + DEBUG, + /// Designated very low priority, often extremely verbose, information. + DEBUG2, +} +#[derive(Clone)] +/// Logger struct +pub struct Logger { + /// The sender half of a logging channel + pub sender: Sender, + /// Log level used when deciding whether to store or + /// ignore the messages issued by the parser. + pub level: HtpLogLevel, +} + +impl Logger { + /// Returns a new logger instance + pub fn new(sender: &Sender, level: HtpLogLevel) -> Logger { + Self { + sender: sender.clone(), + level, + } + } + /// Logs a message to the logger channel. + pub fn log( + &mut self, file: &str, line: u32, level: HtpLogLevel, code: HtpLogCode, msg: String, + ) { + // Ignore messages below our log level. + if level <= self.level { + let _ = self.sender.send(Message::new(file, line, level, code, msg)); + } + } +} + +#[derive(Clone)] +/// Represents a single Message entry for a log +pub struct Message { + /// Log message string. + pub msg: String, + /// Message level. + pub level: HtpLogLevel, + /// Message code. + pub code: HtpLogCode, + /// File in which the code that emitted the message resides. + pub file: String, + /// Line number on which the code that emitted the message resides. + pub line: u32, +} + +impl Message { + /// Returns a new Message instance + pub fn new( + file: &str, line: u32, level: HtpLogLevel, code: HtpLogCode, msg: String, + ) -> Message { + Self { + file: file.to_string(), + line, + level, + code, + msg, + } + } +} + +/// Represents a single log entry. +#[derive(Clone)] +pub struct Log { + /// Client IP address. + pub client_addr: Option, + /// Client port. + pub client_port: Option, + /// Server IP address. + pub server_addr: Option, + /// Server port. + pub server_port: Option, + + /// Log message. + pub msg: Message, +} + +impl Log { + /// Returns a new Log instance. + pub fn new(conn: &Connection, msg: Message) -> Log { + Self { + client_addr: conn.client_addr, + client_port: conn.client_port, + server_addr: conn.server_addr, + server_port: conn.server_port, + msg, + } + } +} + +/// Logs a message at the given level. +#[macro_export] +macro_rules! htp_log { + ($logger:expr, $level:expr, $code:expr, $msg:expr) => {{ + use $crate::log::{HtpLogCode, HtpLogLevel}; + $logger.log(file!(), line!(), $level, $code, $msg.to_string()); + }}; +} + +/// Logs a message at the info level. +#[macro_export] +macro_rules! htp_info { + ($logger:expr, $code:expr, $msg:expr) => { + htp_log!($logger, HtpLogLevel::INFO, $code, $msg); + }; +} + +/// Logs a message at the debug level. +#[macro_export] +macro_rules! htp_debug { + ($logger:expr, $code:expr, $msg:expr) => { + htp_log!($logger, HtpLogLevel::DEBUG, $code, $msg); + }; +} + +/// Logs a message at the warning level. +#[macro_export] +macro_rules! htp_warn { + ($logger:expr, $code:expr, $msg:expr) => { + htp_log!($logger, HtpLogLevel::WARNING, $code, $msg); + }; +} + +/// Logs a message at the error level. +#[macro_export] +macro_rules! htp_error { + ($logger:expr, $code:expr, $msg:expr) => { + htp_log!($logger, HtpLogLevel::ERROR, $code, $msg); + }; +} + +/// Logs a message at the warning level, ensuring that it ones logs the message once. +#[macro_export] +macro_rules! htp_warn_once { + ($logger:expr, $code:expr, $msg:expr, $tx_flags:expr, $flags:expr, $flag:expr) => { + // Log only once per transaction. + if !$tx_flags.is_set($flag) { + htp_warn!($logger, $code, $msg); + } + $tx_flags.set($flag); + $flags.set($flag); + }; +} diff --git a/rust/htp/src/multipart.rs b/rust/htp/src/multipart.rs new file mode 100644 index 000000000000..15cae42d83d6 --- /dev/null +++ b/rust/htp/src/multipart.rs @@ -0,0 +1,1371 @@ +use crate::{ + bstr::Bstr, + config::{Config, MultipartConfig}, + error::Result, + headers::{Flags as HeaderFlags, Parser as HeadersParser, Side}, + hook::FileDataHook, + list::List, + parsers::parse_content_type, + table::Table, + transaction::{Header, Headers}, + util::{ + is_space, take_ascii_whitespace, take_is_space, take_until_no_case, File, FlagOperations, + HtpFileSource, + }, + HtpStatus, +}; +use nom::{ + branch::alt, + bytes::complete::{tag, tag_no_case, take, take_till, take_until, take_while}, + character::complete::char, + character::is_space as nom_is_space, + combinator::{map, not, opt, peek}, + multi::fold_many1, + number::complete::be_u8, + sequence::tuple, + IResult, +}; +use std::rc::Rc; + +/// Export Multipart flags. +#[derive(Debug)] +pub struct Flags; + +impl Flags { + /// Seen a LF line in the payload. LF lines are not allowed, but + /// some clients do use them and some backends do accept them. Mixing + /// LF and CRLF lines within some payload might be unusual. + pub const LF_LINE: u64 = 0x0001; + /// Seen a CRLF line in the payload. This is normal and expected. + pub const CRLF_LINE: u64 = 0x0002; + /// Seen LWS after a boundary instance in the body. Unusual. + pub const BBOUNDARY_LWS_AFTER: u64 = 0x0004; + /// Seen non-LWS content after a boundary instance in the body. Highly unusual. + pub const BBOUNDARY_NLWS_AFTER: u64 = 0x0008; + + /// Payload has a preamble part. Might not be that unusual. + pub const HAS_PREAMBLE: u64 = 0x0010; + + /// Payload has an epilogue part. Unusual. + pub const HAS_EPILOGUE: u64 = 0x0020; + + /// The last boundary was seen in the payload. Absence of the last boundary + /// may not break parsing with some (most?) backends, but it means that the payload + /// is not well formed. Can occur if the client gives up, or if the connection is + /// interrupted. Incomplete payloads should be blocked whenever possible. + pub const SEEN_LAST_BOUNDARY: u64 = 0x0040; + + /// There was a part after the last boundary. This is highly irregular + /// and indicative of evasion. + pub const PART_AFTER_LAST_BOUNDARY: u64 = 0x0080; + + /// The payloads ends abruptly, without proper termination. Can occur if the client gives up, + /// or if the connection is interrupted. When this flag is raised, PART_INCOMPLETE + /// will also be raised for the part that was only partially processed. (But the opposite may not + /// always be the case -- there are other ways in which a part can be left incomplete.) + pub const INCOMPLETE: u64 = 0x0100; + /// The boundary in the Content-Type header is invalid. + pub const HBOUNDARY_INVALID: u64 = 0x0200; + + /// The boundary in the Content-Type header is unusual. This may mean that evasion + /// is attempted, but it could also mean that we have encountered a client that does + /// not do things in the way it should. + pub const HBOUNDARY_UNUSUAL: u64 = 0x0400; + + /// The boundary in the Content-Type header is quoted. This is very unusual, + /// and may be indicative of an evasion attempt. + pub const HBOUNDARY_QUOTED: u64 = 0x0800; + /// Header folding was used in part headers. Very unusual. + pub const PART_HEADER_FOLDING: u64 = 0x1000; + + /// A part of unknown type was encountered, which probably means that the part is lacking + /// a Content-Disposition header, or that the header is invalid. Highly unusual. + pub const PART_UNKNOWN: u64 = 0x2000; + /// There was a repeated part header, possibly in an attempt to confuse the parser. Very unusual. + pub const PART_HEADER_REPEATED: u64 = 0x4000; + /// Unknown part header encountered. + pub const PART_HEADER_UNKNOWN: u64 = 0x8000; + /// Invalid part header encountered. + pub const PART_HEADER_INVALID: u64 = 0x10000; + /// Part type specified in the C-D header is neither MULTIPART_PART_TEXT nor MULTIPART_PART_FILE. + pub const CD_TYPE_INVALID: u64 = 0x20000; + /// Content-Disposition part header with multiple parameters with the same name. + pub const CD_PARAM_REPEATED: u64 = 0x40000; + /// Unknown Content-Disposition parameter. + pub const CD_PARAM_UNKNOWN: u64 = 0x80000; + /// Invalid Content-Disposition syntax. + pub const CD_SYNTAX_INVALID: u64 = 0x10_0000; + + /// There is an abruptly terminated part. This can happen when the payload itself is abruptly + /// terminated (in which case INCOMPLETE) will be raised. However, it can also + /// happen when a boundary is seen before any part data. + pub const PART_INCOMPLETE: u64 = 0x20_0000; + /// A NUL byte was seen in a part header area. + pub const NUL_BYTE: u64 = 0x40_0000; + /// A collection of flags that all indicate an invalid C-D header. + pub const CD_INVALID: u64 = (Self::CD_TYPE_INVALID + | Self::CD_PARAM_REPEATED + | Self::CD_PARAM_UNKNOWN + | Self::CD_SYNTAX_INVALID); + /// A collection of flags that all indicate an invalid part. + pub const PART_INVALID: u64 = (Self::CD_INVALID + | Self::NUL_BYTE + | Self::PART_UNKNOWN + | Self::PART_HEADER_REPEATED + | Self::PART_INCOMPLETE + | Self::PART_HEADER_UNKNOWN + | Self::PART_HEADER_INVALID); + /// A collection of flags that all indicate an invalid Multipart payload. + pub const INVALID: u64 = (Self::PART_INVALID + | Self::PART_AFTER_LAST_BOUNDARY + | Self::INCOMPLETE + | Self::HBOUNDARY_INVALID); + /// A collection of flags that all indicate an unusual Multipart payload. + pub const UNUSUAL: u64 = (Self::INVALID + | Self::PART_HEADER_FOLDING + | Self::BBOUNDARY_NLWS_AFTER + | Self::HAS_EPILOGUE + | Self::HBOUNDARY_UNUSUAL + | Self::HBOUNDARY_QUOTED); + /// A collection of flags that all indicate an unusual Multipart payload, with a low sensitivity to irregularities. + pub const UNUSUAL_PARANOID: u64 = + (Self::UNUSUAL | Self::LF_LINE | Self::BBOUNDARY_LWS_AFTER | Self::HAS_PREAMBLE); +} + +/// Keeps track of multipart parsing. +#[derive(Clone)] +pub struct Parser { + /// Contains information regarding multipart body. + pub multipart: Multipart, + /// Config structure for multipart parsing. + pub cfg: MultipartConfig, + /// Request file data hook invoked whenever file data is available. + pub hook: FileDataHook, + /// Number of extracted files. + pub file_count: u32, + // Internal parsing fields; move into a private structure + /// Parser state; one of MULTIPART_STATE_* constants. + parser_state: HtpMultipartState, + + /// Keeps track of the current position in the boundary matching progress. + /// When this field reaches boundary_len, we have a boundary match. + pub boundary_match_pos: usize, + + /// Index of part that is currently being processed. + pub current_part_idx: Option, + + /// This parser consists of two layers: the outer layer is charged with + /// finding parts, and the internal layer handles part data. There is an + /// interesting interaction between the two parsers. Because the + /// outer layer is seeing every line (it has to, in order to test for + /// boundaries), it also effectively also splits input into lines. The + /// inner parser deals with two areas: first is the headers, which are + /// line based, followed by binary data. When parsing headers, the inner + /// parser can reuse the lines identified by the outer parser. In this + /// variable we keep the current parsing mode of the part, which helps + /// us process input data more efficiently. The possible values are + /// LINE and DATA. + current_part_mode: HtpMultipartMode, + + /// Used for buffering when a potential boundary is fragmented + /// across many input data buffers. On a match, the data stored here is + /// discarded. When there is no match, the buffer is processed as data + /// (belonging to the currently active part). + pub boundary_candidate: Bstr, + /// Used for buffering when part header data arrives in pieces. + pub part_header: Bstr, + /// Header line to be parsed. + pub pending_header_line: Bstr, + /// Working buffer for part header parsing. + pub to_consume: Bstr, + + /// Stores text part pieces until the entire part is seen, at which + /// point the pieces are assembled into a single buffer, and the + /// builder cleared. + pub part_data_pieces: Bstr, + + /// The offset of the current boundary candidate, relative to the most + /// recent data chunk (first unprocessed chunk of data). + pub boundary_candidate_pos: usize, + + /// When we encounter a CR as the last byte in a buffer, we don't know + /// if the byte is part of a CRLF combination. If it is, then the CR + /// might be a part of a boundary. But if it is not, it's current + /// part's data. Because we know how to handle everything before the + /// CR, we do, and we use this flag to indicate that a CR byte is + /// effectively being buffered. This is probably a case of premature + /// optimization, but I am going to leave it in for now. + pub cr_aside: bool, +} + +/// Creates a new multipart/form-data parser. +/// The ownership of the boundary parameter is transferred to the parser. +/// +/// Returns New parser instance +impl Parser { + /// Create new Parser with `Config`, boundary data and flags. + pub fn new(cfg: &Rc, boundary: &[u8], flags: u64) -> Self { + Self { + multipart: Multipart { + boundary_len: boundary.len() + 2, + boundary: Bstr::from([b"--", boundary].concat()), + boundary_count: 0, + parts: List::with_capacity(64), + flags, + }, + cfg: cfg.multipart_cfg.clone(), + hook: cfg.hook_request_file_data.clone(), + file_count: 0, + // We're starting in boundary-matching mode. The first boundary can appear without the + // CRLF, and our starting state expects that. If we encounter non-boundary data, the + // state will switch to data mode. Then, if the data is CRLF or LF, we will go back + // to boundary matching. Thus, we handle all the possibilities. + parser_state: HtpMultipartState::BOUNDARY, + boundary_match_pos: 0, + current_part_idx: None, + current_part_mode: HtpMultipartMode::LINE, + boundary_candidate: Bstr::with_capacity(boundary.len()), + part_header: Bstr::with_capacity(64), + pending_header_line: Bstr::with_capacity(64), + to_consume: Bstr::new(), + part_data_pieces: Bstr::with_capacity(64), + boundary_candidate_pos: 0, + cr_aside: false, + } + } + + /// Returns the part currently being processed. + pub fn get_current_part(&mut self) -> Result<&mut Part> { + self.current_part_idx + .and_then(move |idx| self.multipart.parts.get_mut(idx)) + .ok_or(HtpStatus::ERROR) + } + + /// Handles a boundary event, which means that it will finalize a part if one exists. + fn handle_boundary(&mut self) -> Result<()> { + if self.current_part_idx.is_some() { + self.finalize_part_data()?; + // We're done with this part + self.current_part_idx = None; + // Revert to line mode + self.current_part_mode = HtpMultipartMode::LINE + } + Ok(()) + } + + /// Handles data, creating new parts as necessary. + fn handle_data(&mut self, is_line: bool) -> Result<()> { + if self.to_consume.is_empty() { + return Ok(()); + } + // Do we have a part already? + if self.current_part_idx.is_none() { + // Create a new part. + let mut part = Part::default(); + // Set current part. + if self.multipart.boundary_count == 0 { + part.type_0 = HtpMultipartType::PREAMBLE; + self.multipart.flags.set(Flags::HAS_PREAMBLE); + self.current_part_mode = HtpMultipartMode::DATA + } else { + // Part after preamble. + self.current_part_mode = HtpMultipartMode::LINE + } + // Add part to the list. + self.multipart.parts.push(part); + self.current_part_idx = Some(self.multipart.parts.len() - 1); + } + + let rc = if self.current_part_idx.is_some() { + let data = self.to_consume.clone(); + self.handle_part_data(data.as_slice(), is_line) + } else { + Ok(()) + }; + + self.to_consume.clear(); + rc + } + + /// Handles part data, updating flags, and creating new headers as necessary. + pub fn handle_part_data(&mut self, to_consume: &[u8], is_line: bool) -> Result<()> { + // End of the line. + let mut line: Option = None; + // Keep track of raw part length. + self.get_current_part()?.len += to_consume.len(); + // If we're processing a part that came after the last boundary, then we're not sure if it + // is the epilogue part or some other part (in case of evasion attempt). For that reason we + // will keep all its data in the part_data_pieces structure. If it ends up not being the + // epilogue, this structure will be cleared. + if self.multipart.flags.is_set(Flags::SEEN_LAST_BOUNDARY) + && self.get_current_part()?.type_0 == HtpMultipartType::UNKNOWN + { + self.part_data_pieces.add(to_consume); + } + if self.current_part_mode == HtpMultipartMode::LINE { + // Line mode. + if is_line { + // If this line came to us in pieces, combine them now into a single buffer. + if !self.part_header.is_empty() { + // Allocate string + let mut header = Bstr::with_capacity(self.part_header.len() + to_consume.len()); + header.add(self.part_header.as_slice()); + header.add(self.to_consume.as_slice()); + line = Some(header); + self.part_header.clear(); + } + let data = line + .as_ref() + .map(|line| line.as_slice()) + .unwrap_or(to_consume); + // Is it an empty line? + if data.is_empty() || data.eq(b"\r\n") || data.eq(b"\n") { + self.pending_header_line.add(data); + // Empty line; process headers and switch to data mode. + // Process the pending header, if any. + if !self.pending_header_line.is_empty() + && self.parse_header() == Err(HtpStatus::ERROR) + { + return Err(HtpStatus::ERROR); + } + if self.parse_c_d() == Err(HtpStatus::ERROR) { + return Err(HtpStatus::ERROR); + } + if let Some((_, header)) = self + .get_current_part()? + .headers + .get_nocase_nozero("content-type") + { + self.get_current_part()?.content_type = + Some(parse_content_type(header.value.as_slice())?); + } + self.current_part_mode = HtpMultipartMode::DATA; + self.part_header.clear(); + let file_count = self.file_count; + let cfg = self.cfg.clone(); + let part = self.get_current_part()?; + match &mut part.file { + Some(file) => { + // Changing part type because we have a filename. + part.type_0 = HtpMultipartType::FILE; + if cfg.extract_request_files + && file_count < cfg.extract_request_files_limit + { + file.create(&cfg.tmpdir)?; + self.file_count += 1; + } + } + None => { + if !self.get_current_part()?.name.is_empty() { + // Changing part type because we have a name. + self.get_current_part()?.type_0 = HtpMultipartType::TEXT; + self.part_data_pieces.clear(); + } + } + } + } else if let Some(header) = line { + self.pending_header_line.add(header.as_slice()); + } else { + self.pending_header_line.add(data); + } + } else { + // Not end of line; keep the data chunk for later. + self.part_header.add(to_consume); + } + } else { + // Data mode; keep the data chunk for later (but not if it is a file). + match self.get_current_part()?.type_0 { + HtpMultipartType::FILE => { + // Invoke file data callbacks. + // Ignore error. + let _ = self.run_request_file_data_hook(false); + // Optionally, store the data in a file. + if let Some(file) = &mut self.get_current_part()?.file { + return file.write(to_consume); + } + } + _ => { + // Make a copy of the data in RAM. + self.part_data_pieces.add(to_consume); + } + } + } + Ok(()) + } + + /// Processes set-aside data. + fn process_aside(&mut self, matched: bool) { + // The stored data pieces can contain up to one line. If we're in data mode and there + // was no boundary match, things are straightforward -- we process everything as data. + // If there was a match, we need to take care to not send the line ending as data, nor + // anything that follows (because it's going to be a part of the boundary). Similarly, + // when we are in line mode, we need to split the first data chunk, processing the first + // part as line and the second part as data. + // Do we need to do any chunk splitting? + if matched || self.current_part_mode == HtpMultipartMode::LINE { + // Line mode or boundary match + if matched { + if self.to_consume.last() == Some(&(b'\n')) { + self.to_consume.pop(); + } + if self.to_consume.last() == Some(&(b'\r')) { + self.to_consume.pop(); + } + } else { + // Process the CR byte, if set aside. + if self.cr_aside { + self.to_consume.add("\r"); + } + } + // Ignore result. + let _ = self.handle_data(self.current_part_mode == HtpMultipartMode::LINE); + self.cr_aside = false; + // We know that we went to match a boundary because + // we saw a new line. Now we have to find that line and + // process it. It's either going to be in the current chunk, + // or in the first stored chunk. + + // Split the first chunk. + // In line mode, we are OK with line endings. + // This should be unnecessary, but as a precaution check for min value: + let pos = std::cmp::min(self.boundary_candidate_pos, self.boundary_candidate.len()); + self.to_consume.add(&self.boundary_candidate[..pos]); + // Ignore result. + let _ = self.handle_data(!matched); + // The second part of the split chunks belongs to the boundary + // when matched, data otherwise. + if !matched { + self.to_consume.add(&self.boundary_candidate[pos..]); + } + } else { + // Do not send data if there was a boundary match. The stored + // data belongs to the boundary. + // Data mode and no match. + // In data mode, we process the lone CR byte as data. + + // Treat as part data, when there is not a match. + if self.cr_aside { + self.to_consume.add("\r"); + self.cr_aside = false; + } + // We then process any pieces that we might have stored, also as data. + self.to_consume.add(self.boundary_candidate.as_slice()); + } + self.boundary_candidate.clear(); + // Ignore result. + let _ = self.handle_data(false); + } + + /// Finalize parsing. + pub fn finalize(&mut self) -> Result<()> { + if self.current_part_idx.is_some() { + // Process buffered data, if any. + self.process_aside(false); + // Finalize the last part. + self.finalize_part_data()?; + // It is OK to end abruptly in the epilogue part, but not in any other. + if self.get_current_part()?.type_0 != HtpMultipartType::EPILOGUE { + self.multipart.flags.set(Flags::INCOMPLETE) + } + } + self.boundary_candidate.clear(); + Ok(()) + } + + /// Finalizes part processing. + pub fn finalize_part_data(&mut self) -> Result<()> { + // Determine if this part is the epilogue. + if self.multipart.flags.is_set(Flags::SEEN_LAST_BOUNDARY) { + if self.get_current_part()?.type_0 == HtpMultipartType::UNKNOWN { + // Assume that the unknown part after the last boundary is the epilogue. + self.get_current_part()?.type_0 = HtpMultipartType::EPILOGUE; + + // But if we've already seen a part we thought was the epilogue, + // raise PART_UNKNOWN. Multiple epilogues are not allowed. + if self.multipart.flags.is_set(Flags::HAS_EPILOGUE) { + self.multipart.flags.set(Flags::PART_UNKNOWN) + } + self.multipart.flags.set(Flags::HAS_EPILOGUE) + } else { + self.multipart.flags.set(Flags::PART_AFTER_LAST_BOUNDARY) + } + } + // Sanity checks. + // Have we seen complete part headers? If we have not, that means that the part ended prematurely. + if self.get_current_part()?.type_0 != HtpMultipartType::EPILOGUE + && self.current_part_mode != HtpMultipartMode::DATA + { + self.multipart.flags.set(Flags::PART_INCOMPLETE) + } + // Have we been able to determine the part type? If not, this means + // that the part did not contain the C-D header. + if self.get_current_part()?.type_0 == HtpMultipartType::UNKNOWN { + self.multipart.flags.set(Flags::PART_UNKNOWN) + } + // Finalize part value. + if self.get_current_part()?.type_0 == HtpMultipartType::FILE { + // Notify callbacks about the end of the file. + // Ignore result. + let _ = self.run_request_file_data_hook(true); + } else if !self.part_data_pieces.is_empty() { + let data = self.part_data_pieces.clone(); + self.get_current_part()?.value.clear(); + self.get_current_part()?.value.add(data.as_slice()); + self.part_data_pieces.clear(); + } + Ok(()) + } + + /// Returns the multipart structure created by the parser. + pub fn get_multipart(&mut self) -> &mut Multipart { + &mut self.multipart + } + + /// Handle part data. This function will also buffer a CR character if + /// it is the last byte in the buffer. + fn parse_state_data<'a>(&mut self, input: &'a [u8]) -> &'a [u8] { + if let Ok((remaining, mut consumed)) = take_till::<_, _, (&[u8], nom::error::ErrorKind)>( + |c: u8| c == b'\r' || c == b'\n', + )(input) + { + if let Ok((left, _)) = tag::<_, _, (&[u8], nom::error::ErrorKind)>("\r\n")(remaining) { + consumed = &input[..consumed.len() + 2]; + self.multipart.flags.set(Flags::CRLF_LINE); + // Prepare to switch to boundary testing. + self.parser_state = HtpMultipartState::BOUNDARY; + self.boundary_match_pos = 0; + self.to_consume.add(consumed); + return left; + } else if let Ok((left, _)) = char::<_, (&[u8], nom::error::ErrorKind)>('\r')(remaining) + { + if left.is_empty() { + // We have CR as the last byte in input. We are going to process + // what we have in the buffer as data, except for the CR byte, + // which we're going to leave for later. If it happens that a + // CR is followed by a LF and then a boundary, the CR is going + // to be discarded. + self.cr_aside = true + } else { + // This is not a new line; advance over the + // byte and clear the CR set-aside flag. + consumed = &input[..consumed.len() + 1]; + self.cr_aside = false; + } + self.to_consume.add(consumed); + return left; + } else if let Ok((left, _)) = char::<_, (&[u8], nom::error::ErrorKind)>('\n')(remaining) + { + // Check for a LF-terminated line. + // Advance over LF. + // Did we have a CR in the previous input chunk? + consumed = &input[..consumed.len() + 1]; + if !self.cr_aside { + self.multipart.flags.set(Flags::LF_LINE) + } else { + self.to_consume.add("\r"); + self.cr_aside = false; + self.multipart.flags.set(Flags::CRLF_LINE) + } + self.to_consume.add(consumed); + // Prepare to switch to boundary testing. + self.boundary_match_pos = 0; + self.parser_state = HtpMultipartState::BOUNDARY; + return left; + } else if self.cr_aside { + (self.to_consume).add("\r"); + self.cr_aside = false; + } + (self.to_consume).add(consumed); + // Ignore result. + let _ = self.handle_data(false); + remaining + } else { + input + } + } + + /// Handle possible boundary. + fn parse_state_boundary<'a>(&mut self, input: &'a [u8]) -> &'a [u8] { + if self.multipart.boundary.len() < self.boundary_match_pos { + // This should never hit + // Process stored (buffered) data. + self.process_aside(false); + // Return back where data parsing left off. + self.parser_state = HtpMultipartState::DATA; + return input; + } + let len = std::cmp::min( + self.multipart.boundary.len() - self.boundary_match_pos, + input.len(), + ); + if let Ok((remaining, consumed)) = tag::<&[u8], _, (&[u8], nom::error::ErrorKind)>( + &self.multipart.boundary[self.boundary_match_pos..self.boundary_match_pos + len] + .to_vec(), + )(input) + { + self.boundary_match_pos = self.boundary_match_pos.wrapping_add(len); + if self.boundary_match_pos == self.multipart.boundary_len { + // Boundary match! + // Process stored (buffered) data. + self.process_aside(true); + // Keep track of how many boundaries we've seen. + self.multipart.boundary_count += 1; + if self.multipart.flags.is_set(Flags::SEEN_LAST_BOUNDARY) { + self.multipart.flags.set(Flags::PART_AFTER_LAST_BOUNDARY) + } + // Run boundary match. + let _ = self.handle_boundary(); + // We now need to check if this is the last boundary in the payload + self.parser_state = HtpMultipartState::BOUNDARY_IS_LAST1; + } else { + // No more data in the input buffer; store (buffer) the unprocessed + // part for later, for after we find out if this is a boundary. + self.boundary_candidate.add(consumed); + } + remaining + } else { + // Boundary mismatch. + // Process stored (buffered) data. + self.process_aside(false); + // Return back where data parsing left off. + self.parser_state = HtpMultipartState::DATA; + input + } + } + + /// Determine if we have another boundary to process or not. + /// Examine the first byte after the last boundary character. If it is + /// a dash, then we maybe processing the last boundary in the payload. If + /// it is not, move to eat all bytes until the end of the line. + fn parse_state_last1<'a>(&mut self, input: &'a [u8]) -> &'a [u8] { + if let Ok((remaining, _)) = char::<_, (&[u8], nom::error::ErrorKind)>('-')(input) { + // Found one dash, now go to check the next position. + self.parser_state = HtpMultipartState::BOUNDARY_IS_LAST2; + remaining + } else { + // This is not the last boundary. Change state but + // do not advance the position, allowing the next + // state to process the byte. + self.parser_state = HtpMultipartState::BOUNDARY_EAT_LWS; + input + } + } + + /// Determine if we have another boundary to process or not. + /// Examine the byte after the first dash; expected to be another dash. + /// If not, eat all bytes until the end of the line. + fn parse_state_last2<'a>(&mut self, input: &'a [u8]) -> &'a [u8] { + if let Ok((remaining, _)) = char::<_, (&[u8], nom::error::ErrorKind)>('-')(input) { + // This is indeed the last boundary in the payload. + self.multipart.flags.set(Flags::SEEN_LAST_BOUNDARY); + self.parser_state = HtpMultipartState::BOUNDARY_EAT_LWS; + remaining + } else { + // The second character is not a dash, and so this is not + // the final boundary. Raise the flag for the first dash, + // and change state to consume the rest of the boundary line. + self.multipart.flags.set(Flags::BBOUNDARY_NLWS_AFTER); + self.parser_state = HtpMultipartState::BOUNDARY_EAT_LWS; + input + } + } + + /// Determines state of boundary parsing. Advances state if we're done with boundary + /// processing. + fn parse_state_lws<'a>(&mut self, input: &'a [u8]) -> &'a [u8] { + if let Ok((remaining, _)) = tag::<_, _, (&[u8], nom::error::ErrorKind)>("\r\n")(input) { + // CRLF line ending; we're done with boundary processing; data bytes follow. + self.multipart.flags.set(Flags::CRLF_LINE); + self.parser_state = HtpMultipartState::DATA; + remaining + } else if let Ok((remaining, byte)) = be_u8::<(&[u8], nom::error::ErrorKind)>(input) { + if byte == b'\n' { + // LF line ending; we're done with boundary processing; data bytes follow. + self.multipart.flags.set(Flags::LF_LINE); + self.parser_state = HtpMultipartState::DATA; + } else if nom_is_space(byte) { + // Linear white space is allowed here. + self.multipart.flags.set(Flags::BBOUNDARY_LWS_AFTER); + } else { + // Unexpected byte; consume, but remain in the same state. + self.multipart.flags.set(Flags::BBOUNDARY_NLWS_AFTER); + } + remaining + } else { + input + } + } + + /// Parses a chunk of multipart/form-data data. This function should be called + /// as many times as necessary until all data has been consumed. + pub fn parse(&mut self, mut input: &[u8]) -> HtpStatus { + while !input.is_empty() { + match self.parser_state { + HtpMultipartState::DATA => { + input = self.parse_state_data(input); + } + HtpMultipartState::BOUNDARY => { + input = self.parse_state_boundary(input); + } + HtpMultipartState::BOUNDARY_IS_LAST1 => { + input = self.parse_state_last1(input); + } + HtpMultipartState::BOUNDARY_IS_LAST2 => { + input = self.parse_state_last2(input); + } + HtpMultipartState::BOUNDARY_EAT_LWS => { + input = self.parse_state_lws(input); + } + } + } + HtpStatus::OK + } + + /// Parses one part header. + /// + /// Returns HtpStatus::OK on success, HtpStatus::DECLINED on parsing error, HtpStatus::ERROR + /// on fatal error. + pub fn parse_header(&mut self) -> Result<()> { + // We do not allow NUL bytes here. + if self.pending_header_line.as_slice().contains(&(b'\0')) { + self.multipart.flags.set(Flags::NUL_BYTE); + return Err(HtpStatus::DECLINED); + } + // Extract the name and the value + let parser = HeadersParser::new(Side::Request); + if let Ok((remaining, (headers, _))) = parser.headers()(self.pending_header_line.as_slice()) + { + let remaining = remaining.to_vec(); + for header in headers { + let value_flags = &header.value.flags; + let name_flags = &header.name.flags; + if value_flags.is_set(HeaderFlags::FOLDING) { + self.multipart.flags.set(Flags::PART_HEADER_FOLDING) + } + if value_flags.is_set(HeaderFlags::VALUE_EMPTY) + || name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) + || name_flags.is_set(HeaderFlags::NAME_EMPTY) + || name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) + || name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) + { + // Invalid name and/or value found + self.multipart.flags.set(Flags::PART_HEADER_INVALID); + } + // Now extract the name and the value. + let header = Header::new(header.name.name.into(), header.value.value.into()); + if !header.name.eq_nocase("content-disposition") + && !header.name.eq_nocase("content-type") + { + self.multipart.flags.set(Flags::PART_HEADER_UNKNOWN) + } + // Check if the header already exists. + if let Some((_, h_existing)) = self + .get_current_part()? + .headers + .get_nocase_mut(header.name.as_slice()) + { + h_existing.value.extend_from_slice(b", "); + h_existing.value.extend_from_slice(header.value.as_slice()); + // Keep track of same-name headers. + // FIXME: Normalize the flags? define the symbol in both Flags and Flags and set the value in both from their own namespace + h_existing.flags.set(Flags::PART_HEADER_REPEATED); + self.multipart.flags.set(Flags::PART_HEADER_REPEATED) + } else { + self.get_current_part()? + .headers + .add(header.name.clone(), header); + } + } + self.pending_header_line.clear(); + self.pending_header_line.add(remaining); + } else { + // Invalid name and/or value found + self.multipart.flags.set(Flags::PART_HEADER_INVALID); + return Err(HtpStatus::DECLINED); + } + Ok(()) + } + + /// Parses the Content-Disposition part header. + /// + /// Returns OK on success (header found and parsed), DECLINED if there is no C-D header or if + /// it could not be processed, and ERROR on fatal error. + pub fn parse_c_d(&mut self) -> Result<()> { + // Find the C-D header. + let part = self.get_current_part()?; + let header = { + if let Some((_, header)) = part.headers.get_nocase_nozero_mut("content-disposition") { + header + } else { + self.multipart.flags.set(Flags::PART_UNKNOWN); + return Err(HtpStatus::DECLINED); + } + }; + + // Require "form-data" at the beginning of the header. + if let Ok((_, params)) = content_disposition((*header.value).as_slice()) { + for (param_name, param_value) in params { + match param_name { + b"name" => { + // If we've reached the end of the string that means the + // value was not terminated properly (the second double quote is missing). + // Expecting the terminating double quote. + // Over the terminating double quote. + // Finally, process the parameter value. + // Check that we have not seen the name parameter already. + if !part.name.is_empty() { + self.multipart.flags.set(Flags::CD_PARAM_REPEATED); + return Err(HtpStatus::DECLINED); + } + part.name.clear(); + part.name.add(param_value); + } + b"filename" => { + // Check that we have not seen the filename parameter already. + match part.file { + Some(_) => { + self.multipart.flags.set(Flags::CD_PARAM_REPEATED); + return Err(HtpStatus::DECLINED); + } + None => { + part.file = Some(File::new( + HtpFileSource::MULTIPART, + Some(Bstr::from(param_value)), + )); + } + }; + } + _ => { + // Unknown parameter. + self.multipart.flags.set(Flags::CD_PARAM_UNKNOWN); + return Err(HtpStatus::DECLINED); + } + } + } + } else { + self.multipart.flags.set(Flags::CD_SYNTAX_INVALID); + return Err(HtpStatus::DECLINED); + } + Ok(()) + } + + /// Send file data to request file data callback. + pub fn run_request_file_data_hook(&mut self, is_end: bool) -> Result<()> { + //TODO: do without these clones! + let data = self.to_consume.clone(); + let data = if !is_end { data.as_slice() } else { b"" }; + let hook = self.hook.clone(); + match &mut self.get_current_part()?.file { + // Combine value pieces into a single buffer. + // Keep track of the file length. + Some(file) => { + // Send data to callbacks + file.handle_file_data(hook, data.as_ptr(), data.len()) + } + None => Ok(()), + } + } +} + +/// Holds information related to a part. +#[derive(Clone)] +pub struct Part { + /// Part type; see the * constants. + pub type_0: HtpMultipartType, + /// Raw part length (i.e., headers and data). + pub len: usize, + /// Part name, from the Content-Disposition header. Can be empty. + pub name: Bstr, + + /// Part value; the contents depends on the type of the part: + /// 1) empty for files; 2) contains complete part contents for + /// preamble and epilogue parts (they have no headers), and + /// 3) data only (headers excluded) for text and unknown parts. + pub value: Bstr, + /// Part content type, from the Content-Type header. Can be None. + pub content_type: Option, + /// Part headers (Header instances), using header name as the key. + pub headers: Headers, + /// File data, available only for FILE parts. + pub file: Option, +} + +impl Default for Part { + fn default() -> Self { + Self { + type_0: HtpMultipartType::UNKNOWN, + len: 0, + name: Bstr::with_capacity(64), + value: Bstr::with_capacity(64), + content_type: None, + headers: Table::with_capacity(4), + file: None, + } + } +} + +impl Drop for Part { + fn drop(&mut self) { + self.file = None; + self.headers.elements.clear(); + } +} + +/// Enumerates the current multipart mode. +/// cbindgen:rename-all=QualifiedScreamingSnakeCase +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Debug)] +enum HtpMultipartMode { + /// When in line mode, the parser is handling part headers. + LINE, + /// When in data mode, the parser is consuming part data. + DATA, +} + +/// Enumerates the multipart parsing state. +/// cbindgen:rename-all=QualifiedScreamingSnakeCase +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Debug)] +enum HtpMultipartState { + /// Processing data, waiting for a new line (which might indicate a new boundary). + DATA, + /// Testing a potential boundary. + BOUNDARY, + /// Checking the first byte after a boundary. + BOUNDARY_IS_LAST1, + /// Checking the second byte after a boundary. + BOUNDARY_IS_LAST2, + /// Consuming linear whitespace after a boundary. + BOUNDARY_EAT_LWS, +} + +/// Enumerates the multipart type. +/// cbindgen:rename-all=QualifiedScreamingSnakeCase +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum HtpMultipartType { + /// Unknown part. + UNKNOWN, + /// Text (parameter) part. + TEXT, + /// File part. + FILE, + /// Free-text part before the first boundary. + PREAMBLE, + /// Free-text part after the last boundary. + EPILOGUE, +} + +/// Holds information related to a multipart body. +#[derive(Clone)] +pub struct Multipart { + /// Multipart boundary. + pub boundary: Bstr, + /// Boundary length. + pub boundary_len: usize, + /// How many boundaries were there? + pub boundary_count: i32, + /// List of parts, in the order in which they appeared in the body. + pub parts: List, + /// Parsing flags. + pub flags: u64, +} + +/// Extracts and decodes a C-D header param name and value following a form-data. This is impossible to do correctly without a +/// parsing personality because most browsers are broken: +/// - Firefox encodes " as \", and \ is not encoded. +/// - Chrome encodes " as %22. +/// - IE encodes " as \", and \ is not encoded. +/// - Opera encodes " as \" and \ as \\. +fn content_disposition_param() -> impl Fn(&[u8]) -> IResult<&[u8], (&[u8], Vec)> { + move |input| { + let (mut remaining_input, param_name) = map( + tuple(( + take_ascii_whitespace(), + char(';'), + take_ascii_whitespace(), + take_while(|c: u8| c != b'=' && !c.is_ascii_whitespace()), + take_ascii_whitespace(), + char('='), + take_ascii_whitespace(), + char('\"'), //must start with opening quote + )), + |(_, _, _, param_name, _, _, _, _)| param_name, + )(input)?; + // Unescape any escaped " and \ and find the closing " + let mut param_value = Vec::new(); + loop { + let (left, (value, to_insert)) = tuple(( + take_while(|c| c != b'\"' && c != b'\\'), + opt(tuple((char('\\'), alt((char('\"'), char('\\')))))), + ))(remaining_input)?; + remaining_input = left; + param_value.extend_from_slice(value); + if let Some((_, to_insert)) = to_insert { + // Insert the character + param_value.push(to_insert as u8); + } else { + // Must end with a quote or it is invalid + let (left, _) = char('\"')(remaining_input)?; + remaining_input = left; + break; + } + } + Ok((remaining_input, (param_name, param_value))) + } +} + +/// Extracts and decodes a C-D header param names and values. This is impossible to do correctly without a +/// parsing personality because most browsers are broken: +/// - Firefox encodes " as \", and \ is not encoded. +/// - Chrome encodes " as %22. +/// - IE encodes " as \", and \ is not encoded. +/// - Opera encodes " as \" and \ as \\. +fn content_disposition(input: &[u8]) -> IResult<&[u8], Vec<(&[u8], Vec)>> { + // Multiple header values are seperated by a ", ": https://tools.ietf.org/html/rfc7230#section-3.2.2 + map( + tuple(( + tag("form-data"), + fold_many1( + tuple(( + content_disposition_param(), + take_ascii_whitespace(), + opt(tuple((tag(","), take_ascii_whitespace(), tag("form-data")))), + take_ascii_whitespace(), + )), + Vec::new(), + |mut acc: Vec<(&[u8], Vec)>, (param, _, _, _)| { + acc.push(param); + acc + }, + ), + take_ascii_whitespace(), + opt(tag(";")), // Allow trailing semicolon, + take_ascii_whitespace(), + not(take(1usize)), // We should have no data left, or we exited parsing prematurely + )), + |(_, result, _, _, _, _)| result, + )(input) +} +/// Validates a multipart boundary according to RFC 1341: +/// +/// The only mandatory parameter for the multipart Content-Type +/// is the boundary parameter, which consists of 1 to 70 +/// characters from a set of characters known to be very robust +/// through email gateways, and NOT ending with white space. +/// (If a boundary appears to end with white space, the white +/// space must be presumed to have been added by a gateway, and +/// should be deleted.) It is formally specified by the +/// following BNF: +/// +/// boundary := 0*69 bcharsnospace +/// +/// bchars := bcharsnospace / " " +/// +/// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" +/// / "," / "-" / "." / "/" / ":" / "=" / "?" +/// +/// Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD +/// Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088 +/// MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452 +/// Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh +/// Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S +/// +/// Returns in flags the appropriate Flags +fn validate_boundary(boundary: &[u8], flags: &mut u64) { + // The RFC allows up to 70 characters. In real life, + // boundaries tend to be shorter. + if boundary.is_empty() || boundary.len() > 70 { + flags.set(Flags::HBOUNDARY_INVALID) + } + // Check boundary characters. This check is stricter than the + // RFC, which seems to allow many separator characters. + for byte in boundary { + if !byte.is_ascii_alphanumeric() && *byte != b'-' { + match *byte as char { + '\'' | '(' | ')' | '+' | '_' | ',' | '.' | '/' | ':' | '=' | '?' => { + // These characters are allowed by the RFC, but not common. + flags.set(Flags::HBOUNDARY_UNUSUAL) + } + _ => { + // Invalid character. + flags.set(Flags::HBOUNDARY_INVALID) + } + } + } + } +} + +/// Validates the content type by checking if there are multiple boundary occurrences or any occurrence contains uppercase characters +/// +/// Returns in flags the appropriate Flags +fn validate_content_type(content_type: &[u8], flags: &mut u64) { + if let Ok((_, (f, _))) = fold_many1( + tuple(( + take_until_no_case(b"boundary"), + tag_no_case("boundary"), + take_until("="), + tag("="), + )), + (0, false), + |(mut flags, mut seen_prev): (u64, bool), (_, boundary, _, _): (_, &[u8], _, _)| { + for byte in boundary { + if byte.is_ascii_uppercase() { + flags.set(Flags::HBOUNDARY_INVALID); + break; + } + } + if seen_prev { + // Seen multiple boundaries + flags.set(Flags::HBOUNDARY_INVALID) + } + seen_prev = true; + (flags, seen_prev) + }, + )(content_type) + { + flags.set(f); + } else { + // There must be at least one occurrence! + flags.set(Flags::HBOUNDARY_INVALID); + } +} + +/// Attempts to locate and extract the boundary from an input slice, returning a tuple of the matched +/// boundary and any leading/trailing whitespace and non whitespace characters that might be relevant +fn boundary() -> impl Fn( + &[u8], +) -> IResult< + &[u8], + ( + &[u8], + &[u8], + &[u8], + Option, + &[u8], + Option, + &[u8], + &[u8], + ), +> { + move |input| { + map( + tuple(( + take_until_no_case(b"boundary"), + tag_no_case("boundary"), + take_is_space, + take_until("="), + tag("="), + take_is_space, + peek(opt(char('\"'))), + alt(( + map(tuple((tag("\""), take_until("\""))), |(_, boundary)| { + boundary + }), + map( + tuple(( + take_while(|c: u8| c != b',' && c != b';' && !is_space(c)), + opt(alt((char(','), char(';')))), //Skip the matched character if we matched one without hitting the end + )), + |(boundary, _)| boundary, + ), + )), + peek(opt(char('\"'))), + take_is_space, + take_while(|c| !is_space(c)), + )), + |( + _, + _, + spaces_before_equal, + chars_before_equal, + _, + spaces_after_equal, + opening_quote, + boundary, + closing_quote, + spaces_after_boundary, + chars_after_boundary, + )| { + ( + spaces_before_equal, + chars_before_equal, + spaces_after_equal, + opening_quote, + boundary, + closing_quote, + spaces_after_boundary, + chars_after_boundary, + ) + }, + )(input) + } +} + +/// Looks for boundary in the supplied Content-Type request header. +/// +/// Returns in multipart_flags: Multipart flags, which are not compatible from general LibHTP flags. +/// +/// Returns boundary if found, None otherwise. +/// Flags may be set on even without successfully locating the boundary. For +/// example, if a boundary could not be extracted but there is indication that +/// one is present, the HBOUNDARY_INVALID flag will be set. +pub fn find_boundary<'a>(content_type: &'a [u8], flags: &mut u64) -> Option<&'a [u8]> { + // Our approach is to ignore the MIME type and instead just look for + // the boundary. This approach is more reliable in the face of various + // evasion techniques that focus on submitting invalid MIME types. + // Reset flags. + *flags = 0; + // Correlate with the MIME type. This might be a tad too + // sensitive because it may catch non-browser access with sloppy + // implementations, but let's go with it for now. + if !content_type.starts_with(b"multipart/form-data;") { + flags.set(Flags::HBOUNDARY_INVALID) + } + // Look for the boundary, case insensitive. + if let Ok(( + _, + ( + spaces_before_equal, + chars_before_equal, + spaces_after_equal, + opening_quote, + boundary, + closing_quote, + spaces_after_boundary, + chars_after_boundary, + ), + )) = boundary()(content_type) + { + if !spaces_before_equal.is_empty() + || !spaces_after_equal.is_empty() + || opening_quote.is_some() + || (chars_after_boundary.is_empty() && !spaces_after_boundary.is_empty()) + { + // It is unusual to see whitespace before and/or after the equals sign. + // Unusual to have a quoted boundary + // Unusual but allowed to have only whitespace after the boundary + flags.set(Flags::HBOUNDARY_UNUSUAL) + } + if !chars_before_equal.is_empty() + || (opening_quote.is_some() && closing_quote.is_none()) + || (opening_quote.is_none() && closing_quote.is_some()) + || !chars_after_boundary.is_empty() + { + // Seeing a non-whitespace character before equal sign may indicate evasion + // Having an opening quote, but no closing quote is invalid + // Seeing any character after the boundary, other than whitespace is invalid + flags.set(Flags::HBOUNDARY_INVALID) + } + if boundary.is_empty() { + flags.set(Flags::HBOUNDARY_INVALID); + return None; + } + // Validate boundary characters. + validate_boundary(boundary, flags); + validate_content_type(content_type, flags); + Some(boundary) + } else { + flags.set(Flags::HBOUNDARY_INVALID); + None + } +} + +#[test] +fn Boundary() { + let inputs: Vec<&[u8]> = vec![ + b"multipart/form-data; boundary=myboundarydata", + b"multipart/form-data; BounDary=myboundarydata", + b"multipart/form-data; boundary =myboundarydata", + b"multipart/form-data; boundary= myboundarydata", + b"multipart/form-data; boundary=myboundarydata ", + b"multipart/form-data; boundary=myboundarydata, ", + b"multipart/form-data; boundary=myboundarydata, boundary=secondboundarydata", + b"multipart/form-data; boundary=myboundarydata; ", + b"multipart/form-data; boundary=myboundarydata; boundary=secondboundarydata", + b"multipart/form-data; boundary=\"myboundarydata\"", + b"multipart/form-data; boundary= \"myboundarydata\"", + b"multipart/form-data; boundary=\"myboundarydata\" ", + ]; + + for input in inputs { + let (_, (_, _, _, _, b, _, _, _)) = boundary()(input).unwrap(); + assert_eq!(b, b"myboundarydata"); + } + + let (_, (_, _, _, _, b, _, _, _)) = + boundary()(b"multipart/form-data; boundary=\"myboundarydata").unwrap(); + assert_eq!(b, b"\"myboundarydata"); + + let (_, (_, _, _, _, b, _, _, _)) = + boundary()(b"multipart/form-data; boundary= myboundarydata\"").unwrap(); + assert_eq!(b, b"myboundarydata\""); +} + +#[test] +fn ValidateBoundary() { + let inputs: Vec<&[u8]> = vec![ + b"Unusual\'Boundary", + b"Unusual(Boundary", + b"Unusual)Boundary", + b"Unusual+Boundary", + b"Unusual_Boundary", + b"Unusual,Boundary", + b"Unusual.Boundary", + b"Unusual/Boundary", + b"Unusual:Boundary", + b"Unusual=Boundary", + b"Unusual?Boundary", + b"Invalid>Boundary", + b"InvalidBoundaryTOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOLONG", + b"", //Invalid...Need at least one byte + b"InvalidUnusual.~Boundary", + ]; + let outputs: Vec = vec![ + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_UNUSUAL, + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID | Flags::HBOUNDARY_UNUSUAL, + ]; + + for i in 0..inputs.len() { + let mut flags = 0; + validate_boundary(inputs[i], &mut flags); + assert_eq!(outputs[i], flags); + } +} + +#[test] +fn ValidateContentType() { + let inputs: Vec<&[u8]> = vec![ + b"multipart/form-data; boundary = stuff, boundary=stuff", + b"multipart/form-data; boundary=stuffm BounDary=stuff", + b"multipart/form-data; Boundary=stuff", + b"multipart/form-data; bouNdary=stuff", + b"multipart/form-data; boundary=stuff", + ]; + let outputs: Vec = vec![ + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID, + Flags::HBOUNDARY_INVALID, + 0, + ]; + + for i in 0..inputs.len() { + let mut flags = 0; + validate_content_type(inputs[i], &mut flags); + assert_eq!(outputs[i], flags); + } +} diff --git a/rust/htp/src/parsers.rs b/rust/htp/src/parsers.rs new file mode 100644 index 000000000000..ca80209e99d5 --- /dev/null +++ b/rust/htp/src/parsers.rs @@ -0,0 +1,694 @@ +use crate::{ + bstr::Bstr, + error::Result, + log::Logger, + transaction::{Header, HtpAuthType, HtpProtocol, HtpResponseNumber, Transaction}, + util::{ + ascii_digits, convert_port, hex_digits, take_ascii_whitespace, take_chunked_ctl_chars, + validate_hostname, + }, + HtpStatus, +}; +use nom::{ + branch::alt, + bytes::complete::{is_not, tag, tag_no_case, take_till, take_until, take_while}, + combinator::{map, not, opt, peek}, + error::ErrorKind, + multi::many0, + sequence::tuple, + IResult, +}; + +/// Parses the content type header, trimming any leading whitespace. +/// Finds the end of the MIME type, using the same approach PHP 5.4.3 uses. +/// +/// Returns a tuple of the remaining unparsed header data and the content type +fn content_type() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + map( + tuple((take_ascii_whitespace(), is_not(";, "))), + |(_, content_type)| content_type, + )(input) + } +} + +/// Parses the content type header from the given header value, lowercases it, and stores it in the provided ct bstr. +/// Finds the end of the MIME type, using the same approach PHP 5.4.3 uses. +pub fn parse_content_type(header: &[u8]) -> Result { + let (_, content_type) = content_type()(header)?; + let mut ct = Bstr::from(content_type); + ct.make_ascii_lowercase(); + Ok(ct) +} + +/// Parses Content-Length string (positive decimal number). White space is +/// allowed before and after the number. +/// +/// Returns content length, or None if input is not valid. +pub fn parse_content_length(input: &[u8], logger: Option<&mut Logger>) -> Option { + let (trailing_data, (leading_data, content_length)) = ascii_digits()(input).ok()?; + if let Some(logger) = logger { + if !leading_data.is_empty() { + // Contains invalid characters! But still attempt to process + htp_warn!( + logger, + HtpLogCode::CONTENT_LENGTH_EXTRA_DATA_START, + "C-L value with extra data in the beginning" + ); + } + + if !trailing_data.is_empty() { + // Ok to have junk afterwards + htp_warn!( + logger, + HtpLogCode::CONTENT_LENGTH_EXTRA_DATA_END, + "C-L value with extra data in the end" + ); + } + } + std::str::from_utf8(content_length) + .ok()? + .parse::() + .ok() +} + +/// Parses chunked length (positive hexadecimal number). White space is allowed before +/// and after the number. +pub fn parse_chunked_length(input: &[u8]) -> Result<(Option, bool)> { + let (rest, _) = take_chunked_ctl_chars(input)?; + let (trailing_data, chunked_length) = hex_digits()(rest)?; + if trailing_data.is_empty() && chunked_length.is_empty() { + return Ok((None, false)); + } + let chunked_len = u64::from_str_radix( + std::str::from_utf8(chunked_length).map_err(|_| HtpStatus::ERROR)?, + 16, + ) + .map_err(|_| HtpStatus::ERROR)?; + //TODO: remove this limit and update appropriate tests after differential fuzzing + if chunked_len > std::i32::MAX as u64 { + return Ok((None, false)); + } + let has_ext = trailing_data.contains(&b';'); + Ok((Some(chunked_len), has_ext)) +} + +/// Attempts to extract the scheme from a given input URI. +/// # Example +/// ``` +/// use htp::parsers::scheme; +/// +/// let data: &[u8] = b"http://www.example.com"; +/// let (left, scheme) = scheme()(data).unwrap(); +/// assert_eq!(left, b"//www.example.com"); +/// assert_eq!(scheme, b"http"); +/// ``` +/// +/// Returns a tuple of the unconsumed data and the matched scheme. +pub fn scheme() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + // Scheme test: if it doesn't start with a forward slash character (which it must + // for the contents to be a path or an authority), then it must be the scheme part + map( + tuple((peek(not(tag("/"))), take_until(":"), tag(":"))), + |(_, scheme, _)| scheme, + )(input) + } +} + +/// Helper for parsed credentials (username, Option) +pub type ParsedCredentials<'a> = (&'a [u8], Option<&'a [u8]>); + +/// Attempts to extract the credentials from a given input URI, assuming the scheme has already been extracted. +/// # Example +/// ``` +/// use htp::parsers::credentials; +/// +/// let data: &[u8] = b"//user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag"; +/// let (left, (user, pass)) = credentials()(data).unwrap(); +/// assert_eq!(user, b"user"); +/// assert_eq!(pass.unwrap(), b"pass"); +/// assert_eq!(left, b"www.example.com:1234/path1/path2?a=b&c=d#frag"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and a tuple of the matched username and password. +pub fn credentials() -> impl Fn(&[u8]) -> IResult<&[u8], ParsedCredentials> { + move |input| { + // Authority test: two forward slash characters and it's an authority. + // One, three or more slash characters, and it's a path. + // Note: we only attempt to parse authority if we've seen a scheme. + let (input, (_, _, credentials, _)) = + tuple((tag("//"), peek(not(tag("/"))), take_until("@"), tag("@")))(input)?; + let (password, username) = opt(tuple((take_until(":"), tag(":"))))(credentials)?; + if let Some((username, _)) = username { + Ok((input, (username, Some(password)))) + } else { + Ok((input, (credentials, None))) + } + } +} + +/// Attempts to extract an IPv6 hostname from a given input URI, +/// assuming any scheme, credentials, hostname, port, and path have been already parsed out. +/// # Example +/// ``` +/// use htp::parsers::ipv6; +/// +/// let data: &[u8] = b"[::]/path1?a=b&c=d#frag"; +/// let (left, ipv6) = ipv6()(data).unwrap(); +/// assert_eq!(ipv6, b"[::]"); +/// assert_eq!(left, b"/path1?a=b&c=d#frag"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched ipv6 hostname. +pub fn ipv6() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| -> IResult<&[u8], &[u8]> { + let (rest, _) = tuple((tag("["), is_not("/?#]"), opt(tag("]"))))(input)?; + Ok((rest, &input[..input.len() - rest.len()])) + } +} + +/// Attempts to extract the hostname from a given input URI +/// # Example +/// ``` +/// use htp::parsers::hostname; +/// +/// let data: &[u8] = b"www.example.com:8080/path"; +/// let (left, host) = hostname()(data).unwrap(); +/// assert_eq!(host, b"www.example.com"); +/// assert_eq!(left, b":8080/path"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched hostname. +pub fn hostname() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + let (input, mut hostname) = map( + tuple(( + opt(tag("//")), //If it starts with "//", skip (might have parsed a scheme and no creds) + peek(not(tag("/"))), //If it starts with '/', this is a path, not a hostname + many0(tag(" ")), + alt((ipv6(), is_not("/?#:"))), + )), + |(_, _, _, hostname)| hostname, + )(input)?; + //There may be spaces in the middle of a hostname, so much trim only at the end + while hostname.ends_with(&[b' ']) { + hostname = &hostname[..hostname.len() - 1]; + } + Ok((input, hostname)) + } +} + +/// Attempts to extract the port from a given input URI, +/// assuming any scheme, credentials, or hostname have been already parsed out. +/// # Example +/// ``` +/// use htp::parsers::port; +/// +/// let data: &[u8] = b":8080/path"; +/// let (left, port) = port()(data).unwrap(); +/// assert_eq!(port, b"8080"); +/// assert_eq!(left, b"/path"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched port. +pub fn port() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + // Must start with ":" for there to be a port to parse + let (input, (_, _, port, _)) = + tuple((tag(":"), many0(tag(" ")), is_not("/?#"), many0(tag(" "))))(input)?; + let (_, port) = is_not(" ")(port)?; //we assume there never will be a space in the middle of a port + Ok((input, port)) + } +} + +/// Attempts to extract the path from a given input URI, +/// assuming any scheme, credentials, hostname, and port have been already parsed out. +/// # Example +/// ``` +/// use htp::parsers::path; +/// +/// let data: &[u8] = b"/path1/path2?query"; +/// let (left, path) = path()(data).unwrap(); +/// assert_eq!(path, b"/path1/path2"); +/// assert_eq!(left, b"?query"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched path. +pub fn path() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| is_not("#?")(input) +} + +/// Attempts to extract the query from a given input URI, +/// assuming any scheme, credentials, hostname, port, and path have been already parsed out. +/// # Example +/// ``` +/// use htp::parsers::query; +/// +/// let data: &[u8] = b"?a=b&c=d#frag"; +/// let (left, query) = query()(data).unwrap(); +/// assert_eq!(query, b"a=b&c=d"); +/// assert_eq!(left, b"#frag"); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched query. +pub fn query() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + // Skip the starting '?' + map(tuple((tag("?"), take_till(|c| c == b'#'))), |(_, query)| { + query + })(input) + } +} + +/// Attempts to extract the fragment from a given input URI, +/// assuming any other components have been parsed out. +/// ``` +/// use htp::parsers::fragment; +/// +/// let data: &[u8] = b"#fragment"; +/// let (left, fragment) = fragment()(data).unwrap(); +/// assert_eq!(fragment, b"fragment"); +/// assert_eq!(left, b""); +/// ``` +/// +/// Returns a tuple of the remaining unconsumed data and the matched fragment. +pub fn fragment() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + // Skip the starting '#' + let (input, _) = tag("#")(input)?; + Ok((b"", input)) + } +} + +type parsed_port<'a> = Option<(&'a [u8], Option)>; +type parsed_hostport<'a> = (&'a [u8], parsed_port<'a>, bool); + +/// Parses an authority string, which consists of a hostname with an optional port number +/// +/// Returns a remaining unparsed data, parsed hostname, parsed port, converted port number, +/// and a flag indicating whether the parsed data is valid. +pub fn parse_hostport(input: &[u8]) -> IResult<&[u8], parsed_hostport> { + let (input, host) = hostname()(input)?; + let mut valid = validate_hostname(host); + if let Ok((_, p)) = port()(input) { + if let Some(port) = convert_port(p) { + return Ok((input, (host, Some((p, Some(port))), valid))); + } else { + return Ok((input, (host, Some((p, None)), false))); + } + } else if !input.is_empty() { + //Trailing data after the hostname that is invalid e.g. [::1]xxxxx + valid = false; + } + Ok((input, (host, None, valid))) +} + +/// Extracts the version protocol from the input slice. +/// +/// Returns (any unparsed trailing data, (version_number, flag indicating whether input contains trailing and/or leading whitespace and/or leading zeros)) +fn protocol_version(input: &[u8]) -> IResult<&[u8], (&[u8], bool)> { + map( + tuple(( + take_ascii_whitespace(), + tag_no_case("HTTP"), + take_ascii_whitespace(), + tag("/"), + take_while(|c: u8| c.is_ascii_whitespace() || c == b'0'), + alt((tag(".9"), tag("1.0"), tag("1.1"))), + take_ascii_whitespace(), + )), + |(_, _, leading, _, trailing, version, _)| { + (version, !leading.is_empty() || !trailing.is_empty()) + }, + )(input) +} + +/// Determines protocol number from a textual representation (i.e., "HTTP/1.1"). This +/// function tries to be flexible, allowing whitespace before and after the forward slash, +/// as well as allowing leading zeros in the version number. If such leading/trailing +/// characters are discovered, however, a warning will be logged. +/// +/// Returns HtpProtocol version or invalid. +pub fn parse_protocol(input: &[u8], logger: &mut Logger) -> HtpProtocol { + if let Ok((remaining, (version, contains_trailing))) = protocol_version(input) { + if !remaining.is_empty() { + return HtpProtocol::INVALID; + } + if contains_trailing { + htp_warn!( + logger, + HtpLogCode::PROTOCOL_CONTAINS_EXTRA_DATA, + "HtpProtocol version contains leading and/or trailing whitespace and/or leading zeros" + ); + } + match version { + b".9" => HtpProtocol::V0_9, + b"1.0" => HtpProtocol::V1_0, + b"1.1" => HtpProtocol::V1_1, + _ => HtpProtocol::INVALID, + } + } else { + HtpProtocol::INVALID + } +} + +/// Determines the numerical value of a response status given as a string. +pub fn parse_status(status: &[u8]) -> HtpResponseNumber { + if let Ok((trailing_data, (leading_data, status_code))) = ascii_digits()(status) { + if !trailing_data.is_empty() || !leading_data.is_empty() { + //There are invalid characters in the status code + return HtpResponseNumber::INVALID; + } + if let Ok(status_code) = std::str::from_utf8(status_code) { + if let Ok(status_code) = status_code.parse::() { + if (100..=999).contains(&status_code) { + return HtpResponseNumber::VALID(status_code); + } + } + } + } + HtpResponseNumber::INVALID +} + +/// Parses Digest Authorization request header. +fn parse_authorization_digest(auth_header_value: &[u8]) -> IResult<&[u8], Vec> { + // Extract the username + let (mut remaining_input, _) = tuple(( + take_until("username="), + tag("username="), + take_ascii_whitespace(), // allow lws + tag("\""), // First character after LWS must be a double quote + ))(auth_header_value)?; + let mut result = Vec::new(); + // Unescape any escaped double quotes and find the closing quote + loop { + let (remaining, (auth_header, _)) = tuple((take_until("\""), tag("\"")))(remaining_input)?; + remaining_input = remaining; + result.extend_from_slice(auth_header); + if result.last() == Some(&(b'\\')) { + // Remove the escape and push back the double quote + result.pop(); + result.push(b'\"'); + } else { + // We found the closing double quote! + break; + } + } + Ok((remaining_input, result)) +} + +/// Parses Basic Authorization request header. +fn parse_authorization_basic(request_tx: &mut Transaction, auth_header: &Header) -> Result<()> { + // Skip 'Basic' + let (remaining_input, _) = + tuple((tag_no_case("basic"), take_ascii_whitespace()))(auth_header.value.as_slice()) + .map_err(|_| HtpStatus::DECLINED)?; + // Decode base64-encoded data + let decoded = base64::decode(remaining_input).map_err(|_| HtpStatus::DECLINED)?; + let (password, (username, _)) = + tuple::<_, _, (&[u8], ErrorKind), _>((take_until(":"), tag(":")))(decoded.as_slice()) + .map_err(|_| HtpStatus::DECLINED)?; + request_tx.request_auth_username = Some(Bstr::from(username)); + request_tx.request_auth_password = Some(Bstr::from(password)); + Ok(()) +} + +/// Parses Authorization request header. +pub fn parse_authorization(request_tx: &mut Transaction) -> Result<()> { + let auth_header = if let Some(auth_header) = request_tx + .request_headers + .get_nocase_nozero("authorization") + { + auth_header.clone() + } else { + request_tx.request_auth_type = HtpAuthType::NONE; + return Ok(()); + }; + // TODO Need a flag to raise when failing to parse authentication headers. + if auth_header.value.starts_with_nocase("basic") { + // Basic authentication + request_tx.request_auth_type = HtpAuthType::BASIC; + return parse_authorization_basic(request_tx, &auth_header); + } else if auth_header.value.starts_with_nocase("digest") { + // Digest authentication + request_tx.request_auth_type = HtpAuthType::DIGEST; + let (_, auth_username) = parse_authorization_digest(auth_header.value.as_slice()) + .map_err(|_| HtpStatus::DECLINED)?; + if let Some(username) = &mut request_tx.request_auth_username { + username.clear(); + username.add(auth_username); + } else { + request_tx.request_auth_username = Some(Bstr::from(auth_username)); + } + } else if auth_header.value.starts_with_nocase("bearer") { + request_tx.request_auth_type = HtpAuthType::BEARER; + let (token, _) = tuple(( + tag_no_case("bearer"), + take_ascii_whitespace(), // allow lws + ))(auth_header.value.as_slice()) + .map_err(|_| HtpStatus::DECLINED)?; + request_tx.request_auth_token = Some(Bstr::from(token)); + } else { + // Unrecognized authentication method + request_tx.request_auth_type = HtpAuthType::UNRECOGNIZED + } + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + use rstest::rstest; + + #[rstest] + #[case(" username= \"ivan\\\"r\\\"\"", "ivan\"r\"", "")] + #[case("username=\"ivan\\\"r\\\"\"", "ivan\"r\"", "")] + #[case("username=\"ivan\\\"r\\\"\" ", "ivan\"r\"", " ")] + #[case("username=\"ivanr\" ", "ivanr", " ")] + #[case("username= \"ivanr\" ", "ivanr", " ")] + #[should_panic] + #[case("username=ivanr\" ", "", "")] + #[should_panic] + #[case("username=\"ivanr ", "", "")] + fn test_parse_authorization_digest( + #[case] input: &str, #[case] username: &str, #[case] remaining: &str, + ) { + assert_eq!( + parse_authorization_digest(input.as_bytes()).unwrap(), + (remaining.as_bytes(), username.as_bytes().to_vec()) + ); + } + + #[rstest] + #[case(" 200 ", HtpResponseNumber::VALID(200))] + #[case(" \t 404 ", HtpResponseNumber::VALID(404))] + #[case("123", HtpResponseNumber::VALID(123))] + #[case("99", HtpResponseNumber::INVALID)] + #[case("1000", HtpResponseNumber::INVALID)] + #[case("200 OK", HtpResponseNumber::INVALID)] + #[case("NOT 200", HtpResponseNumber::INVALID)] + fn test_parse_status(#[case] input: &str, #[case] expected: HtpResponseNumber) { + assert_eq!(parse_status(&Bstr::from(input)), expected); + } + + #[rstest] + #[case( + "http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", + "http", + "//user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag" + )] + #[should_panic] + #[case( + "/http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", + "", + "" + )] + fn test_scheme(#[case] input: &str, #[case] s: &str, #[case] remaining: &str) { + assert_eq!( + scheme()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), s.as_bytes()) + ); + } + + #[rstest] + #[case( + "//user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", + "user", + Some("pass"), + "www.example.com:1234/path1/path2?a=b&c=d#frag" + )] + #[case( + "//user@www.example.com:1234/path1/path2?a=b&c=d#frag", + "user", + None, + "www.example.com:1234/path1/path2?a=b&c=d#frag" + )] + #[should_panic] + #[case( + "http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", + "", + None, + "" + )] + fn test_credentials( + #[case] input: &str, #[case] username: &str, #[case] password: Option<&str>, + #[case] remaining: &str, + ) { + assert_eq!( + credentials()(input.as_bytes()).unwrap(), + ( + remaining.as_bytes(), + (username.as_bytes(), password.map(|i| i.as_bytes())) + ) + ); + } + + #[rstest] + #[case( + "www.example.com:1234/path1/path2?a=b&c=d#frag", + "www.example.com", + ":1234/path1/path2?a=b&c=d#frag" + )] + #[case( + "www.example.com/path1/path2?a=b&c=d#frag", + "www.example.com", + "/path1/path2?a=b&c=d#frag" + )] + #[case("www.example.com?a=b&c=d#frag", "www.example.com", "?a=b&c=d#frag")] + #[case("www.example.com#frag", "www.example.com", "#frag")] + #[case("[::1]:8080", "[::1]", ":8080")] + #[case("[::1", "[::1", "")] + #[case("[::1/path1[0]", "[::1", "/path1[0]")] + #[case("[::1]xxxx", "[::1]", "xxxx")] + #[should_panic] + #[case("/www.example.com/path1/path2?a=b&c=d#frag", "", "")] + fn test_hostname(#[case] input: &str, #[case] host: &str, #[case] remaining: &str) { + assert_eq!( + hostname()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), host.as_bytes()) + ); + } + + #[rstest] + #[case(":1234/path1/path2?a=b&c=d#frag", "1234", "/path1/path2?a=b&c=d#frag")] + #[case(":1234?a=b&c=d#frag", "1234", "?a=b&c=d#frag")] + #[case(":1234#frag", "1234", "#frag")] + #[should_panic] + #[case("1234/path1/path2?a=b&c=d#frag", "", "")] + fn test_port(#[case] input: &str, #[case] p: &str, #[case] remaining: &str) { + assert_eq!( + port()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), p.as_bytes()) + ); + } + + #[rstest] + #[case("/path1/path2?a=b&c=d#frag", "/path1/path2", "?a=b&c=d#frag")] + #[case("/path1/path2#frag", "/path1/path2", "#frag")] + #[case("path1/path2?a=b&c=d#frag", "path1/path2", "?a=b&c=d#frag")] + #[case("//", "//", "")] + #[case( + "/uid=0(root) gid=0(root) groups=0(root)asdf", + "/uid=0(root) gid=0(root) groups=0(root)asdf", + "" + )] + fn test_path(#[case] input: &str, #[case] p: &str, #[case] remaining: &str) { + assert_eq!( + path()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), p.as_bytes()) + ); + } + + #[rstest] + #[case("?a=b&c=d#frag", "a=b&c=d", "#frag")] + #[case("?a=b&c=d", "a=b&c=d", "")] + #[case("?", "", "")] + fn test_query(#[case] input: &str, #[case] q: &str, #[case] remaining: &str) { + assert_eq!( + query()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), q.as_bytes()) + ); + } + + #[rstest] + #[case("#frag", "frag")] + #[case("##frag", "#frag")] + #[should_panic] + #[case("frag", "")] + #[should_panic] + #[case("/path#frag", "")] + fn test_fragment(#[case] input: &str, #[case] frag: &str) { + assert_eq!( + fragment()(input.as_bytes()).unwrap(), + ("".as_bytes(), frag.as_bytes()) + ); + } + + #[rstest] + #[case("www.example.com", "www.example.com", None, true, "")] + #[case(" www.example.com ", "www.example.com", None, true, "")] + #[case(" www.example.com:8001 ", "www.example.com", Some(("8001", Some(8001))), true, ":8001 ")] + #[case(" www.example.com : 8001 ", "www.example.com", Some(("8001", Some(8001))), true, ": 8001 ")] + #[case("www.example.com.", "www.example.com.", None, true, "")] + #[case("www.example.com.", "www.example.com.", None, true, "")] + #[case("www.example.com:", "www.example.com", None, false, ":")] + #[case("www.example.com:ff", "www.example.com", Some(("ff", None)), false, ":ff")] + #[case("www.example.com:0", "www.example.com", Some(("0", None)), false, ":0")] + #[case("www.example.com:65536", "www.example.com", Some(("65536", None)), false, ":65536")] + #[case("[::1]:8080", "[::1]", Some(("8080", Some(8080))), true, ":8080")] + #[case("[::1]:", "[::1]", None, false, ":")] + #[case("[::1]x", "[::1]", None, false, "x")] + #[case("[::1", "[::1", None, false, "")] + fn test_parse_hostport( + #[case] input: &str, #[case] hostname: &str, + #[case] parsed_port: Option<(&str, Option)>, #[case] valid: bool, + #[case] remaining: &str, + ) { + assert_eq!( + parse_hostport(input.as_bytes()).unwrap(), + ( + remaining.as_bytes(), + ( + hostname.as_bytes(), + parsed_port.map(|(port, port_nmb)| (port.as_bytes(), port_nmb)), + valid + ) + ) + ); + } + + #[rstest] + #[case("134", Some(134))] + #[case(" \t134 ", Some(134))] + #[case("abcd134 ", Some(134))] + #[case("abcd ", None)] + fn test_parse_content_length(#[case] input: &str, #[case] expected: Option) { + assert_eq!(parse_content_length(input.as_bytes(), None), expected); + } + + #[rstest] + #[case("0 ; qw3=asd3; zc3=\"rt\"y3\"", (Some(0), true))] + #[case("12a5", (Some(0x12a5), false))] + #[case("12a5;ext=value", (Some(0x12a5), true))] + #[case(" \t12a5 ", (Some(0x12a5), false))] + #[case(" \t ", (None, false))] + fn test_parse_chunked_length(#[case] input: &str, #[case] expected: (Option, bool)) { + assert_eq!(parse_chunked_length(input.as_bytes()).unwrap(), expected); + } + + #[rstest] + #[case("multipart/form-data", "multipart/form-data")] + #[case("multipart/form-data;boundary=X", "multipart/form-data")] + #[case("multipart/form-data boundary=X", "multipart/form-data")] + #[case("multipart/form-data,boundary=X", "multipart/form-data")] + #[case("multipart/FoRm-data", "multipart/form-data")] + #[case("multipart/form-data\t boundary=X", "multipart/form-data\t")] + #[case(" \tmultipart/form-data boundary=X", "multipart/form-data")] + fn test_parse_content_type(#[case] input: &str, #[case] expected: &str) { + assert_eq!( + parse_content_type(input.as_bytes()).unwrap(), + Bstr::from(expected) + ); + } +} diff --git a/rust/htp/src/request.rs b/rust/htp/src/request.rs new file mode 100644 index 000000000000..fce7aa1d8373 --- /dev/null +++ b/rust/htp/src/request.rs @@ -0,0 +1,1632 @@ +use crate::{ + bstr::Bstr, + config::{HtpServerPersonality, HtpUnwanted}, + connection::ConnectionFlags, + connection_parser::{ConnectionParser, HtpStreamState, ParserData, State}, + decompressors::{Decompressor, HtpContentEncoding}, + error::Result, + headers::HeaderFlags, + hook::DataHook, + parsers::{parse_chunked_length, parse_content_length, parse_protocol}, + transaction::{ + Data, Header, HtpProtocol, HtpRequestProgress, HtpResponseProgress, HtpTransferCoding, + }, + util::{ + chomp, is_chunked_ctl_line, is_line_ignorable, is_space, is_valid_chunked_length_data, + split_on_predicate, take_is_space, take_not_is_space, take_till_lf, take_till_lf_null, + take_until_null, trimmed, FlagOperations, HtpFlags, + }, + HtpStatus, +}; +use nom::sequence::tuple; +use std::{ + cmp::{min, Ordering}, + mem::take, +}; +use time::OffsetDateTime; + +const HTTP09_MAX_JUNK_LEN: usize = 16; + +/// Enumerate HTTP methods. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpMethod { + /// Used by default, until the method is determined (e.g., before + /// the request line is processed. + UNKNOWN, + /// HEAD + HEAD, + /// GET + GET, + /// PUT + PUT, + /// POST + POST, + /// DELETE + DELETE, + /// CONNECT + CONNECT, + /// OPTIONS + OPTIONS, + /// TRACE + TRACE, + /// PATCH + PATCH, + /// PROPFIND + PROPFIND, + /// PROPPATCH + PROPPATCH, + /// MKCOL + MKCOL, + /// COPY + COPY, + /// MOVE + MOVE, + /// LOCK + LOCK, + /// UNLOCK + UNLOCK, + /// VERSION_CONTROL + VERSION_CONTROL, + /// CHECKOUT + CHECKOUT, + /// UNCHECKOUT + UNCHECKOUT, + /// CHECKIN + CHECKIN, + /// UPDATE + UPDATE, + /// LABEL + LABEL, + /// REPORT + REPORT, + /// MKWORKSPACE + MKWORKSPACE, + /// MKACTIVITY + MKACTIVITY, + /// BASELINE_CONTROL + BASELINE_CONTROL, + /// MERGE + MERGE, + /// INVALID + INVALID, + /// ERROR + ERROR, +} + +impl HtpMethod { + /// Creates a new HtpMethod from the slice. + fn new(method: &[u8]) -> Self { + match method { + b"GET" => HtpMethod::GET, + b"PUT" => HtpMethod::PUT, + b"POST" => HtpMethod::POST, + b"DELETE" => HtpMethod::DELETE, + b"CONNECT" => HtpMethod::CONNECT, + b"OPTIONS" => HtpMethod::OPTIONS, + b"TRACE" => HtpMethod::TRACE, + b"PATCH" => HtpMethod::PATCH, + b"PROPFIND" => HtpMethod::PROPFIND, + b"PROPPATCH" => HtpMethod::PROPPATCH, + b"MKCOL" => HtpMethod::MKCOL, + b"COPY" => HtpMethod::COPY, + b"MOVE" => HtpMethod::MOVE, + b"LOCK" => HtpMethod::LOCK, + b"UNLOCK" => HtpMethod::UNLOCK, + b"VERSION-CONTROL" => HtpMethod::VERSION_CONTROL, + b"CHECKOUT" => HtpMethod::CHECKOUT, + b"UNCHECKOUT" => HtpMethod::UNCHECKOUT, + b"CHECKIN" => HtpMethod::CHECKIN, + b"UPDATE" => HtpMethod::UPDATE, + b"LABEL" => HtpMethod::LABEL, + b"REPORT" => HtpMethod::REPORT, + b"MKWORKSPACE" => HtpMethod::MKWORKSPACE, + b"MKACTIVITY" => HtpMethod::MKACTIVITY, + b"BASELINE-CONTROL" => HtpMethod::BASELINE_CONTROL, + b"MERGE" => HtpMethod::MERGE, + b"INVALID" => HtpMethod::INVALID, + b"HEAD" => HtpMethod::HEAD, + _ => HtpMethod::UNKNOWN, + } + } +} +impl ConnectionParser { + /// Sends outstanding connection data to the currently active data receiver hook. + fn request_receiver_send_data(&mut self, data: &mut ParserData) -> Result<()> { + let data = ParserData::from(data.callback_data()); + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let mut tx_data = Data::new(req.unwrap(), &data); + if let Some(hook) = &self.request_data_receiver_hook { + hook.run_all(self, &mut tx_data)?; + } else { + return Ok(()); + }; + Ok(()) + } + + /// Configures the data receiver hook. + fn request_receiver_set(&mut self, data_receiver_hook: Option) -> Result<()> { + self.request_data_receiver_hook = data_receiver_hook; + Ok(()) + } + + /// Finalizes an existing data receiver hook by sending any outstanding data to it. The + /// hook is then removed so that it receives no more data. + pub fn request_receiver_finalize_clear(&mut self, input: &mut ParserData) -> Result<()> { + if self.request_data_receiver_hook.is_none() { + return Ok(()); + } + let rc = self.request_receiver_send_data(input); + self.request_data_receiver_hook = None; + rc + } + + /// Handles request parser state changes. At the moment, this function is used only + /// to configure data receivers, which are sent raw connection data. + fn request_handle_state_change(&mut self, input: &mut ParserData) -> Result<()> { + if self.request_state_previous == self.request_state { + return Ok(()); + } + + if self.request_state == State::HEADERS { + // ensured by caller + let req = self.request().unwrap(); + let header_fn = Some(req.cfg.hook_request_header_data.clone()); + let trailer_fn = Some(req.cfg.hook_request_trailer_data.clone()); + input.reset_callback_start(); + + match req.request_progress { + HtpRequestProgress::HEADERS => self.request_receiver_set(header_fn), + HtpRequestProgress::TRAILER => self.request_receiver_set(trailer_fn), + _ => Ok(()), + }?; + } + // Initially, I had the finalization of raw data sending here, but that + // caused the last REQUEST_HEADER_DATA hook to be invoked after the + // REQUEST_HEADERS hook -- which I thought made no sense. For that reason, + // the finalization is now initiated from the request header processing code, + // which is less elegant but provides a better user experience. Having some + // (or all) hooks to be invoked on state change might work better. + self.request_state_previous = self.request_state; + Ok(()) + } + + /// If there is any data left in the inbound data chunk, this function will preserve + /// it for later consumption. The maximum amount accepted for buffering is controlled + /// by Config::field_limit. + fn check_request_buffer_limit(&mut self, len: usize) -> Result<()> { + if len == 0 { + return Ok(()); + } + // Check the hard (buffering) limit. + let mut newlen: usize = self.request_buf.len().wrapping_add(len); + // When calculating the size of the buffer, take into account the + // space we're using for the request header buffer. + if let Some(header) = &self.request_header { + newlen = newlen.wrapping_add(header.len()) + } + let field_limit = self.cfg.field_limit; + if newlen > field_limit { + htp_error!( + self.logger, + HtpLogCode::REQUEST_FIELD_TOO_LONG, + format!( + "Request buffer over the limit: size {} limit {}.", + newlen, field_limit + ) + ); + return Err(HtpStatus::ERROR); + } + Ok(()) + } + + /// Performs a check for a CONNECT transaction to decide whether inbound + /// parsing needs to be suspended. + /// + /// Returns OK if the request does not use CONNECT, or HtpStatus::DATA_OTHER if + /// inbound parsing needs to be suspended until we hear from the + /// other side. + pub fn request_connect_check(&mut self) -> Result<()> { + let req = self.request(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + + // If the request uses the CONNECT method, then there will + // not be a request body, but first we need to wait to see the + // response in order to determine if the tunneling request + // was a success. + if req.unwrap().request_method_number == HtpMethod::CONNECT { + self.request_state = State::CONNECT_WAIT_RESPONSE; + self.request_status = HtpStreamState::DATA_OTHER; + return Err(HtpStatus::DATA_OTHER); + } + // Continue to the next step to determine + // the presence of request body + self.request_state = State::BODY_DETERMINE; + Ok(()) + } + + /// Determines whether inbound parsing needs to continue or stop. In + /// case the data appears to be plain text HTTP, we try to continue. + /// + /// Returns OK if the parser can resume parsing, HtpStatus::DATA_BUFFER if + /// we need more data. + pub fn request_connect_probe_data(&mut self, input: &mut ParserData) -> Result<()> { + let data = if let Ok((_, data)) = take_till_lf_null(input.as_slice()) { + data + } else { + return self.handle_request_absent_lf(input); + }; + + if !self.request_buf.is_empty() { + self.check_request_buffer_limit(data.len())?; + } + // copy, will still need buffer data for next state. + let mut buffered = self.request_buf.clone(); + buffered.add(data); + + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + // We skip leading whitespace as IIS allows this. + let res = tuple((take_is_space, take_not_is_space))(buffered.as_slice()); + if let Ok((_, (_, method))) = res { + if HtpMethod::new(method) == HtpMethod::UNKNOWN { + self.request_status = HtpStreamState::TUNNEL; + self.response_status = HtpStreamState::TUNNEL + } else { + return self.state_request_complete(input); + } + }; + Ok(()) + } + + /// Determines whether inbound parsing, which was suspended after + /// encountering a CONNECT transaction, can proceed (after receiving + /// the response). + /// + /// Returns OK if the parser can resume parsing, HtpStatus::DATA_OTHER if + /// it needs to continue waiting. + pub fn request_connect_wait_response(&mut self) -> Result<()> { + let req = self.request(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + // Check that we saw the response line of the current inbound transaction. + if req.response_progress <= HtpResponseProgress::LINE { + return Err(HtpStatus::DATA_OTHER); + } + // A 2xx response means a tunnel was established. Anything + // else means we continue to follow the HTTP stream. + if req.response_status_number.in_range(200, 299) { + // TODO Check that the server did not accept a connection to itself. + // The requested tunnel was established: we are going + // to probe the remaining data on this stream to see + // if we need to ignore it or parse it + self.request_state = State::CONNECT_PROBE_DATA; + } else { + // No tunnel; continue to the next transaction + self.request_state = State::FINALIZE + } + Ok(()) + } + + /// Consumes bytes until the end of the current line. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_body_chunked_data_end(&mut self, input: &mut ParserData) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + // TODO We shouldn't really see anything apart from CR and LF, + // so we should warn about anything else. + if let Ok((_, line)) = take_till_lf(input.as_slice()) { + let len = line.len(); + req.request_message_len = req.request_message_len.wrapping_add(len as u64); + self.request_data_consume(input, len); + self.request_state = State::BODY_CHUNKED_LENGTH; + Ok(()) + } else { + req.request_message_len = req.request_message_len.wrapping_add(input.len() as u64); + self.handle_request_absent_lf(input) + } + } + + /// Processes a chunk of data. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_body_chunked_data(&mut self, input: &mut ParserData) -> Result<()> { + // Determine how many bytes we can consume. + let bytes_to_consume: usize = min( + input.len(), + self.request_chunked_length.unwrap_or(0) as usize, + ); + // If the input buffer is empty, ask for more data. + if bytes_to_consume == 0 { + return Err(HtpStatus::DATA); + } + // Consume the data. + self.request_body_data(Some(&input.as_slice()[0..bytes_to_consume]))?; + + // Adjust counters. + self.request_data_consume(input, bytes_to_consume); + if let Some(len) = self.request_chunked_length.as_mut() { + *len -= bytes_to_consume as u64; + if *len == 0 { + // End of the chunk. + self.request_state = State::BODY_CHUNKED_DATA_END; + return Ok(()); + } + } + // Ask for more data. + Err(HtpStatus::DATA) + } + + /// Extracts chunk length. + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_body_chunked_length(&mut self, input: &mut ParserData) -> Result<()> { + let mut data = input.as_slice(); + loop { + if let Ok((remaining, line)) = take_till_lf(data) { + self.request_data_consume(input, line.len()); + if !self.request_buf.is_empty() { + self.check_request_buffer_limit(line.len())?; + } + + let mut data2 = take(&mut self.request_buf); + data2.add(line); + if is_chunked_ctl_line(&data2) { + let req = self.request_mut().unwrap(); + req.request_message_len = + req.request_message_len.wrapping_add(data2.len() as u64); + //Empty chunk len. Try to continue parsing. + data = remaining; + continue; + } + let req = self.request_mut().unwrap(); + req.request_message_len = req.request_message_len.wrapping_add(data2.len() as u64); + // Handle chunk length. + let (len, ext) = parse_chunked_length(&data2)?; + self.request_chunked_length = len; + if ext { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_CHUNK_EXTENSION, + "Request chunk extension" + ); + } + let len = len.as_ref().ok_or(HtpStatus::ERROR).map_err(|e| { + // Invalid chunk length + htp_error!( + self.logger, + HtpLogCode::INVALID_REQUEST_CHUNK_LEN, + "Request chunk encoding: Invalid chunk length" + ); + e + })?; + match len.cmp(&0) { + Ordering::Equal => { + // End of data + self.request_state = State::HEADERS; + self.request_mut().unwrap().request_progress = HtpRequestProgress::TRAILER + } + Ordering::Greater => { + // More data available. + self.request_state = State::BODY_CHUNKED_DATA + } + _ => {} + } + return Ok(()); + } else { + // Check if the data we have seen so far is invalid + return if !is_valid_chunked_length_data(data) { + // Contains leading junk non hex_ascii data + // Invalid chunk length + htp_error!( + self.logger, + HtpLogCode::INVALID_REQUEST_CHUNK_LEN, + "Request chunk encoding: Invalid chunk length" + ); + Err(HtpStatus::ERROR) + } else { + self.handle_request_absent_lf(input) + }; + } + } + } + + /// Processes identity request body. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_body_identity(&mut self, data: &mut ParserData) -> Result<()> { + let left = self.request_body_data_left.ok_or(HtpStatus::ERROR)?; + // Determine how many bytes we can consume. + let bytes_to_consume: usize = min(data.len(), left as usize); + // If the input buffer is empty, ask for more data. + if bytes_to_consume == 0 { + return Err(HtpStatus::DATA); + } + if data.is_gap() { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + req.request_message_len = req + .request_message_len + .wrapping_add(bytes_to_consume as u64); + // Create a new gap of the appropriate length + let parser_data = ParserData::from(bytes_to_consume); + // Send the gap to the data hooks + let mut tx_data = Data::new(req, &parser_data); + self.request_run_hook_body_data(&mut tx_data)?; + } else { + // Consume the data. + self.request_body_data(Some(&data.as_slice()[0..bytes_to_consume]))?; + } + + // Adjust the counters. + self.request_data_consume(data, bytes_to_consume); + self.request_body_data_left = Some(left - bytes_to_consume as u64); + + // Have we seen the entire request body? + if self.request_body_data_left > Some(0) { + //Ask for more data; + return Err(HtpStatus::DATA); + } + // End of request body. + self.request_state = State::FINALIZE; + // Sends close signal to decompressors, outputting any partially decompressed data + self.request_body_data(None) + } + + /// Determines presence (and encoding) of a request body. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_body_determine(&mut self) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + // Determine the next state based on the presence of the request + // body, and the coding used. + match req.request_transfer_coding { + HtpTransferCoding::CHUNKED => { + req.request_progress = HtpRequestProgress::BODY; + self.request_state = State::BODY_CHUNKED_LENGTH + } + HtpTransferCoding::IDENTITY => { + if req.request_content_length > Some(0) { + req.request_progress = HtpRequestProgress::BODY; + } + self.request_content_length = req.request_content_length; + self.request_body_data_left = self.request_content_length; + if self.request_content_length > Some(0) { + self.request_state = State::BODY_IDENTITY + } else { + self.request_state = State::FINALIZE + } + } + HtpTransferCoding::NO_BODY => { + // This request does not have a body, which + // means that we're done with it + self.request_state = State::FINALIZE + } + _ => { + // Should not be here + return Err(HtpStatus::ERROR); + } + } + Ok(()) + } + + /// Parses request headers. + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_headers(&mut self, input: &mut ParserData) -> Result<()> { + let data = input.as_slice(); + if self.request_status == HtpStreamState::CLOSED { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + req.request_header_parser.set_complete(true); + // Parse previous header, if any. + req.request_progress = HtpRequestProgress::TRAILER; + if let Some(request_header) = self.request_header.take() { + self.parse_request_headers(request_header.as_slice())?; + } + self.request_buf.clear(); + // We've seen all the request headers. + return self.state_request_headers(input); + } + let mut taken = false; + let request_header = if let Some(mut request_header) = self.request_header.take() { + request_header.add(data); + taken = true; + request_header + } else { + Bstr::new() + }; + let data2 = if taken { + request_header.as_slice() + } else { + data + }; + + let (remaining, eoh) = self.parse_request_headers(data2)?; + //TODO: Update the request state machine so that we don't have to have this EOL check + let eol = remaining.len() == data2.len() + && (remaining.starts_with(b"\r\n") || remaining.starts_with(b"\n")); + if eoh + //If the input started with an EOL, we assume this is the end of the headers + || eol + { + if remaining.len() < data.len() { + self.request_data_consume(input, data.len() - remaining.len()); + } else if eol { + if remaining.starts_with(b"\r\n") { + self.request_data_consume(input, min(data.len(), 2)); + } else if remaining.starts_with(b"\n") { + self.request_data_consume(input, min(data.len(), 1)); + } + } + // We've seen all the request headers. + self.state_request_headers(input) + } else { + self.request_data_consume(input, data.len()); + self.check_request_buffer_limit(remaining.len())?; + let remaining = Bstr::from(remaining); + self.request_header.replace(remaining); + Err(HtpStatus::DATA_BUFFER) + } + } + + /// Determines request protocol. + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_protocol(&mut self, input: &mut ParserData) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + // Is this a short-style HTTP/0.9 request? If it is, + // we will not want to parse request headers. + if !req.is_protocol_0_9 { + // Switch to request header parsing. + req.request_progress = HtpRequestProgress::HEADERS; + self.request_state = State::HEADERS + } else { + if let Ok((rem, sp)) = take_is_space(input.as_slice()) { + if !rem.is_empty() || sp.len() > HTTP09_MAX_JUNK_LEN { + // we have more than spaces, no HTTP/0.9 + req.is_protocol_0_9 = false; + req.request_progress = HtpRequestProgress::HEADERS; + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_NO_PROTOCOL, + "Request line: missing protocol" + ); + // Switch to request header parsing. + self.request_state = State::HEADERS; + return Ok(()); + } + } + // We're done with this request. + self.request_state = State::FINALIZE; + } + Ok(()) + } + + /// Parse the request line. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + fn request_line_complete(&mut self, line: &[u8]) -> Result<()> { + self.check_request_buffer_limit(line.len())?; + if line.is_empty() { + return Err(HtpStatus::DATA); + } + let perso = self.cfg.server_personality; + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + // Is this a line that should be ignored? + if is_line_ignorable(perso, line) { + // We have an empty/whitespace line, which we'll note, ignore and move on. + req.request_ignored_lines = req.request_ignored_lines.wrapping_add(1); + return Ok(()); + } + // Process request line. + let data = chomp(line); + req.request_line = Some(Bstr::from(data)); + self.parse_request_line(data)?; + // Finalize request line parsing. + self.state_request_line()?; + Ok(()) + } + + /// Parses request line. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_line(&mut self, input: &mut ParserData) -> Result<()> { + match take_till_lf(input.as_slice()) { + Ok((_, line)) => { + // We have a line ending, so consume the input + // and grab any buffered data + let mut data = take(&mut self.request_buf); + data.add(line); + self.request_data_consume(input, line.len()); + self.request_line_complete(data.as_slice()) + } + _ => { + if self.request_status == HtpStreamState::CLOSED { + let mut data = take(&mut self.request_buf); + data.add(input.as_slice()); + self.request_data_consume(input, input.len()); + self.request_line_complete(data.as_slice()) + } else { + self.handle_request_absent_lf(input) + } + } + } + } + + /// Extract one request header. A header can span multiple lines, in + /// which case they will be folded into one before parsing is attempted. + fn process_request_header(&mut self, header: Header) -> Result<()> { + // Try to parse the header. + // ensured by caller + let hl = self.cfg.number_headers_limit as usize; + let req = self.request_mut().unwrap(); + let mut repeated = false; + let reps = req.request_header_repetitions; + let mut update_reps = false; + // Do we already have a header with the same name? + if let Some(h_existing) = req.request_headers.get_nocase_mut(header.name.as_slice()) { + if !h_existing.flags.is_set(HeaderFlags::FIELD_REPEATED) { + // This is the second occurence for this header. + repeated = true; + } else if reps < 64 { + update_reps = true; + } else { + return Ok(()); + } + // For simplicity reasons, we count the repetitions of all headers + h_existing.flags.set(HeaderFlags::FIELD_REPEATED); + // Having multiple C-L headers is against the RFC but + // servers may ignore the subsequent headers if the values are the same. + if header.name.cmp_nocase("Content-Length") == Ordering::Equal { + // Don't use string comparison here because we want to + // ignore small formatting differences. + let existing_cl = parse_content_length(&h_existing.value, None); + let new_cl = parse_content_length(&header.value, None); + // Ambiguous response C-L value. + if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl { + htp_warn!( + self.logger, + HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST, + "Ambiguous request C-L value" + ); + } + } else { + // Add to the existing header. + h_existing.value.extend_from_slice(b", "); + h_existing.value.extend_from_slice(header.value.as_slice()); + } + } else { + if req.request_headers.elements.len() > hl { + if !req.flags.is_set(HtpFlags::HEADERS_TOO_MANY) { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_TOO_MANY_HEADERS, + "Too many request headers" + ); + let req = self.request_mut().unwrap(); + req.flags.set(HtpFlags::HEADERS_TOO_MANY); + } + return Err(HtpStatus::ERROR); + } + req.request_headers.elements.push(header); + } + let req = self.request_mut().unwrap(); + if update_reps { + req.request_header_repetitions = req.request_header_repetitions.wrapping_add(1) + } + if repeated { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_HEADER_REPETITION, + "Repetition for header" + ); + } + Ok(()) + } + + /// Parse request headers + fn parse_request_headers<'a>(&mut self, data: &'a [u8]) -> Result<(&'a [u8], bool)> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + + let rc = req.unwrap().request_header_parser.headers()(data); + if let Ok((remaining, (headers, eoh))) = rc { + for h in headers { + let mut flags = 0; + let name_flags = h.name.flags; + // Ignore LWS after field-name. + if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) { + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_INVALID_LWS_AFTER_NAME, + "Request field invalid: LWS after name", + self.request_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + //If name has leading whitespace, probably invalid folding + if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) { + // Invalid folding. + // Warn only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::INVALID_REQUEST_FIELD_FOLDING, + "Invalid request field folding", + self.request_mut().unwrap().flags, + flags, + HtpFlags::INVALID_FOLDING + ); + } + // Check that field-name is a token + if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) { + // Incorrectly formed header name. + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_HEADER_INVALID, + "Request header name is not a token", + self.request_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + // No colon? + if name_flags.is_set(HeaderFlags::MISSING_COLON) { + // Log only once per transaction. + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + // TODO Apache will respond to this problem with a 400. + // Now extract the name and the value + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_FIELD_MISSING_COLON, + "Request field invalid: colon missing", + self.request_mut().unwrap().flags, + flags, + HtpFlags::FIELD_UNPARSEABLE + ); + } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) { + // Empty header name. + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_INVALID_EMPTY_NAME, + "Request field invalid: empty name", + self.request_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + self.process_request_header(Header::new_with_flags( + h.name.name.into(), + h.value.value.into(), + flags, + ))?; + } + Ok((remaining, eoh)) + } else { + Ok((data, false)) + } + } + + /// Parses a single request line. + pub fn parse_request_line(&mut self, request_line: &[u8]) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + req.request_line = Some(Bstr::from(request_line)); + let mut mstart: bool = false; + let mut data: &[u8] = request_line; + if self.cfg.server_personality == HtpServerPersonality::APACHE_2 { + //Null terminates + if let Ok((_, before_null)) = take_until_null(data) { + data = before_null + } + } + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + let mut method_parser = tuple + // skip past leading whitespace. IIS allows this + ((take_is_space, + take_not_is_space, + // Ignore whitespace after request method. The RFC allows + // for only one SP, but then suggests any number of SP and HT + // should be permitted. Apache uses isspace(), which is even + // more permitting, so that's what we use here. + take_is_space + )); + + if let Ok((remaining, (ls, method, ws))) = method_parser(data) { + if !ls.is_empty() { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_LEADING_WHITESPACE, + "Request line: leading whitespace" + ); + + let requestline_leading_whitespace_unwanted = + self.cfg.requestline_leading_whitespace_unwanted; + if requestline_leading_whitespace_unwanted != HtpUnwanted::IGNORE { + // reset mstart so that we copy the whitespace into the method + mstart = true; + // set expected response code to this anomaly + let req = self.request_mut().unwrap(); + req.response_status_expected_number = requestline_leading_whitespace_unwanted + } + } + + let req = self.request_mut().unwrap(); + if mstart { + req.request_method = Some(Bstr::from([ls, method].concat())); + } else { + req.request_method = Some(Bstr::from(method)); + } + + if let Some(request_method) = &req.request_method { + req.request_method_number = HtpMethod::new(request_method.as_slice()); + } + + // Too much performance overhead for fuzzing + if ws.iter().any(|&c| c != 0x20) { + htp_warn!( + self.logger, + HtpLogCode::METHOD_DELIM_NON_COMPLIANT, + "Request line: non-compliant delimiter between Method and URI" + ); + } + + if remaining.is_empty() { + // No, this looks like a HTTP/0.9 request. + let req = self.request_mut().unwrap(); + req.is_protocol_0_9 = true; + req.request_protocol_number = HtpProtocol::V0_9; + if req.request_method_number == HtpMethod::UNKNOWN { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD, + "Request line: unknown method only" + ); + } + return Ok(()); + } + + let remaining = trimmed(remaining); + + let (mut uri, mut protocol) = + split_on_predicate(remaining, self.cfg.decoder_cfg.allow_space_uri, true, |c| { + *c == 0x20 + }); + + if uri.len() == remaining.len() && uri.iter().any(|&c| is_space(c)) { + // warn regardless if we've seen non-compliant chars + htp_warn!( + self.logger, + HtpLogCode::URI_DELIM_NON_COMPLIANT, + "Request line: URI contains non-compliant delimiter" + ); + // if we've seen some 'bad' delimiters, we retry with those + let uri_protocol = split_on_predicate( + remaining, + self.cfg.decoder_cfg.allow_space_uri, + true, + |c| is_space(*c), + ); + uri = uri_protocol.0; + protocol = uri_protocol.1; + } + + let req = self.request_mut().unwrap(); + req.request_uri = Some(Bstr::from(uri)); + + // Is there protocol information available? + if protocol.is_empty() { + // No, this looks like a HTTP/0.9 request. + req.is_protocol_0_9 = true; + req.request_protocol_number = HtpProtocol::V0_9; + if req.request_method_number == HtpMethod::UNKNOWN { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL, + "Request line: unknown method and no protocol" + ); + } + return Ok(()); + } + + // The protocol information continues until the end of the line. + req.request_protocol = Some(Bstr::from(protocol)); + self.request_mut().unwrap().request_protocol_number = + parse_protocol(protocol, &mut self.logger); + let req = self.request().unwrap(); + if req.request_method_number == HtpMethod::UNKNOWN + && req.request_protocol_number == HtpProtocol::INVALID + { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL, + "Request line: unknown method and invalid protocol" + ); + } + } + Ok(()) + } + + /// Consumes request body data. + /// This function assumes that handling of chunked encoding is implemented + /// by the container. When you're done submitting body data, invoke a state + /// change (to REQUEST) to finalize any processing that might be pending. + /// The supplied data is fully consumed and there is no expectation that it + /// will be available afterwards. The protocol parsing code makes no copies + /// of the data, but some parsers might. + /// + /// Returns HtpStatus::OK on success or HtpStatus::ERROR if the request transaction + /// is invalid or response body data hook fails. + pub fn request_body_data(&mut self, data: Option<&[u8]>) -> Result<()> { + // None data is used to indicate the end of request body. + // Keep track of body size before decompression. + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + + req.request_message_len = req + .request_message_len + .wrapping_add(data.unwrap_or(b"").len() as u64); + match req.request_content_encoding_processing { + HtpContentEncoding::GZIP + | HtpContentEncoding::DEFLATE + | HtpContentEncoding::ZLIB + | HtpContentEncoding::LZMA => { + // Send data buffer to the decompressor if it exists + if req.request_decompressor.is_none() && data.is_none() { + return Ok(()); + } + let mut decompressor = req.request_decompressor.take().ok_or(HtpStatus::ERROR)?; + if let Some(data) = data { + decompressor + .decompress(data) + .map_err(|_| HtpStatus::ERROR)?; + if decompressor.time_spent() + > self.cfg.compression_options.get_time_limit() as u64 + { + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!( + "Compression bomb: spent {} us decompressing", + decompressor.time_spent(), + ) + ); + decompressor.set_passthrough(true); + } + // put the decompressor back in its slot + let req = self.request_mut().unwrap(); + req.request_decompressor.replace(decompressor); + } else { + // don't put the decompressor back in its slot + // ignore errors + let _ = decompressor.finish(); + } + } + HtpContentEncoding::NONE => { + // When there's no decompression, request_entity_len. + // is identical to request_message_len. + // None data is used to indicate the end of request body. + // Keep track of the body length. + req.request_entity_len += data.unwrap_or(b"").len() as u64; + // Send data to the callbacks. + let data = ParserData::from(data); + let mut data = Data::new(req, &data); + self.request_run_hook_body_data(&mut data).map_err(|e| { + htp_error!( + self.logger, + HtpLogCode::REQUEST_BODY_DATA_CALLBACK_ERROR, + format!("Request body data callback returned error ({:?})", e) + ); + e + })? + } + HtpContentEncoding::ERROR => { + htp_error!( + self.logger, + HtpLogCode::INVALID_CONTENT_ENCODING, + "Expected a valid content encoding" + ); + return Err(HtpStatus::ERROR); + } + } + Ok(()) + } + + /// Initialize the request decompression engine. We can deal with three + /// scenarios: + /// + /// 1. Decompression is enabled, compression indicated in headers, and we decompress. + /// + /// 2. As above, but the user disables decompression by setting response_content_encoding + /// to COMPRESSION_NONE. + /// + /// 3. Decompression is disabled and we do not attempt to enable it, but the user + /// forces decompression by setting response_content_encoding to one of the + /// supported algorithms. + pub fn request_initialize_decompressors(&mut self) -> Result<()> { + let req = self.request_mut(); + if req.is_none() { + return Err(HtpStatus::ERROR); + } + let req = req.unwrap(); + let ce = req + .request_headers + .get_nocase_nozero("content-encoding") + .map(|val| val.value.clone()); + // Process multiple encodings if there is no match on fast path + let mut slow_path = false; + + // Fast path - try to match directly on the encoding value + req.request_content_encoding = if let Some(ce) = &ce { + if ce.cmp_nocase_nozero(b"gzip") == Ordering::Equal + || ce.cmp_nocase_nozero(b"x-gzip") == Ordering::Equal + { + HtpContentEncoding::GZIP + } else if ce.cmp_nocase_nozero(b"deflate") == Ordering::Equal + || ce.cmp_nocase_nozero(b"x-deflate") == Ordering::Equal + { + HtpContentEncoding::DEFLATE + } else if ce.cmp_nocase_nozero(b"lzma") == Ordering::Equal { + HtpContentEncoding::LZMA + } else if ce.cmp_nocase_nozero(b"inflate") == Ordering::Equal + || ce.cmp_nocase_nozero(b"none") == Ordering::Equal + { + HtpContentEncoding::NONE + } else { + slow_path = true; + HtpContentEncoding::NONE + } + } else { + HtpContentEncoding::NONE + }; + + // Configure decompression, if enabled in the configuration. + self.request_mut() + .unwrap() + .request_content_encoding_processing = if self.cfg.request_decompression_enabled { + self.request().unwrap().request_content_encoding + } else { + slow_path = false; + HtpContentEncoding::NONE + }; + + let req = self.request_mut().unwrap(); + let request_content_encoding_processing = req.request_content_encoding_processing; + let compression_options = self.cfg.compression_options; + match &request_content_encoding_processing { + HtpContentEncoding::GZIP + | HtpContentEncoding::DEFLATE + | HtpContentEncoding::ZLIB + | HtpContentEncoding::LZMA => { + self.request_prepend_decompressor(request_content_encoding_processing)?; + } + HtpContentEncoding::NONE => { + if slow_path { + if let Some(ce) = &ce { + let mut layers = 0; + for encoding in ce.split(|c| *c == b',' || *c == b' ') { + if encoding.is_empty() { + continue; + } + layers += 1; + + if let Some(limit) = compression_options.get_layer_limit() { + // decompression layer depth check + if layers > limit { + htp_warn!( + self.logger, + HtpLogCode::TOO_MANY_ENCODING_LAYERS, + "Too many request content encoding layers" + ); + break; + } + } + + let encoding = Bstr::from(encoding); + let encoding = if encoding.index_of_nocase(b"gzip").is_some() { + if !(encoding.cmp_slice(b"gzip") == Ordering::Equal + || encoding.cmp_slice(b"x-gzip") == Ordering::Equal) + { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E gzip has abnormal value" + ); + } + HtpContentEncoding::GZIP + } else if encoding.index_of_nocase(b"deflate").is_some() { + if !(encoding.cmp_slice(b"deflate") == Ordering::Equal + || encoding.cmp_slice(b"x-deflate") == Ordering::Equal) + { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E deflate has abnormal value" + ); + } + HtpContentEncoding::DEFLATE + } else if encoding.cmp_slice(b"lzma") == Ordering::Equal { + if let Some(limit) = compression_options.get_lzma_layers() { + // LZMA decompression layer depth check + if layers > limit { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_TOO_MANY_LZMA_LAYERS, + "Compression bomb: multiple encoding with lzma" + ); + break; + } + } + HtpContentEncoding::LZMA + } else if encoding.cmp_slice(b"inflate") == Ordering::Equal + || encoding.cmp_slice(b"none") == Ordering::Equal + { + HtpContentEncoding::NONE + } else { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E unknown setting" + ); + HtpContentEncoding::NONE + }; + self.request_prepend_decompressor(encoding)?; + } + } + } + } + HtpContentEncoding::ERROR => { + htp_error!( + self.logger, + HtpLogCode::INVALID_CONTENT_ENCODING, + "Expected a valid content encoding" + ); + return Err(HtpStatus::ERROR); + } + } + Ok(()) + } + + /// Prepend a decompressor to the request + fn request_prepend_decompressor(&mut self, encoding: HtpContentEncoding) -> Result<()> { + let compression_options = self.cfg.compression_options; + if encoding != HtpContentEncoding::NONE { + // ensured by caller + let req = self.request_mut().unwrap(); + if let Some(decompressor) = req.request_decompressor.take() { + req.request_decompressor + .replace(decompressor.prepend(encoding, compression_options)?); + } else { + // The processing encoding will be the first one encountered + req.request_content_encoding_processing = encoding; + + // Add the callback first because it will be called last in + // the chain of writers + + // TODO: fix lifetime error and remove this line! + let connp_ptr: *mut ConnectionParser = self as *mut ConnectionParser; + let decompressor = unsafe { + Decompressor::new_with_callback( + encoding, + Box::new(move |data: Option<&[u8]>| -> std::io::Result { + (*connp_ptr).request_decompressor_callback(data) + }), + compression_options, + )? + }; + let req = self.request_mut().unwrap(); + req.request_decompressor.replace(decompressor); + } + } + Ok(()) + } + + fn request_decompressor_callback(&mut self, data: Option<&[u8]>) -> std::io::Result { + // If no data is passed, call the hooks with NULL to signify the end of the + // request body. + let parser_data = ParserData::from(data); + // ensured by only caller + let req = self.request_mut().unwrap(); + let mut tx_data = Data::new(req, &parser_data); + + // Keep track of actual request body length. + req.request_entity_len = req.request_entity_len.wrapping_add(tx_data.len() as u64); + + // Invoke all callbacks. + self.request_run_hook_body_data(&mut tx_data) + .map_err(|_| std::io::Error::new(std::io::ErrorKind::Other, "body data hook failed"))?; + + let compression_options = self.cfg.compression_options; + let req = self.request_mut().unwrap(); + if let Some(decompressor) = &mut req.request_decompressor { + if decompressor.callback_inc() % compression_options.get_time_test_freq() == 0 { + if let Some(time_spent) = decompressor.timer_reset() { + if time_spent > compression_options.get_time_limit() as u64 { + decompressor.set_passthrough(true); + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!("Compression bomb: spent {} us decompressing", time_spent) + ); + } + } + } + } + + // output > ratio * input ? + let ratio = compression_options.get_bomb_ratio(); + let req = self.request().unwrap(); + let exceeds_ratio = if let Some(ratio) = req.request_message_len.checked_mul(ratio) { + req.request_entity_len > ratio + } else { + // overflow occured + true + }; + + let bomb_limit = compression_options.get_bomb_limit(); + let request_entity_len = req.request_entity_len; + let request_message_len = req.request_message_len; + if request_entity_len > bomb_limit && exceeds_ratio { + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!( + "Compression bomb: decompressed {} bytes out of {}", + request_entity_len, request_message_len, + ) + ); + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "compression_bomb_limit reached", + )); + } + Ok(tx_data.len()) + } + + /// Finalizes request. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + pub fn request_finalize(&mut self, input: &mut ParserData) -> Result<()> { + if input.is_gap() { + return self.state_request_complete(input); + } + let mut work = input.as_slice(); + let mut has_lf = false; + if self.request_status != HtpStreamState::CLOSED { + let request_next_byte = input.as_slice().first(); + if request_next_byte.is_none() { + return self.state_request_complete(input); + } + + if let Ok((_, line)) = take_till_lf(work) { + work = &line[..line.len() - 1]; + has_lf = true; + self.request_data_consume(input, line.len() - 1); + } else { + return self.handle_request_absent_lf(input); + } + } + + if !self.request_buf.is_empty() { + self.check_request_buffer_limit(work.len())?; + } + let mut data = take(&mut self.request_buf); + let buf_len = data.len(); + data.add(work); + + if data.is_empty() { + //closing + return self.state_request_complete(input); + } + let res = tuple((take_is_space, take_not_is_space))(&data); + + if let Ok((_, (_, method))) = res { + if method.is_empty() { + // empty whitespace line + let rc = self.request_body_data(Some(&data)); + self.request_buf.clear(); + return rc; + } + if HtpMethod::new(method) == HtpMethod::UNKNOWN { + if self.request_body_data_left.unwrap_or(0) == 0 { + // log only once per transaction + htp_warn!( + self.logger, + HtpLogCode::REQUEST_BODY_UNEXPECTED, + "Unexpected request body" + ); + } else { + self.request_body_data_left = Some(1); + } + if has_lf { + //Adds linefeed to the buffer if there was one + self.request_data_consume(input, 1); + data.add(b"\n"); + } + // Interpret remaining bytes as body data + let rc = self.request_body_data(Some(&data)); + self.request_buf.clear(); + return rc; + } // else continue + self.request_body_data_left = None; + } + // didnt use data, restore + self.request_buf.add(&data[0..buf_len]); + //unread last end of line so that request_line works + self.request_data_unconsume(input, data.len()); + self.state_request_complete(input) + } + + /// Consumes whatever is left in the buffer after detecting an http/0.9 session. + pub fn request_ignore_data_after_http_0_9(&mut self, data: &mut ParserData) -> Result<()> { + if !data.is_empty() { + self.conn.flags.set(ConnectionFlags::HTTP_0_9_EXTRA) + } + self.request_data_consume(data, data.len()); + Err(HtpStatus::DATA) + } + + /// The idle state is where the parser will end up after a transaction is processed. + /// If there is more data available, a new request will be started. + /// + /// Returns OK on state change, ERROR on error, or HTP_DATA when more data is needed. + pub fn request_idle(&mut self, data: &mut ParserData) -> Result<()> { + // We want to start parsing the next request (and change + // the state from IDLE) only if there's at least one + // byte of data available. Otherwise we could be creating + // new structures even if there's no more data on the + // connection. + if data.is_empty() { + // we may have buffered some data, if we are closing, we want to process it + if self.request_status != HtpStreamState::CLOSED || self.request_buf.is_empty() { + return Err(HtpStatus::DATA); + } + } + self.request_reset(); + // Change state to TRANSACTION_START + // Ignore the result. + let _ = self.state_request_start(); + Ok(()) + } + + /// Buffer incomplete request data and verify that field_limit + /// constraint is met. + fn handle_request_absent_lf(&mut self, data: &ParserData) -> Result<()> { + self.check_request_buffer_limit(data.len())?; + self.request_buf.add(data.as_slice()); + self.request_data_consume(data, data.len()); + Err(HtpStatus::DATA_BUFFER) + } + + /// Run the REQUEST_BODY_DATA hook. + fn request_run_hook_body_data(&mut self, d: &mut Data) -> Result<()> { + // Do not invoke callbacks with an empty data chunk + let req = self.request_mut().unwrap(); + if !d.data().is_null() && d.is_empty() { + return Ok(()); + } + req.hook_request_body_data.clone().run_all(self, d)?; + // Run configuration hooks second + self.cfg.hook_request_body_data.run_all(self, d)?; + Ok(()) + } + + /// Process a chunk of inbound (client or request) data. + pub fn request_data( + &mut self, mut chunk: ParserData, timestamp: Option, + ) -> HtpStreamState { + // Reset the bytes consumed counter + self.request_bytes_consumed = 0; + + // Return if the connection is in stop state. + if self.request_status == HtpStreamState::STOP { + htp_info!( + self.logger, + HtpLogCode::PARSER_STATE_ERROR, + "Inbound parser is in STOP state" + ); + return HtpStreamState::STOP; + } + // Return if the connection had a fatal error earlier + if self.request_status == HtpStreamState::ERROR { + htp_error!( + self.logger, + HtpLogCode::PARSER_STATE_ERROR, + "Inbound parser is in ERROR state" + ); + return HtpStreamState::ERROR; + } + + // If the length of the supplied data chunk is zero, proceed + // only if the stream has been closed. We do not allow zero-sized + // chunks in the API, but we use them internally to force the parsers + // to finalize parsing. + if chunk.is_empty() && self.request_status != HtpStreamState::CLOSED { + htp_error!( + self.logger, + HtpLogCode::ZERO_LENGTH_DATA_CHUNKS, + "Zero-length data chunks are not allowed" + ); + return HtpStreamState::CLOSED; + } + // Remember the timestamp of the current request data chunk + if let Some(timestamp) = timestamp { + self.request_timestamp = timestamp; + } + + // Store the current chunk information + self.request_chunk_count = self.request_chunk_count.wrapping_add(1); + self.conn.track_inbound_data(chunk.len()); + // Return without processing any data if the stream is in tunneling + // mode (which it would be after an initial CONNECT transaction). + if self.request_status == HtpStreamState::TUNNEL { + return HtpStreamState::TUNNEL; + } + if self.response_status == HtpStreamState::DATA_OTHER { + self.response_status = HtpStreamState::DATA + } + //handle gap + if chunk.is_gap() { + // Mark the transaction as having a gap + let idx = self.request_index(); + let req = self.request_mut(); + if req.is_none() { + return HtpStreamState::ERROR; + } + let req = req.unwrap(); + + req.flags.set(HtpFlags::REQUEST_MISSING_BYTES); + + if idx == 0 && req.request_progress == HtpRequestProgress::NOT_STARTED { + // We have a leading gap on the first transaction. + // Force the parser to start if it hasn't already. + self.request_mut().unwrap().request_progress = HtpRequestProgress::GAP; + self.request_status = HtpStreamState::ERROR; + return HtpStreamState::ERROR; + } + } + + loop + // Invoke a processor, in a loop, until an error + // occurs or until we run out of data. Many processors + // will process a request, each pointing to the next + // processor that needs to run. + // Return if there's been an error or if we've run out of data. We are relying + // on processors to supply error messages, so we'll keep quiet here. + { + // handle gap + if chunk.is_gap() + && self.request_state != State::BODY_IDENTITY + && self.request_state != State::IGNORE_DATA_AFTER_HTTP_0_9 + && self.request_state != State::FINALIZE + { + // go to request_connect_probe_data ? + htp_error!( + self.logger, + HtpLogCode::INVALID_GAP, + "Gaps are not allowed during this state" + ); + return HtpStreamState::CLOSED; + } + let mut rc = self.handle_request_state(&mut chunk); + + if rc.is_ok() { + if self.request_status == HtpStreamState::TUNNEL { + return HtpStreamState::TUNNEL; + } + rc = self.request_handle_state_change(&mut chunk) + } + match rc { + // Continue looping. + Ok(_) => {} + // Do we need more data? + Err(HtpStatus::DATA) | Err(HtpStatus::DATA_BUFFER) => { + // Ignore result. + let _ = self.request_receiver_send_data(&mut chunk); + self.request_status = HtpStreamState::DATA; + return HtpStreamState::DATA; + } + // Check for suspended parsing. + Err(HtpStatus::DATA_OTHER) => { + // We might have actually consumed the entire data chunk? + if chunk.is_empty() { + // Do not send STREAM_DATE_DATA_OTHER if we've consumed the entire chunk. + self.request_status = HtpStreamState::DATA; + return HtpStreamState::DATA; + } else { + // Partial chunk consumption. + self.request_status = HtpStreamState::DATA_OTHER; + return HtpStreamState::DATA_OTHER; + } + } + // Check for the stop signal. + Err(HtpStatus::STOP) => { + self.request_status = HtpStreamState::STOP; + return HtpStreamState::STOP; + } + // Permanent stream error. + Err(_) => { + self.request_status = HtpStreamState::ERROR; + return HtpStreamState::ERROR; + } + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use rstest::rstest; + + #[rstest] + #[case(b"GET", HtpMethod::GET)] + #[case(b"PUT", HtpMethod::PUT)] + #[case(b"POST", HtpMethod::POST)] + #[case(b"PoST", HtpMethod::UNKNOWN)] + #[case(b"post", HtpMethod::UNKNOWN)] + #[case(b"NOT_METHOD", HtpMethod::UNKNOWN)] + fn test_method(#[case] input: &[u8], #[case] expected: HtpMethod) { + assert_eq!(HtpMethod::new(input), expected); + } +} diff --git a/rust/htp/src/request_generic.rs b/rust/htp/src/request_generic.rs new file mode 100644 index 000000000000..cef2550105fb --- /dev/null +++ b/rust/htp/src/request_generic.rs @@ -0,0 +1,306 @@ +use crate::{ + bstr::Bstr, + config::HtpUnwanted, + connection_parser::ConnectionParser, + error::Result, + headers::Flags as HeaderFlags, + parsers::{parse_content_length, parse_protocol}, + request::HtpMethod, + transaction::{Header, HtpProtocol}, + util::{ + is_space, take_ascii_whitespace, take_is_space, take_not_is_space, take_until_null, + FlagOperations, HtpFlags, + }, +}; +use nom::{bytes::complete::take_while, error::ErrorKind, sequence::tuple}; +use std::cmp::Ordering; + +impl ConnectionParser { + /// Extract one request header. A header can span multiple lines, in + /// which case they will be folded into one before parsing is attempted. + fn process_request_header_generic(&mut self, header: Header) -> Result<()> { + // Try to parse the header. + let mut repeated = false; + let reps = self.request().request_header_repetitions; + let mut update_reps = false; + // Do we already have a header with the same name? + if let Some((_, h_existing)) = self + .request_mut() + .request_headers + .get_nocase_mut(header.name.as_slice()) + { + // TODO Do we want to have a list of the headers that are + // allowed to be combined in this way? + if !h_existing.flags.is_set(HtpFlags::FIELD_REPEATED) { + // This is the second occurence for this header. + repeated = true; + } else if reps < 64 { + update_reps = true; + } else { + return Ok(()); + } + // For simplicity reasons, we count the repetitions of all headers + // Keep track of repeated same-name headers. + h_existing.flags.set(HtpFlags::FIELD_REPEATED); + // Having multiple C-L headers is against the RFC but + // servers may ignore the subsequent headers if the values are the same. + if header.name.cmp_nocase("Content-Length") == Ordering::Equal { + // Don't use string comparison here because we want to + // ignore small formatting differences. + let existing_cl = parse_content_length(&h_existing.value, None); + let new_cl = parse_content_length(&header.value, None); + // Ambiguous response C-L value. + if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl { + htp_warn!( + self.logger, + HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST, + "Ambiguous request C-L value" + ); + } + } else { + // Add to the existing header. + h_existing.value.extend_from_slice(b", "); + h_existing.value.extend_from_slice(header.value.as_slice()); + } + } else { + self.request_mut() + .request_headers + .add(header.name.clone(), header); + } + if update_reps { + self.request_mut().request_header_repetitions = + self.request().request_header_repetitions.wrapping_add(1) + } + if repeated { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_HEADER_REPETITION, + "Repetition for header" + ); + } + Ok(()) + } + + /// Generic request header parser. + pub fn process_request_headers_generic<'a>( + &mut self, + data: &'a [u8], + ) -> Result<(&'a [u8], bool)> { + let rc = self.request_mut().request_header_parser.headers()(data); + if let Ok((remaining, (headers, eoh))) = rc { + for h in headers { + let mut flags = 0; + let name_flags = h.name.flags; + // Ignore LWS after field-name. + if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) { + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_INVALID_LWS_AFTER_NAME, + "Request field invalid: LWS after name", + self.request_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + //If name has leading whitespace, probably invalid folding + if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) { + // Invalid folding. + // Warn only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::INVALID_REQUEST_FIELD_FOLDING, + "Invalid request field folding", + self.request_mut().flags, + flags, + HtpFlags::INVALID_FOLDING + ); + } + // Check that field-name is a token + if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) { + // Incorrectly formed header name. + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_HEADER_INVALID, + "Request header name is not a token", + self.request_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + // No colon? + if name_flags.is_set(HeaderFlags::MISSING_COLON) { + // Log only once per transaction. + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + // TODO Apache will respond to this problem with a 400. + // Now extract the name and the value + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_FIELD_MISSING_COLON, + "Request field invalid: colon missing", + self.request_mut().flags, + flags, + HtpFlags::FIELD_UNPARSEABLE + ); + } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) { + // Empty header name. + // Log only once per transaction. + htp_warn_once!( + self.logger, + HtpLogCode::REQUEST_INVALID_EMPTY_NAME, + "Request field invalid: empty name", + self.request_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + self.process_request_header_generic(Header::new_with_flags( + h.name.name.into(), + h.value.value.into(), + flags, + ))?; + } + Ok((remaining, eoh)) + } else { + Ok((data, false)) + } + } + + /// Parses a single request line. + pub fn parse_request_line_generic_ex( + &mut self, + request_line: &[u8], + nul_terminates: bool, + ) -> Result<()> { + let mut mstart: bool = false; + let mut data: &[u8] = request_line; + if nul_terminates { + if let Ok((_, before_null)) = take_until_null(data) { + data = before_null + } + } + // The request method starts at the beginning of the + // line and ends with the first whitespace character. + let method_parser = tuple::<_, _, (_, ErrorKind), _> + // skip past leading whitespace. IIS allows this + ((take_is_space, + take_not_is_space, + // Ignore whitespace after request method. The RFC allows + // for only one SP, but then suggests any number of SP and HT + // should be permitted. Apache uses isspace(), which is even + // more permitting, so that's what we use here. + take_ascii_whitespace() + )); + + if let Ok((remaining, (ls, method, ws))) = method_parser(data) { + if !ls.is_empty() { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_LEADING_WHITESPACE, + "Request line: leading whitespace" + ); + + let requestline_leading_whitespace_unwanted = + self.cfg.requestline_leading_whitespace_unwanted; + if requestline_leading_whitespace_unwanted != HtpUnwanted::IGNORE { + // reset mstart so that we copy the whitespace into the method + mstart = true; + // set expected response code to this anomaly + self.request_mut().response_status_expected_number = + requestline_leading_whitespace_unwanted + } + } + + if mstart { + self.request_mut().request_method = + Some(Bstr::from([&ls[..], &method[..]].concat())); + } else { + self.request_mut().request_method = Some(Bstr::from(method)); + } + + if let Some(request_method) = &self.request().request_method { + self.request_mut().request_method_number = + HtpMethod::new(request_method.as_slice()); + } + + // Too much performance overhead for fuzzing + if ws.iter().any(|&c| c != 0x20) { + htp_warn!( + self.logger, + HtpLogCode::METHOD_DELIM_NON_COMPLIANT, + "Request line: non-compliant delimiter between Method and URI" + ); + } + + if remaining.is_empty() { + // No, this looks like a HTTP/0.9 request. + self.request_mut().is_protocol_0_9 = true; + self.request_mut().request_protocol_number = HtpProtocol::V0_9; + if self.request().request_method_number == HtpMethod::UNKNOWN { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD, + "Request line: unknown method only" + ); + } + return Ok(()); + } + + let uri_protocol_parser = tuple::<_, _, (_, ErrorKind), _> + // The URI ends with the first whitespace. + ((take_while(|c: u8| c != 0x20), + // Ignore whitespace after URI. + take_is_space) + ); + + if let Ok((mut protocol, (mut uri, _))) = uri_protocol_parser(remaining) { + if uri.len() == remaining.len() && uri.iter().any(|&c| is_space(c)) { + // warn regardless if we've seen non-compliant chars + htp_warn!( + self.logger, + HtpLogCode::URI_DELIM_NON_COMPLIANT, + "Request line: URI contains non-compliant delimiter" + ); + // if we've seen some 'bad' delimiters, we retry with those + let uri_protocol_parser2 = + tuple::<_, _, (_, ErrorKind), _>((take_not_is_space, take_is_space)); + if let Ok((protocol2, (uri2, _))) = uri_protocol_parser2(remaining) { + uri = uri2; + protocol = protocol2; + } + } + self.request_mut().request_uri = Some(Bstr::from(uri)); + // Is there protocol information available? + if protocol.is_empty() { + // No, this looks like a HTTP/0.9 request. + self.request_mut().is_protocol_0_9 = true; + self.request_mut().request_protocol_number = HtpProtocol::V0_9; + if self.request().request_method_number == HtpMethod::UNKNOWN { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL, + "Request line: unknown method and no protocol" + ); + } + return Ok(()); + } + // The protocol information continues until the end of the line. + self.request_mut().request_protocol = Some(Bstr::from(protocol)); + self.request_mut().request_protocol_number = + parse_protocol(protocol, &mut self.logger); + if self.request().request_method_number == HtpMethod::UNKNOWN + && self.request().request_protocol_number == HtpProtocol::INVALID + { + htp_warn!( + self.logger, + HtpLogCode::REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL, + "Request line: unknown method and invalid protocol" + ); + } + } + } + Ok(()) + } +} diff --git a/rust/htp/src/response.rs b/rust/htp/src/response.rs new file mode 100644 index 000000000000..15349c79a841 --- /dev/null +++ b/rust/htp/src/response.rs @@ -0,0 +1,1620 @@ +use crate::{ + bstr::Bstr, + connection_parser::{ConnectionParser, HtpStreamState, ParserData, State}, + decompressors::{Decompressor, HtpContentEncoding}, + error::Result, + headers::HeaderFlags, + hook::DataHook, + parsers::{parse_chunked_length, parse_content_length, parse_protocol, parse_status}, + request::HtpMethod, + transaction::{ + Data, Header, HtpProtocol, HtpRequestProgress, HtpResponseNumber, HtpResponseProgress, + HtpTransferCoding, + }, + uri::Uri, + util::{ + chomp, is_chunked_ctl_line, is_line_ignorable, is_space, is_valid_chunked_length_data, + take_ascii_whitespace, take_is_space, take_is_space_or_null, take_not_is_space, + take_till_eol, take_till_lf, treat_response_line_as_body, FlagOperations, HtpFlags, + }, + HtpStatus, +}; +use nom::{bytes::streaming::take_till as streaming_take_till, error::ErrorKind, sequence::tuple}; +use std::{ + cmp::{min, Ordering}, + mem::take, +}; +use time::OffsetDateTime; + +impl ConnectionParser { + /// Sends outstanding connection data to the currently active data receiver hook. + fn response_receiver_send_data(&mut self, data: &mut ParserData) -> Result<()> { + let data = ParserData::from(data.callback_data()); + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let mut tx_data = Data::new(resp.unwrap(), &data); + if let Some(hook) = &self.response_data_receiver_hook { + hook.run_all(self, &mut tx_data)?; + } else { + return Ok(()); + }; + Ok(()) + } + + /// Finalizes an existing data receiver hook by sending any outstanding data to it. The + /// hook is then removed so that it receives no more data. + pub fn response_receiver_finalize_clear(&mut self, input: &mut ParserData) -> Result<()> { + if self.response_data_receiver_hook.is_none() { + return Ok(()); + } + let rc = self.response_receiver_send_data(input); + self.response_data_receiver_hook = None; + rc + } + + /// Configures the data receiver hook. + fn response_receiver_set(&mut self, data_receiver_hook: Option) -> Result<()> { + self.response_data_receiver_hook = data_receiver_hook; + Ok(()) + } + + /// Handles response parser state changes. At the moment, this function is used only + /// to configure data receivers, which are sent raw connection data. + fn response_handle_state_change(&mut self, input: &mut ParserData) -> Result<()> { + if self.response_state_previous == self.response_state { + return Ok(()); + } + + if self.response_state == State::HEADERS { + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + let header_fn = Some(resp.cfg.hook_response_header_data.clone()); + let trailer_fn = Some(resp.cfg.hook_response_trailer_data.clone()); + input.reset_callback_start(); + + match resp.response_progress { + HtpResponseProgress::HEADERS => self.response_receiver_set(header_fn), + HtpResponseProgress::TRAILER => self.response_receiver_set(trailer_fn), + _ => Ok(()), + }?; + } + // Same comment as in request_handle_state_change(). Below is a copy. + // Initially, I had the finalization of raw data sending here, but that + // caused the last REQUEST_HEADER_DATA hook to be invoked after the + // REQUEST_HEADERS hook -- which I thought made no sense. For that reason, + // the finalization is now initiated from the request header processing code, + // which is less elegant but provides a better user experience. Having some + // (or all) hooks to be invoked on state change might work better. + self.response_state_previous = self.response_state; + Ok(()) + } + + /// The maximum amount accepted for buffering is controlled + /// by htp_config_t::field_limit. + fn check_response_buffer_limit(&mut self, len: usize) -> Result<()> { + if len == 0 { + return Ok(()); + } + // Check the hard (buffering) limit. + let mut newlen: usize = self.response_buf.len().wrapping_add(len); + // When calculating the size of the buffer, take into account the + // space we're using for the response header buffer. + if let Some(response_header) = &self.response_header { + newlen = newlen.wrapping_add(response_header.len()); + } + let field_limit = self.cfg.field_limit; + if newlen > field_limit { + htp_error!( + self.logger, + HtpLogCode::RESPONSE_FIELD_TOO_LONG, + format!( + "Response the buffer limit: size {} limit {}.", + newlen, field_limit + ) + ); + return Err(HtpStatus::ERROR); + } + Ok(()) + } + + /// Consumes bytes until the end of the current line. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::Error on error, or HtpStatus::DATA + /// when more data is needed. + pub fn response_body_chunked_data_end(&mut self, input: &ParserData) -> Result<()> { + // TODO We shouldn't really see anything apart from CR and LF, + // so we should warn about anything else. + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + if let Ok((_, line)) = take_till_lf(input.as_slice()) { + let len = line.len(); + self.response_data_consume(input, len); + let resp = self.response_mut().unwrap(); + resp.response_message_len = resp.response_message_len.wrapping_add(len as u64); + self.response_state = State::BODY_CHUNKED_LENGTH; + Ok(()) + } else { + // Advance to end. Dont need to buffer + resp.response_message_len = resp.response_message_len.wrapping_add(input.len() as u64); + self.response_data_consume(input, input.len()); + Err(HtpStatus::DATA_BUFFER) + } + } + + /// Processes a chunk of data. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::Error on error, or + /// HtpStatus::DATA when more data is needed. + pub fn response_body_chunked_data(&mut self, input: &ParserData) -> Result<()> { + let bytes_to_consume = min( + input.len(), + self.response_chunked_length.unwrap_or(0) as usize, + ); + if bytes_to_consume == 0 { + return Err(HtpStatus::DATA); + } + // Consume the data. + self.response_body_data(Some(&input.as_slice()[0..bytes_to_consume]))?; + // Adjust the counters. + self.response_data_consume(input, bytes_to_consume); + if let Some(len) = &mut self.response_chunked_length { + *len -= bytes_to_consume as u64; + // Have we seen the entire chunk? + if *len == 0 { + self.response_state = State::BODY_CHUNKED_DATA_END; + return Ok(()); + } + } + + Err(HtpStatus::DATA) + } + + /// Extracts chunk length. + /// + /// Returns Ok(()) on success, Err(HTP_ERROR) on error, or Err(HTP_DATA) when more data is needed. + pub fn response_body_chunked_length(&mut self, input: &mut ParserData) -> Result<()> { + let mut data = input.as_slice(); + loop { + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + match take_till_lf(data) { + Ok((remaining, line)) => { + self.response_data_consume(input, line.len()); + if !self.response_buf.is_empty() { + self.check_response_buffer_limit(line.len())?; + } + let mut data2 = take(&mut self.response_buf); + data2.add(line); + if is_chunked_ctl_line(&data2) { + let resp = self.response_mut().unwrap(); + resp.response_message_len = + (resp.response_message_len).wrapping_add(data2.len() as u64); + //Empty chunk len. Try to continue parsing. + data = remaining; + continue; + } + let resp = self.response_mut().unwrap(); + resp.response_message_len = + (resp.response_message_len).wrapping_add(data2.len() as u64); + + match parse_chunked_length(&data2) { + Ok((len, ext)) => { + self.response_chunked_length = len; + if ext { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_CHUNK_EXTENSION, + "Response chunk extension" + ); + } + // Handle chunk length + if let Some(len) = len { + match len.cmp(&0) { + Ordering::Equal => { + // End of data + self.response_state = State::HEADERS; + self.response_mut().unwrap().response_progress = + HtpResponseProgress::TRAILER + } + Ordering::Greater => { + // More data available. + self.response_state = State::BODY_CHUNKED_DATA + } + _ => {} + } + } else { + return Ok(()); // empty chunk length line, lets try to continue + } + } + Err(_) => { + // unconsume so response_body_identity_stream_close doesn't miss the first bytes + self.response_data_unconsume(input, line.len()); + self.response_state = State::BODY_IDENTITY_STREAM_CLOSE; + self.response_mut().unwrap().response_transfer_coding = + HtpTransferCoding::IDENTITY; + htp_error!( + self.logger, + HtpLogCode::INVALID_RESPONSE_CHUNK_LEN, + "Response chunk encoding: Invalid chunk length" + ); + } + } + + return Ok(()); + } + _ => { + // Check if the data we have seen so far is invalid + if !is_valid_chunked_length_data(data) { + // Contains leading junk non hex_ascii data + resp.response_transfer_coding = HtpTransferCoding::IDENTITY; + self.response_state = State::BODY_IDENTITY_STREAM_CLOSE; + htp_error!( + self.logger, + HtpLogCode::INVALID_RESPONSE_CHUNK_LEN, + "Response chunk encoding: Invalid chunk length" + ); + return Ok(()); + } else { + return self.handle_response_absent_lf(input); + } + } + } + } + } + + /// Processes an identity response body of known length. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or + /// HtpStatus::DATA when more data is needed. + pub fn response_body_identity_cl_known(&mut self, data: &mut ParserData) -> Result<()> { + if self.response_status == HtpStreamState::CLOSED { + self.response_state = State::FINALIZE; + // Sends close signal to decompressors + return self.response_body_data(data.data()); + } + let left = self.response_body_data_left.ok_or(HtpStatus::ERROR)?; + let bytes_to_consume = std::cmp::min(data.len() as u64, left); + if bytes_to_consume == 0 { + return Err(HtpStatus::DATA); + } + if data.is_gap() { + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + if resp.response_content_encoding_processing == HtpContentEncoding::NONE { + resp.response_message_len = + resp.response_message_len.wrapping_add(bytes_to_consume); + // Create a new gap of the appropriate length + let parser_data = ParserData::from(bytes_to_consume as usize); + // Send the gap to the data hooks + let mut tx_data = Data::new(resp, &parser_data); + self.response_run_hook_body_data(&mut tx_data)?; + } else { + // end decompression on gap + self.response_body_data(None)?; + } + } else { + // Consume the data. + self.response_body_data(Some(&data.as_slice()[0..bytes_to_consume as usize]))?; + } + // Adjust the counters. + self.response_data_consume(data, bytes_to_consume as usize); + self.response_body_data_left = Some(left - bytes_to_consume); + // Have we seen the entire response body? + if self.response_body_data_left > Some(0) { + return Err(HtpStatus::DATA); + } + // End of response body. + self.response_state = State::FINALIZE; + // Sends close signal to decompressors, outputting any partially decompressed data + self.response_body_data(None) + } + + /// Processes identity response body of unknown length. In this case, we assume the + /// response body consumes all data until the end of the stream. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA + /// when more data is needed. + pub fn response_body_identity_stream_close(&mut self, data: &ParserData) -> Result<()> { + if data.is_gap() { + // Send the gap to the data hooks + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let mut tx_data = Data::new(resp.unwrap(), data); + self.response_run_hook_body_data(&mut tx_data)?; + } else if !data.is_empty() { + // Consume all data from the input buffer. + self.response_body_data(data.data())?; + // Adjust the counters. + self.response_data_consume(data, data.len()); + } + // Have we seen the entire response body? + if self.response_status == HtpStreamState::CLOSED { + self.response_state = State::FINALIZE; + return Ok(()); + } + + Err(HtpStatus::DATA) + } + + /// Determines presence (and encoding) of a response body. + pub fn response_body_determine(&mut self, input: &mut ParserData) -> Result<()> { + // If the request uses the CONNECT method, then not only are we + // to assume there's no body, but we need to ignore all + // subsequent data in the stream. + let response_tx = self.response_mut(); + if response_tx.is_none() { + return Err(HtpStatus::ERROR); + } + let response_tx = response_tx.unwrap(); + + if response_tx.request_method_number == HtpMethod::CONNECT { + if response_tx.response_status_number.in_range(200, 299) { + // This is a successful CONNECT stream, which means + // we need to switch into tunneling mode: on the + // request side we'll now probe the tunnel data to see + // if we need to parse or ignore it. So on the response + // side we wrap up the tx and wait. + self.response_state = State::FINALIZE; + // we may have response headers + return self.state_response_headers(input); + } else if response_tx.response_status_number.eq_num(407) { + // proxy telling us to auth + if self.request_status != HtpStreamState::ERROR { + self.request_status = HtpStreamState::DATA + } + } else { + // This is a failed CONNECT stream, which means that + // we can unblock request parsing + if self.request_status != HtpStreamState::ERROR { + self.request_status = HtpStreamState::DATA + } + // We are going to continue processing this transaction, + // adding a note for ourselves to stop at the end (because + // we don't want to see the beginning of a new transaction). + self.response_data_other_at_tx_end = true + } + } + let response_tx = self.response_mut().unwrap(); + let cl_opt = response_tx + .response_headers + .get_nocase_nozero("content-length") + .cloned(); + let te_opt = response_tx + .response_headers + .get_nocase_nozero("transfer-encoding") + .cloned(); + // Check for "101 Switching Protocol" response. + // If it's seen, it means that traffic after empty line following headers + // is no longer HTTP. We can treat it similarly to CONNECT. + // Unlike CONNECT, however, upgrades from HTTP to HTTP seem + // rather unlikely, so don't try to probe tunnel for nested HTTP, + // and switch to tunnel mode right away. + if response_tx.response_status_number.eq_num(101) { + if response_tx + .response_headers + .get_nocase_nozero("upgrade") + .map(|upgrade| upgrade.value.index_of_nocase_nozero("h2c").is_some()) + .unwrap_or(false) + { + response_tx.is_http_2_upgrade = true; + } + if te_opt.is_none() && cl_opt.is_none() { + self.response_state = State::FINALIZE; + if self.request_status != HtpStreamState::ERROR { + self.request_status = HtpStreamState::TUNNEL + } + self.response_status = HtpStreamState::TUNNEL; + // we may have response headers + return self.state_response_headers(input); + } else { + htp_warn!( + self.logger, + HtpLogCode::SWITCHING_PROTO_WITH_CONTENT_LENGTH, + "Switching Protocol with Content-Length" + ); + } + } + // Check for an interim "100 Continue" response. Ignore it if found, and revert back to RES_LINE. + else if response_tx.response_status_number.eq_num(100) && te_opt.is_none() { + match cl_opt + .as_ref() + .and_then(|cl| parse_content_length(cl.value.as_slice(), Some(&mut self.logger))) + { + // 100 Continue with a Content-Length > 0 isn't treated as a 100 Continue, + // so we do nothing here. + Some(x) if x > 0 => (), + // Otherwise we treat it as a continue and prep for the next response + _ => { + let response_tx = self.response_mut().unwrap(); + if response_tx.seen_100continue { + htp_error!( + self.logger, + HtpLogCode::CONTINUE_ALREADY_SEEN, + "Already seen 100-Continue." + ); + } + // Expecting to see another response line next. + self.response_state = State::LINE; + let response_tx = self.response_mut().unwrap(); + // Ignore any response headers seen so far. + response_tx.response_headers.elements.clear(); + response_tx.response_progress = HtpResponseProgress::LINE; + response_tx.seen_100continue = true; + return Ok(()); + } + } + } + // A request can indicate it waits for headers validation + // before sending its body cf + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect + else if response_tx.response_status_number.in_range(400, 499) + && self.request_content_length > Some(0) + && self.request_body_data_left == self.request_content_length + { + let response_tx = self.response_mut().unwrap(); + if let Some(expect) = response_tx.request_headers.get_nocase("expect") { + if expect.value.eq_slice("100-continue") { + self.request_state = State::FINALIZE; + } + } + } + + // 1. Any response message which MUST NOT include a message-body + // (such as the 1xx, 204, and 304 responses and any response to a HEAD + // request) is always terminated by the first empty line after the + // header fields, regardless of the entity-header fields present in the + // message. + let response_tx = self.response_mut().unwrap(); + if response_tx.request_method_number == HtpMethod::HEAD { + // There's no response body whatsoever + response_tx.response_transfer_coding = HtpTransferCoding::NO_BODY; + self.response_state = State::FINALIZE + } else if response_tx.response_status_number.in_range(100, 199) + || response_tx.response_status_number.eq_num(204) + || response_tx.response_status_number.eq_num(304) + { + // There should be no response body + // but browsers interpret content sent by the server as such + if te_opt.is_none() && cl_opt.is_none() { + response_tx.response_transfer_coding = HtpTransferCoding::NO_BODY; + self.response_state = State::FINALIZE + } else { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_BODY_UNEXPECTED, + "Unexpected Response body" + ); + } + } + // Hack condition to check that we do not assume "no body" + let mut multipart_byteranges = false; + if self.response_state != State::FINALIZE { + // We have a response body + let response_tx = self.response_mut().unwrap(); + let response_content_type = if let Some(ct) = response_tx + .response_headers + .get_nocase_nozero("content-type") + { + // TODO Some platforms may do things differently here. + let response_content_type = if let Ok((_, ct)) = + streaming_take_till::<_, _, (&[u8], ErrorKind)>(|c| c == b';' || is_space(c))( + &ct.value, + ) { + ct + } else { + &ct.value + }; + + let mut response_content_type = Bstr::from(response_content_type); + response_content_type.make_ascii_lowercase(); + if response_content_type + .index_of_nocase("multipart/byteranges") + .is_some() + { + multipart_byteranges = true; + } + Some(response_content_type) + } else { + None + }; + + if response_content_type.is_some() { + response_tx.response_content_type = response_content_type; + } + // 2. If a Transfer-Encoding header field (section 14.40) is present and + // indicates that the "chunked" transfer coding has been applied, then + // the length is defined by the chunked encoding (section 3.6). + if let Some(te) = + te_opt.and_then(|te| te.value.index_of_nocase_nozero("chunked").and(Some(te))) + { + if te.value.cmp_nocase("chunked") != Ordering::Equal { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_ABNORMAL_TRANSFER_ENCODING, + "Transfer-encoding has abnormal chunked value" + ); + } + // 3. If a Content-Length header field (section 14.14) is present, its + // spec says chunked is HTTP/1.1 only, but some browsers accept it + // with 1.0 as well + let response_tx = self.response_mut().unwrap(); + if response_tx.response_protocol_number < HtpProtocol::V1_1 { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_CHUNKED_OLD_PROTO, + "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0" + ); + } + // If the T-E header is present we are going to use it. + let response_tx = self.response_mut().unwrap(); + response_tx.response_transfer_coding = HtpTransferCoding::CHUNKED; + // We are still going to check for the presence of C-L + if cl_opt.is_some() { + // This is a violation of the RFC + response_tx.flags.set(HtpFlags::REQUEST_SMUGGLING) + } + response_tx.response_progress = HtpResponseProgress::BODY; + self.response_state = State::BODY_CHUNKED_LENGTH + } else if let Some(cl) = cl_opt { + // value in bytes represents the length of the message-body. + // We know the exact length + response_tx.response_transfer_coding = HtpTransferCoding::IDENTITY; + // Check for multiple C-L headers + if cl.flags.is_set(HtpFlags::FIELD_REPEATED) { + response_tx.flags.set(HtpFlags::REQUEST_SMUGGLING) + } + // Get body length + let response_content_length = + parse_content_length((*cl.value).as_slice(), Some(&mut self.logger)); + self.response_mut().unwrap().response_content_length = response_content_length; + self.response_content_length = response_content_length; + self.response_body_data_left = response_content_length; + if let Some(len) = response_content_length { + if len != 0 { + self.response_state = State::BODY_IDENTITY_CL_KNOWN; + self.response_mut().unwrap().response_progress = HtpResponseProgress::BODY + } else { + self.response_state = State::FINALIZE + } + } else { + htp_error!( + self.logger, + HtpLogCode::INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE, + "Invalid C-L field in response" + ); + return Err(HtpStatus::ERROR); + } + } else { + // 4. If the message uses the media type "multipart/byteranges", which is + // self-delimiting, then that defines the length. This media type MUST + // NOT be used unless the sender knows that the recipient can parse it; + // the presence in a request of a Range header with multiple byte-range + // specifiers implies that the client can parse multipart/byteranges + // responses. + // TODO Handle multipart/byteranges + if multipart_byteranges { + htp_error!( + self.logger, + HtpLogCode::RESPONSE_MULTIPART_BYTERANGES, + "C-T multipart/byteranges in responses not supported" + ); + return Err(HtpStatus::ERROR); + } + // 5. By the server closing the connection. (Closing the connection + // cannot be used to indicate the end of a request body, since that + // would leave no possibility for the server to send back a response.) + response_tx.response_transfer_coding = HtpTransferCoding::IDENTITY; + response_tx.response_progress = HtpResponseProgress::BODY; + self.response_state = State::BODY_IDENTITY_STREAM_CLOSE; + self.response_body_data_left = None + } + } + // NOTE We do not need to check for short-style HTTP/0.9 requests here because + // that is done earlier, before response line parsing begins + self.state_response_headers(input) + } + + /// Parses response line. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA + /// when more data is needed. + pub fn response_line(&mut self, input: &ParserData) -> Result<()> { + match take_till_eol(input.as_slice()) { + Ok((_, (line, _))) => { + // We have a line ending, so consume the input + // and grab any buffered data. + let mut data = take(&mut self.response_buf); + data.add(line); + self.response_data_consume(input, line.len()); + self.response_line_complete(data.as_slice(), input) + } + _ => { + if self.response_status == HtpStreamState::CLOSED { + let mut data = take(&mut self.response_buf); + data.add(input.as_slice()); + self.response_data_consume(input, input.len()); + self.response_line_complete(data.as_slice(), input) + } else { + self.handle_response_absent_lf(input) + } + } + } + } + + /// Parse the complete response line. + /// + /// Returns OK on state change, ERROR on error, or HtpStatus::DATA_BUFFER + /// when more data is needed. + fn response_line_complete(&mut self, line: &[u8], input: &ParserData) -> Result<()> { + self.check_response_buffer_limit(line.len())?; + if line.is_empty() { + return Err(HtpStatus::DATA); + } + let response_tx = self.response_mut(); + if response_tx.is_none() { + return Err(HtpStatus::ERROR); + } + if is_line_ignorable(self.cfg.server_personality, line) { + if self.response_status == HtpStreamState::CLOSED { + self.response_state = State::FINALIZE + } + // We have an empty/whitespace line, which we'll note, ignore and move on + let response_tx = self.response_mut().unwrap(); + response_tx.response_ignored_lines = response_tx.response_ignored_lines.wrapping_add(1); + // TODO How many lines are we willing to accept? + // Start again + return Ok(()); + } + // Deallocate previous response line allocations, which we would have on a 100 response. + let response_tx = self.response_mut().unwrap(); + response_tx.response_line = None; + response_tx.response_protocol = None; + response_tx.response_status = None; + response_tx.response_message = None; + + // Process response line. + // If the response line is invalid, determine if it _looks_ like + // a response line. If it does not look like a line, process the + // data as a response body because that is what browsers do. + if treat_response_line_as_body(line) { + // if we have a next line beginning with H, skip this one + if input.len() > 1 && (input.as_slice()[0] == b'H' || line.len() <= 3) { + response_tx.response_ignored_lines = + response_tx.response_ignored_lines.wrapping_add(1); + return Ok(()); + } + response_tx.response_content_encoding_processing = HtpContentEncoding::NONE; + self.response_body_data(Some(line))?; + // Continue to process response body. Because we don't have + // any headers to parse, we assume the body continues until + // the end of the stream. + // Have we seen the entire response body? + if input.is_empty() { + let response_tx = self.response_mut().unwrap(); + response_tx.response_transfer_coding = HtpTransferCoding::IDENTITY; + response_tx.response_progress = HtpResponseProgress::BODY; + self.response_body_data_left = None; + self.response_state = State::FINALIZE + } + return Ok(()); + } + self.parse_response_line(line)?; + self.state_response_line()?; + // Move on to the next phase. + self.response_state = State::HEADERS; + self.response_mut().unwrap().response_progress = HtpResponseProgress::HEADERS; + Ok(()) + } + + /// Parses the response line. + pub fn parse_response_line(&mut self, response_line: &[u8]) -> Result<()> { + let response_tx = self.response_mut(); + if response_tx.is_none() { + return Err(HtpStatus::ERROR); + } + let response_tx = response_tx.unwrap(); + + response_tx.response_line = Some(Bstr::from(response_line)); + response_tx.response_protocol_number = HtpProtocol::INVALID; + response_tx.response_status = None; + response_tx.response_status_number = HtpResponseNumber::INVALID; + response_tx.response_message = None; + + let mut response_line_parser = tuple(( + take_is_space_or_null, + take_not_is_space, + take_is_space, + take_not_is_space, + take_ascii_whitespace(), + )); + + let (message, (_ls, response_protocol, ws1, status_code, ws2)) = + response_line_parser(response_line)?; + if response_protocol.is_empty() { + return Ok(()); + } + + response_tx.response_protocol = Some(Bstr::from(response_protocol)); + self.response_mut().unwrap().response_protocol_number = + parse_protocol(response_protocol, &mut self.logger); + + if ws1.is_empty() || status_code.is_empty() { + return Ok(()); + } + + let response_tx = self.response_mut().unwrap(); + response_tx.response_status = Some(Bstr::from(status_code)); + response_tx.response_status_number = parse_status(status_code); + + if ws2.is_empty() { + return Ok(()); + } + + response_tx.response_message = Some(Bstr::from(chomp(message))); + Ok(()) + } + + /// Response header parser. + /// + ///Returns a tuple of the unparsed data and a boolean indicating if the EOH was seen. + fn parse_response_headers<'a>(&mut self, data: &'a [u8]) -> Result<(&'a [u8], bool)> { + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + + let rc = resp.unwrap().response_header_parser.headers()(data); + if let Ok((remaining, (headers, eoh))) = rc { + for h in headers { + let mut flags = 0; + let name_flags = &h.name.flags; + let value_flags = &h.value.flags; + if value_flags.is_set(HeaderFlags::DEFORMED_EOL) + || name_flags.is_set(HeaderFlags::DEFORMED_EOL) + { + htp_warn!( + self.logger, + HtpLogCode::DEFORMED_EOL, + "Weird response end of lines mix" + ); + } + // Ignore LWS after field-name. + if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) { + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_INVALID_LWS_AFTER_NAME, + "Request field invalid: LWS after name", + self.response_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + //If there was leading whitespace, probably was invalid folding. + if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) { + htp_warn_once!( + self.logger, + HtpLogCode::INVALID_RESPONSE_FIELD_FOLDING, + "Invalid response field folding", + self.response_mut().unwrap().flags, + flags, + HtpFlags::INVALID_FOLDING + ); + flags.set(HtpFlags::FIELD_INVALID); + } + // Check that field-name is a token + if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) { + // Incorrectly formed header name. + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_HEADER_NAME_NOT_TOKEN, + "Response header name is not a token", + self.response_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + // No colon? + if name_flags.is_set(HeaderFlags::MISSING_COLON) { + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + // TODO Apache will respond to this problem with a 400. + // Now extract the name and the value + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_FIELD_MISSING_COLON, + "Response field invalid: colon missing", + self.response_mut().unwrap().flags, + flags, + HtpFlags::FIELD_UNPARSEABLE + ); + flags.set(HtpFlags::FIELD_INVALID); + } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) { + // Empty header name. + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_INVALID_EMPTY_NAME, + "Response field invalid: empty name", + self.response_mut().unwrap().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + self.process_response_header(Header::new_with_flags( + h.name.name.into(), + h.value.value.into(), + flags, + ))?; + } + Ok((remaining, eoh)) + } else { + Ok((data, false)) + } + } + + /// Response header line(s) processor, which assembles folded lines + /// into a single buffer before invoking the parsing function. + fn process_response_header(&mut self, header: Header) -> Result<()> { + let mut repeated = false; + let hl = self.cfg.number_headers_limit as usize; + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + let reps = resp.response_header_repetitions; + let mut update_reps = false; + // Do we already have a header with the same name? + if let Some(h_existing) = resp.response_headers.get_nocase_mut(header.name.as_slice()) { + if !h_existing.flags.is_set(HeaderFlags::FIELD_REPEATED) { + // This is the second occurence for this header. + repeated = true; + } else if reps < 64 { + update_reps = true; + } else { + return Ok(()); + } + h_existing.flags.set(HeaderFlags::FIELD_REPEATED); + // For simplicity reasons, we count the repetitions of all headers + // Having multiple C-L headers is against the RFC but many + // browsers ignore the subsequent headers if the values are the same. + if header.name.cmp_nocase("Content-Length") == Ordering::Equal { + // Don't use string comparison here because we want to + // ignore small formatting differences. + let existing_cl = parse_content_length(&h_existing.value, None); + let new_cl = parse_content_length(&(header.value), None); + if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl { + // Ambiguous response C-L value. + htp_warn!( + self.logger, + HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE, + "Ambiguous response C-L value" + ); + } + } else { + // Add to the existing header. + h_existing.value.extend_from_slice(b", "); + h_existing.value.extend_from_slice(header.value.as_slice()); + } + } else { + if resp.response_headers.elements.len() > hl { + if !resp.flags.is_set(HtpFlags::HEADERS_TOO_MANY) { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_TOO_MANY_HEADERS, + "Too many response headers" + ); + let resp = self.response_mut().unwrap(); + resp.flags.set(HtpFlags::HEADERS_TOO_MANY); + } + return Err(HtpStatus::ERROR); + } + resp.response_headers.elements.push(header); + } + let resp = self.response_mut().unwrap(); + if update_reps { + resp.response_header_repetitions = resp.response_header_repetitions.wrapping_add(1) + } + if repeated { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_HEADER_REPETITION, + "Repetition for header" + ); + } + Ok(()) + } + /// Parses response headers. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA when more data is needed. + pub fn response_headers(&mut self, input: &mut ParserData) -> Result<()> { + let response_index = self.response_index(); + if self.response_status == HtpStreamState::CLOSED { + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + resp.response_header_parser.set_complete(true); + // Parse previous header, if any. + if let Some(response_header) = self.response_header.take() { + self.parse_response_headers(response_header.as_slice())?; + } + // Finalize sending raw trailer data. + self.response_receiver_finalize_clear(input)?; + // Run hook response_TRAILER + self.cfg + .hook_response_trailer + .clone() + .run_all(self, response_index)?; + self.response_state = State::FINALIZE; + return Ok(()); + } + let response_header = if let Some(mut response_header) = self.response_header.take() { + response_header.add(input.as_slice()); + response_header + } else { + Bstr::from(input.as_slice()) + }; + + let (remaining, eoh) = self.parse_response_headers(response_header.as_slice())?; + //TODO: Update the response state machine so that we don't have to have this EOL check + let eol = remaining.len() == response_header.len() + && (remaining.eq(b"\r\n") || remaining.eq(b"\n")); + // If remaining is EOL or header parsing saw EOH this is end of headers + if eoh || eol { + if eol { + //Consume the EOL so it isn't included in data processing + self.response_data_consume(input, input.len()); + } else if remaining.len() <= input.len() { + self.response_data_consume(input, input.len() - remaining.len()); + } + // We've seen all response headers. At terminator. + self.response_state = + if self.response().unwrap().response_progress == HtpResponseProgress::HEADERS { + // Response headers. + // The next step is to determine if this response has a body. + State::BODY_DETERMINE + } else { + // Response trailer. + // Finalize sending raw trailer data. + self.response_receiver_finalize_clear(input)?; + // Run hook response_TRAILER. + self.cfg + .hook_response_trailer + .clone() + .run_all(self, response_index)?; + // The next step is to finalize this response. + State::FINALIZE + }; + Ok(()) + } else { + self.response_data_consume(input, input.len()); + self.check_response_buffer_limit(remaining.len())?; + let remaining = Bstr::from(remaining); + self.response_header.replace(remaining); + Err(HtpStatus::DATA_BUFFER) + } + } + + /// Consumes response body data. + /// This function assumes that handling of chunked encoding is implemented + /// by the container. When you're done submitting body data, invoking a state + /// change (to RESPONSE) will finalize any processing that might be pending. + /// + /// The response body data will be decompressed if two conditions are met: one, + /// decompression is enabled in configuration and two, if the response headers + /// indicate compression. Alternatively, you can control decompression from + /// a RESPONSE_HEADERS callback, by setting tx->response_content_encoding either + /// to COMPRESSION_NONE (to disable compression), or to one of the supported + /// decompression algorithms. + /// + /// Returns HtpStatus::OK on success or HtpStatus::ERROR if the request transaction + /// is invalid or response body data hook fails. + pub fn response_body_data(&mut self, data: Option<&[u8]>) -> Result<()> { + // None data is used to indicate the end of response body. + // Keep track of body size before decompression. + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + resp.response_message_len = resp + .response_message_len + .wrapping_add(data.unwrap_or(b"").len() as u64); + + match resp.response_content_encoding_processing { + HtpContentEncoding::GZIP + | HtpContentEncoding::DEFLATE + | HtpContentEncoding::ZLIB + | HtpContentEncoding::LZMA => { + // Send data buffer to the decompressor if it exists + if resp.response_decompressor.is_none() && data.is_none() { + return Ok(()); + } + let mut decompressor = resp.response_decompressor.take().ok_or(HtpStatus::ERROR)?; + if let Some(data) = data { + decompressor + .decompress(data) + .map_err(|_| HtpStatus::ERROR)?; + + if decompressor.time_spent() + > self.cfg.compression_options.get_time_limit() as u64 + { + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!( + "Compression bomb: spent {} us decompressing", + decompressor.time_spent(), + ) + ); + decompressor.set_passthrough(true); + } + // put the decompressor back in its slot + self.response_mut() + .unwrap() + .response_decompressor + .replace(decompressor); + } else { + // don't put the decompressor back in its slot + // ignore errors + let _ = decompressor.finish(); + } + } + HtpContentEncoding::NONE => { + // When there's no decompression, response_entity_len. + // is identical to response_message_len. + let data = ParserData::from(data); + let mut tx_data = Data::new(resp, &data); + resp.response_entity_len = + resp.response_entity_len.wrapping_add(tx_data.len() as u64); + self.response_run_hook_body_data(&mut tx_data)?; + } + HtpContentEncoding::ERROR => { + htp_error!( + self.logger, + HtpLogCode::INVALID_CONTENT_ENCODING, + "Expected a valid content encoding" + ); + return Err(HtpStatus::ERROR); + } + } + Ok(()) + } + + /// Initialize the response decompression engine. We can deal with three + /// scenarios: + /// + /// 1. Decompression is enabled, compression indicated in headers, and we decompress. + /// + /// 2. As above, but the user disables decompression by setting response_content_encoding + /// to COMPRESSION_NONE. + /// + /// 3. Decompression is disabled and we do not attempt to enable it, but the user + /// forces decompression by setting response_content_encoding to one of the + /// supported algorithms. + pub fn response_initialize_decompressors(&mut self) -> Result<()> { + let response_decompression_enabled = self.cfg.response_decompression_enabled; + let resp = self.response_mut(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + let resp = resp.unwrap(); + + let ce = resp + .response_headers + .get_nocase_nozero("content-encoding") + .map(|val| val.value.clone()); + // Process multiple encodings if there is no match on fast path + let mut slow_path = false; + + // Fast path - try to match directly on the encoding value + resp.response_content_encoding = if let Some(ce) = &ce { + if ce.cmp_nocase_nozero(b"gzip") == Ordering::Equal + || ce.cmp_nocase_nozero(b"x-gzip") == Ordering::Equal + { + HtpContentEncoding::GZIP + } else if ce.cmp_nocase_nozero(b"deflate") == Ordering::Equal + || ce.cmp_nocase_nozero(b"x-deflate") == Ordering::Equal + { + HtpContentEncoding::DEFLATE + } else if ce.cmp_nocase_nozero(b"lzma") == Ordering::Equal { + HtpContentEncoding::LZMA + } else if ce.cmp_nocase_nozero(b"inflate") == Ordering::Equal + || ce.cmp_nocase_nozero(b"none") == Ordering::Equal + { + HtpContentEncoding::NONE + } else { + slow_path = true; + HtpContentEncoding::NONE + } + } else { + HtpContentEncoding::NONE + }; + + // Configure decompression, if enabled in the configuration. + resp.response_content_encoding_processing = if response_decompression_enabled { + resp.response_content_encoding + } else { + slow_path = false; + HtpContentEncoding::NONE + }; + + let response_content_encoding_processing = resp.response_content_encoding_processing; + let compression_options = self.cfg.compression_options; + match &response_content_encoding_processing { + HtpContentEncoding::GZIP + | HtpContentEncoding::DEFLATE + | HtpContentEncoding::ZLIB + | HtpContentEncoding::LZMA => { + self.response_prepend_decompressor(response_content_encoding_processing)?; + } + HtpContentEncoding::NONE => { + if slow_path { + if let Some(ce) = &ce { + let mut layers = 0; + let mut lzma_layers = 0; + for encoding in ce.split(|c| *c == b',' || *c == b' ') { + if encoding.is_empty() { + continue; + } + layers += 1; + + if let Some(limit) = compression_options.get_layer_limit() { + // decompression layer depth check + if layers > limit { + htp_warn!( + self.logger, + HtpLogCode::TOO_MANY_ENCODING_LAYERS, + "Too many response content encoding layers" + ); + break; + } + } + + let encoding = Bstr::from(encoding); + let encoding = if encoding.index_of_nocase(b"gzip").is_some() { + if !(encoding.cmp_slice(b"gzip") == Ordering::Equal + || encoding.cmp_slice(b"x-gzip") == Ordering::Equal) + { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E gzip has abnormal value" + ); + } + HtpContentEncoding::GZIP + } else if encoding.index_of_nocase(b"deflate").is_some() { + if !(encoding.cmp_slice(b"deflate") == Ordering::Equal + || encoding.cmp_slice(b"x-deflate") == Ordering::Equal) + { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E deflate has abnormal value" + ); + } + HtpContentEncoding::DEFLATE + } else if encoding.cmp_slice(b"lzma") == Ordering::Equal { + lzma_layers += 1; + if let Some(limit) = compression_options.get_lzma_layers() { + // Lzma layer depth check + if lzma_layers > limit { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_TOO_MANY_LZMA_LAYERS, + "Too many response content encoding lzma layers" + ); + break; + } + } + HtpContentEncoding::LZMA + } else if encoding.cmp_slice(b"inflate") == Ordering::Equal + || encoding.cmp_slice(b"none") == Ordering::Equal + { + HtpContentEncoding::NONE + } else { + htp_warn!( + self.logger, + HtpLogCode::ABNORMAL_CE_HEADER, + "C-E unknown setting" + ); + HtpContentEncoding::NONE + }; + + self.response_prepend_decompressor(encoding)?; + } + } + } + } + HtpContentEncoding::ERROR => { + htp_error!( + self.logger, + HtpLogCode::INVALID_CONTENT_ENCODING, + "Expected a valid content encoding" + ); + return Err(HtpStatus::ERROR); + } + } + Ok(()) + } + + fn response_decompressor_callback(&mut self, data: Option<&[u8]>) -> std::io::Result { + // If no data is passed, call the hooks with NULL to signify the end of the + // response body. + let parser_data = ParserData::from(data); + let compression_options = self.cfg.compression_options; + let resp = self.response_mut().unwrap(); + let mut tx_data = Data::new(resp, &parser_data); + + // Keep track of actual response body length. + resp.response_entity_len = resp.response_entity_len.wrapping_add(tx_data.len() as u64); + + // Invoke all callbacks. + self.response_run_hook_body_data(&mut tx_data) + .map_err(|_| std::io::Error::new(std::io::ErrorKind::Other, "body data hook failed"))?; + let resp = self.response_mut().unwrap(); + if let Some(decompressor) = &mut resp.response_decompressor { + if decompressor.callback_inc() % compression_options.get_time_test_freq() == 0 { + if let Some(time_spent) = decompressor.timer_reset() { + if time_spent > compression_options.get_time_limit() as u64 { + decompressor.set_passthrough(true); + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!("Compression bomb: spent {} us decompressing", time_spent) + ); + } + } + } + } + + // output > ratio * input ? + let ratio = compression_options.get_bomb_ratio(); + let resp = self.response_mut().unwrap(); + let exceeds_ratio = if let Some(ratio) = resp.response_message_len.checked_mul(ratio) { + resp.response_entity_len > ratio + } else { + // overflow occured + true + }; + + let bomb_limit = compression_options.get_bomb_limit(); + let response_entity_len = resp.response_entity_len; + let response_message_len = resp.response_message_len; + if response_entity_len > bomb_limit && exceeds_ratio { + htp_log!( + self.logger, + HtpLogLevel::ERROR, + HtpLogCode::COMPRESSION_BOMB, + format!( + "Compression bomb: decompressed {} bytes out of {}", + response_entity_len, response_message_len, + ) + ); + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "compression_bomb_limit reached", + )); + } + Ok(tx_data.len()) + } + + /// Prepend response decompressor + fn response_prepend_decompressor(&mut self, encoding: HtpContentEncoding) -> Result<()> { + let compression_options = self.cfg.compression_options; + if encoding != HtpContentEncoding::NONE { + // ensured by caller + let resp = self.response_mut().unwrap(); + if let Some(decompressor) = resp.response_decompressor.take() { + let decompressor = decompressor.prepend(encoding, compression_options)?; + resp.response_decompressor.replace(decompressor); + } else { + // The processing encoding will be the first one encountered + resp.response_content_encoding_processing = encoding; + + // Add the callback first because it will be called last in + // the chain of writers + + // TODO: fix lifetime error and remove this line! + let connp_ptr = self as *mut Self; + let decompressor = unsafe { + Decompressor::new_with_callback( + encoding, + Box::new(move |data: Option<&[u8]>| -> std::io::Result { + (*connp_ptr).response_decompressor_callback(data) + }), + compression_options, + )? + }; + self.response_mut() + .unwrap() + .response_decompressor + .replace(decompressor); + } + } + Ok(()) + } + + /// Finalizes response parsing. + pub fn response_finalize(&mut self, input: &mut ParserData) -> Result<()> { + if input.is_gap() { + return self.state_response_complete(input); + } + let mut work = input.as_slice(); + if self.response_status != HtpStreamState::CLOSED { + let response_next_byte = input.as_slice().first(); + if response_next_byte.is_none() { + return self.state_response_complete(input); + } + let lf = response_next_byte + .map(|byte| *byte == b'\n') + .unwrap_or(false); + if !lf { + if let Ok((_, line)) = take_till_lf(work) { + self.response_data_consume(input, line.len()); + work = line; + } else { + return self.handle_response_absent_lf(input); + } + } else { + self.response_data_consume(input, work.len()); + } + } + if !self.response_buf.is_empty() { + self.check_response_buffer_limit(work.len())?; + } + let mut data = take(&mut self.response_buf); + let buf_len = data.len(); + data.add(work); + + if data.is_empty() { + //closing + return self.state_response_complete(input); + } + if treat_response_line_as_body(&data) { + // Interpret remaining bytes as body data + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_BODY_UNEXPECTED, + "Unexpected response body" + ); + return self.response_body_data(Some(data.as_slice())); + } + // didnt use data, restore + self.response_buf.add(&data[0..buf_len]); + //unread last end of line so that RES_LINE works + self.response_data_unconsume(input, data.len()); + self.state_response_complete(input) + } + + /// The response idle state will initialize response processing, as well as + /// finalize each transactions after we are done with it. + /// + /// Returns HtpStatus::OK on state change, HtpStatus::ERROR on error, or HtpStatus::DATA + /// when more data is needed. + pub fn response_idle(&mut self, input: &ParserData) -> Result<()> { + // We want to start parsing the next response (and change + // the state from IDLE) only if there's at least one + // byte of data available. Otherwise we could be creating + // new structures even if there's no more data on the + // connection. + if input.is_empty() { + return Err(HtpStatus::DATA); + } + + // Parsing a new response + // Log if we have not seen the corresponding request yet + let resp = self.response(); + if resp.is_none() { + return Err(HtpStatus::ERROR); + } + if resp.unwrap().request_progress == HtpRequestProgress::NOT_STARTED { + htp_error!( + self.logger, + HtpLogCode::UNABLE_TO_MATCH_RESPONSE_TO_REQUEST, + "Unable to match response to request" + ); + let tx = self.response_mut(); + if tx.is_none() { + return Err(HtpStatus::ERROR); + } + let tx = tx.unwrap(); + + let uri = Uri { + path: Some(Bstr::from("/libhtp::request_uri_not_seen")), + ..Default::default() + }; + tx.request_uri = uri.path.clone(); + tx.parsed_uri = Some(uri); + tx.request_progress = HtpRequestProgress::COMPLETE; + self.request_next(); + } + self.response_content_length = None; + self.response_body_data_left = None; + self.state_response_start() + } + + /// Run the RESPONSE_BODY_DATA hook. + fn response_run_hook_body_data(&mut self, d: &mut Data) -> Result<()> { + // Do not invoke callbacks with an empty data chunk. + if d.is_empty() { + return Ok(()); + } + let resp = self.response().unwrap(); + // Run transaction hooks first + resp.hook_response_body_data.clone().run_all(self, d)?; + // Run configuration hooks second + self.cfg.hook_response_body_data.run_all(self, d)?; + Ok(()) + } + + /// Process a chunk of outbound (server or response) data. + pub fn response_data( + &mut self, mut chunk: ParserData, timestamp: Option, + ) -> HtpStreamState { + // Reset consumed data tracker + self.response_bytes_consumed = 0; + + // Return if the connection is in stop state + if self.response_status == HtpStreamState::STOP { + htp_info!( + self.logger, + HtpLogCode::PARSER_STATE_ERROR, + "Outbound parser is in HTP_STREAM_STATE_STOP" + ); + return HtpStreamState::STOP; + } + // Return if the connection has had a fatal error + if self.response_status == HtpStreamState::ERROR { + htp_error!( + self.logger, + HtpLogCode::PARSER_STATE_ERROR, + "Outbound parser is in HTP_STREAM_STATE_ERROR" + ); + return HtpStreamState::ERROR; + } + + // If the length of the supplied data chunk is zero, proceed + // only if the stream has been closed. We do not allow zero-sized + // chunks in the API, but we use it internally to force the parsers + // to finalize parsing. + if chunk.is_empty() && self.response_status != HtpStreamState::CLOSED { + htp_error!( + self.logger, + HtpLogCode::ZERO_LENGTH_DATA_CHUNKS, + "Zero-length data chunks are not allowed" + ); + return HtpStreamState::CLOSED; + } + // Remember the timestamp of the current response data chunk + if let Some(timestamp) = timestamp { + self.response_timestamp = timestamp; + } + + // Store the current chunk information + self.conn.track_outbound_data(chunk.len()); + // Return without processing any data if the stream is in tunneling + // mode (which it would be after an initial CONNECT transaction. + if self.response_status == HtpStreamState::TUNNEL { + return HtpStreamState::TUNNEL; + } + if chunk.is_gap() { + // Mark the transaction as having a gap + let idx = self.request_index(); + let resp = self.response_mut(); + if resp.is_none() { + return HtpStreamState::ERROR; + } + let resp = resp.unwrap(); + + resp.flags.set(HtpFlags::RESPONSE_MISSING_BYTES); + + if idx == 0 && resp.response_progress == HtpResponseProgress::NOT_STARTED { + // We have a leading gap on the first transaction. + // Force the parser to start if it hasn't already + resp.response_progress = HtpResponseProgress::GAP; + self.response_status = HtpStreamState::ERROR; + return HtpStreamState::ERROR; + } + } + + loop + // Invoke a processor, in a loop, until an error + // occurs or until we run out of data. Many processors + // will process a request, each pointing to the next + // processor that needs to run. + // Return if there's been an error + // or if we've run out of data. We are relying + // on processors to add error messages, so we'll + // keep quiet here. + { + if chunk.is_gap() + && self.response_state != State::BODY_IDENTITY_CL_KNOWN + && self.response_state != State::BODY_IDENTITY_STREAM_CLOSE + && self.response_state != State::FINALIZE + { + htp_error!( + self.logger, + HtpLogCode::INVALID_GAP, + "Gaps are not allowed during this state" + ); + return HtpStreamState::CLOSED; + } + let mut rc = self.handle_response_state(&mut chunk); + + if rc.is_ok() { + if self.response_status == HtpStreamState::TUNNEL { + return HtpStreamState::TUNNEL; + } + rc = self.response_handle_state_change(&mut chunk); + } + match rc { + // Continue looping. + Ok(_) => {} + // Do we need more data? + Err(HtpStatus::DATA) | Err(HtpStatus::DATA_BUFFER) => { + // Ignore result. + let _ = self.response_receiver_send_data(&mut chunk); + self.response_status = HtpStreamState::DATA; + return HtpStreamState::DATA; + } + // Check for stop + Err(HtpStatus::STOP) => { + self.response_status = HtpStreamState::STOP; + return HtpStreamState::STOP; + } + // Check for suspended parsing + Err(HtpStatus::DATA_OTHER) => { + // We might have actually consumed the entire data chunk? + if chunk.is_empty() { + self.response_status = HtpStreamState::DATA; + // Do not send STREAM_DATE_DATA_OTHER if we've + // consumed the entire chunk + return HtpStreamState::DATA; + } else { + self.response_status = HtpStreamState::DATA_OTHER; + // Partial chunk consumption + return HtpStreamState::DATA_OTHER; + } + } + // Permanent stream error. + Err(_) => { + self.response_status = HtpStreamState::ERROR; + return HtpStreamState::ERROR; + } + } + } + } + + /// Advance out buffer cursor and buffer data. + fn handle_response_absent_lf(&mut self, data: &ParserData) -> Result<()> { + self.check_response_buffer_limit(data.len())?; + self.response_buf.add(data.as_slice()); + self.response_data_consume(data, data.len()); + Err(HtpStatus::DATA_BUFFER) + } +} diff --git a/rust/htp/src/response_generic.rs b/rust/htp/src/response_generic.rs new file mode 100644 index 000000000000..83a3ac646167 --- /dev/null +++ b/rust/htp/src/response_generic.rs @@ -0,0 +1,220 @@ +use crate::{ + bstr::Bstr, + connection_parser::ConnectionParser, + error::Result, + headers::Flags as HeaderFlags, + parsers::{parse_content_length, parse_protocol, parse_status}, + transaction::{Header, HtpProtocol, HtpResponseNumber}, + util::{ + take_ascii_whitespace, take_is_space, take_is_space_or_null, take_not_is_space, + FlagOperations, HtpFlags, + }, + HtpStatus, +}; +use nom::{error::ErrorKind, sequence::tuple}; +use std::cmp::Ordering; + +impl ConnectionParser { + /// Generic response line parser. + pub fn parse_response_line_generic(&mut self, response_line: &[u8]) -> Result<()> { + let response_tx = self.response_mut(); + response_tx.response_protocol_number = HtpProtocol::INVALID; + response_tx.response_status = None; + response_tx.response_status_number = HtpResponseNumber::INVALID; + response_tx.response_message = None; + + let response_line_parser = tuple::<_, _, (_, ErrorKind), _>(( + take_is_space_or_null, + take_not_is_space, + take_is_space, + take_not_is_space, + take_ascii_whitespace(), + )); + + if let Ok((message, (_ls, response_protocol, ws1, status_code, ws2))) = + response_line_parser(response_line) + { + if response_protocol.is_empty() { + return Ok(()); + } + + response_tx.response_protocol = Some(Bstr::from(response_protocol)); + self.response_mut().response_protocol_number = + parse_protocol(response_protocol, &mut self.logger); + + if ws1.is_empty() || status_code.is_empty() { + return Ok(()); + } + + let response_tx = self.response_mut(); + response_tx.response_status = Some(Bstr::from(status_code)); + response_tx.response_status_number = parse_status(status_code); + + if ws2.is_empty() { + return Ok(()); + } + + response_tx.response_message = Some(Bstr::from(message)); + } else { + return Err(HtpStatus::ERROR); + } + Ok(()) + } + + /// Generic response header parser. + /// + ///Returns a tuple of the unparsed data and a boolean indicating if the EOH was seen. + pub fn process_response_headers_generic<'a>( + &mut self, + data: &'a [u8], + ) -> Result<(&'a [u8], bool)> { + let rc = self.response_mut().response_header_parser.headers()(data); + if let Ok((remaining, (headers, eoh))) = rc { + for h in headers { + let mut flags = 0; + let name_flags = &h.name.flags; + let value_flags = &h.value.flags; + if value_flags.is_set(HeaderFlags::DEFORMED_EOL) + || name_flags.is_set(HeaderFlags::DEFORMED_EOL) + { + htp_warn!( + self.logger, + HtpLogCode::DEFORMED_EOL, + "Weird response end of lines mix" + ); + } + // Ignore LWS after field-name. + if name_flags.is_set(HeaderFlags::NAME_TRAILING_WHITESPACE) { + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_INVALID_LWS_AFTER_NAME, + "Request field invalid: LWS after name", + self.response_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + //If there was leading whitespace, probably was invalid folding. + if name_flags.is_set(HeaderFlags::NAME_LEADING_WHITESPACE) { + htp_warn_once!( + self.logger, + HtpLogCode::INVALID_RESPONSE_FIELD_FOLDING, + "Invalid response field folding", + self.response_mut().flags, + flags, + HtpFlags::INVALID_FOLDING + ); + flags.set(HtpFlags::FIELD_INVALID); + } + // Check that field-name is a token + if name_flags.is_set(HeaderFlags::NAME_NON_TOKEN_CHARS) { + // Incorrectly formed header name. + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_HEADER_NAME_NOT_TOKEN, + "Response header name is not a token", + self.response_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + // No colon? + if name_flags.is_set(HeaderFlags::MISSING_COLON) { + // We handle this case as a header with an empty name, with the value equal + // to the entire input string. + // TODO Apache will respond to this problem with a 400. + // Now extract the name and the value + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_FIELD_MISSING_COLON, + "Response field invalid: colon missing", + self.response_mut().flags, + flags, + HtpFlags::FIELD_UNPARSEABLE + ); + flags.set(HtpFlags::FIELD_INVALID); + } else if name_flags.is_set(HeaderFlags::NAME_EMPTY) { + // Empty header name. + htp_warn_once!( + self.logger, + HtpLogCode::RESPONSE_INVALID_EMPTY_NAME, + "Response field invalid: empty name", + self.response_mut().flags, + flags, + HtpFlags::FIELD_INVALID + ); + } + self.process_response_header_generic(Header::new_with_flags( + h.name.name.into(), + h.value.value.into(), + flags, + ))?; + } + Ok((remaining, eoh)) + } else { + Ok((data, false)) + } + } + + /// Generic response header line(s) processor, which assembles folded lines + /// into a single buffer before invoking the parsing function. + fn process_response_header_generic(&mut self, header: Header) -> Result<()> { + let mut repeated = false; + let reps = self.response().response_header_repetitions; + let mut update_reps = false; + // Do we already have a header with the same name? + if let Some((_, h_existing)) = self + .response_mut() + .response_headers + .get_nocase_mut(header.name.as_slice()) + { + // Keep track of repeated same-name headers. + if !h_existing.flags.is_set(HtpFlags::FIELD_REPEATED) { + // This is the second occurence for this header. + repeated = true; + } else if reps < 64 { + update_reps = true; + } else { + return Ok(()); + } + h_existing.flags.set(HtpFlags::FIELD_REPEATED); + // For simplicity reasons, we count the repetitions of all headers + // Having multiple C-L headers is against the RFC but many + // browsers ignore the subsequent headers if the values are the same. + if header.name.cmp_nocase("Content-Length") == Ordering::Equal { + // Don't use string comparison here because we want to + // ignore small formatting differences. + let existing_cl = parse_content_length(&h_existing.value, None); + let new_cl = parse_content_length(&(header.value), None); + if existing_cl.is_none() || new_cl.is_none() || existing_cl != new_cl { + // Ambiguous response C-L value. + htp_warn!( + self.logger, + HtpLogCode::DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE, + "Ambiguous response C-L value" + ); + } + } else { + // Add to the existing header. + h_existing.value.extend_from_slice(b", "); + h_existing.value.extend_from_slice(header.value.as_slice()); + } + } else { + self.response_mut() + .response_headers + .add(header.name.clone(), header); + } + if update_reps { + self.response_mut().response_header_repetitions = + self.response().response_header_repetitions.wrapping_add(1) + } + if repeated { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_HEADER_REPETITION, + "Repetition for header" + ); + } + Ok(()) + } +} diff --git a/rust/htp/src/table.rs b/rust/htp/src/table.rs new file mode 100644 index 000000000000..a34687687ed2 --- /dev/null +++ b/rust/htp/src/table.rs @@ -0,0 +1,186 @@ +use crate::bstr::Bstr; +use std::{cmp::Ordering, iter::Iterator, ops::Index, slice::SliceIndex}; + +/// The table structure for key value pairs. +#[derive(Clone, Debug)] +pub struct Table { + /// Entries in the table. + pub elements: Vec<(Bstr, T)>, +} + +impl Index for Table { + type Output = (Bstr, T); + fn index(&self, idx: usize) -> &(Bstr, T) { + &self.elements[idx] + } +} + +impl<'a, T> IntoIterator for &'a Table { + type Item = &'a (Bstr, T); + type IntoIter = std::slice::Iter<'a, (Bstr, T)>; + + fn into_iter(self) -> std::slice::Iter<'a, (Bstr, T)> { + self.elements.iter() + } +} + +impl<'a, T> IntoIterator for &'a mut Table { + type Item = &'a mut (Bstr, T); + type IntoIter = std::slice::IterMut<'a, (Bstr, T)>; + + fn into_iter(self) -> std::slice::IterMut<'a, (Bstr, T)> { + self.elements.iter_mut() + } +} + +impl IntoIterator for Table { + type Item = (Bstr, T); + type IntoIter = std::vec::IntoIter<(Bstr, T)>; + + fn into_iter(self) -> std::vec::IntoIter<(Bstr, T)> { + self.elements.into_iter() + } +} + +impl Table { + /// Make a new owned Table with given capacity + pub fn with_capacity(size: usize) -> Self { + Self { + elements: Vec::with_capacity(size), + } + } + + /// Add a new tuple (key, item) to the table + pub fn add(&mut self, key: Bstr, item: T) { + self.elements.push((key, item)); + } + + /// Retrieve an element from a specific index. + pub fn get(&self, index: I) -> Option<&I::Output> + where + I: SliceIndex<[(Bstr, T)]>, + { + self.elements.get(index) + } + + /// Retrieve a mutable reference to an element from a specific index. + pub fn get_mut(&mut self, index: I) -> Option<&mut I::Output> + where + I: SliceIndex<[(Bstr, T)]>, + { + self.elements.get_mut(index) + } + + /// Search the table for the first tuple with a key matching the given slice, ingnoring ascii case in self + /// + /// Returns None if no match is found. + pub fn get_nocase>(&self, key: K) -> Option<&(Bstr, T)> { + self.elements + .iter() + .find(|x| x.0.cmp_nocase_trimmed(key.as_ref()) == Ordering::Equal) + } + + /// Returns the number of elements in the table + pub fn size(&self) -> usize { + self.elements.len() + } +} + +// Tests + +#[test] +fn Add() { + let mut t = Table::with_capacity(1); + let mut k = Bstr::from("Key"); + assert_eq!(0, t.size()); + t.add(k, "Value1"); + assert_eq!(1, t.size()); + k = Bstr::from("AnotherKey"); + t.add(k, "Value2"); + assert_eq!(2, t.size()); +} + +#[test] +fn GetNoCase() { + let mut t = Table::with_capacity(2); + let mut k = Bstr::from("Key1"); + t.add(k, "Value1"); + k = Bstr::from("KeY2"); + t.add(k, "Value2"); + + let mut result = t.get_nocase("KEY1"); + let mut res = result.unwrap(); + assert_eq!(Ordering::Equal, res.0.cmp_slice("Key1")); + assert_eq!("Value1", res.1); + + result = t.get_nocase("keY1"); + res = result.unwrap(); + assert_eq!(Ordering::Equal, res.0.cmp_slice("Key1")); + assert_eq!("Value1", res.1); + + result = t.get_nocase("key2"); + res = result.unwrap(); + assert_eq!(Ordering::Equal, res.0.cmp_slice("KeY2")); + assert_eq!("Value2", res.1); + + result = t.get_nocase("NotAKey"); + assert!(result.is_none()); +} + +#[test] +fn IndexAccess() { + let mut t = Table::with_capacity(2); + let mut k = Bstr::from("Key1"); + t.add(k, "Value1"); + k = Bstr::from("KeY2"); + t.add(k, "Value2"); + + let res = &t[1]; + assert_eq!(Ordering::Equal, res.0.cmp_slice("KeY2")); + assert_eq!("Value2", res.1); + assert_eq!("Value2", t.get(1).unwrap().1); + + let res_mut = t.get_mut(1).unwrap(); + res_mut.1 = "Value3"; + assert_eq!("Value3", t.get(1).unwrap().1); +} + +#[test] +fn Iterators() { + let mut table = Table::with_capacity(2); + table.add("1".into(), "abc".to_string()); + table.add("2".into(), "def".to_string()); + + let mut iter_ref: std::slice::Iter<(Bstr, String)> = (&table).into_iter(); + let (key1, _): &(Bstr, String) = iter_ref.next().unwrap(); + assert_eq!(key1, &"1"); + assert_eq!(table.get_nocase("1").unwrap().1, "abc"); + + let mut iter_mut_ref: std::slice::IterMut<(Bstr, String)> = (&mut table).into_iter(); + let (key1, ref mut val1): &mut (Bstr, String) = iter_mut_ref.next().unwrap(); + *val1 = "xyz".to_string(); + assert_eq!(key1, &"1"); + assert_eq!(table.get_nocase("1").unwrap().1, "xyz"); + + let mut iter_owned: std::vec::IntoIter<(Bstr, String)> = table.into_iter(); + let (key1, val1) = iter_owned.next().unwrap(); + assert_eq!(key1, "1"); + assert_eq!(val1, "xyz"); +} + +#[test] +fn Table_Misc() { + let mut t: Table<&str> = Table::with_capacity(2); + + let mut pkey = Bstr::with_capacity(1); + pkey.add("p"); + + let mut qkey = Bstr::with_capacity(1); + qkey.add("q"); + + t.add(pkey, "1"); + t.add(qkey, "2"); + + assert!(t.get_nocase("z").is_none()); + assert_eq!("1", t.get_nocase("p").unwrap().1); +} diff --git a/rust/htp/src/test.rs b/rust/htp/src/test.rs new file mode 100644 index 000000000000..621e51f5f240 --- /dev/null +++ b/rust/htp/src/test.rs @@ -0,0 +1,392 @@ +#![allow(non_snake_case)] +use crate::{ + bstr::Bstr, + config::{Config, HtpServerPersonality}, + connection_parser::{ConnectionParser, HtpStreamState, ParserData}, + error::Result, + transaction::Transaction, +}; +use std::{ + env, + iter::IntoIterator, + net::{IpAddr, Ipv4Addr}, + path::PathBuf, + time::SystemTime, +}; +use time::OffsetDateTime; + +#[derive(Debug)] +enum Chunk { + Client(ParserData<'static>), + Server(ParserData<'static>), +} + +/// A structure to hold callback data +pub struct MainUserData { + /// Call order of callbacks + pub order: Vec, + /// Request data from callbacks + pub request_data: Vec, + /// Response data from callbacks + pub response_data: Vec, +} + +impl Default for MainUserData { + /// Make a new user data struct + fn default() -> Self { + Self { + order: Vec::new(), + request_data: Vec::with_capacity(5), + response_data: Vec::with_capacity(5), + } + } +} + +#[derive(Debug)] +struct TestInput { + chunks: Vec, +} + +impl IntoIterator for TestInput { + type Item = Chunk; + type IntoIter = std::vec::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.chunks.into_iter() + } +} + +impl From for TestInput { + fn from(file: PathBuf) -> Self { + let input = std::fs::read(file) + .expect("Could not read file {:?}. Do you need to set a base dir in env('srcdir')?"); + TestInput::from(input.as_slice()) + } +} + +impl From<&[u8]> for TestInput { + fn from(input: &[u8]) -> Self { + let mut test_input = TestInput { chunks: Vec::new() }; + let mut current = Vec::::new(); + let mut client = true; + let mut is_gap = false; + let mut start = true; + for line in input.split_inclusive(|c| *c == b'\n') { + if line.len() >= 4 + && line.len() <= 5 + && (&line[0..3] == b"<<<" + || &line[0..3] == b"<><" + || &line[0..3] == b">>>" + || &line[0..3] == b"><>") + && (line.len() == 4 || line[3] == b'\r') + && line[line.len() - 1] == b'\n' + { + if !current.is_empty() { + // Pop off the CRLF from the last line, which + // just separates the previous data from the + // boundary <<< >>> chars and isn't actual data + if let Some(b'\n') = current.last() { + current.pop(); + } + if let Some(b'\r') = current.last() { + current.pop(); + } + test_input.append(client, current, is_gap); + current = Vec::::new(); + } + // Client represented by first char is > + client = line[0] == b'>'; + // Gaps represented by <>< or ><> + is_gap = line[0] != line[1]; + start = false; + } else { + if start { + // we need to start with an indicated direction + return test_input; + } + current.append(&mut line.to_vec()); + } + } + test_input.append(client, current, is_gap); + test_input + } +} + +impl TestInput { + fn append(&mut self, client: bool, data: Vec, is_gap: bool) { + let chunk = match (client, is_gap) { + // client gap + (true, true) => Chunk::Client(data.len().into()), + // client data + (true, false) => Chunk::Client(data.into()), + // server gap + (false, true) => Chunk::Server(data.len().into()), + // server data + (false, false) => Chunk::Server(data.into()), + }; + self.chunks.push(chunk); + } +} + +/// Error types +#[derive(Debug)] +pub enum TestError { + /// The parser entered the Error state + StreamError, +} + +/// Test harness +#[derive(Debug)] +pub struct Test { + /// The connection parse + pub connp: ConnectionParser, + /// The base directory for the crate - used to find files. + pub basedir: Option, +} + +/// Return a default Config to use with tests +pub fn TestConfig() -> Config { + let mut cfg = Config::default(); + cfg.set_server_personality(HtpServerPersonality::APACHE_2) + .unwrap(); + // The default bomb limit may be slow in some development environments causing tests to fail. + cfg.compression_options + .set_time_limit(10 * cfg.compression_options.get_time_limit()); + cfg.set_parse_urlencoded(true); + + cfg +} + +impl Test { + /// Make a new test with the given config + pub fn new(cfg: Config) -> Self { + let basedir = if let Ok(dir) = std::env::var("srcdir") { + Some(PathBuf::from(dir)) + } else if let Ok(dir) = env::var("CARGO_MANIFEST_DIR") { + let mut base = PathBuf::from(dir); + base.push("tests"); + base.push("files"); + Some(base) + } else { + None + }; + + let connp = ConnectionParser::new(cfg); + Test { connp, basedir } + } + + /// Make a new test with the default TestConfig and register body callbacks. + pub fn new_with_callbacks() -> Self { + let mut cfg = TestConfig(); + cfg.register_request_start(request_start); + cfg.register_request_complete(request_complete); + cfg.register_response_start(response_start); + cfg.register_response_complete(response_complete); + cfg.register_response_body_data(response_body_data); + cfg.register_request_body_data(request_body_data); + cfg.register_transaction_complete(transaction_complete); + let mut t = Test::new(cfg); + // Configure user data and callbacks + t.connp + .response_mut() + .unwrap() + .set_user_data(Box::::default()); + t + } + + /// Open a connection on the underlying ConnectionParser. Useful if you + /// want to send data directly to the ConnectionParser after. + pub fn open_connection(&mut self, tv_start: Option) { + self.connp.open( + Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))), + Some(10000), + Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))), + Some(80), + tv_start, + ); + } + + fn run(&mut self, test: TestInput) -> std::result::Result<(), TestError> { + let tv_start = Some(OffsetDateTime::from(SystemTime::now())); + self.open_connection(tv_start); + + let mut request_buf: Option = None; + let mut response_buf: Option = None; + for chunk in test { + match chunk { + Chunk::Client(data) => { + let rc = self.connp.request_data(data.clone(), tv_start); + + if rc == HtpStreamState::ERROR { + return Err(TestError::StreamError); + } + + if rc == HtpStreamState::DATA_OTHER { + let consumed = self.connp.request_data_consumed(); + let remaining = data.clone().into_owned(); + remaining.consume(consumed); + request_buf = Some(remaining); + } + } + Chunk::Server(data) => { + // If we have leftover data from before then use it first + if let Some(response_remaining) = response_buf { + let rc = self + .connp + .response_data(response_remaining.as_slice().into(), tv_start); + response_buf = None; + if rc == HtpStreamState::ERROR { + return Err(TestError::StreamError); + } + } + + // Now use up this data chunk + let rc = self.connp.response_data(data.clone(), tv_start); + if rc == HtpStreamState::ERROR { + return Err(TestError::StreamError); + } + + if rc == HtpStreamState::DATA_OTHER { + let consumed = self.connp.response_data_consumed(); + let remaining = data.clone().into_owned(); + remaining.consume(consumed); + response_buf = Some(remaining); + } + + // And check if we also had some input data buffered + if let Some(request_remaining) = request_buf { + let rc = self + .connp + .request_data(request_remaining.as_slice().into(), tv_start); + request_buf = None; + if rc == HtpStreamState::ERROR { + return Err(TestError::StreamError); + } + } + } + } + } + + // Clean up any remaining server data + if let Some(response_remaining) = response_buf { + let rc = self + .connp + .response_data(response_remaining.as_slice().into(), tv_start); + if rc == HtpStreamState::ERROR { + return Err(TestError::StreamError); + } + } + self.connp + .close(Some(OffsetDateTime::from(SystemTime::now()))); + Ok(()) + } + + /// Run on a slice of input data. Used with fuzzing. + pub fn run_slice(&mut self, slice: &[u8]) -> std::result::Result<(), TestError> { + self.run(TestInput::from(slice)) + } + + /// Run on a file path. Used in integration tests. + pub fn run_file(&mut self, file: &str) -> std::result::Result<(), TestError> { + let testfile = if let Some(base) = &self.basedir { + let mut path = base.clone(); + path.push(file); + path + } else { + PathBuf::from(file) + }; + + self.run(TestInput::from(testfile)) + } +} + +fn request_start(tx: &mut Transaction) -> Result<()> { + let id = tx.index; + let user_data = tx.user_data_mut::().unwrap(); + user_data.order.push(format!("request_start {}", id)); + Ok(()) +} + +fn request_complete(tx: &mut Transaction) -> Result<()> { + let id = tx.index; + let user_data = &mut tx.user_data_mut::().unwrap(); + user_data.order.push(format!("request_complete {}", id)); + Ok(()) +} + +fn response_start(tx: &mut Transaction) -> Result<()> { + let id = tx.index; + let user_data = tx.user_data_mut::().unwrap(); + user_data.order.push(format!("response_start {}", id)); + Ok(()) +} + +fn response_complete(tx: &mut Transaction) -> Result<()> { + let id = tx.index; + let user_data = tx.user_data_mut::().unwrap(); + user_data.order.push(format!("response_complete {}", id)); + Ok(()) +} + +fn transaction_complete(tx: &mut Transaction) -> Result<()> { + let id = tx.index; + let user_data = tx.user_data_mut::().unwrap(); + user_data.order.push(format!("transaction_complete {}", id)); + Ok(()) +} + +fn response_body_data(tx: &mut Transaction, d: &ParserData) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + let bstr = if d.is_gap() { + Bstr::with_capacity(d.len()) + } else { + Bstr::from(d.as_slice()) + }; + user_data.response_data.push(bstr); + Ok(()) +} + +fn request_body_data(tx: &mut Transaction, d: &ParserData) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + let bstr = if d.is_gap() { + Bstr::with_capacity(d.len()) + } else { + Bstr::from(d.as_slice()) + }; + user_data.request_data.push(bstr); + Ok(()) +} + +#[no_mangle] +/// Creates a Fuzz test runner, and runs a byte slice on it +/// # Safety +/// Input pointer must be non-null. +pub unsafe extern "C" fn libhtprsFuzzRun( + input: *const u8, input_len: u32, +) -> *mut std::os::raw::c_void { + let mut cfg = TestConfig(); + cfg.set_server_personality(HtpServerPersonality::IDS) + .unwrap(); + let mut t = Test::new(cfg); + let data = std::slice::from_raw_parts(input, input_len as usize); + t.run_slice(data).ok(); + let boxed = Box::new(t); + Box::into_raw(boxed) as *mut _ +} + +#[no_mangle] +/// Frees a Fuzz test runner +/// # Safety +/// Input pointer must be non-null. +pub unsafe extern "C" fn libhtprsFreeFuzzRun(state: *mut std::os::raw::c_void) { + //just unbox + std::mem::drop(Box::from_raw(state as *mut Test)); +} + +#[no_mangle] +/// Gets connection parser out of a test runner +/// # Safety +/// Input pointer must be non-null. +pub unsafe extern "C" fn libhtprsFuzzConnp(t: *mut std::os::raw::c_void) -> *mut ConnectionParser { + let state = t as *mut Test; + &mut (*state).connp +} diff --git a/rust/htp/src/transaction.rs b/rust/htp/src/transaction.rs new file mode 100644 index 000000000000..070c1d47f43a --- /dev/null +++ b/rust/htp/src/transaction.rs @@ -0,0 +1,1072 @@ +use crate::{ + bstr::Bstr, + config::{Config, HtpUnwanted}, + connection_parser::ParserData, + decompressors::{Decompressor, HtpContentEncoding}, + error::Result, + headers::{Parser as HeaderParser, Side}, + hook::{DataHook, DataNativeCallbackFn}, + log::Logger, + parsers::{parse_authorization, parse_content_length, parse_content_type, parse_hostport}, + request::HtpMethod, + uri::Uri, + urlencoded::Parser as UrlEncodedParser, + util::{validate_hostname, FlagOperations, HtpFlags}, + HtpStatus, +}; + +use std::{any::Any, cmp::Ordering, rc::Rc}; + +/// A collection of possible data sources. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpDataSource { + /// Embedded in the URL. + URL, + /// Transported in the query string. + QUERY_STRING, + /// Cookies. + COOKIE, + /// Transported in the request body. + BODY, +} + +/// Represents a single request parameter. +#[derive(Clone, Debug)] +pub struct Param { + /// Parameter name. + pub name: Bstr, + /// Parameter value. + pub value: Bstr, + /// Source of the parameter, for example QUERY_STRING. + pub source: HtpDataSource, +} + +impl Param { + /// Make a new owned Param + pub fn new(name: Bstr, value: Bstr, source: HtpDataSource) -> Self { + Param { + name, + value, + source, + } + } +} + +#[derive(Debug, Clone)] +/// This structure is used to pass transaction data (for example +/// request and response body buffers) to callbacks. +pub struct Data<'a> { + /// Transaction pointer. + tx: *mut Transaction, + /// Ref to the parser data. + data: &'a ParserData<'a>, +} + +impl<'a> Data<'a> { + /// Construct a new Data. + pub fn new(tx: *mut Transaction, data: &'a ParserData<'a>) -> Self { + Self { tx, data } + } + + /// Returns the transaction associated with the Data. + pub fn tx(&self) -> *mut Transaction { + self.tx + } + + /// Returns a pointer to the raw data associated with Data. + pub fn data(&self) -> *const u8 { + self.data.data_ptr() + } + + /// Returns the length of the data. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Return an immutable slice view of the data. + pub fn as_slice(&self) -> Option<&[u8]> { + self.data.data() + } + + /// Determine whether this data is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns a reference to the internal ParserData struct. + pub fn parser_data(&self) -> &ParserData { + self.data + } +} + +/// Enumerates the possible request and response body codings. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpTransferCoding { + /// Body coding not determined yet. + UNKNOWN, + /// No body. + NO_BODY, + /// Identity coding is used, which means that the body was sent as is. + IDENTITY, + /// Chunked encoding. + CHUNKED, + /// We could not recognize the encoding. + INVALID, + /// Error retrieving the transfer coding. + ERROR, +} + +/// Enumerates the possible server personalities. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpResponseNumber { + /// Default + UNKNOWN, + /// Could not resolve response number + INVALID, + /// Valid response number + VALID(u16), +} + +impl HtpResponseNumber { + /// Determine if the response status number is in the given range. + pub fn in_range(self, min: u16, max: u16) -> bool { + use HtpResponseNumber::*; + match self { + UNKNOWN | INVALID => false, + VALID(ref status) => status >= &min && status <= &max, + } + } + + /// Determine if the response status number matches the + /// given status number. + pub fn eq_num(self, num: u16) -> bool { + use HtpResponseNumber::*; + match self { + UNKNOWN | INVALID => false, + VALID(ref status) => status == &num, + } + } +} + +/// Represents a single request or response header. +#[derive(Clone, Debug)] +pub struct Header { + /// Header name. + pub name: Bstr, + /// Header value. + pub value: Bstr, + /// Parsing flags; a combination of: HTP_FIELD_INVALID, HTP_FIELD_FOLDED, HTP_FIELD_REPEATED. + pub flags: u64, +} + +/// Table of request or response headers. +#[derive(Clone, Debug)] +pub struct Headers { + /// Entries in the table. + pub elements: Vec
, +} + +impl Headers { + /// Make a new owned Headers Table with given capacity + pub fn with_capacity(size: usize) -> Self { + Self { + elements: Vec::with_capacity(size), + } + } + + /// Search the Headers table for the first tuple with a tuple key matching the given slice, ignoring ascii case and any zeros in self + /// + /// Returns None if no match is found. + pub fn get_nocase_nozero>(&self, key: K) -> Option<&Header> { + self.elements + .iter() + .find(|x| x.name.cmp_nocase_nozero_trimmed(key.as_ref()) == Ordering::Equal) + } + + /// Search the Headers table for the first tuple with a tuple key matching the given slice, ignoring ascii case and any zeros in self + /// + /// Returns None if no match is found. + pub fn get_nocase_nozero_mut>(&mut self, key: K) -> Option<&mut Header> { + self.elements + .iter_mut() + .find(|x| x.name.cmp_nocase_nozero_trimmed(key.as_ref()) == Ordering::Equal) + } + + /// Search the Headers table for the first tuple with a key matching the given slice, ingnoring ascii case in self + /// + /// Returns None if no match is found. + pub fn get_nocase_mut>(&mut self, key: K) -> Option<&mut Header> { + self.elements + .iter_mut() + .find(|x| x.name.cmp_nocase_trimmed(key.as_ref()) == Ordering::Equal) + } + + /// Search the Headers table for the first tuple with a key matching the given slice, ingnoring ascii case in self + /// + /// Returns None if no match is found. + pub fn get_nocase>(&self, key: K) -> Option<&Header> { + self.elements + .iter() + .find(|x| x.name.cmp_nocase_trimmed(key.as_ref()) == Ordering::Equal) + } + + /// Returns the number of elements in the Headers table + pub fn size(&self) -> usize { + self.elements.len() + } +} + +impl<'a> IntoIterator for &'a Headers { + type Item = &'a Header; + type IntoIter = std::slice::Iter<'a, Header>; + + fn into_iter(self) -> std::slice::Iter<'a, Header> { + self.elements.iter() + } +} + +impl IntoIterator for Headers { + type Item = Header; + type IntoIter = std::vec::IntoIter
; + + fn into_iter(self) -> std::vec::IntoIter
{ + self.elements.into_iter() + } +} + +impl Header { + /// Construct a new header. + pub fn new(name: Bstr, value: Bstr) -> Self { + Self::new_with_flags(name, value, 0) + } + + /// Construct a new header with flags. + pub fn new_with_flags(name: Bstr, value: Bstr, flags: u64) -> Self { + Self { name, value, flags } + } +} + +/// Possible states of a progressing transaction. Internally, progress will change +/// to the next state when the processing activities associated with that state +/// begin. For example, when we start to process request line bytes, the request +/// state will change from NOT_STARTED to LINE.* +#[repr(C)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Debug)] +pub enum HtpResponseProgress { + /// Default state. + NOT_STARTED, + /// Response Line. + LINE, + /// Response Headers. + HEADERS, + /// Response Body. + BODY, + /// Trailer data. + TRAILER, + /// Response completed. + COMPLETE, + /// Error involving response side of transaction. + ERROR, + /// Response gap. + GAP, +} + +/// Possible states of a progressing transaction. Internally, progress will change +/// to the next state when the processing activities associated with that state +/// begin. For example, when we start to process request line bytes, the request +/// state will change from NOT_STARTED to LINE.* +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +pub enum HtpRequestProgress { + /// Default state. + NOT_STARTED, + /// In request line state. + LINE, + /// In request headers state. + HEADERS, + /// In request body state. + BODY, + /// Trailer data. + TRAILER, + /// Request is completed. + COMPLETE, + /// Error involving request side of transaction. + ERROR, + /// In request gap state. + GAP, +} + +/// Enumerates the possible values for authentication type. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum HtpAuthType { + /// This is the default value that is used before + /// the presence of authentication is determined (e.g., + /// before request headers are seen). + UNKNOWN, + /// No authentication. + NONE, + /// HTTP Basic authentication used. + BASIC, + /// HTTP Digest authentication used. + DIGEST, + /// HTTP Bearer authentication used. + BEARER, + /// Unrecognized authentication method. + UNRECOGNIZED = 9, + /// Error retrieving the auth type. + ERROR, +} + +/// Protocol version constants. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +pub enum HtpProtocol { + /// Error with the transaction side. + ERROR = -3, + /// Could not resolve protocol version number. + INVALID = -2, + /// Default protocol value. + UNKNOWN = -1, + /// HTTP/0.9 version. + V0_9 = 9, + /// HTTP/1.0 version. + V1_0 = 100, + /// HTTP/1.1 version. + V1_1 = 101, +} + +/// Represents a single HTTP transaction, which is a combination of a request and a response. +pub struct Transaction { + /// The logger structure associated with this transaction + pub logger: Logger, + /// The configuration structure associated with this transaction. + pub cfg: Rc, + /// Is the configuration structure shared with other transactions or connections? If + /// this field is set to HTP_CONFIG_PRIVATE, the transaction owns the configuration. + pub is_config_shared: bool, + /// The user data associated with this transaction. + pub user_data: Option>, + // Request fields + /// Contains a count of how many empty lines were skipped before the request line. + pub request_ignored_lines: u32, + /// The first line of this request. + pub request_line: Option, + /// Request method. + pub request_method: Option, + /// Request method, as number. Available only if we were able to recognize the request method. + pub request_method_number: HtpMethod, + /// Request URI, raw, as given to us on the request line. This field can take different forms, + /// for example authority for CONNECT methods, absolute URIs for proxy requests, and the query + /// string when one is provided. Use Transaction::parsed_uri if you need to access to specific + /// URI elements. Can be NULL if the request line contains only a request method (which is + /// an extreme case of HTTP/0.9, but passes in practice. + pub request_uri: Option, + /// Request protocol, as text. Can be NULL if no protocol was specified. + pub request_protocol: Option, + /// Protocol version as a number. Multiply the high version number by 100, then add the low + /// version number. You should prefer to work the pre-defined HtpProtocol constants. + pub request_protocol_number: HtpProtocol, + /// Is this request using HTTP/0.9? We need a separate field for this purpose because + /// the protocol version alone is not sufficient to determine if HTTP/0.9 is used. For + /// example, if you submit "GET / HTTP/0.9" to Apache, it will not treat the request + /// as HTTP/0.9. + pub is_protocol_0_9: bool, + /// This structure holds the individual components parsed out of the request URI, with + /// appropriate normalization and transformation applied, per configuration. No information + /// is added. In extreme cases when no URI is provided on the request line, all fields + /// will be NULL. (Well, except for port_number, which will be -1.) To inspect raw data, use + /// Transaction::request_uri or Transaction::parsed_uri_raw. + pub parsed_uri: Option, + /// This structure holds the individual components parsed out of the request URI, but + /// without any modification. The purpose of this field is to allow you to look at the data as it + /// was supplied on the request line. Fields can be NULL, depending on what data was supplied. + /// The port_number field is always -1. + pub parsed_uri_raw: Option, + /// This structure holds the whole normalized uri, including path, query, fragment, scheme, username, password, hostname, and port + pub complete_normalized_uri: Option, + /// This structure holds the normalized uri, including path, query, and fragment + pub partial_normalized_uri: Option, + /// HTTP 1.1 RFC + /// + /// 4.3 Message Body + /// + /// The message-body (if any) of an HTTP message is used to carry the + /// entity-body associated with the request or response. The message-body + /// differs from the entity-body only when a transfer-coding has been + /// applied, as indicated by the Transfer-Encoding header field (section + /// 14.41). + /// + /// ```text + /// message-body = entity-body + /// | + /// ``` + /// + /// The length of the request message-body. In most cases, this value + /// will be the same as request_entity_len. The values will be different + /// if request compression or chunking were applied. In that case, + /// request_message_len contains the length of the request body as it + /// has been seen over TCP; request_entity_len contains length after + /// de-chunking and decompression. + pub request_message_len: u64, + /// The length of the request entity-body. In most cases, this value + /// will be the same as request_message_len. The values will be different + /// if request compression or chunking were applied. In that case, + /// request_message_len contains the length of the request body as it + /// has been seen over TCP; request_entity_len contains length after + /// de-chunking and decompression. + pub request_entity_len: u64, + /// Parsed request headers. + pub request_headers: Headers, + /// Request transfer coding. Can be one of UNKNOWN (body presence not + /// determined yet), IDENTITY, CHUNKED, NO_BODY, + /// and UNRECOGNIZED. + pub request_transfer_coding: HtpTransferCoding, + /// Request body compression, which indicates if compression is used + /// for the request body. This field is an interpretation of the information + /// available in request headers. + pub request_content_encoding: HtpContentEncoding, + /// Request body compression processing information, which is related to how + /// the library is going to process (or has processed) a request body. Changing + /// this field mid-processing can influence library actions. For example, setting + /// this field to NONE in a request_headers callback will prevent + /// decompression. + pub request_content_encoding_processing: HtpContentEncoding, + /// This field will contain the request content type when that information + /// is available in request headers. The contents of the field will be converted + /// to lowercase and any parameters (e.g., character set information) removed. + pub request_content_type: Option, + /// Request decompressor used to decompress request body data. + pub request_decompressor: Option, + /// Contains the value specified in the Content-Length header. The value of this + /// field will be None from the beginning of the transaction and until request + /// headers are processed. It will stay None if the C-L header was not provided, + /// or if the value in it cannot be parsed. + pub request_content_length: Option, + /// Transaction-specific REQUEST_BODY_DATA hook. Behaves as + /// the configuration hook with the same name. + pub hook_request_body_data: DataHook, + /// Transaction-specific RESPONSE_BODY_DATA hook. Behaves as + /// the configuration hook with the same name. + pub hook_response_body_data: DataHook, + /// Authentication type used in the request. + pub request_auth_type: HtpAuthType, + /// Authentication username. + pub request_auth_username: Option, + /// Authentication password. Available only when Transaction::request_auth_type is HTP_AUTH_BASIC. + pub request_auth_password: Option, + /// Authentication token. Available only when Transaction::request_auth_type is HTP_AUTH_BEARER. + pub request_auth_token: Option, + /// Request hostname. Per the RFC, the hostname will be taken from the Host header + /// when available. If the host information is also available in the URI, it is used + /// instead of whatever might be in the Host header. Can be NULL. This field does + /// not contain port information. + pub request_hostname: Option, + /// Request port number, if presented. The rules for Transaction::request_host apply. Set to + /// None by default. + pub request_port_number: Option, + + // Response fields + /// How many empty lines did we ignore before reaching the status line? + pub response_ignored_lines: u32, + /// Response line. + pub response_line: Option, + /// Response protocol, as text. Can be NULL. + pub response_protocol: Option, + /// Response protocol as number. Available only if we were able to parse the protocol version, + /// INVALID otherwise. UNKNOWN until parsing is attempted. + pub response_protocol_number: HtpProtocol, + /// Response status code, as text. Starts as NULL and can remain NULL on + /// an invalid response that does not specify status code. + pub response_status: Option, + /// Response status code, available only if we were able to parse it, HTP_STATUS_INVALID + /// otherwise. HTP_STATUS_UNKNOWN until parsing is attempted. + pub response_status_number: HtpResponseNumber, + /// This field is set by the protocol decoder with it thinks that the + /// backend server will reject a request with a particular status code. + pub response_status_expected_number: HtpUnwanted, + /// The message associated with the response status code. Can be NULL. + pub response_message: Option, + /// Have we seen the server respond with a 100 response? + pub seen_100continue: bool, + /// Parsed response headers. Contains instances of Header. + pub response_headers: Headers, + /// Is this a response a HTTP/2.0 upgrade? + pub is_http_2_upgrade: bool, + + /// HTTP 1.1 RFC + /// + /// 4.3 Message Body + /// + /// The message-body (if any) of an HTTP message is used to carry the + /// entity-body associated with the request or response. The message-body + /// differs from the entity-body only when a transfer-coding has been + /// applied, as indicated by the Transfer-Encoding header field (section + /// 14.41). + /// + /// ```text + /// message-body = entity-body + /// | + /// ``` + /// + /// The length of the response message-body. In most cases, this value + /// will be the same as response_entity_len. The values will be different + /// if response compression or chunking were applied. In that case, + /// response_message_len contains the length of the response body as it + /// has been seen over TCP; response_entity_len contains the length after + /// de-chunking and decompression. + pub response_message_len: u64, + /// The length of the response entity-body. In most cases, this value + /// will be the same as response_message_len. The values will be different + /// if request compression or chunking were applied. In that case, + /// response_message_len contains the length of the response body as it + /// has been seen over TCP; response_entity_len contains length after + /// de-chunking and decompression. + pub response_entity_len: u64, + /// Contains the value specified in the Content-Length header. The value of this + /// field will be -1 from the beginning of the transaction and until response + /// headers are processed. It will stay None if the C-L header was not provided, + /// or if the value in it cannot be parsed. + pub response_content_length: Option, + /// Response transfer coding, which indicates if there is a response body, + /// and how it is transported (e.g., as-is, or chunked). + pub response_transfer_coding: HtpTransferCoding, + /// Response body compression, which indicates if compression is used + /// for the response body. This field is an interpretation of the information + /// available in response headers. + pub response_content_encoding: HtpContentEncoding, + /// Response body compression processing information, which is related to how + /// the library is going to process (or has processed) a response body. Changing + /// this field mid-processing can influence library actions. For example, setting + /// this field to NONE in a RESPONSE_HEADERS callback will prevent + /// decompression. + pub response_content_encoding_processing: HtpContentEncoding, + /// This field will contain the response content type when that information + /// is available in response headers. The contents of the field will be converted + /// to lowercase and any parameters (e.g., character set information) removed. + pub response_content_type: Option, + /// Response decompressor used to decompress response body data. + pub response_decompressor: Option, + + // Common fields + /// Parsing flags; a combination of: HTP_REQUEST_INVALID_T_E, HTP_INVALID_FOLDING, + /// HTP_REQUEST_SMUGGLING, HTP_MULTI_PACKET_HEAD, and HTP_FIELD_UNPARSEABLE. + pub flags: u64, + /// Request progress. + pub request_progress: HtpRequestProgress, + /// Response progress. + pub response_progress: HtpResponseProgress, + /// Transaction index on the connection. + pub index: usize, + /// Total repetitions for headers in request. + pub request_header_repetitions: u16, + /// Total repetitions for headers in response. + pub response_header_repetitions: u16, + /// Request header parser + pub request_header_parser: HeaderParser, + /// Response header parser + pub response_header_parser: HeaderParser, +} + +impl std::fmt::Debug for Transaction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Transaction") + .field("request_line", &self.request_line) + .field("request_method", &self.request_method) + .field("request_method_number", &self.request_method_number) + .field("request_uri", &self.request_uri) + .field("request_protocol", &self.request_protocol) + .field("request_protocol_number", &self.request_protocol_number) + .field("is_protocol_0_9", &self.is_protocol_0_9) + .field("parsed_uri", &self.parsed_uri) + .field("parsed_uri_raw", &self.parsed_uri_raw) + .field("complete_normalized_uri", &self.complete_normalized_uri) + .field("partial_normalized_uri", &self.partial_normalized_uri) + .field("request_message_len", &self.request_message_len) + .field("request_entity_len", &self.request_entity_len) + .field("request_headers", &self.request_headers) + .field("request_transfer_coding", &self.request_transfer_coding) + .field("request_content_encoding", &self.request_content_encoding) + .field( + "request_content_encoding_processing", + &self.request_content_encoding_processing, + ) + .field("request_content_type", &self.request_content_type) + .field("request_content_length", &self.request_content_length) + .field("request_auth_type", &self.request_auth_type) + .field("request_auth_username", &self.request_auth_username) + .field("request_auth_password", &self.request_auth_password) + .field("request_auth_token", &self.request_auth_token) + .field("request_hostname", &self.request_hostname) + .field("request_port_number", &self.request_port_number) + .field("request_ignored_lines", &self.request_ignored_lines) + .field("response_ignored_lines", &self.response_ignored_lines) + .field("response_line", &self.response_line) + .field("response_protocol", &self.response_protocol) + .field("response_protocol_number", &self.response_protocol_number) + .field("response_status", &self.response_status) + .field("response_status_number", &self.response_status_number) + .field( + "response_status_expected_number", + &self.response_status_expected_number, + ) + .field("response_message", &self.response_message) + .field("seen_100continue", &self.seen_100continue) + .field("response_headers", &self.response_headers) + .field("is_http_2_upgrade", &self.is_http_2_upgrade) + .field("response_message_len", &self.response_message_len) + .field("response_entity_len", &self.response_entity_len) + .field("response_content_length", &self.response_content_length) + .field("response_transfer_coding", &self.response_transfer_coding) + .field("response_content_encoding", &self.response_content_encoding) + .field( + "response_content_encoding_processing", + &self.response_content_encoding_processing, + ) + .field("response_content_type", &self.response_content_type) + .field("flags", &self.flags) + .field("request_progress", &self.request_progress) + .field("response_progress", &self.response_progress) + .field("index", &self.index) + .field( + "request_header_repetitions", + &self.request_header_repetitions, + ) + .field( + "response_header_repetitions", + &self.response_header_repetitions, + ) + .finish() + } +} + +impl Transaction { + /// Construct a new transaction. + pub fn new(cfg: &Rc, logger: &Logger, index: usize) -> Self { + Self { + logger: logger.clone(), + cfg: Rc::clone(cfg), + is_config_shared: true, + user_data: None, + request_ignored_lines: 0, + request_line: None, + request_method: None, + request_method_number: HtpMethod::UNKNOWN, + request_uri: None, + request_protocol: None, + request_protocol_number: HtpProtocol::UNKNOWN, + is_protocol_0_9: false, + parsed_uri: None, + parsed_uri_raw: None, + complete_normalized_uri: None, + partial_normalized_uri: None, + request_message_len: 0, + request_entity_len: 0, + request_headers: Headers::with_capacity(32), + request_transfer_coding: HtpTransferCoding::UNKNOWN, + request_content_encoding: HtpContentEncoding::NONE, + request_content_encoding_processing: HtpContentEncoding::NONE, + request_content_type: None, + request_content_length: None, + request_decompressor: None, + hook_request_body_data: DataHook::default(), + hook_response_body_data: DataHook::default(), + request_auth_type: HtpAuthType::UNKNOWN, + request_auth_username: None, + request_auth_password: None, + request_auth_token: None, + request_hostname: None, + request_port_number: None, + response_ignored_lines: 0, + response_line: None, + response_protocol: None, + response_protocol_number: HtpProtocol::UNKNOWN, + response_status: None, + response_status_number: HtpResponseNumber::UNKNOWN, + response_status_expected_number: HtpUnwanted::IGNORE, + response_message: None, + seen_100continue: false, + response_headers: Headers::with_capacity(32), + is_http_2_upgrade: false, + response_message_len: 0, + response_entity_len: 0, + response_content_length: None, + response_transfer_coding: HtpTransferCoding::UNKNOWN, + response_content_encoding: HtpContentEncoding::NONE, + response_content_encoding_processing: HtpContentEncoding::NONE, + response_content_type: None, + response_decompressor: None, + flags: 0, + request_progress: HtpRequestProgress::NOT_STARTED, + response_progress: HtpResponseProgress::NOT_STARTED, + index, + request_header_repetitions: 0, + response_header_repetitions: 0, + request_header_parser: HeaderParser::new(Side::Request), + response_header_parser: HeaderParser::new(Side::Response), + } + } + + /// Register callback for the transaction-specific REQUEST_BODY_DATA hook. + pub fn register_request_body_data(&mut self, cbk_fn: DataNativeCallbackFn) { + self.hook_request_body_data.register(cbk_fn) + } + + /// Has this transaction started? + pub fn is_started(&self) -> bool { + !(self.request_progress == HtpRequestProgress::NOT_STARTED + && self.response_progress == HtpResponseProgress::NOT_STARTED) + } + + /// Set the user data. + pub fn set_user_data(&mut self, data: Box) { + self.user_data = Some(data); + } + + /// Get a reference to the user data. + pub fn user_data(&self) -> Option<&T> { + self.user_data + .as_ref() + .and_then(|ud| ud.downcast_ref::()) + } + + /// Get a mutable reference to the user data. + pub fn user_data_mut(&mut self) -> Option<&mut T> { + self.user_data + .as_mut() + .and_then(|ud| ud.downcast_mut::()) + } + + /// Adds one parameter to the request. This function will take over the + /// responsibility for the provided Param structure. + pub fn request_add_param(&mut self, mut param: Param) -> Result<()> { + if let Some(parameter_processor_fn) = self.cfg.parameter_processor { + parameter_processor_fn(&mut param)? + } + Ok(()) + } + + /// Determine if the request has a body. + pub fn request_has_body(&self) -> bool { + self.request_transfer_coding == HtpTransferCoding::IDENTITY + || self.request_transfer_coding == HtpTransferCoding::CHUNKED + } + + /// Process the extracted request headers and set the appropriate flags + pub fn process_request_headers(&mut self) -> Result<()> { + // Determine if we have a request body, and how it is packaged. + let cl_opt = self.request_headers.get_nocase_nozero("content-length"); + // Check for the Transfer-Encoding header, which would indicate a chunked request body. + if let Some(te) = self.request_headers.get_nocase_nozero("transfer-encoding") { + // Make sure it contains "chunked" only. + // TODO The HTTP/1.1 RFC also allows the T-E header to contain "identity", which + // presumably should have the same effect as T-E header absence. However, Apache + // (2.2.22 on Ubuntu 12.04 LTS) instead errors out with "Unknown Transfer-Encoding: identity". + // And it behaves strangely, too, sending a 501 and proceeding to process the request + // (e.g., PHP is run), but without the body. It then closes the connection. + if te.value.index_of_nocase_nozero("chunked").is_none() { + // Invalid T-E header value. + self.request_transfer_coding = HtpTransferCoding::INVALID; + self.flags.set(HtpFlags::REQUEST_INVALID_T_E); + self.flags.set(HtpFlags::REQUEST_INVALID) + } else { + // Chunked encoding is a HTTP/1.1 feature, so check that an earlier protocol + // version is not used. The flag will also be set if the protocol could not be parsed. + // + // TODO IIS 7.0, for example, would ignore the T-E header when it + // it is used with a protocol below HTTP 1.1. This should be a + // personality trait. + if self.request_protocol_number < HtpProtocol::V1_1 { + self.flags.set(HtpFlags::REQUEST_INVALID_T_E); + self.flags.set(HtpFlags::REQUEST_SMUGGLING); + } + // If the T-E header is present we are going to use it. + self.request_transfer_coding = HtpTransferCoding::CHUNKED; + // We are still going to check for the presence of C-L. + if cl_opt.is_some() { + // According to the HTTP/1.1 RFC (section 4.4): + // + // "The Content-Length header field MUST NOT be sent + // if these two lengths are different (i.e., if a Transfer-Encoding + // header field is present). If a message is received with both a + // Transfer-Encoding header field and a Content-Length header field, + // the latter MUST be ignored." + // + self.flags.set(HtpFlags::REQUEST_SMUGGLING) + } + } + } else if let Some(cl) = cl_opt { + // Check for a folded C-L header. + if cl.flags.is_set(HtpFlags::FIELD_FOLDED) { + self.flags.set(HtpFlags::REQUEST_SMUGGLING) + } + // Check for multiple C-L headers. + if cl.flags.is_set(HtpFlags::FIELD_REPEATED) { + self.flags.set(HtpFlags::REQUEST_SMUGGLING) + // TODO Personality trait to determine which C-L header to parse. + // At the moment we're parsing the combination of all instances, + // which is bound to fail (because it will contain commas). + } + // Get the body length. + self.request_content_length = + parse_content_length(cl.value.as_slice(), Some(&mut self.logger)); + if self.request_content_length.is_some() { + // We have a request body of known length. + self.request_transfer_coding = HtpTransferCoding::IDENTITY + } else { + self.request_transfer_coding = HtpTransferCoding::INVALID; + self.flags.set(HtpFlags::REQUEST_INVALID_C_L); + self.flags.set(HtpFlags::REQUEST_INVALID) + } + } else { + // No body. + self.request_transfer_coding = HtpTransferCoding::NO_BODY + } + // If we could not determine the correct body handling, + // consider the request invalid. + if self.request_transfer_coding == HtpTransferCoding::UNKNOWN { + self.request_transfer_coding = HtpTransferCoding::INVALID; + self.flags.set(HtpFlags::REQUEST_INVALID) + } + + // Determine hostname. + // Use the hostname from the URI, when available. + if let Some(hostname) = self.get_parsed_uri_hostname() { + self.request_hostname = Some(Bstr::from(hostname.as_slice())); + } + + if let Some(port_number) = self.get_parsed_uri_port_number() { + self.request_port_number = Some(*port_number); + } + // Examine the Host header. + if let Some(header) = self.request_headers.get_nocase_nozero_mut("host") { + // Host information available in the headers. + if let Ok((_, (hostname, port_nmb, valid))) = parse_hostport(&header.value) { + if !valid { + self.flags.set(HtpFlags::HOSTH_INVALID) + } + // The host information in the headers is valid. + // Is there host information in the URI? + if self.request_hostname.is_none() { + // There is no host information in the URI. Place the + // hostname from the headers into the parsed_uri structure. + let mut hostname = Bstr::from(hostname); + hostname.make_ascii_lowercase(); + self.request_hostname = Some(hostname); + if let Some((_, port)) = port_nmb { + self.request_port_number = port; + } + } else { + // The host information appears in the URI and in the headers. The + // HTTP RFC states that we should ignore the header copy. + // Check for different hostnames. + if let Some(host) = &self.request_hostname { + if host.cmp_nocase(hostname) != Ordering::Equal { + self.flags.set(HtpFlags::HOST_AMBIGUOUS) + } + } + + if let Some((_, port)) = port_nmb { + // Check for different ports. + if self.request_port_number.is_some() && self.request_port_number != port { + self.flags.set(HtpFlags::HOST_AMBIGUOUS) + } + } + } + } else if self.request_hostname.is_some() { + // Invalid host information in the headers. + // Raise the flag, even though the host information in the headers is invalid. + self.flags.set(HtpFlags::HOST_AMBIGUOUS) + } + } else { + // No host information in the headers. + // HTTP/1.1 requires host information in the headers. + if self.request_protocol_number >= HtpProtocol::V1_1 { + self.flags.set(HtpFlags::HOST_MISSING) + } + } + // Determine Content-Type. + if let Some(ct) = self.request_headers.get_nocase_nozero("content-type") { + self.request_content_type = Some(parse_content_type(ct.value.as_slice())?); + } + // Parse authentication information. + if self.cfg.parse_request_auth { + parse_authorization(self).or_else(|rc| { + if rc == HtpStatus::DECLINED { + // Don't fail the stream if an authorization header is invalid, just set a flag. + self.flags.set(HtpFlags::AUTH_INVALID); + Ok(()) + } else { + Err(rc) + } + })?; + } + Ok(()) + } + + /// Sanity check the response line, logging if there is an invalid protocol or status number. + pub fn validate_response_line(&mut self) { + // Is the response line valid? + if self.response_protocol_number == HtpProtocol::INVALID { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_LINE_INVALID_PROTOCOL, + "Invalid response line: invalid protocol" + ); + self.flags.set(HtpFlags::STATUS_LINE_INVALID) + } + if !self.response_status_number.in_range(100, 999) { + htp_warn!( + self.logger, + HtpLogCode::RESPONSE_LINE_INVALID_RESPONSE_STATUS, + "Invalid response line: invalid response status." + ); + self.response_status_number = HtpResponseNumber::INVALID; + self.flags.set(HtpFlags::STATUS_LINE_INVALID) + } + } + + /// Parse the raw request line + pub fn parse_request_line(&mut self) -> Result<()> { + // Determine how to process the request URI. + let mut parsed_uri = Uri::with_config(self.cfg.decoder_cfg); + if self.request_method_number == HtpMethod::CONNECT { + // When CONNECT is used, the request URI contains an authority string. + parsed_uri.parse_uri_hostport( + self.request_uri.as_ref().ok_or(HtpStatus::ERROR)?, + &mut self.flags, + ); + } else if let Some(uri) = self.request_uri.as_ref() { + parsed_uri.parse_uri(uri.as_slice()); + } + self.parsed_uri_raw = Some(parsed_uri); + // Parse the request URI into Transaction::parsed_uri_raw. + // Build Transaction::parsed_uri, but only if it was not explicitly set already. + if self.parsed_uri.is_none() { + // Keep the original URI components, but create a copy which we can normalize and use internally. + self.normalize_parsed_uri(); + } + if self.cfg.parse_urlencoded { + if let Some(query) = self + .parsed_uri + .as_ref() + .and_then(|parsed_uri| parsed_uri.query.clone()) + { + // We have a non-zero length query string. + let mut urlenp = UrlEncodedParser::new(self.cfg.decoder_cfg); + urlenp.parse_complete(query.as_slice()); + + // Add all parameters to the transaction. + for (name, value) in urlenp.params.elements.iter() { + let param = Param::new( + Bstr::from(name.as_slice()), + Bstr::from(value.as_slice()), + HtpDataSource::QUERY_STRING, + ); + self.request_add_param(param)?; + } + } + } + + // Check parsed_uri hostname. + if let Some(hostname) = self.get_parsed_uri_hostname() { + if !validate_hostname(hostname.as_slice()) { + self.flags.set(HtpFlags::HOSTU_INVALID) + } + } + Ok(()) + } + + /// Determines if both request and response are complete. + pub fn is_complete(&self) -> bool { + // A transaction is considered complete only when both the request and + // response are complete. (Sometimes a complete response can be seen + // even while the request is ongoing.) + self.request_progress == HtpRequestProgress::COMPLETE + && self.response_progress == HtpResponseProgress::COMPLETE + } + + /// Return a reference to the parsed request uri. + pub fn get_parsed_uri_query(&self) -> Option<&Bstr> { + self.parsed_uri + .as_ref() + .and_then(|parsed_uri| parsed_uri.query.as_ref()) + } + + /// Return a reference to the uri hostname. + pub fn get_parsed_uri_hostname(&self) -> Option<&Bstr> { + self.parsed_uri + .as_ref() + .and_then(|parsed_uri| parsed_uri.hostname.as_ref()) + } + + /// Return a reference to the uri port_number. + pub fn get_parsed_uri_port_number(&self) -> Option<&u16> { + self.parsed_uri + .as_ref() + .and_then(|parsed_uri| parsed_uri.port_number.as_ref()) + } + + /// Normalize a previously-parsed request URI. + pub fn normalize_parsed_uri(&mut self) { + let mut uri = Uri::with_config(self.cfg.decoder_cfg); + if let Some(incomplete) = &self.parsed_uri_raw { + uri.scheme = incomplete.normalized_scheme(); + uri.username = incomplete.normalized_username(&mut self.flags); + uri.password = incomplete.normalized_password(&mut self.flags); + uri.hostname = incomplete.normalized_hostname(&mut self.flags); + uri.port_number = incomplete.normalized_port(&mut self.flags); + uri.query = incomplete.query.clone(); + uri.fragment = incomplete.normalized_fragment(&mut self.flags); + uri.path = incomplete + .normalized_path(&mut self.flags, &mut self.response_status_expected_number); + } + self.parsed_uri = Some(uri); + } +} + +impl PartialEq for Transaction { + /// Determines if other references the same transaction. + fn eq(&self, other: &Self) -> bool { + self.index == other.index + } +} + +#[test] +fn GetNocaseNozero() { + let mut t = Headers::with_capacity(2); + let v1 = Bstr::from("Value1"); + let mut k = Bstr::from("K\x00\x00\x00\x00ey\x001"); + let mut h = Header::new(k, v1.clone()); + t.elements.push(h); + k = Bstr::from("K\x00e\x00\x00Y2"); + let v2 = Bstr::from("Value2"); + h = Header::new(k, v2.clone()); + t.elements.push(h); + + let mut result = t.get_nocase_nozero("key1"); + let mut res = result.unwrap(); + assert_eq!( + Ordering::Equal, + res.name.cmp_slice("K\x00\x00\x00\x00ey\x001") + ); + assert_eq!(v1, res.value); + + result = t.get_nocase_nozero("KeY1"); + res = result.unwrap(); + assert_eq!( + Ordering::Equal, + res.name.cmp_slice("K\x00\x00\x00\x00ey\x001") + ); + assert_eq!(v1, res.value); + + result = t.get_nocase_nozero("KEY2"); + res = result.unwrap(); + assert_eq!(Ordering::Equal, res.name.cmp_slice("K\x00e\x00\x00Y2")); + assert_eq!(v2, res.value); + + result = t.get_nocase("key1"); + assert!(result.is_none()); +} diff --git a/rust/htp/src/transactions.rs b/rust/htp/src/transactions.rs new file mode 100644 index 000000000000..45a889398df1 --- /dev/null +++ b/rust/htp/src/transactions.rs @@ -0,0 +1,178 @@ +use crate::{config::Config, log::Logger, transaction::Transaction}; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::rc::Rc; + +/// Transaction is a structure which tracks request and response +/// transactions, and guarantees that the current request or +/// response transaction will always exist. +pub struct Transactions { + config: Rc, + logger: Logger, + request: usize, + response: usize, + transactions: BTreeMap, +} + +impl Transactions { + /// Make a new Transactions struct with the given config + pub fn new(cfg: &Rc, logger: &Logger) -> Self { + Self { + config: Rc::clone(cfg), + logger: logger.clone(), + request: 0, + response: 0, + transactions: BTreeMap::default(), + } + } + + /// Return the number of transactions processed. + /// The value returned may wrap around if the number of transactions + /// exceeds the storage size available to `usize`. + pub fn size(&self) -> usize { + // The total number of transactions is just the maximum + // of the request or response transaction index + 1 (if + // that transaction is started), or zero if neither + // request or response transaction exist yet + let tx_to_check = std::cmp::max(self.request, self.response); + match self.transactions.get(&tx_to_check) { + // Transaction is created, check if it is started + Some(tx) => tx.index.wrapping_add(tx.is_started() as usize), + // Transaction doesn't exist yet, so the index is the size + None => tx_to_check, + } + } + + /// Get the current request transaction index + pub fn request_index(&self) -> usize { + self.request + } + + /// Get the current request transaction + pub fn request(&mut self) -> Option<&Transaction> { + match self.request_mut() { + Some(req) => Some(req), + None => None, + } + } + + /// Get the current request transaction + pub fn request_mut(&mut self) -> Option<&mut Transaction> { + let cfg = &self.config; + let logger = &self.logger; + let request = self.request; + let nbtx = self.transactions.len(); + match self.transactions.entry(request) { + Entry::Occupied(entry) => Some(entry.into_mut()), + Entry::Vacant(entry) => { + if nbtx >= cfg.max_tx as usize { + return None; + } + Some(entry.insert(Transaction::new(cfg, logger, request))) + } + } + } + + /// Get the current response transaction index + pub fn response_index(&self) -> usize { + self.response + } + + /// Get the current response transaction + pub fn response(&mut self) -> Option<&Transaction> { + match self.response_mut() { + Some(resp) => Some(resp), + None => None, + } + } + + /// Get the current response transaction + pub fn response_mut(&mut self) -> Option<&mut Transaction> { + let cfg = &self.config; + let logger = &self.logger; + let response = self.response; + let nbtx = self.transactions.len(); + match self.transactions.entry(response) { + Entry::Occupied(entry) => Some(entry.into_mut()), + Entry::Vacant(entry) => { + if nbtx >= cfg.max_tx as usize { + return None; + } + Some(entry.insert(Transaction::new(cfg, logger, response))) + } + } + } + + /// Increment the request transaction number. + /// May cause the previous transaction to be freed if configured to auto-destroy. + /// Returns the new request transaction index + pub fn request_next(&mut self) -> usize { + self.check_free(self.request); + self.request = self.request.wrapping_add(1); + self.request + } + + /// Increment the response transaction number. + /// May cause the previous transaction to be freed if configured to auto-destroy. + /// Returns the new response transaction index + pub fn response_next(&mut self) -> usize { + self.check_free(self.response); + self.response = self.response.wrapping_add(1); + self.response + } + + /// Check if any old transactions can be freed + fn check_free(&mut self, index: usize) { + if self.config.tx_auto_destroy { + if let Some(tx) = self.transactions.get(&index) { + if !tx.is_complete() { + return; + } + } + self.transactions.remove(&index); + } + } + + /// Remove the transaction at the given index. If the transaction + /// existed, it is returned. + pub fn remove(&mut self, index: usize) -> Option { + self.transactions.remove(&index) + } + + /// Get the given transaction by index number + pub fn get(&self, index: usize) -> Option<&Transaction> { + self.transactions.get(&index) + } + + /// Get the given transaction by index number + pub fn get_mut(&mut self, index: usize) -> Option<&mut Transaction> { + self.transactions.get_mut(&index) + } +} + +/// An iterator over Transactions +pub struct TransactionsIterator<'a> { + iter: std::collections::btree_map::IterMut<'a, usize, Transaction>, +} + +impl<'a> Iterator for TransactionsIterator<'a> { + type Item = &'a mut Transaction; + fn next(&mut self) -> Option { + if let Some((_index, tx)) = self.iter.next() { + Some(tx) + } else { + None + } + } +} + +impl<'a> IntoIterator for &'a mut Transactions { + type Item = &'a mut Transaction; + type IntoIter = TransactionsIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + TransactionsIterator { + iter: self.transactions.iter_mut(), + } + } +} diff --git a/rust/htp/src/unicode_bestfit_map.rs b/rust/htp/src/unicode_bestfit_map.rs new file mode 100644 index 000000000000..0e530b97e16d --- /dev/null +++ b/rust/htp/src/unicode_bestfit_map.rs @@ -0,0 +1,435 @@ +use lazy_static::lazy_static; +use std::collections::HashMap; + +#[derive(Copy, Clone)] +pub struct UnicodeBestfitMap { + // Best-fit mapping options. + /// The best-fit map to use to decode %u-encoded characters. + pub map: &'static HashMap, + /// The replacement byte used when there is no best-fit mapping. + pub replacement_byte: u8, +} + +impl Default for UnicodeBestfitMap { + fn default() -> Self { + Self { + map: &bestfit_1252, + replacement_byte: b'?', + } + } +} + +impl UnicodeBestfitMap { + pub fn get(&self, unicode: u32) -> u8 { + self.map + .get(&unicode) + .copied() + .unwrap_or(self.replacement_byte) + } +} + +/// Create bestfit key from two 8 bit bytes. +#[macro_export] +macro_rules! bestfit_key { + ($c1:expr, $c2:expr) => { + ((($c1 as i32) << 8 as i32) + $c2 as i32) as u32 + }; +} + +lazy_static! { + pub static ref bestfit_1252: HashMap = [ + (bestfit_key!(0x01, 0), 0x41), + (bestfit_key!(0x1, 0x1), 0x61), + (bestfit_key!(0x1, 0x2), 0x41), + (bestfit_key!(0x1, 0x3), 0x61), + (bestfit_key!(0x1, 0x4), 0x41), + (bestfit_key!(0x1, 0x5), 0x61), + (bestfit_key!(0x1, 0x6), 0x43), + (bestfit_key!(0x1, 0x7), 0x63), + (bestfit_key!(0x1, 0x8), 0x43), + (bestfit_key!(0x1, 0x9), 0x63), + (bestfit_key!(0x1, 0xa), 0x43), + (bestfit_key!(0x1, 0xb), 0x63), + (bestfit_key!(0x1, 0xc), 0x43), + (bestfit_key!(0x1, 0xd), 0x63), + (bestfit_key!(0x1, 0xe), 0x44), + (bestfit_key!(0x1, 0xf), 0x64), + (bestfit_key!(0x1, 0x11), 0x64), + (bestfit_key!(0x1, 0x12), 0x45), + (bestfit_key!(0x1, 0x13), 0x65), + (bestfit_key!(0x1, 0x14), 0x45), + (bestfit_key!(0x1, 0x15), 0x65), + (bestfit_key!(0x1, 0x16), 0x45), + (bestfit_key!(0x1, 0x17), 0x65), + (bestfit_key!(0x1, 0x18), 0x45), + (bestfit_key!(0x1, 0x19), 0x65), + (bestfit_key!(0x1, 0x1a), 0x45), + (bestfit_key!(0x1, 0x1b), 0x65), + (bestfit_key!(0x1, 0x1c), 0x47), + (bestfit_key!(0x1, 0x1d), 0x67), + (bestfit_key!(0x1, 0x1e), 0x47), + (bestfit_key!(0x1, 0x1f), 0x67), + (bestfit_key!(0x1, 0x20), 0x47), + (bestfit_key!(0x1, 0x21), 0x67), + (bestfit_key!(0x1, 0x22), 0x47), + (bestfit_key!(0x1, 0x23), 0x67), + (bestfit_key!(0x1, 0x24), 0x48), + (bestfit_key!(0x1, 0x25), 0x68), + (bestfit_key!(0x1, 0x26), 0x48), + (bestfit_key!(0x1, 0x27), 0x68), + (bestfit_key!(0x1, 0x28), 0x49), + (bestfit_key!(0x1, 0x29), 0x69), + (bestfit_key!(0x1, 0x2a), 0x49), + (bestfit_key!(0x1, 0x2b), 0x69), + (bestfit_key!(0x1, 0x2c), 0x49), + (bestfit_key!(0x1, 0x2d), 0x69), + (bestfit_key!(0x1, 0x2e), 0x49), + (bestfit_key!(0x1, 0x2f), 0x69), + (bestfit_key!(0x1, 0x30), 0x49), + (bestfit_key!(0x1, 0x31), 0x69), + (bestfit_key!(0x1, 0x34), 0x4a), + (bestfit_key!(0x1, 0x35), 0x6a), + (bestfit_key!(0x1, 0x36), 0x4b), + (bestfit_key!(0x1, 0x37), 0x6b), + (bestfit_key!(0x1, 0x39), 0x4c), + (bestfit_key!(0x1, 0x3a), 0x6c), + (bestfit_key!(0x1, 0x3b), 0x4c), + (bestfit_key!(0x1, 0x3c), 0x6c), + (bestfit_key!(0x1, 0x3d), 0x4c), + (bestfit_key!(0x1, 0x3e), 0x6c), + (bestfit_key!(0x1, 0x41), 0x4c), + (bestfit_key!(0x1, 0x42), 0x6c), + (bestfit_key!(0x1, 0x43), 0x4e), + (bestfit_key!(0x1, 0x44), 0x6e), + (bestfit_key!(0x1, 0x45), 0x4e), + (bestfit_key!(0x1, 0x46), 0x6e), + (bestfit_key!(0x1, 0x47), 0x4e), + (bestfit_key!(0x1, 0x48), 0x6e), + (bestfit_key!(0x1, 0x4c), 0x4f), + (bestfit_key!(0x1, 0x4d), 0x6f), + (bestfit_key!(0x1, 0x4e), 0x4f), + (bestfit_key!(0x1, 0x4f), 0x6f), + (bestfit_key!(0x1, 0x50), 0x4f), + (bestfit_key!(0x1, 0x51), 0x6f), + (bestfit_key!(0x1, 0x54), 0x52), + (bestfit_key!(0x1, 0x55), 0x72), + (bestfit_key!(0x1, 0x56), 0x52), + (bestfit_key!(0x1, 0x57), 0x72), + (bestfit_key!(0x1, 0x58), 0x52), + (bestfit_key!(0x1, 0x59), 0x72), + (bestfit_key!(0x1, 0x5a), 0x53), + (bestfit_key!(0x1, 0x5b), 0x73), + (bestfit_key!(0x1, 0x5c), 0x53), + (bestfit_key!(0x1, 0x5d), 0x73), + (bestfit_key!(0x1, 0x5e), 0x53), + (bestfit_key!(0x1, 0x5f), 0x73), + (bestfit_key!(0x1, 0x62), 0x54), + (bestfit_key!(0x1, 0x63), 0x74), + (bestfit_key!(0x1, 0x64), 0x54), + (bestfit_key!(0x1, 0x65), 0x74), + (bestfit_key!(0x1, 0x66), 0x54), + (bestfit_key!(0x1, 0x67), 0x74), + (bestfit_key!(0x1, 0x68), 0x55), + (bestfit_key!(0x1, 0x69), 0x75), + (bestfit_key!(0x1, 0x6a), 0x55), + (bestfit_key!(0x1, 0x6b), 0x75), + (bestfit_key!(0x1, 0x6c), 0x55), + (bestfit_key!(0x1, 0x6d), 0x75), + (bestfit_key!(0x1, 0x6e), 0x55), + (bestfit_key!(0x1, 0x6f), 0x75), + (bestfit_key!(0x1, 0x70), 0x55), + (bestfit_key!(0x1, 0x71), 0x75), + (bestfit_key!(0x1, 0x72), 0x55), + (bestfit_key!(0x1, 0x73), 0x75), + (bestfit_key!(0x1, 0x74), 0x57), + (bestfit_key!(0x1, 0x75), 0x77), + (bestfit_key!(0x1, 0x76), 0x59), + (bestfit_key!(0x1, 0x77), 0x79), + (bestfit_key!(0x1, 0x79), 0x5a), + (bestfit_key!(0x1, 0x7b), 0x5a), + (bestfit_key!(0x1, 0x7c), 0x7a), + (bestfit_key!(0x1, 0x80), 0x62), + (bestfit_key!(0x1, 0x97), 0x49), + (bestfit_key!(0x1, 0x9a), 0x6c), + (bestfit_key!(0x1, 0x9f), 0x4f), + (bestfit_key!(0x1, 0xa0), 0x4f), + (bestfit_key!(0x1, 0xa1), 0x6f), + (bestfit_key!(0x1, 0xab), 0x74), + (bestfit_key!(0x1, 0xae), 0x54), + (bestfit_key!(0x1, 0xaf), 0x55), + (bestfit_key!(0x1, 0xb0), 0x75), + (bestfit_key!(0x1, 0xb6), 0x7a), + (bestfit_key!(0x1, 0xc0), 0x7c), + (bestfit_key!(0x1, 0xc3), 0x21), + (bestfit_key!(0x1, 0xcd), 0x41), + (bestfit_key!(0x1, 0xce), 0x61), + (bestfit_key!(0x1, 0xcf), 0x49), + (bestfit_key!(0x1, 0xd0), 0x69), + (bestfit_key!(0x1, 0xd1), 0x4f), + (bestfit_key!(0x1, 0xd2), 0x6f), + (bestfit_key!(0x1, 0xd3), 0x55), + (bestfit_key!(0x1, 0xd4), 0x75), + (bestfit_key!(0x1, 0xd5), 0x55), + (bestfit_key!(0x1, 0xd6), 0x75), + (bestfit_key!(0x1, 0xd7), 0x55), + (bestfit_key!(0x1, 0xd8), 0x75), + (bestfit_key!(0x1, 0xd9), 0x55), + (bestfit_key!(0x1, 0xda), 0x75), + (bestfit_key!(0x1, 0xdb), 0x55), + (bestfit_key!(0x1, 0xdc), 0x75), + (bestfit_key!(0x1, 0xde), 0x41), + (bestfit_key!(0x1, 0xdf), 0x61), + (bestfit_key!(0x1, 0xe4), 0x47), + (bestfit_key!(0x1, 0xe5), 0x67), + (bestfit_key!(0x1, 0xe6), 0x47), + (bestfit_key!(0x1, 0xe7), 0x67), + (bestfit_key!(0x1, 0xe8), 0x4b), + (bestfit_key!(0x1, 0xe9), 0x6b), + (bestfit_key!(0x1, 0xea), 0x4f), + (bestfit_key!(0x1, 0xeb), 0x6f), + (bestfit_key!(0x1, 0xec), 0x4f), + (bestfit_key!(0x1, 0xed), 0x6f), + (bestfit_key!(0x1, 0xf0), 0x6a), + (bestfit_key!(0x2, 0x61), 0x67), + (bestfit_key!(0x2, 0xb9), 0x27), + (bestfit_key!(0x2, 0xba), 0x22), + (bestfit_key!(0x2, 0xbc), 0x27), + (bestfit_key!(0x2, 0xc4), 0x5e), + (bestfit_key!(0x2, 0xc8), 0x27), + (bestfit_key!(0x2, 0xcb), 0x60), + (bestfit_key!(0x2, 0xcd), 0x5f), + (bestfit_key!(0x3, 0x00), 0x60), + (bestfit_key!(0x3, 0x2), 0x5e), + (bestfit_key!(0x3, 0x3), 0x7e), + (bestfit_key!(0x3, 0xe), 0x22), + (bestfit_key!(0x3, 0x31), 0x5f), + (bestfit_key!(0x3, 0x32), 0x5f), + (bestfit_key!(0x3, 0x7e), 0x3b), + (bestfit_key!(0x3, 0x93), 0x47), + (bestfit_key!(0x3, 0x98), 0x54), + (bestfit_key!(0x3, 0xa3), 0x53), + (bestfit_key!(0x3, 0xa6), 0x46), + (bestfit_key!(0x3, 0xa9), 0x4f), + (bestfit_key!(0x3, 0xb1), 0x61), + (bestfit_key!(0x3, 0xb4), 0x64), + (bestfit_key!(0x3, 0xb5), 0x65), + (bestfit_key!(0x3, 0xc0), 0x70), + (bestfit_key!(0x3, 0xc3), 0x73), + (bestfit_key!(0x3, 0xc4), 0x74), + (bestfit_key!(0x3, 0xc6), 0x66), + (bestfit_key!(0x4, 0xbb), 0x68), + (bestfit_key!(0x5, 0x89), 0x3a), + (bestfit_key!(0x6, 0x6a), 0x25), + (bestfit_key!(0x20, 0), 0x20), + (bestfit_key!(0x20, 0x1), 0x20), + (bestfit_key!(0x20, 0x2), 0x20), + (bestfit_key!(0x20, 0x3), 0x20), + (bestfit_key!(0x20, 0x4), 0x20), + (bestfit_key!(0x20, 0x5), 0x20), + (bestfit_key!(0x20, 0x6), 0x20), + (bestfit_key!(0x20, 0x10), 0x2d), + (bestfit_key!(0x20, 0x11), 0x2d), + (bestfit_key!(0x20, 0x17), 0x3d), + (bestfit_key!(0x20, 0x32), 0x27), + (bestfit_key!(0x20, 0x35), 0x60), + (bestfit_key!(0x20, 0x44), 0x2f), + (bestfit_key!(0x20, 0x74), 0x34), + (bestfit_key!(0x20, 0x75), 0x35), + (bestfit_key!(0x20, 0x76), 0x36), + (bestfit_key!(0x20, 0x77), 0x37), + (bestfit_key!(0x20, 0x78), 0x38), + (bestfit_key!(0x20, 0x7f), 0x6e), + (bestfit_key!(0x20, 0x80), 0x30), + (bestfit_key!(0x20, 0x81), 0x31), + (bestfit_key!(0x20, 0x82), 0x32), + (bestfit_key!(0x20, 0x83), 0x33), + (bestfit_key!(0x20, 0x84), 0x34), + (bestfit_key!(0x20, 0x85), 0x35), + (bestfit_key!(0x20, 0x86), 0x36), + (bestfit_key!(0x20, 0x87), 0x37), + (bestfit_key!(0x20, 0x88), 0x38), + (bestfit_key!(0x20, 0x89), 0x39), + (bestfit_key!(0x20, 0xa7), 0x50), + (bestfit_key!(0x21, 0x2), 0x43), + (bestfit_key!(0x21, 0x7), 0x45), + (bestfit_key!(0x21, 0xa), 0x67), + (bestfit_key!(0x21, 0xb), 0x48), + (bestfit_key!(0x21, 0xc), 0x48), + (bestfit_key!(0x21, 0xd), 0x48), + (bestfit_key!(0x21, 0xe), 0x68), + (bestfit_key!(0x21, 0x10), 0x49), + (bestfit_key!(0x21, 0x11), 0x49), + (bestfit_key!(0x21, 0x12), 0x4c), + (bestfit_key!(0x21, 0x13), 0x6c), + (bestfit_key!(0x21, 0x15), 0x4e), + (bestfit_key!(0x21, 0x18), 0x50), + (bestfit_key!(0x21, 0x19), 0x50), + (bestfit_key!(0x21, 0x1a), 0x51), + (bestfit_key!(0x21, 0x1b), 0x52), + (bestfit_key!(0x21, 0x1c), 0x52), + (bestfit_key!(0x21, 0x1d), 0x52), + (bestfit_key!(0x21, 0x24), 0x5a), + (bestfit_key!(0x21, 0x28), 0x5a), + (bestfit_key!(0x21, 0x2a), 0x4b), + (bestfit_key!(0x21, 0x2c), 0x42), + (bestfit_key!(0x21, 0x2d), 0x43), + (bestfit_key!(0x21, 0x2e), 0x65), + (bestfit_key!(0x21, 0x2f), 0x65), + (bestfit_key!(0x21, 0x30), 0x45), + (bestfit_key!(0x21, 0x31), 0x46), + (bestfit_key!(0x21, 0x33), 0x4d), + (bestfit_key!(0x21, 0x34), 0x6f), + (bestfit_key!(0x22, 0x12), 0x2d), + (bestfit_key!(0x22, 0x15), 0x2f), + (bestfit_key!(0x22, 0x16), 0x5c), + (bestfit_key!(0x22, 0x17), 0x2a), + (bestfit_key!(0x22, 0x1a), 0x76), + (bestfit_key!(0x22, 0x1e), 0x38), + (bestfit_key!(0x22, 0x23), 0x7c), + (bestfit_key!(0x22, 0x29), 0x6e), + (bestfit_key!(0x22, 0x36), 0x3a), + (bestfit_key!(0x22, 0x3c), 0x7e), + (bestfit_key!(0x22, 0x61), 0x3d), + (bestfit_key!(0x22, 0x64), 0x3d), + (bestfit_key!(0x22, 0x65), 0x3d), + (bestfit_key!(0x23, 0x3), 0x5e), + (bestfit_key!(0x23, 0x20), 0x28), + (bestfit_key!(0x23, 0x21), 0x29), + (bestfit_key!(0x23, 0x29), 0x3c), + (bestfit_key!(0x23, 0x2a), 0x3e), + (bestfit_key!(0x25, 0), 0x2d), + (bestfit_key!(0x25, 0xc), 0x2b), + (bestfit_key!(0x25, 0x10), 0x2b), + (bestfit_key!(0x25, 0x14), 0x2b), + (bestfit_key!(0x25, 0x18), 0x2b), + (bestfit_key!(0x25, 0x1c), 0x2b), + (bestfit_key!(0x25, 0x2c), 0x2d), + (bestfit_key!(0x25, 0x34), 0x2d), + (bestfit_key!(0x25, 0x3c), 0x2b), + (bestfit_key!(0x25, 0x50), 0x2d), + (bestfit_key!(0x25, 0x52), 0x2b), + (bestfit_key!(0x25, 0x53), 0x2b), + (bestfit_key!(0x25, 0x54), 0x2b), + (bestfit_key!(0x25, 0x55), 0x2b), + (bestfit_key!(0x25, 0x56), 0x2b), + (bestfit_key!(0x25, 0x57), 0x2b), + (bestfit_key!(0x25, 0x58), 0x2b), + (bestfit_key!(0x25, 0x59), 0x2b), + (bestfit_key!(0x25, 0x5a), 0x2b), + (bestfit_key!(0x25, 0x5b), 0x2b), + (bestfit_key!(0x25, 0x5c), 0x2b), + (bestfit_key!(0x25, 0x5d), 0x2b), + (bestfit_key!(0x25, 0x64), 0x2d), + (bestfit_key!(0x25, 0x65), 0x2d), + (bestfit_key!(0x25, 0x66), 0x2d), + (bestfit_key!(0x25, 0x67), 0x2d), + (bestfit_key!(0x25, 0x68), 0x2d), + (bestfit_key!(0x25, 0x69), 0x2d), + (bestfit_key!(0x25, 0x6a), 0x2b), + (bestfit_key!(0x25, 0x6b), 0x2b), + (bestfit_key!(0x25, 0x6c), 0x2b), + (bestfit_key!(0x25, 0x84), 0x5f), + (bestfit_key!(0x27, 0x58), 0x7c), + (bestfit_key!(0x30, 0), 0x20), + (bestfit_key!(0x30, 0x8), 0x3c), + (bestfit_key!(0x30, 0x9), 0x3e), + (bestfit_key!(0x30, 0x1a), 0x5b), + (bestfit_key!(0x30, 0x1b), 0x5d), + (bestfit_key!(0xff, 0x1), 0x21), + (bestfit_key!(0xff, 0x2), 0x22), + (bestfit_key!(0xff, 0x3), 0x23), + (bestfit_key!(0xff, 0x4), 0x24), + (bestfit_key!(0xff, 0x5), 0x25), + (bestfit_key!(0xff, 0x6), 0x26), + (bestfit_key!(0xff, 0x7), 0x27), + (bestfit_key!(0xff, 0x8), 0x28), + (bestfit_key!(0xff, 0x9), 0x29), + (bestfit_key!(0xff, 0xa), 0x2a), + (bestfit_key!(0xff, 0xb), 0x2b), + (bestfit_key!(0xff, 0xc), 0x2c), + (bestfit_key!(0xff, 0xd), 0x2d), + (bestfit_key!(0xff, 0xe), 0x2e), + (bestfit_key!(0xff, 0xf), 0x2f), + (bestfit_key!(0xff, 0x10), 0x30), + (bestfit_key!(0xff, 0x11), 0x31), + (bestfit_key!(0xff, 0x12), 0x32), + (bestfit_key!(0xff, 0x13), 0x33), + (bestfit_key!(0xff, 0x14), 0x34), + (bestfit_key!(0xff, 0x15), 0x35), + (bestfit_key!(0xff, 0x16), 0x36), + (bestfit_key!(0xff, 0x17), 0x37), + (bestfit_key!(0xff, 0x18), 0x38), + (bestfit_key!(0xff, 0x19), 0x39), + (bestfit_key!(0xff, 0x1a), 0x3a), + (bestfit_key!(0xff, 0x1b), 0x3b), + (bestfit_key!(0xff, 0x1c), 0x3c), + (bestfit_key!(0xff, 0x1d), 0x3d), + (bestfit_key!(0xff, 0x1e), 0x3e), + (bestfit_key!(0xff, 0x20), 0x40), + (bestfit_key!(0xff, 0x21), 0x41), + (bestfit_key!(0xff, 0x22), 0x42), + (bestfit_key!(0xff, 0x23), 0x43), + (bestfit_key!(0xff, 0x24), 0x44), + (bestfit_key!(0xff, 0x25), 0x45), + (bestfit_key!(0xff, 0x26), 0x46), + (bestfit_key!(0xff, 0x27), 0x47), + (bestfit_key!(0xff, 0x28), 0x48), + (bestfit_key!(0xff, 0x29), 0x49), + (bestfit_key!(0xff, 0x2a), 0x4a), + (bestfit_key!(0xff, 0x2b), 0x4b), + (bestfit_key!(0xff, 0x2c), 0x4c), + (bestfit_key!(0xff, 0x2d), 0x4d), + (bestfit_key!(0xff, 0x2e), 0x4e), + (bestfit_key!(0xff, 0x2f), 0x4f), + (bestfit_key!(0xff, 0x30), 0x50), + (bestfit_key!(0xff, 0x31), 0x51), + (bestfit_key!(0xff, 0x32), 0x52), + (bestfit_key!(0xff, 0x33), 0x53), + (bestfit_key!(0xff, 0x34), 0x54), + (bestfit_key!(0xff, 0x35), 0x55), + (bestfit_key!(0xff, 0x36), 0x56), + (bestfit_key!(0xff, 0x37), 0x57), + (bestfit_key!(0xff, 0x38), 0x58), + (bestfit_key!(0xff, 0x39), 0x59), + (bestfit_key!(0xff, 0x3a), 0x5a), + (bestfit_key!(0xff, 0x3b), 0x5b), + (bestfit_key!(0xff, 0x3c), 0x5c), + (bestfit_key!(0xff, 0x3d), 0x5d), + (bestfit_key!(0xff, 0x3e), 0x5e), + (bestfit_key!(0xff, 0x3f), 0x5f), + (bestfit_key!(0xff, 0x40), 0x60), + (bestfit_key!(0xff, 0x41), 0x61), + (bestfit_key!(0xff, 0x42), 0x62), + (bestfit_key!(0xff, 0x43), 0x63), + (bestfit_key!(0xff, 0x44), 0x64), + (bestfit_key!(0xff, 0x45), 0x65), + (bestfit_key!(0xff, 0x46), 0x66), + (bestfit_key!(0xff, 0x47), 0x67), + (bestfit_key!(0xff, 0x48), 0x68), + (bestfit_key!(0xff, 0x49), 0x69), + (bestfit_key!(0xff, 0x4a), 0x6a), + (bestfit_key!(0xff, 0x4b), 0x6b), + (bestfit_key!(0xff, 0x4c), 0x6c), + (bestfit_key!(0xff, 0x4d), 0x6d), + (bestfit_key!(0xff, 0x4e), 0x6e), + (bestfit_key!(0xff, 0x4f), 0x6f), + (bestfit_key!(0xff, 0x50), 0x70), + (bestfit_key!(0xff, 0x51), 0x71), + (bestfit_key!(0xff, 0x52), 0x72), + (bestfit_key!(0xff, 0x53), 0x73), + (bestfit_key!(0xff, 0x54), 0x74), + (bestfit_key!(0xff, 0x55), 0x75), + (bestfit_key!(0xff, 0x56), 0x76), + (bestfit_key!(0xff, 0x57), 0x77), + (bestfit_key!(0xff, 0x58), 0x78), + (bestfit_key!(0xff, 0x59), 0x79), + (bestfit_key!(0xff, 0x5a), 0x7a), + (bestfit_key!(0xff, 0x5b), 0x7b), + (bestfit_key!(0xff, 0x5c), 0x7c), + (bestfit_key!(0xff, 0x5d), 0x7d), + (bestfit_key!(0xff, 0x5e), 0x7e), + ] + .iter() + .cloned() + .collect(); +} diff --git a/rust/htp/src/uri.rs b/rust/htp/src/uri.rs new file mode 100644 index 000000000000..3dd71c5de9ca --- /dev/null +++ b/rust/htp/src/uri.rs @@ -0,0 +1,539 @@ +use crate::{ + bstr::Bstr, + config::{DecoderConfig, HtpUnwanted}, + log::Logger, + parsers::{credentials, fragment, hostname, parse_hostport, path, port, query, scheme}, + urlencoded::{decode_uri_inplace, decode_uri_with_flags, path_decode_uri_inplace}, + utf8_decoder::decode_and_validate_inplace, + util::{convert_port, FlagOperations, HtpFlags}, +}; +use nom::{combinator::opt, sequence::tuple}; + +/// URI structure. Each of the fields provides access to a single +/// URI element. Where an element is not present in a URI, the +/// corresponding field will be set to NULL or -1, depending on the +/// field type. +#[derive(Clone)] +pub struct Uri { + /// Decoder configuration + pub cfg: DecoderConfig, + /// Scheme, e.g., "http". + pub scheme: Option, + /// Username. + pub username: Option, + /// Password. + pub password: Option, + /// Hostname. + pub hostname: Option, + /// Port, as string. + pub port: Option, + /// Port, as number. This field will be None if there was + /// no port information in the URI or the port information + /// was invalid (e.g., it's not a number or it falls out of range. + pub port_number: Option, + /// The path part of this URI. + pub path: Option, + /// Query string. + pub query: Option, + /// Fragment identifier. This field will rarely be available in a server-side + /// setting, but it's not impossible to see it. + pub fragment: Option, +} + +impl std::fmt::Debug for Uri { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("Uri") + .field("scheme", &self.scheme) + .field("username", &self.username) + .field("password", &self.password) + .field("hostname", &self.hostname) + .field("port", &self.port) + .field("port_number", &self.port_number) + .field("path", &self.path) + .field("query", &self.query) + .field("fragment", &self.fragment) + .finish() + } +} + +impl Default for Uri { + /// Create an empty Uri struct. + fn default() -> Self { + Self { + cfg: DecoderConfig::default(), + scheme: None, + username: None, + password: None, + hostname: None, + port: None, + port_number: None, + path: None, + query: None, + fragment: None, + } + } +} + +impl Uri { + /// Create an empty Uri struct but with the given DecoderCfg + pub fn with_config(cfg: DecoderConfig) -> Self { + Self { + cfg, + scheme: None, + username: None, + password: None, + hostname: None, + port: None, + port_number: None, + path: None, + query: None, + fragment: None, + } + } + + /// Normalize uri scheme. + pub fn normalized_scheme(&self) -> Option { + if let Some(mut scheme) = self.scheme.clone() { + scheme.make_ascii_lowercase(); + Some(scheme) + } else { + None + } + } + + /// Normalize uri username. + pub fn normalized_username(&self, flags: &mut u64) -> Option { + if let Some(username) = self.username.as_ref() { + decode_uri_with_flags(&self.cfg, flags, username.as_slice()).ok() + } else { + None + } + } + + /// Normalize uri password. + pub fn normalized_password(&self, flags: &mut u64) -> Option { + if let Some(password) = self.password.as_ref() { + decode_uri_with_flags(&self.cfg, flags, password.as_slice()).ok() + } else { + None + } + } + + /// Normalize uri hostname. + pub fn normalized_hostname(&self, flags: &mut u64) -> Option { + if let Some(hostname) = self.hostname.as_ref() { + let mut normalized_hostname = + decode_uri_with_flags(&self.cfg, flags, hostname.as_slice()).ok()?; + normalized_hostname.make_ascii_lowercase(); + // Remove dots from the end of the string. + while normalized_hostname.last() == Some(&(b'.')) { + normalized_hostname.pop(); + } + Some(normalized_hostname) + } else { + None + } + } + + /// Normalize uri port. + pub fn normalized_port(&self, flags: &mut u64) -> Option { + if let Some(port) = self.port.as_ref() { + let normalized_port = convert_port(port.as_slice()); + if normalized_port.is_none() { + // Failed to parse the port number. + flags.set(HtpFlags::HOSTU_INVALID); + } + normalized_port + } else { + None + } + } + + /// Normalize uri fragment. + pub fn normalized_fragment(&self, flags: &mut u64) -> Option { + if let Some(fragment) = self.fragment.as_ref() { + decode_uri_with_flags(&self.cfg, flags, fragment).ok() + } else { + None + } + } + + /// Normalize uri path. + pub fn normalized_path(&self, flags: &mut u64, status: &mut HtpUnwanted) -> Option { + if let Some(mut path) = self.path.clone() { + // Decode URL-encoded (and %u-encoded) characters, as well as lowercase, + // compress separators and convert backslashes. + // Ignore result. + path_decode_uri_inplace(&self.cfg, flags, status, &mut path); + // Handle UTF-8 in the path. Validate it first, and only save it if cfg specifies it + decode_and_validate_inplace(&self.cfg, flags, status, &mut path); + // RFC normalization. + normalize_uri_path_inplace(&mut path); + Some(path) + } else { + None + } + } + + /// Parses request URI, making no attempt to validate the contents. + /// + /// It attempts, but is not guaranteed to successfully parse out a scheme, username, password, hostname, port, query, and fragment. + /// Note: only attempts to extract a username, password, and hostname and subsequently port if it successfully parsed a scheme. + pub fn parse_uri(&mut self, input: &[u8]) { + let res = tuple(( + opt(tuple(( + scheme(), + opt(credentials()), + opt(tuple((hostname(), opt(port())))), + ))), + opt(path()), + opt(query()), + opt(fragment()), + ))(input); + if let Ok((_, (scheme_authority, path, query, fragment))) = res { + if let Some(path) = path { + self.path = Some(Bstr::from(path)); + } + if let Some(query) = query { + self.query = Some(Bstr::from(query)); + } + if let Some(fragment) = fragment { + self.fragment = Some(Bstr::from(fragment)); + } + if let Some((scheme, authority, hostname_port)) = scheme_authority { + self.scheme = Some(Bstr::from(scheme)); + if let Some((username, password)) = authority { + self.username = Some(Bstr::from(username)); + if let Some(password) = password { + self.password = Some(Bstr::from(password)); + } + } + if let Some((hostname, port)) = hostname_port { + self.hostname = Some(Bstr::from(hostname)); + if let Some(port) = port { + self.port = Some(Bstr::from(port)); + } + } + } + } + } + + /// Parses hostport provided in the URI. + pub fn parse_uri_hostport(&mut self, hostport: &Bstr, flags: &mut u64) { + if let Ok((_, (host, port_nmb, mut valid))) = parse_hostport(hostport) { + let hostname = &host.to_ascii_lowercase(); + self.hostname = Some(Bstr::from(hostname.as_slice())); + if let Some((port, port_nmb)) = port_nmb { + self.port = Some(Bstr::from(port)); + if let Some(num) = port_nmb { + self.port_number = Some(num); + } else { + valid = false; + } + } + if !valid { + flags.set(HtpFlags::HOSTU_INVALID) + } + } + } + + /// Generate a normalized uri string. + pub fn generate_normalized_uri( + &self, mut logger: Option, + ) -> (Option, Option) { + // On the first pass determine the length of the final bstrs + let mut partial_len = 0usize; + let mut complete_len = 0usize; + complete_len = complete_len.wrapping_add( + self.scheme + .as_ref() + .map(|scheme| scheme.len() + 3) + .unwrap_or(0), + ); // '://' + complete_len = complete_len.wrapping_add( + self.username + .as_ref() + .map(|username| username.len()) + .unwrap_or(0), + ); + complete_len = complete_len.wrapping_add( + self.password + .as_ref() + .map(|password| password.len()) + .unwrap_or(0), + ); + if self.username.is_some() || self.password.is_some() { + complete_len = complete_len.wrapping_add(2); // ':' and '@' + } + complete_len = complete_len.wrapping_add( + self.hostname + .as_ref() + .map(|hostname| hostname.len()) + .unwrap_or(0), + ); + complete_len = + complete_len.wrapping_add(self.port.as_ref().map(|port| port.len()).unwrap_or(0)); // ':' + partial_len = + partial_len.wrapping_add(self.path.as_ref().map(|path| path.len()).unwrap_or(0)); + partial_len = partial_len.wrapping_add( + self.query + .as_ref() + .map(|query| query.len() + 1) + .unwrap_or(0), + ); // ? + partial_len = partial_len.wrapping_add( + self.fragment + .as_ref() + .map(|fragment| fragment.len() + 1) + .unwrap_or(0), + ); // # + complete_len = complete_len.wrapping_add(partial_len); + // On the second pass construct the string + let mut normalized_uri = Bstr::with_capacity(complete_len); + let mut partial_normalized_uri = Bstr::with_capacity(partial_len); + + if let Some(scheme) = self.scheme.as_ref() { + normalized_uri.add(scheme.as_slice()); + normalized_uri.add("://"); + } + if self.username.is_some() || self.password.is_some() { + if let Some(username) = self.username.as_ref() { + normalized_uri.add(username.as_slice()); + } + normalized_uri.add(":"); + if let Some(password) = self.password.as_ref() { + normalized_uri.add(password.as_slice()); + } + normalized_uri.add("@"); + } + if let Some(hostname) = self.hostname.as_ref() { + normalized_uri.add(hostname.as_slice()); + } + if let Some(port) = self.port.as_ref() { + normalized_uri.add(":"); + normalized_uri.add(port.as_slice()); + } + if let Some(mut path) = self.path.clone() { + // Path is already decoded when we parsed the uri in transaction, only decode once more + if self.cfg.double_decode_normalized_path { + let path_len = path.len(); + let _ = decode_uri_inplace(&self.cfg, &mut path); + if path_len > path.len() { + if let Some(logger) = logger.as_mut() { + htp_warn!( + logger, + HtpLogCode::DOUBLE_ENCODED_URI, + "URI path is double encoded" + ); + } + } + } + partial_normalized_uri.add(path.as_slice()); + } + if let Some(mut query) = self.query.clone() { + let _ = decode_uri_inplace(&self.cfg, &mut query); + if self.cfg.double_decode_normalized_query { + let query_len = query.len(); + let _ = decode_uri_inplace(&self.cfg, &mut query); + if query_len > query.len() { + if let Some(logger) = logger.as_mut() { + htp_warn!( + logger, + HtpLogCode::DOUBLE_ENCODED_URI, + "URI query is double encoded" + ); + } + } + } + partial_normalized_uri.add("?"); + partial_normalized_uri.add(query.as_slice()); + } + if let Some(fragment) = self.fragment.as_ref() { + partial_normalized_uri.add("#"); + partial_normalized_uri.add(fragment.as_slice()); + } + normalized_uri.add(partial_normalized_uri.as_slice()); + if !normalized_uri.is_empty() { + if !partial_normalized_uri.is_empty() { + (Some(partial_normalized_uri), Some(normalized_uri)) + } else { + (None, Some(normalized_uri)) + } + } else { + (None, None) + } + } +} + +/// Normalize URI path in place. This function implements the remove dot segments algorithm +/// specified in RFC 3986, section 5.2.4. +fn normalize_uri_path_inplace(s: &mut Bstr) { + let mut out = Vec::<&[u8]>::with_capacity(10); + s.as_slice() + .split(|c| *c == b'/') + .for_each(|segment| match segment { + b"." => {} + b".." => { + if !(out.len() == 1 && out[0] == b"") { + out.pop(); + } + } + x => out.push(x), + }); + let out = out.join(b"/" as &[u8]); + s.clear(); + s.add(out.as_slice()); +} + +//Tests +#[cfg(test)] +mod test { + use super::*; + use rstest::rstest; + #[rstest] + #[case::no_port(b"http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", + Some("http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag"), + Some("/path1/path2?a=b&c=d#frag"), + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("http")), + username: Some(Bstr::from("user")), + password: Some(Bstr::from("pass")), + hostname: Some(Bstr::from("www.example.com")), + port: Some(Bstr::from("1234")), + port_number: None, + path: Some(Bstr::from("/path1/path2")), + query: Some(Bstr::from("a=b&c=d")), + fragment: Some(Bstr::from("frag")), + })] + #[case::scheme_hostname_path(b"http://host.com/path", + Some("http://host.com/path"), + Some("/path"), + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("http")), + username: None, + password: None, + hostname: Some(Bstr::from("host.com")), + port: None, + port_number: None, + path: Some(Bstr::from("/path")), + query: None, + fragment: None, + })] + #[case::scheme_hostname(b"http://host.com", + Some("http://host.com"), + None, + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("http")), + username: None, + password: None, + hostname: Some(Bstr::from("host.com")), + port: None, + port_number: None, + path: None, + query: None, + fragment: None, + })] + #[case::scheme_path(b"http://", + Some("http:////"), + Some("//"), + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("http")), + username: None, + password: None, + hostname: None, + port: None, + port_number: None, + path: Some(Bstr::from("//")), + query: None, + fragment: None, + })] + #[case::path(b"/path", + Some("/path"), + Some("/path"), + Uri { + cfg: DecoderConfig::default(), + scheme: None, + username: None, + password: None, + hostname: None, + port: None, + port_number: None, + path: Some(Bstr::from("/path")), + query: None, + fragment: None, + })] + #[case::empty_scheme_path(b"://", + Some(":////"), + Some("//"), + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("")), + username: None, + password: None, + hostname: None, + port: None, + port_number: None, + path: Some(Bstr::from("//")), + query: None, + fragment: None, + })] + #[case::empty(b"", None, None, Uri::default())] + #[case::scheme_user_host(b"http://user@host.com", + Some("http://user:@host.com"), + None, + Uri { + cfg: DecoderConfig::default(), + scheme: Some(Bstr::from("http")), + username: Some(Bstr::from("user")), + password: None, + hostname: Some(Bstr::from("host.com")), + port: None, + port_number: None, + path: None, + query: None, + fragment: None, + })] + fn test_parse_uri( + #[case] input: &[u8], #[case] expected_normalized: Option<&str>, + #[case] expected_partial: Option<&str>, #[case] expected: Uri, + ) { + let mut uri = Uri::default(); + uri.parse_uri(input); + assert_eq!(uri.scheme, expected.scheme); + assert_eq!(uri.username, expected.username); + assert_eq!(uri.password, expected.password); + assert_eq!(uri.hostname, expected.hostname); + assert_eq!(uri.port, expected.port); + assert_eq!(uri.path, expected.path); + assert_eq!(uri.query, expected.query); + assert_eq!(uri.fragment, expected.fragment); + assert_eq!( + uri.generate_normalized_uri(None), + ( + expected_partial.map(Bstr::from), + expected_normalized.map(Bstr::from) + ) + ); + } + + #[rstest] + #[case(b"/a/b/c/./../../g", b"/a/g")] + #[case(b"mid/content=5/../6", b"mid/6")] + #[case(b"./one", b"one")] + #[case(b"../one", b"one")] + #[case(b".", b"")] + #[case(b"..", b"")] + #[case(b"one/.", b"one")] + #[case(b"one/..", b"")] + #[case(b"one/../", b"")] + #[case(b"/../../../images.gif", b"/images.gif")] + fn test_normalize_uri_path(#[case] input: &[u8], #[case] expected: &[u8]) { + let mut s = Bstr::from(input); + normalize_uri_path_inplace(&mut s); + assert!(s.eq_slice(expected)) + } +} diff --git a/rust/htp/src/urlencoded.rs b/rust/htp/src/urlencoded.rs new file mode 100644 index 000000000000..e3128aea964b --- /dev/null +++ b/rust/htp/src/urlencoded.rs @@ -0,0 +1,1144 @@ +use crate::{ + bstr::Bstr, + config::{DecoderConfig, HtpUnwanted, HtpUrlEncodingHandling}, + error::Result, + table::Table, + util::{FlagOperations, HtpFlags}, +}; + +use nom::{ + branch::alt, + bytes::complete::{tag_no_case, take, take_till, take_while_m_n}, + character::complete::char, + combinator::{map, not, opt, peek}, + multi::fold_many0, + number::complete::be_u8, + sequence::tuple, + IResult, +}; + +/// This is the main URLENCODED parser structure. It is used to store +/// parser configuration, temporary parsing data, as well as the parameters. +#[derive(Clone)] +pub struct Parser { + /// The configuration structure associated with this parser + pub cfg: DecoderConfig, + /// The character used to separate parameters. Defaults to & and should + /// not be changed without good reason. + pub argument_separator: u8, + /// Whether to perform URL-decoding on parameters. Defaults to true. + pub decode_url_encoding: bool, + /// This table contains the list of parameters, indexed by name. + pub params: Table, + /// Contains parsing flags + pub flags: u64, + /// This field is set if the parser thinks that the + /// backend server will reject a request with a particular status code. + pub response_status_expected_number: HtpUnwanted, + // Private fields; these are used during the parsing process only + complete: bool, + saw_data: bool, + field: Bstr, +} + +impl Parser { + /// Construct new Parser with provided decoder configuration + pub fn new(cfg: DecoderConfig) -> Self { + Self { + cfg, + argument_separator: b'&', + decode_url_encoding: true, + params: Table::with_capacity(32), + flags: 0, + response_status_expected_number: HtpUnwanted::IGNORE, + complete: false, + saw_data: false, + field: Bstr::with_capacity(64), + } + } + + /// Finalizes parsing, forcing the parser to convert any outstanding + /// data into parameters. This method should be invoked at the end + /// of a parsing operation that used urlenp_parse_partial(). + pub fn finalize(&mut self) { + self.complete = true; + self.parse_partial(b"") + } + + /// Parses the provided data chunk under the assumption + /// that it contains all the data that will be parsed. When this + /// method is used for parsing the finalization method should not + /// be invoked. + pub fn parse_complete(&mut self, data: &[u8]) { + self.parse_partial(data); + self.finalize() + } + + /// Parses the provided data chunk, searching for argument seperators and '=' to locate names and values, + /// keeping state to allow streaming parsing, i.e., the parsing where only partial information is available + /// at any one time. The method urlenp_finalize() must be invoked at the end to finalize parsing. + pub fn parse_partial(&mut self, data: &[u8]) { + self.field.add(data); + let input = self.field.clone(); + let mut input = input.as_slice(); + if input.is_empty() { + if self.complete && self.params.size() == 0 && self.saw_data { + self.params.add(Bstr::new(), Bstr::new()); + } + return; + } + let mut remaining: &[u8] = b""; + let sep = self.argument_separator; + self.saw_data = true; + if !self.complete { + let data: Vec<&[u8]> = input.rsplitn(2, |c| *c == sep).collect(); + if data.len() == 2 { + input = data[1]; + remaining = data[0]; + } else { + return; + } + } + input.split(|c| *c == sep).for_each(|segment| { + if let Ok((value, name)) = name_value(segment) { + let mut name = Bstr::from(name); + let mut value = Bstr::from(value); + if self.decode_url_encoding { + if let Ok((_, (consumed, flags, expected_status))) = + decode_uri(name.as_slice(), &self.cfg) + { + self.flags.set(flags); + self.response_status_expected_number = expected_status; + name.clear(); + name.add(consumed); + } + if let Ok((_, (consumed, flags, expected_status))) = + decode_uri(value.as_slice(), &self.cfg) + { + self.flags.set(flags); + self.response_status_expected_number = expected_status; + value.clear(); + value.add(consumed); + } + } + self.params.add(name, value); + } + }); + self.field.clear(); + self.field.add(remaining); + } +} + +impl Default for Parser { + /// Construct a new Parser with default values + fn default() -> Self { + Self { + cfg: DecoderConfig::default(), + argument_separator: b'&', + decode_url_encoding: true, + params: Table::with_capacity(32), + flags: 0, + response_status_expected_number: HtpUnwanted::IGNORE, + complete: false, + saw_data: false, + field: Bstr::with_capacity(64), + } + } +} + +/// Extracts names and values from the url parameters +/// +/// Returns a name value pair, separated by an '=' +fn name_value(input: &[u8]) -> IResult<&[u8], &[u8]> { + map( + tuple((peek(take(1usize)), take_till(|c| c == b'='), opt(char('=')))), + |(_, name, _)| name, + )(input) +} + +/// Convert two input bytes, pointed to by the pointer parameter, +/// into a single byte by assuming the input consists of hexadecimal +/// characters. This function will happily convert invalid input. +/// +/// Returns hex-decoded byte +fn x2c(input: &[u8]) -> IResult<&[u8], u8> { + let (input, (c1, c2)) = tuple((be_u8, be_u8))(input)?; + let mut decoded_byte = if c1 >= b'A' { + ((c1 & 0xdf) - b'A') + 10 + } else { + c1 - b'0' + }; + decoded_byte = (decoded_byte as i32 * 16) as u8; + decoded_byte += if c2 >= b'A' { + ((c2 & 0xdf) - b'A') + 10 + } else { + c2 - b'0' + }; + Ok((input, decoded_byte)) +} + +/// Decode a path %u-encoded character, using best-fit mapping as necessary. +/// +/// Sets i to decoded byte +fn path_decode_u_encoding<'a>( + i: &'a [u8], cfg: &DecoderConfig, +) -> IResult<&'a [u8], (u8, u64, HtpUnwanted)> { + let mut flags = 0; + let mut expected_status_code = HtpUnwanted::IGNORE; + let (i, c1) = x2c(i)?; + let (i, c2) = x2c(i)?; + let mut r = c2; + if c1 == 0 { + flags.set(HtpFlags::PATH_OVERLONG_U) + } else { + // Check for fullwidth form evasion + if c1 == 0xff { + flags.set(HtpFlags::PATH_HALF_FULL_RANGE) + } + expected_status_code = cfg.u_encoding_unwanted; + // Use best-fit mapping + r = cfg.bestfit_map.get(bestfit_key!(c1, c2)); + } + // Check for encoded path separators + if r == b'/' || cfg.backslash_convert_slashes && r == b'\\' { + flags.set(HtpFlags::PATH_ENCODED_SEPARATOR) + } + Ok((i, (r, flags, expected_status_code))) +} + +/// Decode a %u-encoded character, using best-fit mapping as necessary. Params version. +/// +/// Returns decoded byte +fn decode_u_encoding_params<'a>(i: &'a [u8], cfg: &DecoderConfig) -> IResult<&'a [u8], (u8, u64)> { + let (i, c1) = x2c(i)?; + let (i, c2) = x2c(i)?; + let mut flags = 0; + // Check for overlong usage first. + if c1 == 0 { + flags.set(HtpFlags::URLEN_OVERLONG_U); + return Ok((i, (c2, flags))); + } + // Both bytes were used. + // Detect half-width and full-width range. + if c1 == 0xff && c2 <= 0xef { + flags.set(HtpFlags::URLEN_HALF_FULL_RANGE) + } + // Use best-fit mapping. + Ok((i, (cfg.bestfit_map.get(bestfit_key!(c1, c2)), flags))) +} + +/// Decodes path valid uencoded params according to the given cfg settings. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_decode_valid_u_encoding( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |remaining_input| { + let (left, _) = tag_no_case("u")(remaining_input)?; + let mut output = remaining_input; + let mut byte = b'%'; + let mut flags = 0; + let mut expected_status_code = HtpUnwanted::IGNORE; + if cfg.u_encoding_decode { + let (left, hex) = take_while_m_n(4, 4, |c: u8| c.is_ascii_hexdigit())(left)?; + output = left; + expected_status_code = cfg.u_encoding_unwanted; + // Decode a valid %u encoding. + let (_, (b, f, c)) = path_decode_u_encoding(hex, cfg)?; + byte = b; + flags.set(f); + if c != HtpUnwanted::IGNORE { + expected_status_code = c; + } + if byte == 0 { + flags.set(HtpFlags::PATH_ENCODED_NUL); + if cfg.nul_encoded_unwanted != HtpUnwanted::IGNORE { + expected_status_code = cfg.nul_encoded_unwanted + } + if cfg.nul_encoded_terminates { + // Terminate the path at the raw NUL byte. + return Ok((b"", (byte, expected_status_code, flags, false))); + } + } + } + let (byte, code) = path_decode_control(byte, cfg); + if code != HtpUnwanted::IGNORE { + expected_status_code = code; + } + Ok((output, (byte, expected_status_code, flags, true))) + } +} + +/// Decodes path invalid uencoded params according to the given cfg settings. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_decode_invalid_u_encoding( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |remaining_input| { + let mut output = remaining_input; + let mut byte = b'%'; + let mut flags = 0; + let mut expected_status_code = HtpUnwanted::IGNORE; + let (left, _) = tag_no_case("u")(remaining_input)?; + if cfg.u_encoding_decode { + let (left, hex) = take(4usize)(left)?; + // Invalid %u encoding + flags = HtpFlags::PATH_INVALID_ENCODING; + expected_status_code = cfg.url_encoding_invalid_unwanted; + if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT { + // Do not place anything in output; consume the %. + return Ok((remaining_input, (byte, expected_status_code, flags, false))); + } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID { + let (_, (b, f, c)) = path_decode_u_encoding(hex, cfg)?; + if c != HtpUnwanted::IGNORE { + expected_status_code = c; + } + flags.set(f); + byte = b; + output = left; + } + } + let (byte, code) = path_decode_control(byte, cfg); + if code != HtpUnwanted::IGNORE { + expected_status_code = code; + } + Ok((output, (byte, expected_status_code, flags, true))) + } +} + +/// Decodes path valid hex according to the given cfg settings. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_decode_valid_hex( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |remaining_input| { + let original_remaining = remaining_input; + // Valid encoding (2 xbytes) + not(tag_no_case("u"))(remaining_input)?; + let (mut left, hex) = take_while_m_n(2, 2, |c: u8| c.is_ascii_hexdigit())(remaining_input)?; + let mut flags = 0; + // Convert from hex. + let (_, mut byte) = x2c(hex)?; + if byte == 0 { + flags.set(HtpFlags::PATH_ENCODED_NUL); + if cfg.nul_encoded_terminates { + // Terminate the path at the raw NUL byte. + return Ok((b"", (byte, cfg.nul_encoded_unwanted, flags, false))); + } + } + if byte == b'/' || (cfg.backslash_convert_slashes && byte == b'\\') { + flags.set(HtpFlags::PATH_ENCODED_SEPARATOR); + if !cfg.path_separators_decode { + // Leave encoded + byte = b'%'; + left = original_remaining; + } + } + let (byte, expected_status_code) = path_decode_control(byte, cfg); + Ok((left, (byte, expected_status_code, flags, true))) + } +} + +/// Decodes invalid path hex according to the given cfg settings. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_decode_invalid_hex( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |remaining_input| { + let mut remaining = remaining_input; + // Valid encoding (2 xbytes) + not(tag_no_case("u"))(remaining_input)?; + let (left, hex) = take(2usize)(remaining_input)?; + let mut byte = b'%'; + // Invalid encoding + let flags = HtpFlags::PATH_INVALID_ENCODING; + let expected_status_code = cfg.url_encoding_invalid_unwanted; + if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT { + // Do not place anything in output; consume the %. + return Ok((remaining_input, (byte, expected_status_code, flags, false))); + } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID { + // Decode + let (_, b) = x2c(hex)?; + remaining = left; + byte = b; + } + let (byte, expected_status_code) = path_decode_control(byte, cfg); + Ok((remaining, (byte, expected_status_code, flags, true))) + } +} + +/// If the first byte of the input path string is a '%', it attempts to decode according to the +/// configuration specified by cfg. Various flags (HTP_PATH_*) might be set. If something in the +/// input would cause a particular server to respond with an error, the appropriate status +/// code will be set. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_decode_percent( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |i| { + map( + tuple(( + char('%'), + alt(( + path_decode_valid_u_encoding(cfg), + path_decode_invalid_u_encoding(cfg), + move |remaining_input| { + let (_, _) = tag_no_case("u")(remaining_input)?; + // Incomplete invalid %u encoding + Ok(( + remaining_input, + ( + b'%', + cfg.url_encoding_invalid_unwanted, + HtpFlags::PATH_INVALID_ENCODING, + cfg.url_encoding_invalid_handling + != HtpUrlEncodingHandling::REMOVE_PERCENT, + ), + )) + }, + path_decode_valid_hex(cfg), + path_decode_invalid_hex(cfg), + move |remaining_input| { + // Invalid URL encoding (not even 2 bytes of data) + Ok(( + remaining_input, + ( + b'%', + cfg.url_encoding_invalid_unwanted, + HtpFlags::PATH_INVALID_ENCODING, + cfg.url_encoding_invalid_handling + != HtpUrlEncodingHandling::REMOVE_PERCENT, + ), + )) + }, + )), + )), + |(_, result)| result, + )(i) + } +} + +/// Assumes the input is already decoded and checks if it is null byte or control character, handling each +/// according to the decoder configurations settings. +/// +/// Returns parsed byte, corresponding status code, appropriate flags and whether the byte should be output. +fn path_parse_other( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |i| { + let (remaining_input, byte) = be_u8(i)?; + // One non-encoded byte. + // Did we get a raw NUL byte? + if byte == 0 && cfg.nul_raw_terminates { + // Terminate the path at the encoded NUL byte. + return Ok((b"", (byte, cfg.nul_raw_unwanted, 0, false))); + } + let (byte, expected_status_code) = path_decode_control(byte, cfg); + Ok((remaining_input, (byte, expected_status_code, 0, true))) + } +} +/// Checks for control characters and converts them according to the cfg settings +/// +/// Returns decoded byte and expected_status_code +fn path_decode_control(mut byte: u8, cfg: &DecoderConfig) -> (u8, HtpUnwanted) { + // Note: What if an invalid encoding decodes into a path + // separator? This is theoretical at the moment, because + // the only platform we know doesn't convert separators is + // Apache, who will also respond with 400 if invalid encoding + // is encountered. Thus no check for a separator here. + // Place the character into output + // Check for control characters + let expected_status_code = if byte < 0x20 { + cfg.control_chars_unwanted + } else { + HtpUnwanted::IGNORE + }; + // Convert backslashes to forward slashes, if necessary + if byte == b'\\' && cfg.backslash_convert_slashes { + byte = b'/' + } + // Lowercase characters, if necessary + if cfg.convert_lowercase { + byte = byte.to_ascii_lowercase() + } + (byte, expected_status_code) +} + +/// Performs decoding of the input path uri string, according to the configuration specified +/// by cfg. Various flags (HTP_PATH_*) might be set. If something in the input would +/// cause a particular server to respond with an error, the appropriate status +/// code will be set. +/// +/// Returns decoded bytes, flags set during decoding, and corresponding status code + +fn path_decode_uri<'a>( + input: &'a [u8], cfg: &DecoderConfig, +) -> IResult<&'a [u8], (Vec, u64, HtpUnwanted)> { + fold_many0( + alt((path_decode_percent(cfg), path_parse_other(cfg))), + || (Vec::new(), 0, HtpUnwanted::IGNORE), + |mut acc: (Vec<_>, u64, HtpUnwanted), (byte, code, flag, insert)| { + // If we're compressing separators then we need + // to check if the previous character was a separator + if insert { + if byte == b'/' && cfg.path_separators_compress { + if !acc.0.is_empty() { + if acc.0[acc.0.len() - 1] != b'/' { + acc.0.push(byte); + } + } else { + acc.0.push(byte); + } + } else { + acc.0.push(byte); + } + } + acc.1.set(flag); + acc.2 = code; + acc + }, + )(input) +} + +/// Decode the parsed uri path inplace according to the settings in the +/// transaction configuration structure. +pub fn path_decode_uri_inplace( + decoder_cfg: &DecoderConfig, flag: &mut u64, status: &mut HtpUnwanted, path: &mut Bstr, +) { + if let Ok((_, (consumed, flags, expected_status_code))) = + path_decode_uri(path.as_slice(), decoder_cfg) + { + path.clear(); + path.add(consumed.as_slice()); + *status = expected_status_code; + flag.set(flags); + } +} + +/// Performs decoding of the input uri string, according to the configuration specified +/// by cfg. Various flags (HTP_URLEN_*) might be set. If something in the input would +/// cause a particular server to respond with an error, the appropriate status +/// code will be set. +/// +/// Returns decoded bytes, flags set during decoding, and corresponding status code +fn decode_uri<'a>( + input: &'a [u8], cfg: &DecoderConfig, +) -> IResult<&'a [u8], (Vec, u64, HtpUnwanted)> { + fold_many0( + alt((decode_percent(cfg), decode_plus(cfg), unencoded_byte(cfg))), + || (Vec::new(), 0, HtpUnwanted::IGNORE), + |mut acc: (Vec<_>, u64, HtpUnwanted), (byte, code, flag, insert)| { + if insert { + acc.0.push(byte); + } + acc.1.set(flag); + if code != HtpUnwanted::IGNORE { + acc.2 = code; + } + acc + }, + )(input) +} + +/// Performs decoding of the uri string, according to the configuration specified +/// by cfg. Various flags might be set. +pub fn decode_uri_with_flags( + decoder_cfg: &DecoderConfig, flags: &mut u64, input: &[u8], +) -> Result { + let (_, (consumed, f, _)) = decode_uri(input, decoder_cfg)?; + if f.is_set(HtpFlags::URLEN_INVALID_ENCODING) { + flags.set(HtpFlags::PATH_INVALID_ENCODING) + } + if f.is_set(HtpFlags::URLEN_ENCODED_NUL) { + flags.set(HtpFlags::PATH_ENCODED_NUL) + } + if f.is_set(HtpFlags::URLEN_RAW_NUL) { + flags.set(HtpFlags::PATH_RAW_NUL); + } + Ok(Bstr::from(consumed)) +} + +/// Performs in-place decoding of the input uri string, according to the configuration specified by cfg and ctx. +/// +/// Returns OK on success, ERROR on failure. +pub fn decode_uri_inplace(cfg: &DecoderConfig, input: &mut Bstr) -> Result<()> { + let (_, (consumed, _, _)) = decode_uri(input.as_slice(), cfg)?; + (*input).clear(); + input.add(consumed.as_slice()); + Ok(()) +} + +/// Decodes valid uencoded hex bytes according to the given cfg settings. +/// e.g. "u0064" -> "d" +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_valid_u_encoding( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |input| { + let (left, _) = alt((char('u'), char('U')))(input)?; + if cfg.u_encoding_decode { + let (input, hex) = take_while_m_n(4, 4, |c: u8| c.is_ascii_hexdigit())(left)?; + let (_, (byte, flags)) = decode_u_encoding_params(hex, cfg)?; + return Ok((input, (byte, cfg.u_encoding_unwanted, flags, true))); + } + Ok((input, (b'%', HtpUnwanted::IGNORE, 0, true))) + } +} + +/// Decodes invalid uencoded params according to the given cfg settings. +/// e.g. "u00}9" -> "i" +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_invalid_u_encoding( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |mut input| { + let (left, _) = alt((char('u'), char('U')))(input)?; + let mut byte = b'%'; + let mut code = HtpUnwanted::IGNORE; + let mut flags = 0; + let mut insert = true; + if cfg.u_encoding_decode { + // Invalid %u encoding (could not find 4 xdigits). + let (left, invalid_hex) = take(4usize)(left)?; + flags.set(HtpFlags::URLEN_INVALID_ENCODING); + code = if cfg.url_encoding_invalid_unwanted != HtpUnwanted::IGNORE { + cfg.url_encoding_invalid_unwanted + } else { + cfg.u_encoding_unwanted + }; + if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT { + // Do not place anything in output; consume the %. + insert = false; + } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID { + let (_, (b, f)) = decode_u_encoding_params(invalid_hex, cfg)?; + flags.set(f); + byte = b; + input = left; + } + } + Ok((input, (byte, code, flags, insert))) + } +} + +/// Decodes valid hex byte. +/// e.g. "2f" -> "/" +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_valid_hex() -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> { + move |input| { + // Valid encoding (2 xbytes) + not(alt((char('u'), char('U'))))(input)?; + let (input, hex) = take_while_m_n(2, 2, |c: u8| c.is_ascii_hexdigit())(input)?; + let (_, byte) = x2c(hex)?; + Ok((input, (byte, HtpUnwanted::IGNORE, 0, true))) + } +} + +/// Decodes invalid hex byte according to the given cfg settings. +/// e.g. "}9" -> "i" +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_invalid_hex( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |mut input| { + not(alt((char('u'), char('U'))))(input)?; + // Invalid encoding (2 bytes, but not hexadecimal digits). + let mut byte = b'%'; + let mut insert = true; + if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::REMOVE_PERCENT { + // Do not place anything in output; consume the %. + insert = false; + } else if cfg.url_encoding_invalid_handling == HtpUrlEncodingHandling::PROCESS_INVALID { + let (left, b) = x2c(input)?; + input = left; + byte = b; + } + Ok(( + input, + ( + byte, + cfg.url_encoding_invalid_unwanted, + HtpFlags::URLEN_INVALID_ENCODING, + insert, + ), + )) + } +} + +/// If the first byte of the input string is a '%', it attempts to decode according to the +/// configuration specified by cfg. Various flags (HTP_URLEN_*) might be set. If something in the +/// input would cause a particular server to respond with an error, the appropriate status +/// code will be set. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_percent( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |i| { + let (input, _) = char('%')(i)?; + let (input, (byte, mut expected_status_code, mut flags, insert)) = alt(( + decode_valid_u_encoding(cfg), + decode_invalid_u_encoding(cfg), + decode_valid_hex(), + decode_invalid_hex(cfg), + move |input| { + // Invalid %u encoding; not enough data. (not even 2 bytes) + // Do not place anything in output if REMOVE_PERCENT; consume the %. + Ok(( + input, + ( + b'%', + cfg.url_encoding_invalid_unwanted, + HtpFlags::URLEN_INVALID_ENCODING, + !(cfg.url_encoding_invalid_handling + == HtpUrlEncodingHandling::REMOVE_PERCENT), + ), + )) + }, + ))(input)?; + //Did we get an encoded NUL byte? + if byte == 0 { + flags.set(HtpFlags::URLEN_ENCODED_NUL); + if cfg.nul_encoded_unwanted != HtpUnwanted::IGNORE { + expected_status_code = cfg.nul_encoded_unwanted + } + if cfg.nul_encoded_terminates { + // Terminate the path at the encoded NUL byte. + return Ok((b"", (byte, expected_status_code, flags, false))); + } + } + Ok((input, (byte, expected_status_code, flags, insert))) + } +} + +/// Consumes the next nullbyte if it is a '+', decoding it according to the cfg +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn decode_plus( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |input| { + let (input, byte) = map(char('+'), |byte| { + // Decoding of the plus character is conditional on the configuration. + if cfg.plusspace_decode { + 0x20 + } else { + byte as u8 + } + })(input)?; + Ok((input, (byte, HtpUnwanted::IGNORE, 0, true))) + } +} + +/// Consumes the next byte in the input string and treats it as an unencoded byte. +/// Handles raw null bytes according to the input cfg settings. +/// +/// Returns decoded byte, corresponding status code, appropriate flags and whether the byte should be output. +fn unencoded_byte( + cfg: &DecoderConfig, +) -> impl Fn(&[u8]) -> IResult<&[u8], (u8, HtpUnwanted, u64, bool)> + '_ { + move |input| { + let (input, byte) = be_u8(input)?; + // One non-encoded byte. + // Did we get a raw NUL byte? + if byte == 0 { + return Ok(( + if cfg.nul_raw_terminates { b"" } else { input }, + ( + byte, + cfg.nul_raw_unwanted, + HtpFlags::URLEN_RAW_NUL, + !cfg.nul_raw_terminates, + ), + )); + } + Ok((input, (byte, HtpUnwanted::IGNORE, 0, true))) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::config::Config; + use rstest::rstest; + + #[rstest] + #[case::empty("", &[])] + #[case::empty_key_value("&", &[("", "")])] + #[case::empty_key_value("=&", &[("", "")])] + #[case::empty_key_value("&=", &[("", "")])] + #[case::empty_key_value("&&", &[("", "")])] + #[case::empty_key_value("=", &[("", "")])] + #[case::empty_key("=1&", &[("", "1")])] + #[case::empty_key("=p", &[("", "p")])] + #[case::empty_value("p", &[("p", "")])] + #[case::empty_value("p=", &[("p", "")])] + #[case::empty_value("p&", &[("p", "")])] + #[case::pair("p=1", &[("p", "1")])] + #[case::two_pair("p=1&q=2", &[("p", "1"), ("q", "2")])] + #[case::two_keys("p&q", &[("p", ""), ("q", "")])] + #[case::two_keys_one_value("p&q=2", &[("p", ""), ("q", "2")])] + fn test_parse_complete(#[case] input: &str, #[case] expected: &[(&str, &str)]) { + let mut urlenp = Parser::default(); + urlenp.parse_complete(input.as_bytes()); + for (key, value) in expected { + assert!(urlenp.params.get_nocase(key).unwrap().1.eq_slice(value)); + } + assert_eq!( + expected.len(), + urlenp.params.size(), + "Test case expected {} params. parse_complete resulted in {} params.", + expected.len(), + urlenp.params.size() + ); + } + + #[rstest] + #[case::empty_value(&["p"], &[("p", "")])] + #[case::empty_value(&["p", "x"], &[("px", "")])] + #[case::empty_value(&["p", "x&"], &[("px", "")])] + #[case::empty_value(&["p", "="], &[("p", "")])] + #[case::empty_value(&["p", "", "", ""], &[("p", "")])] + #[case::two_pairs( + &["px", "n", "", "=", "1", "2", "&", "qz", "n", "", "=", "2", "3", "&"], + &[("pxn", "12"), ("qzn", "23")] + )] + fn test_parse_partial(#[case] input: &[&str], #[case] expected: &[(&str, &str)]) { + let mut urlenp = Parser::default(); + for i in input { + urlenp.parse_partial(i.as_bytes()); + } + urlenp.finalize(); + for (key, value) in expected { + assert!(urlenp.params.get_nocase(key).unwrap().1.eq_slice(value)); + } + assert_eq!( + expected.len(), + urlenp.params.size(), + "Test case expected {} params. parse_complete resulted in {} params.", + expected.len(), + urlenp.params.size() + ); + } + + #[rstest] + #[case("/dest", "/dest", "/dest", "/dest")] + #[case("/%64est", "/dest", "/dest", "/dest")] + #[case("/%xxest", "/1est", "/%xxest", "/xxest")] + #[case("/%a", "/%a", "/%a", "/a")] + #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC")] + #[case("/%u0064", "/%u0064", "/%u0064", "/%u0064")] + #[case("/%u006", "/%u006", "/%u006", "/%u006")] + #[case("/%uXXXX", "/%uXXXX", "/%uXXXX", "/%uXXXX")] + #[case("/%u0000ABC", "/%u0000ABC", "/%u0000ABC", "/%u0000ABC")] + #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC")] + #[case("/one%2ftwo", "/one/two", "/one/two", "/one/two")] + fn test_decode_uri( + #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str, + #[case] expected_remove: &str, + ) { + let i = Bstr::from(input); + let mut cfg = Config::default(); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_process.as_bytes() + ); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_preserve.as_bytes() + ); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_remove.as_bytes() + ); + } + + #[rstest] + #[case("/dest", "/dest", "/dest", "/dest")] + #[case("/%64est", "/dest", "/dest", "/dest")] + #[case("/%xxest", "/1est", "/%xxest", "/xxest")] + #[case("/%a", "/%a", "/%a", "/a")] + #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC")] + #[case("/%u0064", "/d", "/d", "/d")] + #[case("/%U0064", "/d", "/d", "/d")] + #[case("/%u006", "/%u006", "/%u006", "/u006")] + #[case("/%uXXXX", "/?", "/%uXXXX", "/uXXXX")] + #[case("/%u0000ABC", "/\0ABC", "/\0ABC", "/\0ABC")] + #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC")] + #[case("/one%2ftwo", "/one/two", "/one/two", "/one/two")] + fn test_decode_uri_decode( + #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str, + #[case] expected_remove: &str, + ) { + let i = Bstr::from(input); + let mut cfg = Config::default(); + cfg.set_u_encoding_decode(true); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_process.as_bytes() + ); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_preserve.as_bytes() + ); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT); + assert_eq!( + decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, + expected_remove.as_bytes() + ); + } + + #[rstest] + #[case("/%u0000ABC")] + #[case("/%00ABC")] + #[case("/\0ABC")] + fn test_decode_uri_nul_terminates(#[case] input: &str) { + let i = Bstr::from(input); + let mut cfg = Config::default(); + cfg.set_u_encoding_decode(true); + cfg.set_nul_encoded_terminates(true); + cfg.set_nul_raw_terminates(true); + assert_eq!(decode_uri(&i, &cfg.decoder_cfg).unwrap().1 .0, b"/"); + } + + #[rstest] + #[case("/dest", "/dest", "/dest", "/dest", 0)] + #[case("/%64est", "/dest", "/dest", "/dest", 0)] + #[case( + "/%xxest", + "/1est", + "/%xxest", + "/xxest", + HtpFlags::PATH_INVALID_ENCODING + )] + #[case("/%a", "/%a", "/%a", "/a", HtpFlags::PATH_INVALID_ENCODING)] + #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL)] + #[case("/%u0064", "/%u0064", "/%u0064", "/%u0064", 0)] + #[case("/%u006", "/%u006", "/%u006", "/%u006", 0)] + #[case("/%uXXXX", "/%uXXXX", "/%uXXXX", "/%uXXXX", 0)] + #[case("/%u0000ABC", "/%u0000ABC", "/%u0000ABC", "/%u0000ABC", 0)] + #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC", 0)] + #[case( + "/one%2ftwo", + "/one%2ftwo", + "/one%2ftwo", + "/one%2ftwo", + HtpFlags::PATH_ENCODED_SEPARATOR + )] + fn test_path_decode_uri_inplace( + #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str, + #[case] expected_remove: &str, #[case] flags: u64, + ) { + let mut cfg = Config::default(); + let mut response_status_expected_number = HtpUnwanted::IGNORE; + + let mut input_process = Bstr::from(input); + let mut flags_process = 0; + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID); + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_process, + &mut response_status_expected_number, + &mut input_process, + ); + assert_eq!(input_process, Bstr::from(expected_process)); + assert_eq!(flags_process, flags); + + let mut input_preserve = Bstr::from(input); + let mut flags_preserve = 0; + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_preserve, + &mut response_status_expected_number, + &mut input_preserve, + ); + assert_eq!(input_preserve, Bstr::from(expected_preserve)); + assert_eq!(flags_preserve, flags); + + let mut input_remove = Bstr::from(input); + let mut flags_remove = 0; + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT); + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_remove, + &mut response_status_expected_number, + &mut input_remove, + ); + assert_eq!(input_remove, Bstr::from(expected_remove)); + assert_eq!(flags_remove, flags); + } + + #[rstest] + #[case("/dest", "/dest", "/dest", "/dest", 0)] + #[case("/%64est", "/dest", "/dest", "/dest", 0)] + #[case( + "/%xxest", + "/1est", + "/%xxest", + "/xxest", + HtpFlags::PATH_INVALID_ENCODING + )] + #[case("/%a", "/%a", "/%a", "/a", HtpFlags::PATH_INVALID_ENCODING)] + #[case("/%00ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL)] + #[case("/%u0064", "/d", "/d", "/d", HtpFlags::PATH_OVERLONG_U)] + #[case("/%U0064", "/d", "/d", "/d", HtpFlags::PATH_OVERLONG_U)] + #[case("/%u006", "/%u006", "/%u006", "/u006", HtpFlags::PATH_INVALID_ENCODING)] + #[case("/%uXXXX", "/?", "/%uXXXX", "/uXXXX", HtpFlags::PATH_INVALID_ENCODING)] + #[case("/%u0000ABC", "/\0ABC", "/\0ABC", "/\0ABC", HtpFlags::PATH_ENCODED_NUL | HtpFlags::PATH_OVERLONG_U)] + #[case("/\0ABC", "/\0ABC", "/\0ABC", "/\0ABC", 0)] + #[case( + "/one%2ftwo", + "/one%2ftwo", + "/one%2ftwo", + "/one%2ftwo", + HtpFlags::PATH_ENCODED_SEPARATOR + )] + fn test_path_decode_uri_inplace_decode( + #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str, + #[case] expected_remove: &str, #[case] flags: u64, + ) { + let mut cfg = Config::default(); + cfg.set_u_encoding_decode(true); + let mut response_status_expected_number = HtpUnwanted::IGNORE; + + let mut input_process = Bstr::from(input); + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID); + let mut flags_process = 0; + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_process, + &mut response_status_expected_number, + &mut input_process, + ); + assert_eq!(input_process, Bstr::from(expected_process)); + assert_eq!(flags_process, flags); + + let mut input_preserve = Bstr::from(input); + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + let mut flags_preserve = 0; + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_preserve, + &mut response_status_expected_number, + &mut input_preserve, + ); + assert_eq!(input_preserve, Bstr::from(expected_preserve)); + assert_eq!(flags_preserve, flags); + + let mut input_remove = Bstr::from(input); + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT); + let mut flags_remove = 0; + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags_remove, + &mut response_status_expected_number, + &mut input_remove, + ); + assert_eq!(input_remove, Bstr::from(expected_remove)); + assert_eq!(flags_remove, flags); + } + + #[rstest] + #[case("/%u0000ABC", HtpFlags::PATH_ENCODED_NUL | HtpFlags::PATH_OVERLONG_U)] + #[case("/%00ABC", HtpFlags::PATH_ENCODED_NUL)] + #[case("/\0ABC", 0)] + fn test_path_decode_inplace_nul_terminates(#[case] input: &str, #[case] expected_flags: u64) { + let mut cfg = Config::default(); + cfg.set_u_encoding_decode(true); + cfg.set_nul_encoded_terminates(true); + cfg.set_nul_raw_terminates(true); + let mut i = Bstr::from(input); + let mut flags = 0; + let mut response_status_expected_number = HtpUnwanted::IGNORE; + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags, + &mut response_status_expected_number, + &mut i, + ); + assert_eq!(i, Bstr::from("/")); + assert_eq!(flags, expected_flags); + } + + #[rstest] + #[case::encoded("/one%2ftwo")] + #[case::convert("/one\\two")] + #[case::compress("/one//two")] + fn test_path_decode_inplace_seps(#[case] input: &str) { + let mut cfg = Config::default(); + cfg.set_backslash_convert_slashes(true); + cfg.set_path_separators_decode(true); + cfg.set_path_separators_compress(true); + let mut i = Bstr::from(input); + let mut flags = 0; + let mut response_status_expected_number = HtpUnwanted::IGNORE; + path_decode_uri_inplace( + &cfg.decoder_cfg, + &mut flags, + &mut response_status_expected_number, + &mut i, + ); + assert_eq!(i, Bstr::from("/one/two")); + } + + #[rstest] + #[case( + "/one/tw%u006f/three/%u123", + "/one/two/three/%u123", + "/one/two/three/%u123", + "/one/two/three/u123" + )] + #[case( + "/one/tw%u006f/three/%3", + "/one/two/three/%3", + "/one/two/three/%3", + "/one/two/three/3" + )] + #[case( + "/one/tw%u006f/three/%uXXXX", + "/one/two/three/?", + "/one/two/three/%uXXXX", + "/one/two/three/uXXXX" + )] + fn test_decode_uri_inplace( + #[case] input: &str, #[case] expected_process: &str, #[case] expected_preserve: &str, + #[case] expected_remove: &str, + ) { + let mut cfg = Config::default(); + cfg.set_u_encoding_decode(true); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PROCESS_INVALID); + let mut input_process = Bstr::from(input); + decode_uri_inplace(&cfg.decoder_cfg, &mut input_process).unwrap(); + assert_eq!(input_process, Bstr::from(expected_process)); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::PRESERVE_PERCENT); + let mut input_preserve = Bstr::from(input); + decode_uri_inplace(&cfg.decoder_cfg, &mut input_preserve).unwrap(); + assert_eq!(input_preserve, Bstr::from(expected_preserve)); + + cfg.set_url_encoding_invalid_handling(HtpUrlEncodingHandling::REMOVE_PERCENT); + let mut input_remove = Bstr::from(input); + decode_uri_inplace(&cfg.decoder_cfg, &mut input_remove).unwrap(); + assert_eq!(input_remove, Bstr::from(expected_remove)); + } +} diff --git a/rust/htp/src/utf8_decoder.rs b/rust/htp/src/utf8_decoder.rs new file mode 100644 index 000000000000..ce6c77d965b1 --- /dev/null +++ b/rust/htp/src/utf8_decoder.rs @@ -0,0 +1,251 @@ +// Copyright (c) 2008-2009 Bjoern Hoehrmann +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software +// and associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +// Copyright (c) 2008-2009 Bjoern Hoehrmann +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. +use crate::{ + bstr::Bstr, + config::{DecoderConfig, HtpUnwanted}, + unicode_bestfit_map::UnicodeBestfitMap, + util::{FlagOperations, HtpFlags}, +}; + +static utf8d: [u8; 400] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0xb, 0x6, 0x6, + 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8, + 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +]; +static utf8d_allow_overlong: [u8; 400] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, 0x6, 0x6, 0x6, + 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0, 0x1, 0x2, 0x3, 0x5, 0x8, + 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +]; + +#[derive(Clone)] +pub struct Utf8Decoder { + bestfit_map: UnicodeBestfitMap, + state: u32, + seq: u32, + codepoint: u32, + pub flags: u64, + pub seen_valid: bool, + pub decoded_bytes: Vec, +} + +impl Utf8Decoder { + /// Make a new owned Utf8Decoder + pub fn new(bestfit_map: UnicodeBestfitMap) -> Self { + Self { + bestfit_map, + state: 0, + seq: 0, + codepoint: 0, + flags: 0, + seen_valid: false, + decoded_bytes: Vec::new(), + } + } + + /// Decode utf8 byte using best-fit map. + fn decode_byte(&mut self, encoded_byte: u8, is_last_byte: bool) { + self.seq = self.seq.wrapping_add(1); + self.decode_byte_allow_overlong(encoded_byte as u32); + match self.state { + 0 => { + if self.seq == 1 { + // ASCII character, which we just copy. + self.decoded_bytes.push(self.codepoint as u8); + } else { + // A valid UTF-8 character, which we need to convert. + self.seen_valid = true; + // Check for overlong characters and set the flag accordingly. + if (self.seq == 2 && self.codepoint < 0x80) + || (self.seq == 3 && self.codepoint < 0x800) + || (self.seq == 4 && self.codepoint < 0x10000) + { + self.flags.set(HtpFlags::PATH_UTF8_OVERLONG); + } + // Special flag for half-width/full-width evasion. + if self.codepoint >= 0xff00 && self.codepoint <= 0xffef { + self.flags.set(HtpFlags::PATH_HALF_FULL_RANGE) + } + // Use best-fit mapping to convert to a single byte. + self.decoded_bytes.push(self.bestfit_codepoint()); + } + self.seq = 0; + } + 1 => { + // Invalid UTF-8 character. + self.flags.set(HtpFlags::PATH_UTF8_INVALID); + // Output the replacement byte, replacing one or more invalid bytes. + // If the invalid byte was first in a sequence, consume it. Otherwise, + // assume it's the starting byte of the next character. + self.state = 0; + self.codepoint = 0; + self.decoded_bytes.push(self.bestfit_map.replacement_byte); + if self.seq != 1 { + self.seq = 0; + self.decode_byte(encoded_byte, is_last_byte); + } else { + self.seq = 0; + } + } + _ => { + // The character is not yet formed. + if is_last_byte { + // If the last input chunk ended with an incomplete byte sequence for a code point, + // this is an error and a replacement character is emitted hence starting from 1 not 0 + for _ in 1..self.seq { + self.decoded_bytes.push(self.bestfit_map.replacement_byte); + } + } + } + } + } + + /// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream + /// with an ascii stream, storing the result in self.decoded_bytes. Overlong + /// characters will be decoded and invalid characters will be replaced with + /// the replacement byte specified in the bestfit_map. Best-fit mapping will be used + /// to convert UTF-8 into a single-byte stream. + fn decode_and_validate(&mut self, input: &[u8]) { + //Reset all internals + self.state = 0; + self.seq = 0; + self.codepoint = 0; + self.flags = 0; + self.decoded_bytes.clear(); + self.decoded_bytes.reserve(input.len()); + self.seen_valid = false; + for (byte, is_last) in input + .iter() + .enumerate() + .map(|(i, b)| (b, i + 1 == input.len())) + { + self.decode_byte(*byte, is_last); + } + // Did the input stream seem like a valid UTF-8 string? + if self.seen_valid && !self.flags.is_set(HtpFlags::PATH_UTF8_INVALID) { + self.flags.set(HtpFlags::PATH_UTF8_VALID) + } + } + + /// Process one byte of UTF-8 data and set the code point if one is available. Allows + /// overlong characters in input. + /// + /// Sets the state to ACCEPT(0) for a valid character, REJECT(1) for an invalid character, + /// or OTHER(u32) if the character has not yet been formed + fn decode_byte_allow_overlong(&mut self, byte: u32) { + let type_0: u32 = utf8d_allow_overlong[byte as usize] as u32; + self.codepoint = if self.state != 0 { + (byte & 0x3f) | self.codepoint << 6 + } else { + (0xff >> type_0) & byte + }; + self.state = utf8d[(256u32) + .wrapping_add((self.state).wrapping_mul(16)) + .wrapping_add(type_0) as usize] as u32; + } + + /// Convert a Unicode codepoint into a single-byte, using best-fit + /// mapping (as specified in the provided configuration structure). + /// + /// Returns converted single byte + fn bestfit_codepoint(&self) -> u8 { + // Is it a single-byte codepoint? + if self.codepoint < 0x100 { + return self.codepoint as u8; + } + self.bestfit_map.get(self.codepoint) + } +} + +/// Decode a UTF-8 encoded path. Replaces a possibly-invalid utf8 byte stream with +/// an ascii stream. Overlong characters will be decoded and invalid characters will +/// be replaced with the replacement byte specified in the cfg. Best-fit mapping will +/// be used to convert UTF-8 into a single-byte stream. The resulting decoded path will +/// be stored in the input path if the transaction cfg indicates it +pub fn decode_and_validate_inplace( + cfg: &DecoderConfig, flags: &mut u64, status: &mut HtpUnwanted, path: &mut Bstr, +) { + let mut decoder = Utf8Decoder::new(cfg.bestfit_map); + decoder.decode_and_validate(path.as_slice()); + if cfg.utf8_convert_bestfit { + path.clear(); + path.add(decoder.decoded_bytes.as_slice()); + } + flags.set(decoder.flags); + + if flags.is_set(HtpFlags::PATH_UTF8_INVALID) && cfg.utf8_invalid_unwanted != HtpUnwanted::IGNORE + { + *status = cfg.utf8_invalid_unwanted; + } +} +#[cfg(test)] +mod tests { + use crate::{ + bstr::Bstr, config::Config, config::HtpUnwanted, utf8_decoder::decode_and_validate_inplace, + }; + use rstest::rstest; + + #[rstest] + #[case(b"\xf1.\xf1\xef\xbd\x9dabcd", "?.?}abcd")] + //1111 0000 1001 0000 1000 1101 1111 1111 + #[case::invalid_incomplete_seq(b"\xf0\x90\x8d\xff", "??")] + //1110 0010 1000 0010 + #[case::invalid_incomplete_seq(b"\xe2\x82", "?")] + //1100 0010 1111 1111 1111 0000 + #[case::invalid_incomplete_seq(b"\xc2\xff\xf0", "??")] + //1111 0000 1001 0000 0010 1000 1011 1100 + #[case::invalid_incomplete_seq(b"\xf0\x90\x28\xbc", "?(?")] + fn test_decode_and_validate_inplace(#[case] input: &[u8], #[case] expected: &str) { + let mut cfg = Config::default(); + cfg.set_utf8_convert_bestfit(true); + let mut i = Bstr::from(input); + let mut flags = 0; + let mut response_status_expected_number = HtpUnwanted::IGNORE; + decode_and_validate_inplace( + &cfg.decoder_cfg, + &mut flags, + &mut response_status_expected_number, + &mut i, + ); + assert_eq!(i, Bstr::from(expected)); + } +} diff --git a/rust/htp/src/util.rs b/rust/htp/src/util.rs new file mode 100644 index 000000000000..0c784cd44bee --- /dev/null +++ b/rust/htp/src/util.rs @@ -0,0 +1,811 @@ +//! Utility functions for http parsing. + +use crate::{config::HtpServerPersonality, error::NomError}; +use nom::{ + branch::alt, + bytes::complete::{ + is_not, tag, tag_no_case, take_till, take_until, take_while, take_while1, take_while_m_n, + }, + bytes::streaming::{tag as streaming_tag, take_till as streaming_take_till}, + character::complete::{char, digit1}, + character::is_space as nom_is_space, + combinator::{map, opt}, + sequence::tuple, + Err::Incomplete, + IResult, Needed, +}; + +use std::str::FromStr; + +/// String for the libhtp version. +pub const HTP_VERSION_STRING_FULL: &'_ str = concat!("LibHTP v", env!("CARGO_PKG_VERSION"), "\x00"); + +/// Trait to allow interacting with flags. +pub trait FlagOperations { + /// Inserts the specified flags in-place. + fn set(&mut self, other: T); + /// Removes the specified flags in-place. + fn unset(&mut self, other: T); + /// Determine if the specified flags are set + fn is_set(&self, other: T) -> bool; +} + +impl FlagOperations for u8 { + /// Inserts the specified flags in-place. + fn set(&mut self, other: u8) { + *self |= other; + } + /// Removes the specified flags in-place. + fn unset(&mut self, other: u8) { + *self &= !other; + } + /// Determine if the specified flags are set + fn is_set(&self, other: u8) -> bool { + self & other != 0 + } +} + +impl FlagOperations for u64 { + /// Inserts the specified flags in-place. + fn set(&mut self, other: u64) { + *self |= other; + } + /// Removes the specified flags in-place. + fn unset(&mut self, other: u64) { + *self &= !other; + } + /// Determine if the specified flags are set + fn is_set(&self, other: u64) -> bool { + self & other != 0 + } +} + +/// Various flag bits. Even though we have a flag field in several places +/// (header, transaction, connection), these fields are all in the same namespace +/// because we may want to set the same flag in several locations. For example, we +/// may set HTP_FIELD_FOLDED on the actual folded header, but also on the transaction +/// that contains the header. Both uses are useful. +#[repr(C)] +pub struct HtpFlags; + +impl HtpFlags { + /// Field cannot be parsed. + pub const FIELD_UNPARSEABLE: u64 = 0x0000_0000_0004; + /// Field is invalid. + pub const FIELD_INVALID: u64 = 0x0000_0000_0008; + /// Field is folded. + pub const FIELD_FOLDED: u64 = 0x0000_0000_0010; + /// Field has been seen more than once. + pub const FIELD_REPEATED: u64 = 0x0000_0000_0020; + /// Field is too long. + pub const FIELD_LONG: u64 = 0x0000_0000_0040; + /// Field contains raw null byte. + pub const FIELD_RAW_NUL: u64 = 0x0000_0000_0080; + /// Detect HTTP request smuggling. + pub const REQUEST_SMUGGLING: u64 = 0x0000_0000_0100; + /// Invalid header folding. + pub const INVALID_FOLDING: u64 = 0x0000_0000_0200; + /// Invalid request transfer-encoding. + pub const REQUEST_INVALID_T_E: u64 = 0x0000_0000_0400; + /// Multiple chunks. + pub const MULTI_PACKET_HEAD: u64 = 0x0000_0000_0800; + /// No host information in header. + pub const HOST_MISSING: u64 = 0x0000_0000_1000; + /// Inconsistent host or port information. + pub const HOST_AMBIGUOUS: u64 = 0x0000_0000_2000; + /// Encoded path contains null. + pub const PATH_ENCODED_NUL: u64 = 0x0000_0000_4000; + /// Url encoded contains raw null. + pub const PATH_RAW_NUL: u64 = 0x0000_0000_8000; + /// Url encoding is invalid. + pub const PATH_INVALID_ENCODING: u64 = 0x0000_0001_0000; + /// Path is invalid. + pub const PATH_INVALID: u64 = 0x0000_0002_0000; + /// Overlong usage in path. + pub const PATH_OVERLONG_U: u64 = 0x0000_0004_0000; + /// Encoded path separators present. + pub const PATH_ENCODED_SEPARATOR: u64 = 0x0000_0008_0000; + /// At least one valid UTF-8 character and no invalid ones. + pub const PATH_UTF8_VALID: u64 = 0x0000_0010_0000; + /// Invalid utf8 in path. + pub const PATH_UTF8_INVALID: u64 = 0x0000_0020_0000; + /// Invalid utf8 overlong character. + pub const PATH_UTF8_OVERLONG: u64 = 0x0000_0040_0000; + /// Range U+FF00 - U+FFEF detected. + pub const PATH_HALF_FULL_RANGE: u64 = 0x0000_0080_0000; + /// Status line is invalid. + pub const STATUS_LINE_INVALID: u64 = 0x0000_0100_0000; + /// Host in the URI. + pub const HOSTU_INVALID: u64 = 0x0000_0200_0000; + /// Host in the Host header. + pub const HOSTH_INVALID: u64 = 0x0000_0400_0000; + /// Uri / host header invalid. + pub const HOST_INVALID: u64 = (Self::HOSTU_INVALID | Self::HOSTH_INVALID); + /// Contains null. + pub const URLEN_ENCODED_NUL: u64 = 0x0000_0800_0000; + /// Invalid encoding. + pub const URLEN_INVALID_ENCODING: u64 = 0x0000_1000_0000; + /// Overlong usage. + pub const URLEN_OVERLONG_U: u64 = 0x0000_2000_0000; + /// Range U+FF00 - U+FFEF detected. + pub const URLEN_HALF_FULL_RANGE: u64 = 0x0000_4000_0000; + /// Raw null byte. + pub const URLEN_RAW_NUL: u64 = 0x0000_8000_0000; + /// Request invalid. + pub const REQUEST_INVALID: u64 = 0x0001_0000_0000; + /// Request content-length invalid. + pub const REQUEST_INVALID_C_L: u64 = 0x0002_0000_0000; + /// Authorization is invalid. + pub const AUTH_INVALID: u64 = 0x0004_0000_0000; + /// Missing bytes in request and/or response data. + pub const MISSING_BYTES: u64 = 0x0008_0000_0000; + /// Missing bytes in request data. + pub const REQUEST_MISSING_BYTES: u64 = (0x0010_0000_0000 | Self::MISSING_BYTES); + /// Missing bytes in the response data. + pub const RESPONSE_MISSING_BYTES: u64 = (0x0020_0000_0000 | Self::MISSING_BYTES); + /// Too many headers, log only once. + pub const HEADERS_TOO_MANY: u64 = 0x0040_0000_0000; +} + +/// Enumerates possible EOLs +#[derive(PartialEq, Eq, Copy, Clone, Debug)] +pub enum Eol { + /// No specific EOL found + None, + /// '\n' + LF, + /// '\r' + CR, + /// "\r\n" + CRLF, +} + +/// Determines if character in a seperator. +/// separators = "(" | ")" | "<" | ">" | "@" +/// | "," | ";" | ":" | "\" | <"> +/// | "/" | "[" | "]" | "?" | "=" +/// | "{" | "}" | SP | HT +fn is_separator(c: u8) -> bool { + matches!( + c as char, + '(' | ')' + | '<' + | '>' + | '@' + | ',' + | ';' + | ':' + | '\\' + | '"' + | '/' + | '[' + | ']' + | '?' + | '=' + | '{' + | '}' + | ' ' + | '\t' + ) +} + +/// Determines if character is a token. +/// token = 1* +/// CHAR = +pub fn is_token(c: u8) -> bool { + (32..=126).contains(&c) && !is_separator(c) +} + +/// This parser takes leading whitespace as defined by is_ascii_whitespace. +pub fn take_ascii_whitespace() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| take_while(|c: u8| c.is_ascii_whitespace())(input) +} + +/// Remove all line terminators (LF, CR or CRLF) from +/// the end of the line provided as input. +pub fn chomp(mut data: &[u8]) -> &[u8] { + loop { + let last_char = data.last(); + if last_char == Some(&(b'\n')) || last_char == Some(&(b'\r')) { + data = &data[..data.len() - 1]; + } else { + break; + } + } + data +} + +/// Trim the leading whitespace +fn trim_start(input: &[u8]) -> &[u8] { + let mut result = input; + while let Some(x) = result.first() { + if is_space(*x) { + result = &result[1..] + } else { + break; + } + } + result +} + +/// Trim the trailing whitespace +fn trim_end(input: &[u8]) -> &[u8] { + let mut result = input; + while let Some(x) = result.last() { + if is_space(*x) { + result = &result[..(result.len() - 1)] + } else { + break; + } + } + result +} + +/// Trim the leading and trailing whitespace from this byteslice. +pub fn trimmed(input: &[u8]) -> &[u8] { + trim_end(trim_start(input)) +} + +/// Splits the given input into two halves using the given predicate. +/// The `reverse` parameter determines whether or not to split on the +/// first match or the second match. +/// The `do_trim` parameter will return results with leading and trailing +/// whitespace trimmed. +/// If the predicate does not match, then the entire input is returned +/// in the first predicate element and an empty binary string is returned +/// in the second element. +pub fn split_on_predicate( + input: &[u8], reverse: bool, do_trim: bool, predicate: F, +) -> (&[u8], &[u8]) +where + F: FnMut(&u8) -> bool, +{ + let (first, second) = if reverse { + let mut iter = input.rsplitn(2, predicate); + let mut second = iter.next(); + let mut first = iter.next(); + // If we do not get two results, then put the only result first + if first.is_none() { + first = second; + second = None; + } + (first.unwrap_or(b""), second.unwrap_or(b"")) + } else { + let mut iter = input.splitn(2, predicate); + let first = iter.next(); + let second = iter.next(); + (first.unwrap_or(b""), second.unwrap_or(b"")) + }; + + if do_trim { + (trimmed(first), trimmed(second)) + } else { + (first, second) + } +} + +/// Determines if character is a whitespace character. +/// whitespace = ' ' | '\t' | '\r' | '\n' | '\x0b' | '\x0c' +pub fn is_space(c: u8) -> bool { + matches!(c as char, ' ' | '\t' | '\r' | '\n' | '\x0b' | '\x0c') +} + +/// Is the given line empty? +/// +/// Returns true or false +fn is_line_empty(data: &[u8]) -> bool { + matches!(data, b"\x0d" | b"\x0a" | b"\x0d\x0a") +} + +/// Determine if entire line is whitespace as defined by +/// util::is_space. +fn is_line_whitespace(data: &[u8]) -> bool { + !data.iter().any(|c| !is_space(*c)) +} + +/// Searches for and extracts the next set of ascii digits from the input slice if present +/// Parses over leading and trailing LWS characters. +/// +/// Returns (any trailing non-LWS characters, (non-LWS leading characters, ascii digits)) +pub fn ascii_digits() -> impl Fn(&[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + move |input| { + map( + tuple(( + nom_take_is_space, + take_till(|c: u8| c.is_ascii_digit()), + digit1, + nom_take_is_space, + )), + |(_, leading_data, digits, _)| (leading_data, digits), + )(input) + } +} + +/// Searches for and extracts the next set of hex digits from the input slice if present +/// Parses over leading and trailing LWS characters. +/// +/// Returns a tuple of any trailing non-LWS characters and the found hex digits +pub fn hex_digits() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |input| { + map( + tuple(( + nom_take_is_space, + take_while(|c: u8| c.is_ascii_hexdigit()), + nom_take_is_space, + )), + |(_, digits, _)| digits, + )(input) + } +} + +/// Determines if the given line is a request terminator. +fn is_line_terminator( + server_personality: HtpServerPersonality, data: &[u8], next_no_lf: bool, +) -> bool { + // Is this the end of request headers? + if server_personality == HtpServerPersonality::IIS_5_0 { + // IIS 5 will accept a whitespace line as a terminator + if is_line_whitespace(data) { + return true; + } + } + + // Treat an empty line as terminator + if is_line_empty(data) { + return true; + } + if data.len() == 2 && nom_is_space(data[0]) && data[1] == b'\n' { + return next_no_lf; + } + false +} + +/// Determines if the given line can be ignored when it appears before a request. +pub fn is_line_ignorable(server_personality: HtpServerPersonality, data: &[u8]) -> bool { + is_line_terminator(server_personality, data, false) +} + +/// Attempts to convert the provided port slice to a u16 +/// +/// Returns port number if a valid one is found. None if fails to convert or the result is 0 +pub fn convert_port(port: &[u8]) -> Option { + if port.is_empty() { + return None; + } + let port_number = std::str::from_utf8(port).ok()?.parse::().ok()?; + if port_number == 0 { + None + } else { + Some(port_number) + } +} + +/// Determine if the information provided on the response line +/// is good enough. Browsers are lax when it comes to response +/// line parsing. In most cases they will only look for the +/// words "http" at the beginning. +/// +/// Returns true for good enough (treat as response body) or false for not good enough +pub fn treat_response_line_as_body(data: &[u8]) -> bool { + // Browser behavior: + // Firefox 3.5.x: (?i)^\s*http + // IE: (?i)^\s*http\s*/ + // Safari: ^HTTP/\d+\.\d+\s+\d{3} + + tuple((opt(take_is_space_or_null), tag_no_case("http")))(data).is_err() +} + +/// Implements relaxed (not strictly RFC) hostname validation. +/// +/// Returns true if the supplied hostname is valid; false if it is not. +pub fn validate_hostname(input: &[u8]) -> bool { + if input.is_empty() || input.len() > 255 { + return false; + } + + // Check IPv6 + if let Ok((_rest, (_left_br, addr, _right_br))) = tuple(( + char::<_, NomError<&[u8]>>('['), + is_not::<_, _, NomError<&[u8]>>("#?/]"), + char::<_, NomError<&[u8]>>(']'), + ))(input) + { + if let Ok(str) = std::str::from_utf8(addr) { + return std::net::Ipv6Addr::from_str(str).is_ok(); + } + } + + if tag::<_, _, NomError<&[u8]>>(".")(input).is_ok() + || take_until::<_, _, NomError<&[u8]>>("..")(input).is_ok() + { + return false; + } + for section in input.split(|&c| c == b'.') { + if section.len() > 63 { + return false; + } + // According to the RFC, an underscore it not allowed in the label, but + // we allow it here because we think it's often seen in practice. + if take_while_m_n::<_, _, NomError<&[u8]>>(section.len(), section.len(), |c| { + c == b'_' || c == b'-' || (c as char).is_alphanumeric() + })(section) + .is_err() + { + return false; + } + } + true +} + +/// Returns the LibHTP version string. +pub fn get_version() -> &'static str { + HTP_VERSION_STRING_FULL +} + +/// Take leading whitespace as defined by nom_is_space. +pub fn nom_take_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(nom_is_space)(data) +} + +/// Take data before the first null character if it exists. +pub fn take_until_null(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(|c| c != b'\0')(data) +} + +/// Take leading space as defined by util::is_space. +pub fn take_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(is_space)(data) +} + +/// Take leading null characters or spaces as defined by util::is_space +pub fn take_is_space_or_null(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(|c| is_space(c) || c == b'\0')(data) +} + +/// Take any non-space character as defined by is_space. +pub fn take_not_is_space(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(|c: u8| !is_space(c))(data) +} + +/// Returns all data up to and including the first new line or null +/// Returns Err if not found +pub fn take_till_lf_null(data: &[u8]) -> IResult<&[u8], &[u8]> { + let (_, line) = streaming_take_till(|c| c == b'\n' || c == 0)(data)?; + Ok((&data[line.len() + 1..], &data[0..line.len() + 1])) +} + +/// Returns all data up to and including the first new line +/// Returns Err if not found +pub fn take_till_lf(data: &[u8]) -> IResult<&[u8], &[u8]> { + let (_, line) = streaming_take_till(|c| c == b'\n')(data)?; + Ok((&data[line.len() + 1..], &data[0..line.len() + 1])) +} + +/// Returns all data up to and including the first EOL and which EOL was seen +/// +/// Returns Err if not found +pub fn take_till_eol(data: &[u8]) -> IResult<&[u8], (&[u8], Eol)> { + let (_, (line, eol)) = tuple(( + streaming_take_till(|c| c == b'\n' || c == b'\r'), + alt(( + streaming_tag("\r\n"), + streaming_tag("\r"), + streaming_tag("\n"), + )), + ))(data)?; + match eol { + b"\n" => Ok((&data[line.len() + 1..], (&data[0..line.len() + 1], Eol::LF))), + b"\r" => Ok((&data[line.len() + 1..], (&data[0..line.len() + 1], Eol::CR))), + b"\r\n" => Ok(( + &data[line.len() + 2..], + (&data[0..line.len() + 2], Eol::CRLF), + )), + _ => Err(Incomplete(Needed::new(1))), + } +} + +/// Skip control characters +pub fn take_chunked_ctl_chars(data: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(is_chunked_ctl_char)(data) +} + +/// Check if the data contains valid chunked length chars, i.e. leading chunked ctl chars and ascii hexdigits +/// +/// Returns true if valid, false otherwise +pub fn is_valid_chunked_length_data(data: &[u8]) -> bool { + tuple(( + take_chunked_ctl_chars, + take_while1(|c: u8| !c.is_ascii_hexdigit()), + ))(data) + .is_err() +} + +fn is_chunked_ctl_char(c: u8) -> bool { + matches!(c, 0x0d | 0x0a | 0x20 | 0x09 | 0x0b | 0x0c) +} + +/// Check if the entire input line is chunked control characters +pub fn is_chunked_ctl_line(l: &[u8]) -> bool { + for c in l { + if !is_chunked_ctl_char(*c) { + return false; + } + } + true +} + +#[cfg(test)] +mod tests { + use crate::util::*; + use rstest::rstest; + + #[rstest] + #[case("", "", "")] + #[case("hello world", "", "hello world")] + #[case("\0", "\0", "")] + #[case("hello_world \0 ", "\0 ", "hello_world ")] + #[case("hello\0\0\0\0", "\0\0\0\0", "hello")] + fn test_take_until_null(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) { + assert_eq!( + take_until_null(input.as_bytes()).unwrap(), + (remaining.as_bytes(), parsed.as_bytes()) + ); + } + + #[rstest] + #[case("", "", "")] + #[case(" hell o", "hell o", " ")] + #[case(" \thell o", "hell o", " \t")] + #[case("hell o", "hell o", "")] + #[case("\r\x0b \thell \to", "hell \to", "\r\x0b \t")] + fn test_take_is_space(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) { + assert_eq!( + take_is_space(input.as_bytes()).unwrap(), + (remaining.as_bytes(), parsed.as_bytes()) + ); + } + + #[rstest] + #[case(" http 1.1", false)] + #[case("\0 http 1.1", false)] + #[case("http", false)] + #[case("HTTP", false)] + #[case(" HTTP", false)] + #[case("test", true)] + #[case(" test", true)] + #[case("", true)] + #[case("kfgjl hTtp ", true)] + fn test_treat_response_line_as_body(#[case] input: &str, #[case] expected: bool) { + assert_eq!(treat_response_line_as_body(input.as_bytes()), expected); + } + + #[rstest] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("", "", "")] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("header:value\r\r", "", "")] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("header:value", "", "")] + #[case("\nheader:value\r\n", "header:value\r\n", "\n")] + #[case("header:value\r\n", "", "header:value\r\n")] + #[case("header:value\n\r", "\r", "header:value\n")] + #[case("header:value\n\n", "\n", "header:value\n")] + #[case("abcdefg\nhijk", "hijk", "abcdefg\n")] + fn test_take_till_lf(#[case] input: &str, #[case] remaining: &str, #[case] parsed: &str) { + assert_eq!( + take_till_lf(input.as_bytes()).unwrap(), + (remaining.as_bytes(), parsed.as_bytes()) + ); + } + + #[rstest] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("", "", "", Eol::CR)] + #[case("abcdefg\n", "", "abcdefg\n", Eol::LF)] + #[case("abcdefg\n\r", "\r", "abcdefg\n", Eol::LF)] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("abcdefg\r", "", "", Eol::CR)] + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Incomplete(Size(1))")] + #[case("abcdefg", "", "", Eol::CR)] + #[case("abcdefg\nhijk", "hijk", "abcdefg\n", Eol::LF)] + #[case("abcdefg\n\r\nhijk", "\r\nhijk", "abcdefg\n", Eol::LF)] + #[case("abcdefg\rhijk", "hijk", "abcdefg\r", Eol::CR)] + #[case("abcdefg\r\nhijk", "hijk", "abcdefg\r\n", Eol::CRLF)] + #[case("abcdefg\r\n", "", "abcdefg\r\n", Eol::CRLF)] + fn test_take_till_eol( + #[case] input: &str, #[case] remaining: &str, #[case] parsed: &str, #[case] eol: Eol, + ) { + assert_eq!( + take_till_eol(input.as_bytes()).unwrap(), + (remaining.as_bytes(), (parsed.as_bytes(), eol)) + ); + } + + #[rstest] + #[case(b'a', false)] + #[case(b'^', false)] + #[case(b'-', false)] + #[case(b'_', false)] + #[case(b'&', false)] + #[case(b'(', true)] + #[case(b'\\', true)] + #[case(b'/', true)] + #[case(b'=', true)] + #[case(b'\t', true)] + fn test_is_separator(#[case] input: u8, #[case] expected: bool) { + assert_eq!(is_separator(input), expected); + } + + #[rstest] + #[case(b'a', true)] + #[case(b'&', true)] + #[case(b'+', true)] + #[case(b'\t', false)] + #[case(b'\n', false)] + fn test_is_token(#[case] input: u8, #[case] expected: bool) { + assert_eq!(is_token(input), expected); + } + + #[rstest] + #[case("", "")] + #[case("test\n", "test")] + #[case("test\r\n", "test")] + #[case("test\r\n\n", "test")] + #[case("test\n\r\r\n\r", "test")] + #[case("test", "test")] + #[case("te\nst", "te\nst")] + fn test_chomp(#[case] input: &str, #[case] expected: &str) { + assert_eq!(chomp(input.as_bytes()), expected.as_bytes()); + } + + #[rstest] + #[case::trimmed(b"notrim", b"notrim")] + #[case::trim_start(b"\t trim", b"trim")] + #[case::trim_both(b" trim ", b"trim")] + #[case::trim_both_ignore_middle(b" trim trim ", b"trim trim")] + #[case::trim_end(b"trim \t", b"trim")] + #[case::trim_empty(b"", b"")] + fn test_trim(#[case] input: &[u8], #[case] expected: &[u8]) { + assert_eq!(trimmed(input), expected); + } + + #[rstest] + #[case::non_space(0x61, false)] + #[case::space(0x20, true)] + #[case::form_feed(0x0c, true)] + #[case::newline(0x0a, true)] + #[case::carriage_return(0x0d, true)] + #[case::tab(0x09, true)] + #[case::vertical_tab(0x0b, true)] + fn test_is_space(#[case] input: u8, #[case] expected: bool) { + assert_eq!(is_space(input), expected); + } + + #[rstest] + #[case("", false)] + #[case("arfarf", false)] + #[case("\n\r", false)] + #[case("\rabc", false)] + #[case("\r\n", true)] + #[case("\r", true)] + #[case("\n", true)] + fn test_is_line_empty(#[case] input: &str, #[case] expected: bool) { + assert_eq!(is_line_empty(input.as_bytes()), expected); + } + + #[rstest] + #[case("", false)] + #[case("www.ExAmplE-1984.com", true)] + #[case("[::]", true)] + #[case("[2001:3db8:0000:0000:0000:ff00:d042:8530]", true)] + #[case("www.example.com", true)] + #[case("www.exa-mple.com", true)] + #[case("www.exa_mple.com", true)] + #[case(".www.example.com", false)] + #[case("www..example.com", false)] + #[case("www.example.com..", false)] + #[case("www example com", false)] + #[case("[::", false)] + #[case("[::/path[0]", false)] + #[case("[::#garbage]", false)] + #[case("[::?]", false)] + #[case::over64_char( + "www.exampleexampleexampleexampleexampleexampleexampleexampleexampleexample.com", + false + )] + fn test_validate_hostname(#[case] input: &str, #[case] expected: bool) { + assert_eq!(validate_hostname(input.as_bytes()), expected); + } + + #[rstest] + #[should_panic( + expected = "called `Result::unwrap()` on an `Err` value: Error(Error { input: [], code: Digit })" + )] + #[case(" garbage no ascii ", "", "", "")] + #[case(" a200 \t bcd ", "bcd ", "a", "200")] + #[case(" 555555555 ", "", "", "555555555")] + #[case(" 555555555 500", "500", "", "555555555")] + fn test_ascii_digits( + #[case] input: &str, #[case] remaining: &str, #[case] leading: &str, #[case] digits: &str, + ) { + // Returns (any trailing non-LWS characters, (non-LWS leading characters, ascii digits)) + assert_eq!( + ascii_digits()(input.as_bytes()).unwrap(), + ( + remaining.as_bytes(), + (leading.as_bytes(), digits.as_bytes()) + ) + ); + } + + #[rstest] + #[case("", "", "")] + #[case("12a5", "", "12a5")] + #[case("12a5 .....", ".....", "12a5")] + #[case(" \t12a5..... ", "..... ", "12a5")] + #[case(" 68656c6c6f 12a5", "12a5", "68656c6c6f")] + #[case(" .....", ".....", "")] + fn test_hex_digits(#[case] input: &str, #[case] remaining: &str, #[case] digits: &str) { + //(trailing non-LWS characters, found hex digits) + assert_eq!( + hex_digits()(input.as_bytes()).unwrap(), + (remaining.as_bytes(), digits.as_bytes()) + ); + } + + #[rstest] + #[case("", "", "")] + #[case("no chunked ctl chars here", "no chunked ctl chars here", "")] + #[case( + "\x0d\x0a\x20\x09\x0b\x0cno chunked ctl chars here", + "no chunked ctl chars here", + "\x0d\x0a\x20\x09\x0b\x0c" + )] + #[case( + "no chunked ctl chars here\x20\x09\x0b\x0c", + "no chunked ctl chars here\x20\x09\x0b\x0c", + "" + )] + #[case( + "\x20\x09\x0b\x0cno chunked ctl chars here\x20\x09\x0b\x0c", + "no chunked ctl chars here\x20\x09\x0b\x0c", + "\x20\x09\x0b\x0c" + )] + fn test_take_chunked_ctl_chars( + #[case] input: &str, #[case] remaining: &str, #[case] hex_digits: &str, + ) { + //(trailing non-LWS characters, found hex digits) + assert_eq!( + take_chunked_ctl_chars(input.as_bytes()).unwrap(), + (remaining.as_bytes(), hex_digits.as_bytes()) + ); + } + + #[rstest] + #[case("", true)] + #[case("68656c6c6f", true)] + #[case("\x0d\x0a\x20\x09\x0b\x0c68656c6c6f", true)] + #[case("X5O!P%@AP", false)] + #[case("\x0d\x0a\x20\x09\x0b\x0cX5O!P%@AP", false)] + fn test_is_valid_chunked_length_data(#[case] input: &str, #[case] expected: bool) { + assert_eq!(is_valid_chunked_length_data(input.as_bytes()), expected); + } + + #[rstest] + #[case("", false, true, ("", ""))] + #[case("ONE TWO THREE", false, true, ("ONE", "TWO THREE"))] + #[case("ONE TWO THREE", true, true, ("ONE TWO", "THREE"))] + #[case("ONE TWO THREE", false, true, ("ONE", "TWO THREE"))] + #[case("ONE TWO THREE", true, true, ("ONE TWO", "THREE"))] + #[case("ONE", false, true, ("ONE", ""))] + #[case("ONE", true, true, ("ONE", ""))] + fn test_split_on_predicate( + #[case] input: &str, #[case] reverse: bool, #[case] trim: bool, + #[case] expected: (&str, &str), + ) { + assert_eq!( + split_on_predicate(input.as_bytes(), reverse, trim, |c| *c == 0x20), + (expected.0.as_bytes(), expected.1.as_bytes()) + ); + } +} diff --git a/rust/htp/tests/common.rs b/rust/htp/tests/common.rs new file mode 100644 index 000000000000..cb0115689bec --- /dev/null +++ b/rust/htp/tests/common.rs @@ -0,0 +1,204 @@ +#[macro_export] +macro_rules! cstr { + ( $x:expr ) => {{ + CString::new($x).unwrap().as_ptr() + }}; +} + +/// Compares a transaction's header value to an expected value. +/// +/// The `attr` argument is meant to be either `request_headers` or `response_headers`. +/// +/// Example usage: +/// assert_header_eq!(tx, request_headers, "host", ""www.example.com"); +#[allow(unused_macros)] +#[macro_export] +macro_rules! assert_header_eq { + ($tx:expr, $attr:ident, $key:expr, $val:expr) => {{ + let header = &(*$tx).$attr + .get_nocase_nozero($key) + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()); + assert_eq!(*header.value, $val); + }}; + ($tx:expr, $attr:ident, $key:expr, $val:expr,) => {{ + assert_header_eq!($tx, $attr, $key, $val); + }}; + ($tx:expr, $attr:ident, $key:expr, $val:expr, $($arg:tt)+) => {{ + let header = (*(*$tx).$attr) + .get_nocase_nozero($key) + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()) + .1 + .as_ref() + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()); + assert_eq!(*header.value, $val, $($arg)*); + }}; +} + +/// Compares a transaction's request header value to an expected value. +/// +/// Example usage: +/// assert_request_header_eq!(tx, "host", ""www.example.com"); +#[macro_export] +macro_rules! assert_request_header_eq { + ($tx:expr, $key:expr, $val:expr) => {{ + assert_header_eq!($tx, request_headers, $key, $val); + }}; + ($tx:expr, $key:expr, $val:expr,) => {{ + assert_header_eq!($tx, request_headers, $key, $val); + }}; + ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{ + assert_header_eq!($tx, request_headers, $val, $($arg)*); + }}; +} + +/// Compares a transaction's response header value to an expected value. +/// +/// Example usage: +/// assert_response_header_eq!(tx, "content-encoding", ""gzip"); +#[macro_export] +macro_rules! assert_response_header_eq { + ($tx:expr, $key:expr, $val:expr) => {{ + assert_header_eq!($tx, response_headers, $key, $val); + }}; + ($tx:expr, $key:expr, $val:expr,) => {{ + assert_header_eq!($tx, response_headers, $key, $val); + }}; + ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{ + assert_header_eq!($tx, response_headers, $val, $($arg)*); + }}; +} + +/// Asserts that a transaction's response contains a flag. +/// +/// Example usage: +/// assert_response_header_flag_contains!(tx, "Content-Length", Flags::FIELD_REPEATED); +#[macro_export] +macro_rules! assert_response_header_flag_contains { + ($tx:expr, $key:expr, $val:expr) => {{ + let header = &(*$tx).response_headers + .get_nocase_nozero($key) + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()); + assert!(header.flags.is_set($val)); + }}; + ($tx:expr, $key:expr, $val:expr,) => {{ + assert_response_header_flag_contains!($tx, response_headers, $key, $val); + }}; + ($tx:expr, $key:expr, $val:expr, $($arg:tt)+) => {{ + let header = (*(*$tx).response_headers) + .get_nocase_nozero($key) + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()) + .1 + .as_ref() + .expect(format!( + "expected header '{}' to exist at {}:{}:{}", + $key, + file!(), + line!(), + column!() + ).as_ref()); + assert_eq!(*header.value, $val, $($arg)*); + assert!((*header).flags.is_set($val), $($arg)*); + }}; +} + +/// Assert the common evader request values are as expected +/// +/// Example usage: +/// assert_evader_request!(tx, "url"); +#[macro_export] +macro_rules! assert_evader_request { + ($tx:expr, $url:expr) => {{ + assert!(($tx).request_method.as_ref().unwrap().eq_slice("GET")); + assert!(($tx).request_uri.as_ref().unwrap().eq_slice($url)); + assert_eq!(HtpProtocol::V1_1, ($tx).request_protocol_number); + assert_header_eq!($tx, request_headers, "host", "evader.example.com"); + }}; +} + +/// Assert the common evader response values are as expected +/// +/// Example usage: +/// assert_evader_response!(tx); +#[macro_export] +macro_rules! assert_evader_response { + ($tx:expr) => {{ + assert_eq!(HtpProtocol::V1_1, ($tx).response_protocol_number); + assert!(($tx).response_status_number.eq_num(200)); + assert_response_header_eq!($tx, "Content-type", "application/octet-stream"); + assert_response_header_eq!( + $tx, + "Content-disposition", + "attachment; filename=\"eicar.txt\"" + ); + assert!(($tx) + .response_headers + .get_nocase_nozero("Connection") + .is_some()); + }}; +} + +/// Assert the response transfer encoding is detected as chunked +/// +/// Example usage: +/// assert_evader_chunked_response!(tx); +#[macro_export] +macro_rules! assert_evader_chunked { + ($tx:expr) => {{ + assert_eq!($tx.response_transfer_coding, HtpTransferCoding::CHUNKED); + assert_response_header_eq!($tx, "Yet-Another-Header", "foo"); + assert_eq!(68, ($tx).response_entity_len); + assert_eq!(156, ($tx).response_message_len); + let user_data = ($tx).user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(17, user_data.response_data.len()); + assert_eq!(b"X5O!".as_ref(), (&user_data.response_data[0]).as_slice()); + assert_eq!(b"P%@A".as_ref(), (&user_data.response_data[1]).as_slice()); + assert_eq!(b"P[4\\".as_ref(), (&user_data.response_data[2]).as_slice()); + assert_eq!(b"PZX5".as_ref(), (&user_data.response_data[3]).as_slice()); + assert_eq!(b"4(P^".as_ref(), (&user_data.response_data[4]).as_slice()); + assert_eq!(b")7CC".as_ref(), (&user_data.response_data[5]).as_slice()); + assert_eq!(b")7}$".as_ref(), (&user_data.response_data[6]).as_slice()); + assert_eq!(b"EICA".as_ref(), (&user_data.response_data[7]).as_slice()); + assert_eq!(b"R-ST".as_ref(), (&user_data.response_data[8]).as_slice()); + assert_eq!(b"ANDA".as_ref(), (&user_data.response_data[9]).as_slice()); + assert_eq!(b"RD-A".as_ref(), (&user_data.response_data[10]).as_slice()); + assert_eq!(b"NTIV".as_ref(), (&user_data.response_data[11]).as_slice()); + assert_eq!(b"IRUS".as_ref(), (&user_data.response_data[12]).as_slice()); + assert_eq!(b"-TES".as_ref(), (&user_data.response_data[13]).as_slice()); + assert_eq!(b"T-FI".as_ref(), (&user_data.response_data[14]).as_slice()); + assert_eq!(b"LE!$".as_ref(), (&user_data.response_data[15]).as_slice()); + assert_eq!(b"H+H*".as_ref(), (&user_data.response_data[16]).as_slice()); + assert_eq!(HtpRequestProgress::COMPLETE, ($tx).request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, ($tx).response_progress); + }}; +} diff --git a/rust/htp/tests/files/00-adhoc.t b/rust/htp/tests/files/00-adhoc.t new file mode 100644 index 000000000000..36805de930a0 --- /dev/null +++ b/rust/htp/tests/files/00-adhoc.t @@ -0,0 +1,14 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/01-get.t b/rust/htp/tests/files/01-get.t new file mode 100644 index 000000000000..e9edceb3a488 --- /dev/null +++ b/rust/htp/tests/files/01-get.t @@ -0,0 +1,14 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/02-header-test-apache2.t b/rust/htp/tests/files/02-header-test-apache2.t new file mode 100644 index 000000000000..a7cb78794b92 Binary files /dev/null and b/rust/htp/tests/files/02-header-test-apache2.t differ diff --git a/rust/htp/tests/files/03-post-urlencoded.t b/rust/htp/tests/files/03-post-urlencoded.t new file mode 100644 index 000000000000..052377e6cb98 --- /dev/null +++ b/rust/htp/tests/files/03-post-urlencoded.t @@ -0,0 +1,34 @@ +>>> +POST /?qsp1=1&%20p%20q=2&u=Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_ HTTP/1.0 +Content-Length: 12 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + +p=0123456789 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +9 +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/04-post-urlencoded-chunked.t b/rust/htp/tests/files/04-post-urlencoded-chunked.t new file mode 100644 index 000000000000..1d72e717658f --- /dev/null +++ b/rust/htp/tests/files/04-post-urlencoded-chunked.t @@ -0,0 +1,26 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +b +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/05-expect.t b/rust/htp/tests/files/05-expect.t new file mode 100644 index 000000000000..0ad3090776b1 --- /dev/null +++ b/rust/htp/tests/files/05-expect.t @@ -0,0 +1,39 @@ +>>> +POST / HTTP/1.1 +User-Agent: curl/7.18.2 (i486-pc-linux-gnu) libcurl/7.18.2 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.8 libssh2/0.18 +Accept: */* +Content-Length: 216 +Expect: 100-continue +Content-Type: multipart/form-data; boundary=----------------------------07869933ca1b + + +<<< +HTTP/1.1 100 Continue +Header1: This +Header2: That + + +>>> +------------------------------07869933ca1b +Content-Disposition: form-data; name="file"; filename="404.php" +Content-Type: application/octet-stream + + +>>> + +>>> + +------------------------------07869933ca1b-- + +<<< +HTTP/1.1 200 OK +Date: Tue, 03 Nov 2009 09:27:47 GMT +Server: Apache +Last-Modified: Thu, 30 Apr 2009 12:20:49 GMT +ETag: "2dcada-2d-468c4b9ec6a40" +Accept-Ranges: bytes +Content-Length: 45 +Vary: Accept-Encoding +Content-Type: text/html + +

It works!

diff --git a/rust/htp/tests/files/06-uri-normal.t b/rust/htp/tests/files/06-uri-normal.t new file mode 100644 index 000000000000..78a138cf94ed --- /dev/null +++ b/rust/htp/tests/files/06-uri-normal.t @@ -0,0 +1,9 @@ +>>> +GET http://username:password@www.example.com:8080/sub/folder/file.jsp?p=q#f HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/07-pipelined-connection.t b/rust/htp/tests/files/07-pipelined-connection.t new file mode 100644 index 000000000000..07ef6036949d --- /dev/null +++ b/rust/htp/tests/files/07-pipelined-connection.t @@ -0,0 +1,15 @@ +>>> +GET /first HTTP/1.1 + +GET /second HTTP/1.1 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/08-not-pipelined-connection.t b/rust/htp/tests/files/08-not-pipelined-connection.t new file mode 100644 index 000000000000..2a1bac331e9b --- /dev/null +++ b/rust/htp/tests/files/08-not-pipelined-connection.t @@ -0,0 +1,18 @@ +>>> +GET /first HTTP/1.1 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! +>>> +GET /second HTTP/1.1 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/09-multi-packet-request-head.t b/rust/htp/tests/files/09-multi-packet-request-head.t new file mode 100644 index 000000000000..08a16f2cc26a --- /dev/null +++ b/rust/htp/tests/files/09-multi-packet-request-head.t @@ -0,0 +1,14 @@ +>>> +GET / HTTP/1.0 + +>>> +Host: www.example.com + +>>> + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/10-host-in-headers.t b/rust/htp/tests/files/10-host-in-headers.t new file mode 100644 index 000000000000..b892b0ab243b --- /dev/null +++ b/rust/htp/tests/files/10-host-in-headers.t @@ -0,0 +1,34 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! +>>> +GET / HTTP/1.1 +Host: www.example.com. + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 +>>> +GET / HTTP/1.1 +Host: WwW.ExamPle.cOm + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 +>>> +GET / HTTP/1.1 +Host: www.example.com:80 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 \ No newline at end of file diff --git a/rust/htp/tests/files/100-auth-digest-escaped-quote.t b/rust/htp/tests/files/100-auth-digest-escaped-quote.t new file mode 100644 index 000000000000..f0bed0b2bd9f --- /dev/null +++ b/rust/htp/tests/files/100-auth-digest-escaped-quote.t @@ -0,0 +1,8 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Digest username="ivan\"r\"", realm="Book Review", + nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d", + uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb", + qop=auth, nc=00000004, cnonce="c3bcee9534c051a0" + diff --git a/rust/htp/tests/files/101-request-cookies-2.t b/rust/htp/tests/files/101-request-cookies-2.t new file mode 100644 index 000000000000..4554c6d9392c --- /dev/null +++ b/rust/htp/tests/files/101-request-cookies-2.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Cookie: =0; p=1; q=2; =; ; z= + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/102-request-cookies-3.t b/rust/htp/tests/files/102-request-cookies-3.t new file mode 100644 index 000000000000..cfe2d6874182 --- /dev/null +++ b/rust/htp/tests/files/102-request-cookies-3.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Cookie: = ; a=1; b=2 ; =7; c=double=equal + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! diff --git a/rust/htp/tests/files/103-request-cookies-4.t b/rust/htp/tests/files/103-request-cookies-4.t new file mode 100644 index 000000000000..78b63ec7eff8 --- /dev/null +++ b/rust/htp/tests/files/103-request-cookies-4.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Cookie: = ;;c=1; a=1 ; b=2; + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/104-request-cookies-5.t b/rust/htp/tests/files/104-request-cookies-5.t new file mode 100644 index 000000000000..e39a3c3b77eb --- /dev/null +++ b/rust/htp/tests/files/104-request-cookies-5.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Cookie: + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/105-expect-100.t b/rust/htp/tests/files/105-expect-100.t new file mode 100644 index 000000000000..422f97fb7827 --- /dev/null +++ b/rust/htp/tests/files/105-expect-100.t @@ -0,0 +1,27 @@ +>>> +PUT /forbidden HTTP/1.1 +Content-Length: 14 +Expect: 100-continue + + +<<< +HTTP/1.0 401 Forbidden +Content-Length: 0 + + +>>> +POST /ok HTTP/1.1 +Content-Length: 14 +Expect: 100-continue + + +<<< +HTTP/1.0 100 continue +Content-Length: 0 + + +>>> +Hello People! + +<<< +HTTP/1.0 200 OK diff --git a/rust/htp/tests/files/106-tunnelled-1.t b/rust/htp/tests/files/106-tunnelled-1.t new file mode 100644 index 000000000000..dc67ea941153 --- /dev/null +++ b/rust/htp/tests/files/106-tunnelled-1.t @@ -0,0 +1,15 @@ +>>> +CONNECT abc:443 HTTP/1.1 +User-Agent: Victor/1.0 + +GET / HTTP/1.1 +User-Agent: Victor/1.0 + + +<<< +HTTP/1.1 200 OK +Server: VictorServer/1.0 + + + + diff --git a/rust/htp/tests/files/107-response_unknown_status.t b/rust/htp/tests/files/107-response_unknown_status.t new file mode 100644 index 000000000000..26ab8b18dc9c --- /dev/null +++ b/rust/htp/tests/files/107-response_unknown_status.t @@ -0,0 +1,12 @@ +>>> +GET /ld/index.php?id=412784631&cid=0064&version=4&name=try HTTP/1.1 +Accept: */* +User-Agent:LD-agent +Host: 209.405.196.16 + +<<< +NOTHTTP REALLY +SOMEOTHERDATA +STILLNOTHTTP +SOMEMOREOTHERDATA + diff --git a/rust/htp/tests/files/108-response-headers-cr-only.t b/rust/htp/tests/files/108-response-headers-cr-only.t new file mode 100644 index 000000000000..bead6173f5a8 --- /dev/null +++ b/rust/htp/tests/files/108-response-headers-cr-only.t @@ -0,0 +1,9 @@ +>>> +GET /index.html HTTP/1.0 +Host: www.google.org +User-Agent: Mozilla/5.0 + +<<< +HTTP/1.0 200 message +Content-Type: text/html Content-Length: 7 + diff --git a/rust/htp/tests/files/109-response-headers-deformed-eol.t b/rust/htp/tests/files/109-response-headers-deformed-eol.t new file mode 100644 index 000000000000..9650121c7d8f --- /dev/null +++ b/rust/htp/tests/files/109-response-headers-deformed-eol.t @@ -0,0 +1,12 @@ +>>> +GET /index.html HTTP/1.0 +Host: www.google.org +User-Agent: Mozilla/5.0 + +<<< +HTTP/1.0 200 message +Content-Type: text/html +Content-Length: 6 + + +abcdef diff --git a/rust/htp/tests/files/11-response-stream-closure.t b/rust/htp/tests/files/11-response-stream-closure.t new file mode 100644 index 000000000000..8bc167c60991 --- /dev/null +++ b/rust/htp/tests/files/11-response-stream-closure.t @@ -0,0 +1,13 @@ +>>> +GET / HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/110-response-folded-headers-2.t b/rust/htp/tests/files/110-response-folded-headers-2.t new file mode 100644 index 000000000000..2e74d0d25458 --- /dev/null +++ b/rust/htp/tests/files/110-response-folded-headers-2.t @@ -0,0 +1,14 @@ +>>> +POST / HTTP/1.0 +Content-Length: 12 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + +p=0123456789 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache + Server +Connection: close + diff --git a/rust/htp/tests/files/111-response-headers-chunked.t b/rust/htp/tests/files/111-response-headers-chunked.t new file mode 100644 index 000000000000..b1ca3eb072aa --- /dev/null +++ b/rust/htp/tests/files/111-response-headers-chunked.t @@ -0,0 +1,13 @@ +>>> +GET /index.html HTTP/1.0 +Host: www.google.org +User-Agent: Mozilla/5.0 + +<<< +HTTP/1.0 200 message +Content- +<<< +Type: text/html +Content-Length: 12 + +Hello World! diff --git a/rust/htp/tests/files/112-response-headers-chunked-2.t b/rust/htp/tests/files/112-response-headers-chunked-2.t new file mode 100644 index 000000000000..21f2ef4f9979 --- /dev/null +++ b/rust/htp/tests/files/112-response-headers-chunked-2.t @@ -0,0 +1,15 @@ +>>> +GET /index.html HTTP/1.0 +Host: www.google.org +User-Agent: Mozilla/5.0 + +<<< +HTTP/1.0 200 message +Content- +<<< +Type: text/html +Content-Length: +<<< +12 + +Hello World! diff --git a/rust/htp/tests/files/113-response-multipart-byte-ranges.t b/rust/htp/tests/files/113-response-multipart-byte-ranges.t new file mode 100644 index 000000000000..57d7a2d9159c --- /dev/null +++ b/rust/htp/tests/files/113-response-multipart-byte-ranges.t @@ -0,0 +1,23 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Range: bytes=0-50, 100-150 + + +<<< +HTTP/1.1 206 Partial content +Date: Wed, 15 Nov 1995 06:25:24 GMT +Last-modified: Wed, 15 Nov 1995 04:58:08 GMT +Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES + +--THIS_STRING_SEPARATES +Content-type: application/pdf +Content-range: bytes 500-999/8000 + +...the first range... +--THIS_STRING_SEPARATES +Content-type: application/pdf +Content-range: bytes 7000-7999/8000 + +...the second range +--THIS_STRING_SEPARATES-- \ No newline at end of file diff --git a/rust/htp/tests/files/114-http-2-upgrade.t b/rust/htp/tests/files/114-http-2-upgrade.t new file mode 100644 index 000000000000..f15d819edb88 --- /dev/null +++ b/rust/htp/tests/files/114-http-2-upgrade.t @@ -0,0 +1,33 @@ +>>> +GET /robots.txt HTTP/1.1 +Host: nghttp2.org +User-Agent: curl/7.61.0 +Accept: */* +Connection: Upgrade, HTTP2-Settings +Upgrade: h2c +HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA + + +<<< +HTTP/1.1 200 OK +Content-Type: text/html +Content-Length: 12 + +Hello World! + + +>>> +GET /robots.txt HTTP/1.1 +Host: nghttp2.org +User-Agent: curl/7.61.0 +Accept: */* +Connection: Upgrade, HTTP2-Settings +Upgrade: h2c +HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA + + +<<< +HTTP/1.1 101 Switching Protocols +Connection: Upgrade +Upgrade: h2c + diff --git a/rust/htp/tests/files/115-auth-bearer.t b/rust/htp/tests/files/115-auth-bearer.t new file mode 100644 index 000000000000..b0c6e772a8e0 --- /dev/null +++ b/rust/htp/tests/files/115-auth-bearer.t @@ -0,0 +1,8 @@ +>>> +GET /resource HTTP/1.1 +Host: server.example.com +Authorization: Bearer mF_9.B5f-4.1JqM + + + + diff --git a/rust/htp/tests/files/116-request-compression.t b/rust/htp/tests/files/116-request-compression.t new file mode 100644 index 000000000000..6bcc208d693e Binary files /dev/null and b/rust/htp/tests/files/116-request-compression.t differ diff --git a/rust/htp/tests/files/117-request-response-compression.t b/rust/htp/tests/files/117-request-response-compression.t new file mode 100644 index 000000000000..66856f335116 Binary files /dev/null and b/rust/htp/tests/files/117-request-response-compression.t differ diff --git a/rust/htp/tests/files/118-post.t b/rust/htp/tests/files/118-post.t new file mode 100644 index 000000000000..1044d569584e --- /dev/null +++ b/rust/htp/tests/files/118-post.t @@ -0,0 +1,16 @@ +>>> +POST / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Content-Length: 12 + +Hello World! +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/119-ambiguous-eol.t b/rust/htp/tests/files/119-ambiguous-eol.t new file mode 100644 index 000000000000..ea520b8bfe3a --- /dev/null +++ b/rust/htp/tests/files/119-ambiguous-eol.t @@ -0,0 +1,11 @@ +>>> +POST / HTTP/1.0 + +>>> + +<<< +<<< +HTTP/1.0 200 OK + +<<< + diff --git a/rust/htp/tests/files/12-connect-request.t b/rust/htp/tests/files/12-connect-request.t new file mode 100644 index 000000000000..89faf8e7a284 --- /dev/null +++ b/rust/htp/tests/files/12-connect-request.t @@ -0,0 +1,21 @@ +>>> +CONNECT www.ssllabs.com:443 HTTP/1.0 + + +<<< +HTTP/1.1 405 Method Not Allowed +Date: Sat, 12 Dec 2009 05:08:45 GMT +Server: Apache/2.2.14 (Unix) mod_ssl/2.2.14 OpenSSL/0.9.8g PHP/5.3.0 +Allow: GET,HEAD,POST,OPTIONS,TRACE +Vary: Accept-Encoding +Content-Length: 230 +Connection: close +Content-Type: text/html; charset=iso-8859-1 + + + +405 Method Not Allowed + +

Method Not Allowed

+

The requested method CONNECT is not allowed for the URL /.

+ \ No newline at end of file diff --git a/rust/htp/tests/files/120-request-gap.t b/rust/htp/tests/files/120-request-gap.t new file mode 100644 index 000000000000..451d7d41bfd3 --- /dev/null +++ b/rust/htp/tests/files/120-request-gap.t @@ -0,0 +1,43 @@ +>>> +POST / HTTP/1.1 +User-Agent: curl/7.18.2 (i486-pc-linux-gnu) libcurl/7.18.2 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.8 libssh2/0.18 +Accept: */* +Content-Length: 216 +Expect: 100-continue +Content-Type: multipart/form-data; boundary=----------------------------07869933ca1b + + +<<< +HTTP/1.1 100 Continue +Header1: This +Header2: That + + +>>> +------------------------------07869933ca1b +Content-Disposition: form-data; name="file"; filename="404.php" +Content-Type: application/octet-stream + + +>>> +<> +"404" +>>> +; ?> +>>> + +------------------------------07869933ca1b-- + +<<< +HTTP/1.1 200 OK +Date: Tue, 03 Nov 2009 09:27:47 GMT +Server: Apache +Last-Modified: Thu, 30 Apr 2009 12:20:49 GMT +ETag: "2dcada-2d-468c4b9ec6a40" +Accept-Ranges: bytes +Content-Length: 45 +Vary: Accept-Encoding +Content-Type: text/html + +

It works!

diff --git a/rust/htp/tests/files/121-response-gap.t b/rust/htp/tests/files/121-response-gap.t new file mode 100644 index 000000000000..7b3cdf127a94 --- /dev/null +++ b/rust/htp/tests/files/121-response-gap.t @@ -0,0 +1,17 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hell +<>< +o Wo +<<< +rld! diff --git a/rust/htp/tests/files/122-response-body-data.t b/rust/htp/tests/files/122-response-body-data.t new file mode 100644 index 000000000000..020bebdee624 --- /dev/null +++ b/rust/htp/tests/files/122-response-body-data.t @@ -0,0 +1,6 @@ +<<< +1 +2 +<<< +3 +4 \ No newline at end of file diff --git a/rust/htp/tests/files/123-response-header-bug.t b/rust/htp/tests/files/123-response-header-bug.t new file mode 100644 index 000000000000..dd980b80fba2 --- /dev/null +++ b/rust/htp/tests/files/123-response-header-bug.t @@ -0,0 +1,14 @@ +>>> +GET http://www.example.com:XXX/?p=%20 HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/124-response-incomplete.t b/rust/htp/tests/files/124-response-incomplete.t new file mode 100644 index 000000000000..4730570eee78 --- /dev/null +++ b/rust/htp/tests/files/124-response-incomplete.t @@ -0,0 +1,10 @@ +>>> +GET /index.html HTTP/1.0 +Content-Type: text/html +Host: www.openinfosecfoundation.org +User-Agent: This is dummy message body + +<<< +HTTP/1.0 200 OK +Content-Length: 7 +Content-Type: text/html diff --git a/rust/htp/tests/files/13-compressed-response-gzip-ct.t b/rust/htp/tests/files/13-compressed-response-gzip-ct.t new file mode 100644 index 000000000000..d5a2e317438b Binary files /dev/null and b/rust/htp/tests/files/13-compressed-response-gzip-ct.t differ diff --git a/rust/htp/tests/files/14-compressed-response-gzip-chunked.t b/rust/htp/tests/files/14-compressed-response-gzip-chunked.t new file mode 100644 index 000000000000..bae8a2d3cee6 Binary files /dev/null and b/rust/htp/tests/files/14-compressed-response-gzip-chunked.t differ diff --git a/rust/htp/tests/files/15-connect-complete.t b/rust/htp/tests/files/15-connect-complete.t new file mode 100644 index 000000000000..071d0643ba1a Binary files /dev/null and b/rust/htp/tests/files/15-connect-complete.t differ diff --git a/rust/htp/tests/files/16-connect-extra.t b/rust/htp/tests/files/16-connect-extra.t new file mode 100644 index 000000000000..9c08f17f78ba --- /dev/null +++ b/rust/htp/tests/files/16-connect-extra.t @@ -0,0 +1,32 @@ +>>> +CONNECT www.feistyduck.com:80 HTTP/1.1 +Host: www.feistyduck.com + +HEAD / HTTP/1.0 + + +<<< +HTTP/1.1 301 Moved Permanently +Date: Wed, 06 Jan 2010 17:41:34 GMT +Server: Apache +Location: https://www.feistyduck.com/ +Vary: Accept-Encoding +Content-Length: 235 +Content-Type: text/html; charset=iso-8859-1 + + + +301 Moved Permanently + +

Moved Permanently

+

The document has moved here.

+ + +HTTP/1.1 301 Moved Permanently +Date: Wed, 06 Jan 2010 17:41:46 GMT +Server: Apache +Location: https://www.feistyduck.com/ +Vary: Accept-Encoding +Connection: close +Content-Type: text/html; charset=iso-8859-1 + diff --git a/rust/htp/tests/files/17-multipart-1.t b/rust/htp/tests/files/17-multipart-1.t new file mode 100644 index 000000000000..7c083c68b9f9 --- /dev/null +++ b/rust/htp/tests/files/17-multipart-1.t @@ -0,0 +1,41 @@ +>>> +POST /upload.php?qsp1=1&%20p%20q=2 HTTP/1.1 +Host: 192.168.3.100:8080 +User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 (.NET CLR 3.5.30729) +Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 +Accept-Language: en-us,en;q=0.5 +Accept-Encoding: gzip,deflate +Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7 +Keep-Alive: 300 +Connection: keep-alive +Content-Type: multipart/form-data; boundary=---------------------------41184676334 +Content-Length: 610 + +-----------------------------41184676334 +Content-Disposition: form-data; name="field1" + +0123456789 +-----------------------------41184676334 +Content-Disposition: form-data; name="field2" + +9876543210 +-----------------------------41184676334 +Content-Disposition: form-data; name="file1"; filename="New Text Document.txt" +Content-Type: text/plain + +FFFFFFFFFFFFFFFFFFFFFFFFFFFF +-----------------------------41184676334 +Content-Disposition: form-data; name="file2"; filename="New Text Document.txt" +Content-Type: text/plain + +FFFFFFFFFFFFFFFFFFFFFFFFFFFF +-----------------------------41184676334-- + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/18-compressed-response-deflate.t b/rust/htp/tests/files/18-compressed-response-deflate.t new file mode 100644 index 000000000000..b70940ef5c51 Binary files /dev/null and b/rust/htp/tests/files/18-compressed-response-deflate.t differ diff --git a/rust/htp/tests/files/19-urlencoded-test.t b/rust/htp/tests/files/19-urlencoded-test.t new file mode 100644 index 000000000000..21d7f2724dc2 --- /dev/null +++ b/rust/htp/tests/files/19-urlencoded-test.t @@ -0,0 +1,15 @@ +>>> +POST /?p=1&q=2 HTTP/1.0 +Content-Type: application/x-www-form-urlencoded +Content-Length: 11 + +p=3&q=4&z=5 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/20-ambiguous-host.t b/rust/htp/tests/files/20-ambiguous-host.t new file mode 100644 index 000000000000..bb3e55123237 --- /dev/null +++ b/rust/htp/tests/files/20-ambiguous-host.t @@ -0,0 +1,58 @@ +>>> +GET http://example.com/1 HTTP/1.1 +Host: example.com + + +<<< +HTTP/1.1 200 OK +Date: Mon, 26 Apr 2010 13:56:31 GMT +Content-Length: 8 + +12345678 +>>> +GET http://example.com/2 HTTP/1.1 +Host: foo.com + + +<<< +HTTP/1.1 200 OK +Date: Mon, 26 Apr 2010 13:56:31 GMT +Content-Length: 8 + +12345678 +>>> +POST http://www.example.com:8001/3 HTTP/1.1 +Host: www.example.com:8001 +Content-Length: 8 + +12345678 +<<< +HTTP/1.1 200 OK +Date: Mon, 26 Apr 2010 13:56:31 GMT +Content-Length: 8 + +12345678 +>>> +POST http://www.example.com:8002/4 HTTP/1.1 +Host: www.example.com:8003 +Content-Length: 8 + +12345678 +<<< +HTTP/1.1 200 OK +Date: Mon, 26 Apr 2010 13:56:31 GMT +Content-Length: 8 + +12345678 +>>> +POST http://www.example.com:80/5 HTTP/1.1 +Host: www.example.com +Content-Length: 8 + +12345678 +<<< +HTTP/1.1 200 OK +Date: Mon, 26 Apr 2010 13:56:31 GMT +Content-Length: 8 + +12345678 \ No newline at end of file diff --git a/rust/htp/tests/files/21-http09.t b/rust/htp/tests/files/21-http09.t new file mode 100644 index 000000000000..5359a7f86369 --- /dev/null +++ b/rust/htp/tests/files/21-http09.t @@ -0,0 +1,11 @@ +>>> +GET /?foo=bar + +<<< + + +301 Moved Permanently + +

Moved Permanently

+

The document has moved.

+ diff --git a/rust/htp/tests/files/22-http_1_1-host_missing b/rust/htp/tests/files/22-http_1_1-host_missing new file mode 100644 index 000000000000..53ca3e8f34e4 --- /dev/null +++ b/rust/htp/tests/files/22-http_1_1-host_missing @@ -0,0 +1,14 @@ +>>> +GET /?p=%20 HTTP/1.1 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/22-php-param-processing.t b/rust/htp/tests/files/22-php-param-processing.t new file mode 100644 index 000000000000..772a6d197164 --- /dev/null +++ b/rust/htp/tests/files/22-php-param-processing.t @@ -0,0 +1,14 @@ +>>> +GET /?%20p%20q%20=1&q=2&z%20w=3 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/23-http09-multiple.t b/rust/htp/tests/files/23-http09-multiple.t new file mode 100644 index 000000000000..5fcb04b17d31 --- /dev/null +++ b/rust/htp/tests/files/23-http09-multiple.t @@ -0,0 +1,12 @@ +>>> +GET /?foo=bar +GET /?foo=bar + +<<< + + +301 Moved Permanently + +

Moved Permanently

+

The document has moved.

+ diff --git a/rust/htp/tests/files/24-http09-explicit.t b/rust/htp/tests/files/24-http09-explicit.t new file mode 100644 index 000000000000..01989917611e --- /dev/null +++ b/rust/htp/tests/files/24-http09-explicit.t @@ -0,0 +1,13 @@ +>>> +GET /?foo=bar HTTP/0.9 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/25-small-chunks.t b/rust/htp/tests/files/25-small-chunks.t new file mode 100644 index 000000000000..fdbfc9e24c5e --- /dev/null +++ b/rust/htp/tests/files/25-small-chunks.t @@ -0,0 +1,41 @@ +>>> +GET +>>> + /?x=y +>>> + HTTP/1.0 +User-Agent: +>>> + Test +>>> + User +>>> + Agent +Host: www.example.com + + +<<< +HTTP/1.0 +<<< + 200 +<<< + OK +Date: +<<< + Mon, +<<< + 31 +<<< + Aug +<<< + 2009 +<<< + 20:25:50 +<<< + GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/26-request-headers-raw.t b/rust/htp/tests/files/26-request-headers-raw.t new file mode 100644 index 000000000000..fdbfc9e24c5e --- /dev/null +++ b/rust/htp/tests/files/26-request-headers-raw.t @@ -0,0 +1,41 @@ +>>> +GET +>>> + /?x=y +>>> + HTTP/1.0 +User-Agent: +>>> + Test +>>> + User +>>> + Agent +Host: www.example.com + + +<<< +HTTP/1.0 +<<< + 200 +<<< + OK +Date: +<<< + Mon, +<<< + 31 +<<< + Aug +<<< + 2009 +<<< + 20:25:50 +<<< + GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/27-request-trailer-raw.t b/rust/htp/tests/files/27-request-trailer-raw.t new file mode 100644 index 000000000000..1d72e717658f --- /dev/null +++ b/rust/htp/tests/files/27-request-trailer-raw.t @@ -0,0 +1,26 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +b +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/28-response-headers-raw.t b/rust/htp/tests/files/28-response-headers-raw.t new file mode 100644 index 000000000000..db1e07fc0fe5 --- /dev/null +++ b/rust/htp/tests/files/28-response-headers-raw.t @@ -0,0 +1,33 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: +<<< + Mon, +<<< + 31 Aug 2009 20:25:50 GMT +Server: +<<< + Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +b +Hello World +1 +! +0 +Set-Cookie: +<<< + name= +<<< +value +Another-Header: +<<< + Header-Value + diff --git a/rust/htp/tests/files/29-response-trailer-raw.t b/rust/htp/tests/files/29-response-trailer-raw.t new file mode 100644 index 000000000000..db1e07fc0fe5 --- /dev/null +++ b/rust/htp/tests/files/29-response-trailer-raw.t @@ -0,0 +1,33 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: +<<< + Mon, +<<< + 31 Aug 2009 20:25:50 GMT +Server: +<<< + Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +b +Hello World +1 +! +0 +Set-Cookie: +<<< + name= +<<< +value +Another-Header: +<<< + Header-Value + diff --git a/rust/htp/tests/files/30-get-ipv6.t b/rust/htp/tests/files/30-get-ipv6.t new file mode 100644 index 000000000000..baf3920a264d --- /dev/null +++ b/rust/htp/tests/files/30-get-ipv6.t @@ -0,0 +1,15 @@ +>>> +GET http://[::1]:8080/?p=%20 HTTP/1.0 +Host: [::1]:8080 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/31-get-request-line-nul.t b/rust/htp/tests/files/31-get-request-line-nul.t new file mode 100644 index 000000000000..3de2eb49dd50 Binary files /dev/null and b/rust/htp/tests/files/31-get-request-line-nul.t differ diff --git a/rust/htp/tests/files/32-invalid-hostname.t b/rust/htp/tests/files/32-invalid-hostname.t new file mode 100644 index 000000000000..a034785c6fc1 --- /dev/null +++ b/rust/htp/tests/files/32-invalid-hostname.t @@ -0,0 +1,15 @@ +>>> +GET http://www..example.com/?p=%20 HTTP/1.0 +Host: www example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/33-invalid-hostname.t b/rust/htp/tests/files/33-invalid-hostname.t new file mode 100644 index 000000000000..ad18d33f6c35 --- /dev/null +++ b/rust/htp/tests/files/33-invalid-hostname.t @@ -0,0 +1,15 @@ +>>> +GET http://www.example.com:XXX/?p=%20 HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/34-invalid-hostname.t b/rust/htp/tests/files/34-invalid-hostname.t new file mode 100644 index 000000000000..e886ebf73419 --- /dev/null +++ b/rust/htp/tests/files/34-invalid-hostname.t @@ -0,0 +1,15 @@ +>>> +GET /?p=%20 HTTP/1.0 +Host: www.example.com: +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/35-early-response.t b/rust/htp/tests/files/35-early-response.t new file mode 100644 index 000000000000..4b205db7d3db --- /dev/null +++ b/rust/htp/tests/files/35-early-response.t @@ -0,0 +1,18 @@ +>>> +POST / HTTP/1.0 +Content-Length: 12 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + + +<<< +HTTP/1.0 400 Bad Request +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 11 + +Bad Request +>>> +p=0123456789 \ No newline at end of file diff --git a/rust/htp/tests/files/36-invalid-request-1-invalid-c-l.t b/rust/htp/tests/files/36-invalid-request-1-invalid-c-l.t new file mode 100644 index 000000000000..42980f4e5b94 --- /dev/null +++ b/rust/htp/tests/files/36-invalid-request-1-invalid-c-l.t @@ -0,0 +1,17 @@ +>>> +POST / HTTP/1.0 +Host: www.example.com +Content-Length: ABC +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + +p=0123456789 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/37-invalid-request-2-t-e-and-c-l.t b/rust/htp/tests/files/37-invalid-request-2-t-e-and-c-l.t new file mode 100644 index 000000000000..8edab9c1cac6 --- /dev/null +++ b/rust/htp/tests/files/37-invalid-request-2-t-e-and-c-l.t @@ -0,0 +1,28 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Length: 12 +Host: www.example.com +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +b +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/38-invalid-request-3-invalid-t-e.t b/rust/htp/tests/files/38-invalid-request-3-invalid-t-e.t new file mode 100644 index 000000000000..ada8dd5c6d10 --- /dev/null +++ b/rust/htp/tests/files/38-invalid-request-3-invalid-t-e.t @@ -0,0 +1,27 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: ABC +Host: www.example.com +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +b +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/39-auto-destroy-crash.t b/rust/htp/tests/files/39-auto-destroy-crash.t new file mode 100644 index 000000000000..b892b0ab243b --- /dev/null +++ b/rust/htp/tests/files/39-auto-destroy-crash.t @@ -0,0 +1,34 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! +>>> +GET / HTTP/1.1 +Host: www.example.com. + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 +>>> +GET / HTTP/1.1 +Host: WwW.ExamPle.cOm + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 +>>> +GET / HTTP/1.1 +Host: www.example.com:80 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 \ No newline at end of file diff --git a/rust/htp/tests/files/40-auth-basic.t b/rust/htp/tests/files/40-auth-basic.t new file mode 100644 index 000000000000..86060012d1c6 --- /dev/null +++ b/rust/htp/tests/files/40-auth-basic.t @@ -0,0 +1,5 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +Authorization: Basic aXZhbnI6c2VjcmV0 + diff --git a/rust/htp/tests/files/41-auth-digest.t b/rust/htp/tests/files/41-auth-digest.t new file mode 100644 index 000000000000..53065b03fccd --- /dev/null +++ b/rust/htp/tests/files/41-auth-digest.t @@ -0,0 +1,8 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Digest username="ivanr", realm="Book Review", + nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d", + uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb", + qop=auth, nc=00000004, cnonce="c3bcee9534c051a0" + diff --git a/rust/htp/tests/files/42-unknown-method_only.t b/rust/htp/tests/files/42-unknown-method_only.t new file mode 100644 index 000000000000..8c9a603d21c0 --- /dev/null +++ b/rust/htp/tests/files/42-unknown-method_only.t @@ -0,0 +1,3 @@ +>>> +HELLO + diff --git a/rust/htp/tests/files/43-invalid-protocol.t b/rust/htp/tests/files/43-invalid-protocol.t new file mode 100644 index 000000000000..0e4c0adc0772 --- /dev/null +++ b/rust/htp/tests/files/43-invalid-protocol.t @@ -0,0 +1,3 @@ +>>> +GET / JUNK/1.0 + diff --git a/rust/htp/tests/files/44-auth-basic-invalid.t b/rust/htp/tests/files/44-auth-basic-invalid.t new file mode 100644 index 000000000000..ed44445c6ee9 --- /dev/null +++ b/rust/htp/tests/files/44-auth-basic-invalid.t @@ -0,0 +1,5 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +Authorization: Basic notBase64:EncodedStuff + diff --git a/rust/htp/tests/files/45-auth-digest-unquoted-username.t b/rust/htp/tests/files/45-auth-digest-unquoted-username.t new file mode 100644 index 000000000000..855e00edad15 --- /dev/null +++ b/rust/htp/tests/files/45-auth-digest-unquoted-username.t @@ -0,0 +1,8 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Digest username=ivanr, realm="Book Review", + nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d", + uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb", + qop=auth, nc=00000004, cnonce="c3bcee9534c051a0" + diff --git a/rust/htp/tests/files/46-auth-digest-invalid-username.t b/rust/htp/tests/files/46-auth-digest-invalid-username.t new file mode 100644 index 000000000000..dbd1c43a459f --- /dev/null +++ b/rust/htp/tests/files/46-auth-digest-invalid-username.t @@ -0,0 +1,8 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Digest username = ivanr, realm="Book Review", + nonce="OgmPjb/jAwA=7c5a49c2ed9416dba1b04b5307d6d935f74a859d", + uri="/review/", algorithm=MD5, response="3c430d26043cc306e0282635929d57cb", + qop=auth, nc=00000004, cnonce="c3bcee9534c051a0" + diff --git a/rust/htp/tests/files/47-auth-unrecognized.t b/rust/htp/tests/files/47-auth-unrecognized.t new file mode 100644 index 000000000000..5d51455017ee --- /dev/null +++ b/rust/htp/tests/files/47-auth-unrecognized.t @@ -0,0 +1,5 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Turbo customAuthDataHere + diff --git a/rust/htp/tests/files/48-invalid-response-headers-1.t b/rust/htp/tests/files/48-invalid-response-headers-1.t new file mode 100644 index 000000000000..d10582af69ab --- /dev/null +++ b/rust/htp/tests/files/48-invalid-response-headers-1.t @@ -0,0 +1,17 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 +No Colon +Lws : After Header Name +Header@Name: Not Token + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/49-invalid-response-headers-2.t b/rust/htp/tests/files/49-invalid-response-headers-2.t new file mode 100644 index 000000000000..16970f9a1d8d --- /dev/null +++ b/rust/htp/tests/files/49-invalid-response-headers-2.t @@ -0,0 +1,15 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 +: Empty Name + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/50-util.t b/rust/htp/tests/files/50-util.t new file mode 100644 index 000000000000..e9edceb3a488 --- /dev/null +++ b/rust/htp/tests/files/50-util.t @@ -0,0 +1,14 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/51-get-ipv6-invalid.t b/rust/htp/tests/files/51-get-ipv6-invalid.t new file mode 100644 index 000000000000..bde929c8a630 --- /dev/null +++ b/rust/htp/tests/files/51-get-ipv6-invalid.t @@ -0,0 +1,15 @@ +>>> +GET http://[::1:8080/?p=%20 HTTP/1.0 +Host: [::1]:8080 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/52-invalid-path.t b/rust/htp/tests/files/52-invalid-path.t new file mode 100644 index 000000000000..97528e7d8a70 --- /dev/null +++ b/rust/htp/tests/files/52-invalid-path.t @@ -0,0 +1,15 @@ +>>> +GET invalid/path?p=%20 HTTP/1.0 +Host: [::1]:8080 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/53-path-utf8-none.t b/rust/htp/tests/files/53-path-utf8-none.t new file mode 100644 index 000000000000..9234cd94e8ce --- /dev/null +++ b/rust/htp/tests/files/53-path-utf8-none.t @@ -0,0 +1,15 @@ +>>> +GET /Ristic.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/54-path-utf8-valid.t b/rust/htp/tests/files/54-path-utf8-valid.t new file mode 100644 index 000000000000..518918e767e8 --- /dev/null +++ b/rust/htp/tests/files/54-path-utf8-valid.t @@ -0,0 +1,15 @@ +>>> +GET /Risti%C4%87.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/55-path-utf8-overlong-2.t b/rust/htp/tests/files/55-path-utf8-overlong-2.t new file mode 100644 index 000000000000..f78a0883a95c --- /dev/null +++ b/rust/htp/tests/files/55-path-utf8-overlong-2.t @@ -0,0 +1,15 @@ +>>> +GET /%c0%a6.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/56-path-utf8-overlong-3.t b/rust/htp/tests/files/56-path-utf8-overlong-3.t new file mode 100644 index 000000000000..3184dc81aa46 --- /dev/null +++ b/rust/htp/tests/files/56-path-utf8-overlong-3.t @@ -0,0 +1,15 @@ +>>> +GET /%e0%80%a6.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/57-path-utf8-overlong-4.t b/rust/htp/tests/files/57-path-utf8-overlong-4.t new file mode 100644 index 000000000000..cfccdbe1313c --- /dev/null +++ b/rust/htp/tests/files/57-path-utf8-overlong-4.t @@ -0,0 +1,15 @@ +>>> +GET /%f0%80%80%a6.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/58-path-utf8-invalid.t b/rust/htp/tests/files/58-path-utf8-invalid.t new file mode 100644 index 000000000000..f3d58035a970 --- /dev/null +++ b/rust/htp/tests/files/58-path-utf8-invalid.t @@ -0,0 +1,15 @@ +>>> +GET /Risti%C4%87%80.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/59-path-utf8-fullwidth.t b/rust/htp/tests/files/59-path-utf8-fullwidth.t new file mode 100644 index 000000000000..4321652656ff --- /dev/null +++ b/rust/htp/tests/files/59-path-utf8-fullwidth.t @@ -0,0 +1,15 @@ +>>> +GET /%EF%BC%86.txt HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/60-request-cookies-1.t b/rust/htp/tests/files/60-request-cookies-1.t new file mode 100644 index 000000000000..51aca6f07c72 --- /dev/null +++ b/rust/htp/tests/files/60-request-cookies-1.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Cookie: =0; p=1; q=2; =; z= + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/61-empty-line-between-requests.t b/rust/htp/tests/files/61-empty-line-between-requests.t new file mode 100644 index 000000000000..47a8c2120bae --- /dev/null +++ b/rust/htp/tests/files/61-empty-line-between-requests.t @@ -0,0 +1,19 @@ +>>> +GET /first HTTP/1.1 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! +>>> + +GET /second HTTP/1.1 + + +<<< +HTTP/1.0 200 OK +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/62-post-no-body.t b/rust/htp/tests/files/62-post-no-body.t new file mode 100644 index 000000000000..10a8d4b770ef --- /dev/null +++ b/rust/htp/tests/files/62-post-no-body.t @@ -0,0 +1,34 @@ +>>> +POST / HTTP/1.0 +Content-Length: 0 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +9 +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/63-post-chunked-invalid-1.t b/rust/htp/tests/files/63-post-chunked-invalid-1.t new file mode 100644 index 000000000000..eb5ef0c2f79a --- /dev/null +++ b/rust/htp/tests/files/63-post-chunked-invalid-1.t @@ -0,0 +1,26 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +80000000 +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/64-post-chunked-invalid-2.t b/rust/htp/tests/files/64-post-chunked-invalid-2.t new file mode 100644 index 000000000000..f5fc91da7bf2 --- /dev/null +++ b/rust/htp/tests/files/64-post-chunked-invalid-2.t @@ -0,0 +1,26 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +-1 +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/65-post-chunked-invalid-3.t b/rust/htp/tests/files/65-post-chunked-invalid-3.t new file mode 100644 index 000000000000..4076e0ba1b81 --- /dev/null +++ b/rust/htp/tests/files/65-post-chunked-invalid-3.t @@ -0,0 +1,26 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + + +p=012345678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/66-post-chunked-split-chunk.t b/rust/htp/tests/files/66-post-chunked-split-chunk.t new file mode 100644 index 000000000000..6f5dd486d94c --- /dev/null +++ b/rust/htp/tests/files/66-post-chunked-split-chunk.t @@ -0,0 +1,28 @@ +>>> +POST / HTTP/1.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla +Cookie: 1 + +b +p=01234 +>>> +5678 +1 +9 +0 +Cookie: +>>> + 2 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/67-long-request-line.t b/rust/htp/tests/files/67-long-request-line.t new file mode 100644 index 000000000000..fa3f98480aa3 --- /dev/null +++ b/rust/htp/tests/files/67-long-request-line.t @@ -0,0 +1,16 @@ +>>> +GET /0123456789/ +>>> +0123456789/ HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/68-invalid-request-header.t b/rust/htp/tests/files/68-invalid-request-header.t new file mode 100644 index 000000000000..4e6d688eecd6 Binary files /dev/null and b/rust/htp/tests/files/68-invalid-request-header.t differ diff --git a/rust/htp/tests/files/69-long-response-header.t b/rust/htp/tests/files/69-long-response-header.t new file mode 100644 index 000000000000..822d3ca9daf1 --- /dev/null +++ b/rust/htp/tests/files/69-long-response-header.t @@ -0,0 +1,16 @@ +>>> +GET / HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache Apache Apache Apache +<<< +Apache Apache Apache Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/70-response-invalid-chunk-length.t b/rust/htp/tests/files/70-response-invalid-chunk-length.t new file mode 100644 index 000000000000..68b43e38d36e --- /dev/null +++ b/rust/htp/tests/files/70-response-invalid-chunk-length.t @@ -0,0 +1,18 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +g +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/71-response-split-chunk.t b/rust/htp/tests/files/71-response-split-chunk.t new file mode 100644 index 000000000000..1d3f0919e720 --- /dev/null +++ b/rust/htp/tests/files/71-response-split-chunk.t @@ -0,0 +1,20 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +9 +01234 +<<< +5678 +1 +9 +0 + diff --git a/rust/htp/tests/files/72-response-split-body.t b/rust/htp/tests/files/72-response-split-body.t new file mode 100644 index 000000000000..db5ab9f72465 --- /dev/null +++ b/rust/htp/tests/files/72-response-split-body.t @@ -0,0 +1,16 @@ +>>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello +<<< + World! \ No newline at end of file diff --git a/rust/htp/tests/files/73-response-te-and-cl.t b/rust/htp/tests/files/73-response-te-and-cl.t new file mode 100644 index 000000000000..46c646da92e5 --- /dev/null +++ b/rust/htp/tests/files/73-response-te-and-cl.t @@ -0,0 +1,19 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 10 +Transfer-Encoding: chunked + +9 +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/74-response-multiple-cl.t b/rust/htp/tests/files/74-response-multiple-cl.t new file mode 100644 index 000000000000..556fb8fad308 --- /dev/null +++ b/rust/htp/tests/files/74-response-multiple-cl.t @@ -0,0 +1,14 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/75-response-invalid-cl.t b/rust/htp/tests/files/75-response-invalid-cl.t new file mode 100644 index 000000000000..8743d881e602 --- /dev/null +++ b/rust/htp/tests/files/75-response-invalid-cl.t @@ -0,0 +1,13 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: -1 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/76-response-no-body.t b/rust/htp/tests/files/76-response-no-body.t new file mode 100644 index 000000000000..831571cff2ef --- /dev/null +++ b/rust/htp/tests/files/76-response-no-body.t @@ -0,0 +1,34 @@ +>>> +POST /?qsp1=1&%20p%20q=2&u=Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_Ivan+Risti%C4%87_ HTTP/1.0 +Content-Length: 12 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + +p=0123456789 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 0 + + +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +9 +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/77-response-folded-headers.t b/rust/htp/tests/files/77-response-folded-headers.t new file mode 100644 index 000000000000..dd33c07a0f42 --- /dev/null +++ b/rust/htp/tests/files/77-response-folded-headers.t @@ -0,0 +1,35 @@ +>>> +POST / HTTP/1.0 +Content-Length: 12 +Content-Type: application/x-www-form-urlencoded +User-Agent: Mozilla + +p=0123456789 +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache + Server +Connection: close +Content-Type: text/html +Content-Length: 0 + + +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apach2 +Connection: close +Content-Type: text/html +Transfer-Encoding: chunked + +9 +012345678 +1 +9 +0 + diff --git a/rust/htp/tests/files/78-response-no-status-headers.t b/rust/htp/tests/files/78-response-no-status-headers.t new file mode 100644 index 000000000000..82e8d2a609e9 --- /dev/null +++ b/rust/htp/tests/files/78-response-no-status-headers.t @@ -0,0 +1,8 @@ +>>> +GET / HTTP/1.0 +User-Agent: Mozilla + + +<<< +Hello +World! \ No newline at end of file diff --git a/rust/htp/tests/files/79-connect-invalid-hostport.t b/rust/htp/tests/files/79-connect-invalid-hostport.t new file mode 100644 index 000000000000..9258b7b41e34 --- /dev/null +++ b/rust/htp/tests/files/79-connect-invalid-hostport.t @@ -0,0 +1,32 @@ +>>> +CONNECT [:80 HTTP/1.1 +Host: www.feistyduck.com + +HEAD / HTTP/1.0 + + +<<< +HTTP/1.1 301 Moved Permanently +Date: Wed, 06 Jan 2010 17:41:34 GMT +Server: Apache +Location: https://www.feistyduck.com/ +Vary: Accept-Encoding +Content-Length: 235 +Content-Type: text/html; charset=iso-8859-1 + + + +301 Moved Permanently + +

Moved Permanently

+

The document has moved here.

+ + +HTTP/1.1 301 Moved Permanently +Date: Wed, 06 Jan 2010 17:41:46 GMT +Server: Apache +Location: https://www.feistyduck.com/ +Vary: Accept-Encoding +Connection: close +Content-Type: text/html; charset=iso-8859-1 + diff --git a/rust/htp/tests/files/80-hostname-invalid-1.t b/rust/htp/tests/files/80-hostname-invalid-1.t new file mode 100644 index 000000000000..f5e28c1ef7ce --- /dev/null +++ b/rust/htp/tests/files/80-hostname-invalid-1.t @@ -0,0 +1,15 @@ +>>> +GET http://www.example.com/?p=%20 HTTP/1.0 +Host: [:80 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/81-hostname-invalid-2.t b/rust/htp/tests/files/81-hostname-invalid-2.t new file mode 100644 index 000000000000..d3065c9ebb69 --- /dev/null +++ b/rust/htp/tests/files/81-hostname-invalid-2.t @@ -0,0 +1,15 @@ +>>> +GET http://[:80/?p=%20 HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/82-put.t b/rust/htp/tests/files/82-put.t new file mode 100644 index 000000000000..99314624b4bf --- /dev/null +++ b/rust/htp/tests/files/82-put.t @@ -0,0 +1,16 @@ +>>> +PUT / HTTP/1.0 +Host: www.example.com +User-Agent: Mozilla +Content-Length: 12 + +Hello World! +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/83-auth-digest-invalid-username-2.t b/rust/htp/tests/files/83-auth-digest-invalid-username-2.t new file mode 100644 index 000000000000..2344a401fd0d --- /dev/null +++ b/rust/htp/tests/files/83-auth-digest-invalid-username-2.t @@ -0,0 +1,5 @@ +>>> +GET / HTTP/1.1 +Host: www.example.com +Authorization: Digest username="ivanr + diff --git a/rust/htp/tests/files/84-response-no-status-headers-2.t b/rust/htp/tests/files/84-response-no-status-headers-2.t new file mode 100644 index 000000000000..239e08aa6010 --- /dev/null +++ b/rust/htp/tests/files/84-response-no-status-headers-2.t @@ -0,0 +1,7 @@ +>>> +GET / HTTP/1.0 +User-Agent: Mozilla + + +<<< +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/85-zero-byte-request-timeout.t b/rust/htp/tests/files/85-zero-byte-request-timeout.t new file mode 100644 index 000000000000..0cc0e09ad14d --- /dev/null +++ b/rust/htp/tests/files/85-zero-byte-request-timeout.t @@ -0,0 +1,16 @@ +<<< +HTTP/1.0 408 Request Time-out +Server: AkamaiGHost +Mime-Version: 1.0 +Date: Fri, 27 Sep 2013 16:37:37 GMT +Content-Type: text/html +Content-Length: 218 +Expires: Fri, 27 Sep 2013 16:37:37 GMT + + +Request Timeout + +

Request Timeout

+The server timed out while waiting for the browser's request.

+Reference #2.9efcd4d9.1380708056.0 + diff --git a/rust/htp/tests/files/86-partial-request-timeout.t b/rust/htp/tests/files/86-partial-request-timeout.t new file mode 100644 index 000000000000..97dc4bb15e73 --- /dev/null +++ b/rust/htp/tests/files/86-partial-request-timeout.t @@ -0,0 +1,18 @@ +>>> +GET +<<< +HTTP/1.0 408 Request Time-out +Server: AkamaiGHost +Mime-Version: 1.0 +Date: Fri, 27 Sep 2013 16:37:37 GMT +Content-Type: text/html +Content-Length: 218 +Expires: Fri, 27 Sep 2013 16:37:37 GMT + + +Request Timeout + +

Request Timeout

+The server timed out while waiting for the browser's request.

+Reference #2.9efcd4d9.1380708056.0 + diff --git a/rust/htp/tests/files/87-issue-55-incorrect-host-ambiguous-warning.t b/rust/htp/tests/files/87-issue-55-incorrect-host-ambiguous-warning.t new file mode 100644 index 000000000000..463ce5d3004f --- /dev/null +++ b/rust/htp/tests/files/87-issue-55-incorrect-host-ambiguous-warning.t @@ -0,0 +1,8 @@ +>>> +CONNECT www.example.com:443 HTTP/1.1 +Host: www.example.com:443 +Accept: */* +Content-Type: text/html +Proxy-Connection: Keep-Alive +Content-length: 0 + diff --git a/rust/htp/tests/files/88-response-multiple-cl-mismatch.t b/rust/htp/tests/files/88-response-multiple-cl-mismatch.t new file mode 100644 index 000000000000..a1c17c81e893 --- /dev/null +++ b/rust/htp/tests/files/88-response-multiple-cl-mismatch.t @@ -0,0 +1,14 @@ +>>> +GET / HTTP/1.0 + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 +Content-Length: 11 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/89-get-whitespace.t b/rust/htp/tests/files/89-get-whitespace.t new file mode 100644 index 000000000000..0bb5b2ddadd7 --- /dev/null +++ b/rust/htp/tests/files/89-get-whitespace.t @@ -0,0 +1,14 @@ +>>> + GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! \ No newline at end of file diff --git a/rust/htp/tests/files/90-request-uri-too-large.t b/rust/htp/tests/files/90-request-uri-too-large.t new file mode 100644 index 000000000000..eef176a73459 --- /dev/null +++ b/rust/htp/tests/files/90-request-uri-too-large.t @@ -0,0 +1,17 @@ +>>> +GET /blaaaaaaaaaaaaaaaaaaaaaaaaa +<<< +HTTP/1.0 414 Request-URI Too Large +Server: MyBigFatServer +Mime-Version: 1.0 +Date: Fri, 27 Sep 2013 16:37:37 GMT +Content-Type: text/html +Content-Length: 139 +Expires: Fri, 27 Sep 2013 16:37:37 GMT + + +Request-URI Too Large + +

Request-URI Too Large

+The Request-URI is Too Large + diff --git a/rust/htp/tests/files/91-request-unexpected-body.t b/rust/htp/tests/files/91-request-unexpected-body.t new file mode 100644 index 000000000000..358da1225607 --- /dev/null +++ b/rust/htp/tests/files/91-request-unexpected-body.t @@ -0,0 +1,16 @@ +>>> +POST / HTTP/1.1 +Host: localhost +Content-Type: application/x-www-form-urlencoded + +login=foo&password=bar + +<<< +HTTP/1.1 200 OK +Content-Length: 0 + + +>>> +GET / HTTP/1.1 +Host: localhost + diff --git a/rust/htp/tests/files/92-http_0_9-method_only.t b/rust/htp/tests/files/92-http_0_9-method_only.t new file mode 100644 index 000000000000..5c7c9b2ffcba --- /dev/null +++ b/rust/htp/tests/files/92-http_0_9-method_only.t @@ -0,0 +1,3 @@ +>>> +GET / + diff --git a/rust/htp/tests/files/93-compressed-response-deflateasgzip.t b/rust/htp/tests/files/93-compressed-response-deflateasgzip.t new file mode 100644 index 000000000000..e6c2eb5e4786 Binary files /dev/null and b/rust/htp/tests/files/93-compressed-response-deflateasgzip.t differ diff --git a/rust/htp/tests/files/94-compressed-response-multiple.t b/rust/htp/tests/files/94-compressed-response-multiple.t new file mode 100644 index 000000000000..4d0fdf74f984 Binary files /dev/null and b/rust/htp/tests/files/94-compressed-response-multiple.t differ diff --git a/rust/htp/tests/files/95-compressed-response-gzipasdeflate.t b/rust/htp/tests/files/95-compressed-response-gzipasdeflate.t new file mode 100644 index 000000000000..8076f832cff3 Binary files /dev/null and b/rust/htp/tests/files/95-compressed-response-gzipasdeflate.t differ diff --git a/rust/htp/tests/files/96-compressed-response-lzma.t b/rust/htp/tests/files/96-compressed-response-lzma.t new file mode 100644 index 000000000000..a5ea306d80d3 Binary files /dev/null and b/rust/htp/tests/files/96-compressed-response-lzma.t differ diff --git a/rust/htp/tests/files/97-requests-cut.t b/rust/htp/tests/files/97-requests-cut.t new file mode 100644 index 000000000000..2d2da6c740e9 --- /dev/null +++ b/rust/htp/tests/files/97-requests-cut.t @@ -0,0 +1,9 @@ +>>> +GET /?p=%20 HTTP/1.1 +User-Agent: Mozilla + +G +>>> +ET /?p=%21 HTTP/1.1 +User-Agent: Mozilla + diff --git a/rust/htp/tests/files/98-responses-cut.t b/rust/htp/tests/files/98-responses-cut.t new file mode 100644 index 000000000000..5bd8164b8575 --- /dev/null +++ b/rust/htp/tests/files/98-responses-cut.t @@ -0,0 +1,26 @@ +>>> +GET /?p=%20 HTTP/1.1 +User-Agent: Mozilla + +GET /?p=%21 HTTP/1.1 +User-Agent: Mozilla + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 14 + +Hello World! +H +<<< +TTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 13 + +Hello People! \ No newline at end of file diff --git a/rust/htp/tests/files/99-get.t b/rust/htp/tests/files/99-get.t new file mode 100644 index 000000000000..5c892a735249 --- /dev/null +++ b/rust/htp/tests/files/99-get.t @@ -0,0 +1,15 @@ +>>> +GET /%2e%2e/images.gif HTTP/1.1 +Host: www.ExAmPlE.cOM +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World! diff --git a/rust/htp/tests/files/anchor.empty b/rust/htp/tests/files/anchor.empty new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/rust/htp/tests/files/generate-gzip-tests.php b/rust/htp/tests/files/generate-gzip-tests.php new file mode 100755 index 000000000000..021c4369802f --- /dev/null +++ b/rust/htp/tests/files/generate-gzip-tests.php @@ -0,0 +1,322 @@ +#!/usr/bin/env php +compressionMethod = $m; + } + + public function setCrc32($crc) { + $this->crc32 = $crc; + } + + public function setInputSize($len) { + $this->isize = $len; + } + + public function setXfl($xfl) { + $this->xfl = $xfl; + } + + public function setFilename($filename) { + $this->filename = $filename; + } + + public function setComment($comment) { + $this->comment = $comment; + } + + public function setExtra($extra) { + $this->extra = $extra; + } + + public function setTextFlag($b) { + $this->textFlag = $b; + } + + public function useHeaderCrc($b) { + $this->useHeaderCrc = $b; + } + + public function setHeaderCrc($crc) { + $this->headerCrc = $crc; + } + + public function setFlags($f) { + $this->forcedFlags = $f; + } + + public function getFlags() { + if ($this->forcedFlags !== false) { + return $this->forcedFlags; + } + + $flags = 0; + + // FTEXT + if ($this->textFlag) { + $flags = $flags | 0x01; + } + + // FHCRC + if ($this->useHeaderCrc) { + $flags = $flags | 0x02; + } + + // FEXTRA + if ($this->extra !== false) { + $flags = $flags | 0x04; + } + + // FNAME + if ($this->filename !== false) { + $flags = $flags | 0x08; + } + + // FCOMMENT + if ($this->comment !== false) { + $flags = $flags | 0x16; + } + + return $flags; + } + + public function setData($data) { + $this->data = $data; + } + + public function writeTo($filename) { + $fp = fopen($filename, "w+"); + $this->write($fp); + fclose($fp); + } + + public function write($fp) { + $header = ""; + + // header (ID1 + ID2) + $header .= "\x1f\x8b"; + + // compression method (CM) + $header .= pack("C", $this->compressionMethod); + + // flags (FLG) + $header .= pack("C", $this->getFlags()); + + // mtime (MTIME) + $header .= "\x9c\x54\xf4\x50"; + + // extra flags (XFL) + $header .= pack("C", $this->xfl); + + // operating system (OS) + $header .= "\xff"; + + // FEXTRA + if ($this->extra !== false) { + $header .= pack("v", strlen($this->extra)); + $header .= $this->extra; + } + + // FNAME + if ($this->filename !== false) { + $header .= $this->filename; + $header .= "\x00"; + } + + // FCOMMENT + if ($this->comment !== false) { + $header .= $this->comment; + $header .= "\x00"; + } + + fwrite($fp, $header); + + // FHCRC + if ($this->useHeaderCrc) { + if ($this->headerCrc !== false) { + // "The CRC16 consists of the two least significant bytes of the CRC32 [...]" + fwrite($fp, pack("v", crc32($header))); + } else { + fwrite($fp, pack("v", $this->headerCrc)); + } + } + + // compressed blocks + $compressedData = gzcompress($this->data); + // The gzcompress() function does not produce output that's fully compatible with gzip, + // so we need to strip out the extra data: remove 2 bytes from the beginning + // (CMF and FLG) and 4 bytes from the end (Adler CRC). + $compressedData = substr($compressedData, 2, strlen($compressedData) - 6); + fwrite($fp, $compressedData); + + // CRC32 + if ($this->crc32 === false) { + fwrite($fp, pack("V", crc32($this->data))); + } else { + fwrite($fp, pack("V", $this->crc32)); + } + + // uncompressed size (ISIZE) + if ($this->isize === false) { + fwrite($fp, pack("V", strlen($this->data))); + } else { + fwrite($fp, pack("V", $this->isize)); + } + } +} + +// 01: minimal file +$gz = new GzipTest(); +$gz->writeTo("gztest-01-minimal.gz"); + +// 02: with FNAME +$gz = new GzipTest(); +$gz->setFilename("file.txt"); +$gz->writeTo("gztest-02-fname.gz"); + +// 03: with FCOMMENT +$gz = new GzipTest(); +$gz->setComment("COMMENT"); +$gz->writeTo("gztest-03-fcomment.gz"); + +// 04: with FHCRC +$gz = new GzipTest(); +$gz->useHeaderCrc(true); +$gz->writeTo("gztest-04-fhcrc.gz"); + +// 05: with FEXTRA +$gz = new GzipTest(); +$gz->setExtra("EXTRA"); +$gz->writeTo("gztest-05-fextra.gz"); + +// 06: with FTEXT +$gz = new GzipTest(); +$gz->setTextFlag(true); +$gz->writeTo("gztest-06-ftext.gz"); + +// 07: with FRESERVED1 +$gz = new GzipTest(); +$gz->setFlags($gz->getFlags() | 0x20); +$gz->writeTo("gztest-07-freserved1.gz"); + +// 08: with FRESERVED2 +$gz = new GzipTest(); +$gz->setFlags($gz->getFlags() | 0x40); +$gz->writeTo("gztest-08-freserved2.gz"); + +// 09: with FRESERVED3 +$gz = new GzipTest(); +$gz->setFlags($gz->getFlags() | 0x80); +$gz->writeTo("gztest-09-freserved3.gz"); + +// 10: Two parts (compressed streams) +$gz = new GzipTest(); +$fp = fopen("gztest-10-multipart.gz", "w+"); +$gz->setFilename("file1.txt"); +$gz->write($fp); +$gz->setData("The quick brown fox jumps over the lazy dog."); +$gz->setFilename("file2.txt"); +$gz->write($fp); +fclose($fp); + +// 11: Invalid compression method +$gz = new GzipTest(); +$gz->setCompressionMethod(0x07); +$gz->writeTo("gztest-11-invalid-method.gz"); + +// 12: Invalid CRC32 +$gz = new GzipTest(); +$gz->setCrc32(0xffffffff); +$gz->writeTo("gztest-12-invalid-crc32.gz"); + +// 13: Invalid ISIZE +$gz = new GzipTest(); +$gz->setData("Grumpy Wizards make toxic brew for the Evil Queen and Jack."); +$gz->setInputSize(0x10); +$gz->writeTo("gztest-13-invalid-isize.gz"); + +// 14: Invalid extra flags (XFL) +$gz = new GzipTest(); +$gz->setXfl(0xff); +$gz->writeTo("gztest-14-invalid-xfl.gz"); + +// 15: Invalid header CRC (FHCRC) +$gz = new GzipTest(); +$gz->useHeaderCrc(true); +$gz->setHeaderCrc(0xffff); +$gz->writeTo("gztest-15-invalid-fhcrc.gz"); + +?> diff --git a/rust/htp/tests/files/gztest-01-minimal.gz b/rust/htp/tests/files/gztest-01-minimal.gz new file mode 100644 index 000000000000..e82fcde6b1a1 Binary files /dev/null and b/rust/htp/tests/files/gztest-01-minimal.gz differ diff --git a/rust/htp/tests/files/gztest-02-fname.gz b/rust/htp/tests/files/gztest-02-fname.gz new file mode 100644 index 000000000000..bb38b70b0bab Binary files /dev/null and b/rust/htp/tests/files/gztest-02-fname.gz differ diff --git a/rust/htp/tests/files/gztest-03-fcomment.gz b/rust/htp/tests/files/gztest-03-fcomment.gz new file mode 100644 index 000000000000..fe55135c71d5 Binary files /dev/null and b/rust/htp/tests/files/gztest-03-fcomment.gz differ diff --git a/rust/htp/tests/files/gztest-04-fhcrc.gz b/rust/htp/tests/files/gztest-04-fhcrc.gz new file mode 100644 index 000000000000..cd0ce6b02b9e Binary files /dev/null and b/rust/htp/tests/files/gztest-04-fhcrc.gz differ diff --git a/rust/htp/tests/files/gztest-05-fextra.gz b/rust/htp/tests/files/gztest-05-fextra.gz new file mode 100644 index 000000000000..72290b07895d Binary files /dev/null and b/rust/htp/tests/files/gztest-05-fextra.gz differ diff --git a/rust/htp/tests/files/gztest-06-ftext.gz b/rust/htp/tests/files/gztest-06-ftext.gz new file mode 100644 index 000000000000..9d9aeccf22f6 Binary files /dev/null and b/rust/htp/tests/files/gztest-06-ftext.gz differ diff --git a/rust/htp/tests/files/gztest-07-freserved1.gz b/rust/htp/tests/files/gztest-07-freserved1.gz new file mode 100644 index 000000000000..bd365b54a976 Binary files /dev/null and b/rust/htp/tests/files/gztest-07-freserved1.gz differ diff --git a/rust/htp/tests/files/gztest-08-freserved2.gz b/rust/htp/tests/files/gztest-08-freserved2.gz new file mode 100644 index 000000000000..e240ec1c840c Binary files /dev/null and b/rust/htp/tests/files/gztest-08-freserved2.gz differ diff --git a/rust/htp/tests/files/gztest-09-freserved3.gz b/rust/htp/tests/files/gztest-09-freserved3.gz new file mode 100644 index 000000000000..4071cdc1336a Binary files /dev/null and b/rust/htp/tests/files/gztest-09-freserved3.gz differ diff --git a/rust/htp/tests/files/gztest-10-multipart.gz b/rust/htp/tests/files/gztest-10-multipart.gz new file mode 100644 index 000000000000..a2c0cd53b90d Binary files /dev/null and b/rust/htp/tests/files/gztest-10-multipart.gz differ diff --git a/rust/htp/tests/files/gztest-11-invalid-method.gz b/rust/htp/tests/files/gztest-11-invalid-method.gz new file mode 100644 index 000000000000..9c137684eca8 Binary files /dev/null and b/rust/htp/tests/files/gztest-11-invalid-method.gz differ diff --git a/rust/htp/tests/files/gztest-12-invalid-crc32.gz b/rust/htp/tests/files/gztest-12-invalid-crc32.gz new file mode 100644 index 000000000000..1832ef85bd46 Binary files /dev/null and b/rust/htp/tests/files/gztest-12-invalid-crc32.gz differ diff --git a/rust/htp/tests/files/gztest-13-invalid-isize.gz b/rust/htp/tests/files/gztest-13-invalid-isize.gz new file mode 100644 index 000000000000..55263bc4b21c Binary files /dev/null and b/rust/htp/tests/files/gztest-13-invalid-isize.gz differ diff --git a/rust/htp/tests/files/gztest-14-invalid-xfl.gz b/rust/htp/tests/files/gztest-14-invalid-xfl.gz new file mode 100644 index 000000000000..a844957f911e Binary files /dev/null and b/rust/htp/tests/files/gztest-14-invalid-xfl.gz differ diff --git a/rust/htp/tests/files/gztest-15-invalid-fhcrc.gz b/rust/htp/tests/files/gztest-15-invalid-fhcrc.gz new file mode 100644 index 000000000000..b6fa5dd2f0f7 Binary files /dev/null and b/rust/htp/tests/files/gztest-15-invalid-fhcrc.gz differ diff --git a/rust/htp/tests/files/http-close-headers.t b/rust/htp/tests/files/http-close-headers.t new file mode 100644 index 000000000000..e8afa09d419b --- /dev/null +++ b/rust/htp/tests/files/http-close-headers.t @@ -0,0 +1,12 @@ +>>> +GET / HTTP/1.1 +Host: 100.64.0.200 +Connection: keep-alive +Accept-Encoding: gzip, deflate +Accept: */* +User-Agent: python-requests/2.21.0 + + +<<< +HTTP/1.0 200 OK +Server:ng1nx diff --git a/rust/htp/tests/files/http-evader-017.t b/rust/htp/tests/files/http-evader-017.t new file mode 100644 index 000000000000..801b60ac1e15 --- /dev/null +++ b/rust/htp/tests/files/http-evader-017.t @@ -0,0 +1,25 @@ +>>> +GET /chunked/eicar.txt/cr-size HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: chunked +Connection: close + + f +X5O!P%@AP[4\PZX + f +54(P^)7CC)7}$EI + f +CAR-STANDARD-AN + f +TIVIRUS-TEST-FI + 8 +LE!$H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-018.t b/rust/htp/tests/files/http-evader-018.t new file mode 100644 index 000000000000..0410d507c2c7 --- /dev/null +++ b/rust/htp/tests/files/http-evader-018.t @@ -0,0 +1,30 @@ +>>> +GET /chunked/eicar.txt/lf-size HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: chunked +Connection: close + + +f +X5O!P%@AP[4\PZX + +f +54(P^)7CC)7}$EI + +f +CAR-STANDARD-AN + +f +TIVIRUS-TEST-FI + +8 +LE!$H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-044.t b/rust/htp/tests/files/http-evader-044.t new file mode 100644 index 000000000000..d14489c54a36 --- /dev/null +++ b/rust/htp/tests/files/http-evader-044.t @@ -0,0 +1,13 @@ +>>> +GET /chunked/eicar.txt/chunked,http10,do_clen HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.0 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: chunked +Connection: close + +X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H* \ No newline at end of file diff --git a/rust/htp/tests/files/http-evader-059.t b/rust/htp/tests/files/http-evader-059.t new file mode 100644 index 000000000000..d73519a87e2b --- /dev/null +++ b/rust/htp/tests/files/http-evader-059.t @@ -0,0 +1,51 @@ +>>> +GET /chunked/eicar.txt/chunkednl- HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Yet-another-header: foo +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: chunked + +Connection: close + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-060.t b/rust/htp/tests/files/http-evader-060.t new file mode 100644 index 000000000000..b4dd8f7f4f62 --- /dev/null +++ b/rust/htp/tests/files/http-evader-060.t @@ -0,0 +1,51 @@ +>>> +GET /chunked/eicar.txt/nl-nl-chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Yet-another-header: foo +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: + + chunked +Connection: close + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + diff --git a/rust/htp/tests/files/http-evader-061.t b/rust/htp/tests/files/http-evader-061.t new file mode 100644 index 000000000000..63a77d0a3614 --- /dev/null +++ b/rust/htp/tests/files/http-evader-061.t @@ -0,0 +1,52 @@ +>>> +GET /chunked/eicar.txt/nl-nl-chunked-nl- HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Yet-another-header: foo +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: + + chunked + +Connection: close + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + diff --git a/rust/htp/tests/files/http-evader-078.t b/rust/htp/tests/files/http-evader-078.t new file mode 100644 index 000000000000..ae61150fa587 --- /dev/null +++ b/rust/htp/tests/files/http-evader-078.t @@ -0,0 +1,13 @@ +>>> +GET /chunked/eicar.txt/chunkedcr-,do_clen HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Transfer-Encoding: chunked +Connection: close + +X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H* \ No newline at end of file diff --git a/rust/htp/tests/files/http-evader-118.t b/rust/htp/tests/files/http-evader-118.t new file mode 100644 index 000000000000..2cbcd6c6a3e9 Binary files /dev/null and b/rust/htp/tests/files/http-evader-118.t differ diff --git a/rust/htp/tests/files/http-evader-130.t b/rust/htp/tests/files/http-evader-130.t new file mode 100644 index 000000000000..6ea0730666aa Binary files /dev/null and b/rust/htp/tests/files/http-evader-130.t differ diff --git a/rust/htp/tests/files/http-evader-195.t b/rust/htp/tests/files/http-evader-195.t new file mode 100644 index 000000000000..78097e5b2afb Binary files /dev/null and b/rust/htp/tests/files/http-evader-195.t differ diff --git a/rust/htp/tests/files/http-evader-274.t b/rust/htp/tests/files/http-evader-274.t new file mode 100644 index 000000000000..30170e05e2c4 --- /dev/null +++ b/rust/htp/tests/files/http-evader-274.t @@ -0,0 +1,51 @@ +>>> +GET /broken/eicar.txt/somehdr;space;chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +X-Foo: bar + Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-284.t b/rust/htp/tests/files/http-evader-284.t new file mode 100644 index 000000000000..266fe380e9da --- /dev/null +++ b/rust/htp/tests/files/http-evader-284.t @@ -0,0 +1,51 @@ +>>> +GET /broken/eicar.txt/cr;chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close + +Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-286.t b/rust/htp/tests/files/http-evader-286.t new file mode 100644 index 000000000000..2a0d45fb6cce --- /dev/null +++ b/rust/htp/tests/files/http-evader-286.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/crcronly;chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close + Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-287.t b/rust/htp/tests/files/http-evader-287.t new file mode 100644 index 000000000000..42896a1824f5 --- /dev/null +++ b/rust/htp/tests/files/http-evader-287.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/cr-cronly;chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close + Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-297.t b/rust/htp/tests/files/http-evader-297.t new file mode 100644 index 000000000000..89e3f906722a --- /dev/null +++ b/rust/htp/tests/files/http-evader-297.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/te%5C015%5C040%3Achunked;do_chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +Transfer-Encoding :chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-300.t b/rust/htp/tests/files/http-evader-300.t new file mode 100644 index 000000000000..1372f34bcc2c --- /dev/null +++ b/rust/htp/tests/files/http-evader-300.t @@ -0,0 +1,53 @@ +>>> +GET /broken/eicar.txt/te%5C015%5C012%5C040%5C015%5C012%5C040%3A%5C015%5C012%5C040chunked;do_chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +Transfer-Encoding + + : + chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-303.t b/rust/htp/tests/files/http-evader-303.t new file mode 100644 index 000000000000..548947305505 Binary files /dev/null and b/rust/htp/tests/files/http-evader-303.t differ diff --git a/rust/htp/tests/files/http-evader-307.t b/rust/htp/tests/files/http-evader-307.t new file mode 100644 index 000000000000..60d4e32a1cc7 Binary files /dev/null and b/rust/htp/tests/files/http-evader-307.t differ diff --git a/rust/htp/tests/files/http-evader-318.t b/rust/htp/tests/files/http-evader-318.t new file mode 100644 index 000000000000..aa99bca6a45b Binary files /dev/null and b/rust/htp/tests/files/http-evader-318.t differ diff --git a/rust/htp/tests/files/http-evader-320.t b/rust/htp/tests/files/http-evader-320.t new file mode 100644 index 000000000000..f9a8b5d65bd6 Binary files /dev/null and b/rust/htp/tests/files/http-evader-320.t differ diff --git a/rust/htp/tests/files/http-evader-321.t b/rust/htp/tests/files/http-evader-321.t new file mode 100644 index 000000000000..80f21a862886 Binary files /dev/null and b/rust/htp/tests/files/http-evader-321.t differ diff --git a/rust/htp/tests/files/http-evader-390.t b/rust/htp/tests/files/http-evader-390.t new file mode 100644 index 000000000000..476c9a41b380 Binary files /dev/null and b/rust/htp/tests/files/http-evader-390.t differ diff --git a/rust/htp/tests/files/http-evader-402.t b/rust/htp/tests/files/http-evader-402.t new file mode 100644 index 000000000000..123e59a8e3f1 --- /dev/null +++ b/rust/htp/tests/files/http-evader-402.t @@ -0,0 +1,45 @@ +>>> +GET /broken/eicar.txt/chunked;cr-no-crlf;end-crlflf HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok Content-type: application/octet-stream Content-disposition: attachment; filename="eicar.txt" Connection: close Transfer-Encoding: chunked Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-405.t b/rust/htp/tests/files/http-evader-405.t new file mode 100644 index 000000000000..893fc3a511ef --- /dev/null +++ b/rust/htp/tests/files/http-evader-405.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/chunked;lfcr-no-crlf;end-crlfcrlf HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok + Content-type: application/octet-stream + Content-disposition: attachment; filename="eicar.txt" + Connection: close + Transfer-Encoding: chunked + Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-411.t b/rust/htp/tests/files/http-evader-411.t new file mode 100644 index 000000000000..1cb395f732bc --- /dev/null +++ b/rust/htp/tests/files/http-evader-411.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/end-lfcrcrlf;chunked HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-416.t b/rust/htp/tests/files/http-evader-416.t new file mode 100644 index 000000000000..c1cb2bd83507 --- /dev/null +++ b/rust/htp/tests/files/http-evader-416.t @@ -0,0 +1,14 @@ +>>> +GET /broken/eicar.txt/end-lf%5C040lf HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +Yet-another-header: foo +Content-length: 68 + +X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H* diff --git a/rust/htp/tests/files/http-evader-419.t b/rust/htp/tests/files/http-evader-419.t new file mode 100644 index 000000000000..091e5fc8250b --- /dev/null +++ b/rust/htp/tests/files/http-evader-419.t @@ -0,0 +1,50 @@ +>>> +GET /broken/eicar.txt/chunked;end-lf%5C040lf HTTP/1.1 +Host: evader.example.com + + +<<< +HTTP/1.1 200 ok +Content-type: application/octet-stream +Content-disposition: attachment; filename="eicar.txt" +Connection: close +Transfer-Encoding: chunked +Yet-another-header: foo + +4 +X5O! +4 +P%@A +4 +P[4\ +4 +PZX5 +4 +4(P^ +4 +)7CC +4 +)7}$ +4 +EICA +4 +R-ST +4 +ANDA +4 +RD-A +4 +NTIV +4 +IRUS +4 +-TES +4 +T-FI +4 +LE!$ +4 +H+H* +0 + + diff --git a/rust/htp/tests/files/http-evader-423.t b/rust/htp/tests/files/http-evader-423.t new file mode 100644 index 000000000000..0f3ad0d39a47 Binary files /dev/null and b/rust/htp/tests/files/http-evader-423.t differ diff --git a/rust/htp/tests/files/http-start-from-response.t b/rust/htp/tests/files/http-start-from-response.t new file mode 100644 index 000000000000..2763ea7f6294 --- /dev/null +++ b/rust/htp/tests/files/http-start-from-response.t @@ -0,0 +1,41 @@ +<<< +HTTP/1.1 200 OK +Date: Wed, 04 Jul 2018 09:35:14 GMT +Server: Apache/2.4.33 (Fedora) OpenSSL/1.1.0h-fips +Last-Modified: Tue, 03 Jul 2018 10:54:38 GMT +ETag: "b-5701623f27308" +Accept-Ranges: bytes +Content-Length: 11 +Keep-Alive: timeout=5, max=100 +Connection: Keep-Alive +Content-Type: text/html; charset=UTF-8 + +Hello GCX! + +>>> +GET /favicon.ico HTTP/1.1 +Host: 172.16.9.189 +Connection: keep-alive +User-Agent: Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36 +Accept: image/webp,image/apng,image/*,*/*;q=0.8 +Referer: http://172.16.9.189/ +Accept-Encoding: gzip, deflate +Accept-Language: en-US,en;q=0.9 + + +<<< +HTTP/1.1 404 Not Found +Date: Wed, 04 Jul 2018 09:35:14 GMT +Server: Apache/2.4.33 (Fedora) OpenSSL/1.1.0h-fips +Content-Length: 209 +Keep-Alive: timeout=5, max=99 +Connection: Keep-Alive +Content-Type: text/html; charset=iso-8859-1 + + + +404 Not Found + +

Not Found

+

The requested URL /favicon.ico was not found on this server.

+ diff --git a/rust/htp/tests/gunzip.rs b/rust/htp/tests/gunzip.rs new file mode 100644 index 000000000000..ce19c0ebac2e --- /dev/null +++ b/rust/htp/tests/gunzip.rs @@ -0,0 +1,200 @@ +#![allow(non_snake_case)] +use htp::{ + bstr::*, + config::{Config, HtpServerPersonality}, + connection_parser::{ConnectionParser, ParserData}, + decompressors::{Decompressor, HtpContentEncoding}, + transaction::Transaction, + HtpStatus, +}; +use std::{env, path::PathBuf}; + +// import common testing utilities +mod common; + +#[derive(Debug)] +struct Test { + connp: ConnectionParser, + expected: Bstr, + decompressor: Decompressor, +} + +enum TestError { + Io(()), + Htp(()), +} + +fn GUnzip_decompressor_callback(tx: &mut Transaction, d: &ParserData) -> HtpStatus { + tx.set_user_data(Box::new(Bstr::from(d.as_slice()))); + HtpStatus::OK +} + +impl Test { + fn new() -> Self { + let mut cfg = Config::default(); + cfg.set_server_personality(HtpServerPersonality::APACHE_2) + .unwrap(); + // The default bomb limit may be slow in some development environments causing tests to fail. + cfg.compression_options.set_time_limit(std::u32::MAX); + let mut connp = ConnectionParser::new(cfg); + + let expected = Bstr::from("The five boxing wizards jump quickly."); + let tx = connp.request_mut().unwrap() as *mut Transaction; + Test { + connp, + expected, + decompressor: Decompressor::new_with_callback( + HtpContentEncoding::GZIP, + Box::new(move |data: Option<&[u8]>| { + let data = ParserData::from(data); + GUnzip_decompressor_callback(unsafe { &mut *tx }, &data); + Ok(data.len()) + }), + Default::default(), + ) + .unwrap(), + } + } + + fn run(&mut self, filename: &str) -> Result<(), TestError> { + let mut filepath = if let Ok(dir) = std::env::var("srcdir") { + PathBuf::from(dir) + } else { + let mut base = PathBuf::from( + env::var("CARGO_MANIFEST_DIR").expect("Could not determine test file directory"), + ); + base.push("tests"); + base.push("files"); + base + }; + filepath.push(filename); + + let data = std::fs::read(filepath).map_err(|_| TestError::Io(()))?; + self.decompressor + .decompress(&data) + .map(|_| ()) + .map_err(|_| TestError::Htp(())) + } +} + +#[test] +fn GUnzip_Minimal() { + let mut t = Test::new(); + assert!(t.run("gztest-01-minimal.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_FNAME() { + let mut t = Test::new(); + assert!(t.run("gztest-02-fname.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_FEXTRA() { + let mut t = Test::new(); + assert!(t.run("gztest-05-fextra.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_FTEXT() { + let mut t = Test::new(); + assert!(t.run("gztest-06-ftext.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_Multipart() { + let mut t = Test::new(); + assert!(t.run("gztest-10-multipart.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_InvalidExtraFlags() { + let mut t = Test::new(); + assert!(t.run("gztest-14-invalid-xfl.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +#[test] +fn GUnzip_InvalidHeaderCrc() { + let mut t = Test::new(); + assert!(t.run("gztest-15-invalid-fhcrc.gz").is_ok()); + let request_tx = t.connp.request().unwrap(); + let output = request_tx.user_data::().unwrap(); + assert_eq!(*output, t.expected); +} + +/* +// These tests were disabled in libhtp +#[test] +fn GUnzip_FCOMMENT() { + let mut t = Test::new(); + assert!(t.run("gztest-03-fcomment.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_FHCRC() { + let mut t = Test::new(); + assert!(t.run("gztest-04-fhcrc.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_FRESERVED1() { + let mut t = Test::new(); + assert!(t.run("gztest-07-freserved1.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_FRESERVED2() { + let mut t = Test::new(); + assert!(t.run("gztest-08-freserved2.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_FRESERVED3() { + let mut t = Test::new(); + assert!(t.run("gztest-09-freserved3.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_InvalidMethod() { + let mut t = Test::new(); + assert!(t.run("gztest-11-invalid-method.gz.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_InvalidCrc() { + let mut t = Test::new(); + assert!(t.run("gztest-12-invalid-crc32.gz").is_ok()); + assert_eq!(t.output, t.expected); +} + +#[test] +fn GUnzip_InvalidInputSize() { + let mut t = Test::new(); + assert!(t.run("gztest-13-invalid-isize.gz").is_ok()); + assert_eq!(t.output, t.expected); +} +*/ diff --git a/rust/htp/tests/hybrid.rs b/rust/htp/tests/hybrid.rs new file mode 100644 index 000000000000..480cd743e3ab --- /dev/null +++ b/rust/htp/tests/hybrid.rs @@ -0,0 +1,817 @@ +#![allow(non_snake_case)] +#![allow(non_camel_case_types)] +use htp::{ + bstr::Bstr, + config::{Config, HtpServerPersonality}, + connection_parser::{ConnectionParser, ParserData}, + error::Result, + transaction::{Header, HtpProtocol, HtpResponseNumber, Transaction}, + uri::Uri, + HtpStatus, +}; +use std::net::{IpAddr, Ipv4Addr}; + +// import common testing utilities +mod common; + +struct HybridParsing_Get_User_Data { + // Request callback indicators. + callback_REQUEST_START_invoked: i32, + callback_REQUEST_LINE_invoked: i32, + callback_REQUEST_HEADERS_invoked: i32, + callback_REQUEST_COMPLETE_invoked: i32, + + // Response callback indicators. + callback_RESPONSE_START_invoked: i32, + callback_RESPONSE_LINE_invoked: i32, + callback_RESPONSE_HEADERS_invoked: i32, + callback_RESPONSE_COMPLETE_invoked: i32, + + // Transaction callback indicators. + callback_TRANSACTION_COMPLETE_invoked: i32, + + // Response body handling fields. + response_body_chunks_seen: i32, + response_body_correctly_received: i32, +} + +impl HybridParsing_Get_User_Data { + pub fn new() -> Self { + HybridParsing_Get_User_Data { + callback_REQUEST_START_invoked: 0, + callback_REQUEST_LINE_invoked: 0, + callback_REQUEST_HEADERS_invoked: 0, + callback_REQUEST_COMPLETE_invoked: 0, + callback_RESPONSE_START_invoked: 0, + callback_RESPONSE_LINE_invoked: 0, + callback_RESPONSE_HEADERS_invoked: 0, + callback_RESPONSE_COMPLETE_invoked: 0, + callback_TRANSACTION_COMPLETE_invoked: 0, + response_body_chunks_seen: 0, + response_body_correctly_received: 0, + } + } +} + +fn HybridParsing_Get_Callback_REQUEST_START(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_REQUEST_START_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_REQUEST_LINE(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_REQUEST_LINE_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_REQUEST_HEADERS(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_REQUEST_HEADERS_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_REQUEST_COMPLETE(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_REQUEST_COMPLETE_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_RESPONSE_START(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_RESPONSE_START_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_RESPONSE_LINE(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_RESPONSE_LINE_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_RESPONSE_HEADERS(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_RESPONSE_HEADERS_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_RESPONSE_COMPLETE(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_RESPONSE_COMPLETE_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_TRANSACTION_COMPLETE(tx: &mut Transaction) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + user_data.callback_TRANSACTION_COMPLETE_invoked += 1; + Ok(()) +} + +fn HybridParsing_Get_Callback_RESPONSE_BODY_DATA( + tx: &mut Transaction, d: &ParserData, +) -> Result<()> { + let user_data = tx.user_data_mut::().unwrap(); + + // Don't do anything if in errored state. + if user_data.response_body_correctly_received == -1 { + return Err(HtpStatus::ERROR); + } + + let data = d.as_slice(); + match user_data.response_body_chunks_seen { + 0 => { + if data == b"

Hello" { + user_data.response_body_chunks_seen += 1; + } else { + eprintln!("Mismatch in 1st chunk"); + user_data.response_body_correctly_received = -1; + } + } + 1 => { + if data == b" " { + user_data.response_body_chunks_seen += 1; + } else { + eprintln!("Mismatch in 2nd chunk"); + user_data.response_body_correctly_received = -1; + } + } + 2 => { + if data == b"World!

" { + user_data.response_body_chunks_seen += 1; + user_data.response_body_correctly_received = 1; + } else { + eprintln!("Mismatch in 3rd chunk"); + user_data.response_body_correctly_received = -1; + } + } + _ => { + eprintln!("Seen more than 3 chunks"); + user_data.response_body_correctly_received = -1; + } + } + Ok(()) +} + +// Set one request header. +macro_rules! tx_set_header { + ($headers:expr, $name:expr, $value:expr) => { + $headers + .elements + .push(Header::new($name.into(), $value.into())) + }; +} + +fn TestConfig() -> Config { + let mut cfg = Config::default(); + cfg.set_server_personality(HtpServerPersonality::APACHE_2) + .unwrap(); + cfg.set_parse_urlencoded(true); + cfg +} + +fn register_user_callbacks(cfg: &mut Config) { + // Request callbacks + cfg.register_request_start(HybridParsing_Get_Callback_REQUEST_START); + cfg.register_request_line(HybridParsing_Get_Callback_REQUEST_LINE); + cfg.register_request_headers(HybridParsing_Get_Callback_REQUEST_HEADERS); + cfg.register_request_complete(HybridParsing_Get_Callback_REQUEST_COMPLETE); + + // Response callbacks + cfg.register_response_start(HybridParsing_Get_Callback_RESPONSE_START); + cfg.register_response_line(HybridParsing_Get_Callback_RESPONSE_LINE); + cfg.register_response_headers(HybridParsing_Get_Callback_RESPONSE_HEADERS); + cfg.register_response_body_data(HybridParsing_Get_Callback_RESPONSE_BODY_DATA); + cfg.register_response_complete(HybridParsing_Get_Callback_RESPONSE_COMPLETE); + + // Transaction callbacks + cfg.register_transaction_complete(HybridParsing_Get_Callback_TRANSACTION_COMPLETE); +} + +struct HybridParsingTest { + connp: ConnectionParser, +} + +impl HybridParsingTest { + fn new(cfg: Config) -> Self { + let mut connp = ConnectionParser::new(cfg); + connp.open( + Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))), + Some(32768), + Some(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))), + Some(80), + None, + ); + + HybridParsingTest { connp } + } +} + +/// Test hybrid mode with one complete GET transaction; request then response +/// with a body. Most features are tested, including query string parameters and callbacks. +#[test] +fn GetTest() { + let mut cfg = TestConfig(); + // Register callbacks + register_user_callbacks(&mut cfg); + let mut t = HybridParsingTest::new(cfg); + let tx = t.connp.request_mut().unwrap(); + + // Configure user data and callbacks + tx.set_user_data(Box::new(HybridParsing_Get_User_Data::new())); + // We should be operating on the same transaction throughout + let tx_id = tx.index; + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + // Request begins + t.connp.state_request_start().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_START_invoked); + + // Request line data + t.connp + .parse_request_line(b"GET /?p=1&q=2 HTTP/1.1") + .unwrap(); + + // Request line complete + t.connp.state_request_line().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_LINE_invoked); + + // Check request line data + let tx = t.connp.tx_mut(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2")); + assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.1")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/")); + assert!(parsed_uri.query.as_ref().unwrap().eq_slice("p=1&q=2")); + + // Request headers + tx_set_header!(tx.request_headers, "Host", "www.example.com"); + tx_set_header!(tx.request_headers, "Connection", "keep-alive"); + tx_set_header!(tx.request_headers, "User-Agent", "Mozilla/5.0"); + + // Request headers complete + t.connp.state_request_headers(&mut p).unwrap(); + + // Check headers + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked); + + let tx = t.connp.tx(tx_id).unwrap(); + assert_request_header_eq!(tx, "host", "www.example.com"); + assert_request_header_eq!(tx, "connection", "keep-alive"); + assert_request_header_eq!(tx, "user-agent", "Mozilla/5.0"); + + // Request complete + t.connp.state_request_complete(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked); + + // Response begins + t.connp.state_response_start().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_START_invoked); + + // Response line data + t.connp.parse_response_line(b"HTTP/1.1 200 OK").unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1")); + assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number); + assert!(tx.response_status.as_ref().unwrap().eq_slice("200")); + assert!(tx.response_status_number.eq_num(200)); + assert!(tx.response_message.as_ref().unwrap().eq_slice("OK")); + + // Response line complete + t.connp.state_response_line().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + + assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked); + + // Response header data + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!(tx.response_headers, "Content-Type", "text/html"); + tx_set_header!(tx.response_headers, "Server", "Apache"); + + // Response headers complete + t.connp.state_response_headers(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked); + + // Check response headers + let tx = t.connp.tx(tx_id).unwrap(); + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "server", "Apache"); + + // Response body data + t.connp.response_body_data(Some(b"

Hello")).unwrap(); + t.connp.response_body_data(Some(b" ")).unwrap(); + t.connp.response_body_data(Some(b"World!

")).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.response_body_correctly_received); + + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!(tx.response_headers, "Content-Type", "text/html"); + tx_set_header!(tx.response_headers, "Server", "Apache"); + + // Check trailing response headers + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "server", "Apache"); + + t.connp.state_response_complete(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_COMPLETE_invoked); +} + +/// Use a POST request in order to test request body processing and parameter parsing. +#[test] +fn PostUrlecodedTest() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + // Request begins + t.connp.state_request_start().unwrap(); + + // Request line data + t.connp.parse_request_line(b"POST / HTTP/1.1").unwrap(); + + // Request line complete + t.connp.state_request_line().unwrap(); + + // Configure headers to trigger the URLENCODED parser + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!( + tx.request_headers, + "Content-Type", + "application/x-www-form-urlencoded" + ); + tx_set_header!(tx.request_headers, "Content-Length", "7"); + + // Request headers complete + t.connp.state_request_headers(&mut p).unwrap(); + + // Send request body + t.connp.request_body_data(Some(b"p=1")).unwrap(); + t.connp.request_body_data(Some(b"")).unwrap(); + t.connp.request_body_data(Some(b"&")).unwrap(); + t.connp.request_body_data(Some(b"q=2")).unwrap(); + + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!(tx.request_headers, "Host", "www.example.com"); + tx_set_header!(tx.request_headers, "Connection", "keep-alive"); + tx_set_header!(tx.request_headers, "User-Agent", "Mozilla/5.0"); + + assert_request_header_eq!(tx, "host", "www.example.com"); + assert_request_header_eq!(tx, "connection", "keep-alive"); + assert_request_header_eq!(tx, "user-agent", "Mozilla/5.0"); + + // Request complete + t.connp.state_request_complete(&mut p).unwrap(); +} + +/// Test with a compressed response body and decompression enabled. +#[test] +fn CompressedResponse() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + t.connp.state_request_start().unwrap(); + + t.connp.parse_request_line(b"GET / HTTP/1.1").unwrap(); + + t.connp.state_request_line().unwrap(); + t.connp.state_request_headers(&mut p).unwrap(); + t.connp.state_request_complete(&mut p).unwrap(); + + t.connp.state_response_start().unwrap(); + + t.connp.parse_response_line(b"HTTP/1.1 200 OK").unwrap(); + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!(tx.response_headers, "Content-Encoding", "gzip"); + tx_set_header!(tx.response_headers, "Content-Length", "187"); + + t.connp.state_response_headers(&mut p).unwrap(); + + let RESPONSE: &[u8] = + b"H4sIAAAAAAAAAG2PwQ6CMBBE73xFU++tXk2pASliAiEhPegRYUOJYEktEP5eqB6dy2ZnJ5O3LJFZ\ + yj2WiCBah7zKVPBMT1AjCf2gTWnabmH0e/AY/QXDPLqj8HLO07zw8S52wkiKm1zXvRPeeg//2lbX\ + kwpQrauxh5dFqnyj3uVYgJJCxD5W1g5HSud5Jo3WTQek0mR8UgNlDYZOLcz0ZMuH3y+YKzDAaMDJ\ + SrihOVL32QceVXUy4QAAAA=="; + + let body = Bstr::from(base64::decode(RESPONSE).unwrap()); + + t.connp.response_body_data(Some(body.as_slice())).unwrap(); + + t.connp.state_response_complete(&mut p).unwrap(); + + let tx = t.connp.tx(tx_id).unwrap(); + assert_eq!(187, tx.response_message_len); + assert_eq!(225, tx.response_entity_len); +} + +#[test] +fn ParamCaseSensitivity() { + let mut t = HybridParsingTest::new(TestConfig()); + + // Request begins + t.connp.state_request_start().unwrap(); + + // Request line data + t.connp + .parse_request_line(b"GET /?p=1&Q=2 HTTP/1.1") + .unwrap(); + + // Request line complete + t.connp.state_request_line().unwrap(); +} + +/// Use a POST request in order to test request body processing and parameter +/// parsing. In hybrid mode, we expect that the body arrives to us dechunked. +#[test] +fn PostUrlecodedChunked() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + // Request begins. + t.connp.state_request_start().unwrap(); + + // Request line data. + t.connp.parse_request_line(b"POST / HTTP/1.1").unwrap(); + t.connp.state_request_line().unwrap(); + + // Configure headers to trigger the URLENCODED parser. + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx_set_header!( + tx.request_headers, + "Content-Type", + "application/x-www-form-urlencoded" + ); + tx_set_header!(tx.request_headers, "Transfer-Encoding", "chunked"); + + // Request headers complete. + t.connp.state_request_headers(&mut p).unwrap(); + + // Send request body. + t.connp.request_body_data(Some(b"p=1")).unwrap(); + t.connp.request_body_data(Some(b"&")).unwrap(); + t.connp.request_body_data(Some(b"q=2")).unwrap(); + + // Request complete. + t.connp.state_request_complete(&mut p).unwrap(); +} + +#[test] +fn RequestLineParsing1() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Request begins + t.connp.state_request_start().unwrap(); + + // Request line data + t.connp + .parse_request_line(b"GET /?p=1&q=2 HTTP/1.0") + .unwrap(); + + // Request line complete + t.connp.state_request_line().unwrap(); + + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2")); + assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.0")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.query.as_ref().unwrap().eq_slice("p=1&q=2")); +} + +#[test] +fn RequestLineParsing2() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + t.connp.parse_request_line(b"GET /").unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.is_protocol_0_9); + assert_eq!(HtpProtocol::V0_9, tx.request_protocol_number); + assert!(tx.request_protocol.is_none()); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); +} + +#[test] +fn RequestLineParsing3() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + t.connp.parse_request_line(b"GET / HTTP / 01.1").unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number); + assert!(tx + .request_protocol + .as_ref() + .unwrap() + .eq_slice("HTTP / 01.1")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); +} + +#[test] +fn RequestLineParsing4() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + t.connp.parse_request_line(b"GET / HTTP / 01.10").unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::INVALID, tx.request_protocol_number); + assert!(tx + .request_protocol + .as_ref() + .unwrap() + .eq_slice("HTTP / 01.10")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); +} + +#[test] +fn RequestLineParsing5() { + let mut cfg = TestConfig(); + cfg.set_allow_space_uri(true); + let mut t = HybridParsingTest::new(cfg); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + t.connp.parse_request_line(b"GET / HTTP / 01.10").unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::INVALID, tx.request_protocol_number); + assert!(tx.request_protocol.as_ref().unwrap().eq_slice("01.10")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/ HTTP /")); +} + +#[test] +fn RequestLineParsing6() { + let mut cfg = TestConfig(); + cfg.set_allow_space_uri(true); + let mut t = HybridParsingTest::new(cfg); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + // Test the parser's "found bad chars" path + t.connp + .parse_request_line(b"GET\t/\tHTTP\t\t/\t01.10") + .unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::INVALID, tx.request_protocol_number); + assert!(tx.request_protocol.as_ref().unwrap().eq_slice("01.10")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/\tHTTP\t\t/")); +} + +#[test] +fn ParsedUriSupplied() { + let mut t = HybridParsingTest::new(TestConfig()); + let tx_id = t.connp.request().unwrap().index; + + // Feed data to the parser. + t.connp.state_request_start().unwrap(); + t.connp + .parse_request_line(b"GET /?p=1&q=2 HTTP/1.0") + .unwrap(); + + let tx = t.connp.tx_mut(tx_id).unwrap(); + let u = Uri { + path: Some(Bstr::from("/123")), + ..Default::default() + }; + tx.parsed_uri = Some(u); + t.connp.state_request_line().unwrap(); + + // Check the results now. + let tx = t.connp.tx(tx_id).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/123")); +} + +#[test] +fn DoubleEncodedUriPath() { + let mut cfg = TestConfig(); + cfg.set_double_decode_normalized_path(true); + let mut t = HybridParsingTest::new(cfg); + // Feed data to the parser. + + t.connp.state_request_start().unwrap(); + t.connp.parse_request_line(b"GET /%2500 HTTP/1.0").unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + + let tx = t.connp.request().unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/%2500")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/%00")); + assert!(tx.complete_normalized_uri.as_ref().unwrap().eq_slice("/\0")); +} + +#[test] +fn DoubleEncodedUriQuery() { + let mut cfg = TestConfig(); + cfg.set_double_decode_normalized_query(true); + let mut t = HybridParsingTest::new(cfg); + // Feed data to the parser. + + t.connp.state_request_start().unwrap(); + t.connp + .parse_request_line(b"GET /?a=%2500 HTTP/1.0") + .unwrap(); + t.connp.state_request_line().unwrap(); + + // Check the results now. + + let tx = t.connp.request().unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?a=%2500")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/")); + assert!(parsed_uri.query.as_ref().unwrap().eq_slice("a=%2500")); + assert!(tx + .complete_normalized_uri + .as_ref() + .unwrap() + .eq_slice("/?a=\0")); +} + +/// Test hybrid mode with one complete GET transaction; request then response +/// with no body. Used to crash in htp_connp_close(). +#[test] +fn TestRepeatCallbacks() { + let mut cfg = TestConfig(); + // Request callbacks + register_user_callbacks(&mut cfg); + let mut t = HybridParsingTest::new(cfg); + + let tx_id = t.connp.request().unwrap().index; + + // Configure user data and callbacks + let tx = t.connp.tx_mut(tx_id).unwrap(); + tx.set_user_data(Box::new(HybridParsing_Get_User_Data::new())); + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + // Request begins + t.connp.state_request_start().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_START_invoked); + + // Request line data + t.connp.parse_request_line(b"GET / HTTP/1.0").unwrap(); + + // Request line complete + t.connp.state_request_line().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_LINE_invoked); + + let tx = t.connp.tx(tx_id).unwrap(); + // Check request line data + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); + assert!(tx.request_protocol.as_ref().unwrap().eq_slice("HTTP/1.0")); + let parsed_uri = tx.parsed_uri.as_ref().unwrap(); + assert!(parsed_uri.path.as_ref().unwrap().eq_slice("/")); + + // Request headers complete + t.connp.state_request_headers(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked); + + // Request complete + t.connp.state_request_complete(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked); + + // Response begins + t.connp.state_response_start().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_START_invoked); + + // Response line data + t.connp.parse_response_line(b"HTTP/1.1 200 OK\r\n").unwrap(); + + // Response line complete + t.connp.state_response_line().unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked); + + // Response headers complete + t.connp.state_response_headers(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked); + + // Response complete + t.connp.state_response_complete(&mut p).unwrap(); + let tx = t.connp.tx(tx_id).unwrap(); + let user_data = tx.user_data::().unwrap(); + assert_eq!(1, user_data.callback_REQUEST_START_invoked); + assert_eq!(1, user_data.callback_REQUEST_LINE_invoked); + assert_eq!(1, user_data.callback_REQUEST_HEADERS_invoked); + assert_eq!(1, user_data.callback_REQUEST_COMPLETE_invoked); + assert_eq!(1, user_data.callback_RESPONSE_START_invoked); + assert_eq!(1, user_data.callback_RESPONSE_LINE_invoked); + assert_eq!(1, user_data.callback_RESPONSE_HEADERS_invoked); + assert_eq!(1, user_data.callback_RESPONSE_COMPLETE_invoked); + assert_eq!(1, user_data.callback_TRANSACTION_COMPLETE_invoked); +} + +/// Try response line with missing response code and message +#[test] +fn ResponseLineIncomplete() { + let mut t = HybridParsingTest::new(TestConfig()); + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + t.connp.state_response_start().unwrap(); + t.connp.parse_response_line(b"HTTP/1.1").unwrap(); + let tx = t.connp.response().unwrap(); + assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1")); + assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number); + assert!(tx.response_status.is_none()); + assert_eq!(HtpResponseNumber::INVALID, tx.response_status_number); + assert!(tx.response_message.is_none()); + t.connp.state_response_complete(&mut p).unwrap(); +} + +/// Try response line with missing response message +#[test] +fn ResponseLineIncomplete1() { + let mut t = HybridParsingTest::new(TestConfig()); + + // Make dummy parser data to satisfy callbacks + let mut p = ParserData::from(b"" as &[u8]); + + t.connp.state_response_start().unwrap(); + t.connp.parse_response_line(b"HTTP/1.1 200").unwrap(); + let tx = t.connp.response().unwrap(); + assert!(tx.response_protocol.as_ref().unwrap().eq_slice("HTTP/1.1")); + assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number); + assert!(tx.response_status.as_ref().unwrap().eq_slice("200")); + assert!(tx.response_status_number.eq_num(200)); + assert!(tx.response_message.is_none()); + t.connp.state_response_complete(&mut p).unwrap(); +} diff --git a/rust/htp/tests/main.rs b/rust/htp/tests/main.rs new file mode 100644 index 000000000000..4f8abfc20173 --- /dev/null +++ b/rust/htp/tests/main.rs @@ -0,0 +1,2955 @@ +#![allow(non_snake_case)] +use htp::{ + bstr::Bstr, + config::HtpServerPersonality, + connection::ConnectionFlags, + connection_parser::ParserData, + error::Result, + log::{HtpLogCode, HtpLogLevel}, + transaction::{ + HtpAuthType, HtpProtocol, HtpRequestProgress, HtpResponseNumber, HtpResponseProgress, + HtpTransferCoding, Transaction, + }, + util::{FlagOperations, HtpFlags}, +}; + +use htp::test::{MainUserData, Test, TestConfig}; + +use std::iter::IntoIterator; + +// import common testing utilities +mod common; + +#[test] +fn AdHoc() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("00-adhoc.t").is_ok()); +} + +#[test] +fn Get() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("01-get.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20")); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .query + .as_ref() + .unwrap() + .eq_slice("p=%20")); +} + +#[test] +fn GetSlice() { + let mut t = Test::new(TestConfig()); + assert!(t + .run_slice( + b">>> +GET /?p=%20 HTTP/1.0 +User-Agent: Mozilla + + +<<< +HTTP/1.0 200 OK +Date: Mon, 31 Aug 2009 20:25:50 GMT +Server: Apache +Connection: close +Content-Type: text/html +Content-Length: 12 + +Hello World!" + ) + .is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20")); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .query + .as_ref() + .unwrap() + .eq_slice("p=%20")); +} + +#[test] +fn GetEncodedRelPath() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("99-get.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/images.gif")); +} + +#[test] +fn ApacheHeaderParsing() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("02-header-test-apache2.t").is_ok()); + + let tx = t.connp.tx(0).expect("expected tx to exist"); + + let actual: Vec<(&[u8], &[u8])> = (&tx.request_headers) + .into_iter() + .map(|val| (val.name.as_slice(), val.value.as_slice())) + .collect(); + + let expected: Vec<(&[u8], &[u8])> = [ + ("Invalid-Folding", "1"), + ("Valid-Folding", "2 2"), + ("Normal-Header", "3"), + ("Invalid Header Name", "4"), + ("Same-Name-Headers", "5, 6"), + ("Empty-Value-Header", ""), + ("", "8, "), + ("Header-With-LWS-After", "9"), + ("Header-With-NUL", "BEFORE\0AFTER"), + ] + .iter() + .map(|(key, val)| (key.as_bytes(), val.as_bytes())) + .collect(); + assert_eq!( + actual, + expected, + "{:?} != {:?}", + actual + .clone() + .into_iter() + .map(|(key, val)| ( + String::from_utf8_lossy(key).to_string(), + String::from_utf8_lossy(val).to_string() + )) + .collect::>(), + expected + .clone() + .into_iter() + .map(|(key, val)| ( + String::from_utf8_lossy(key).to_string(), + String::from_utf8_lossy(val).to_string() + )) + .collect::>(), + ); +} + +#[test] +fn PostUrlencoded() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("03-post-urlencoded.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + // Transaction 1 + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(tx.request_progress, HtpRequestProgress::COMPLETE); + assert_eq!(tx.response_progress, HtpResponseProgress::COMPLETE); + + assert_response_header_eq!(tx, "Server", "Apache"); + + // Transaction 2 + let tx2 = t.connp.tx(1).unwrap(); + + assert_eq!(tx2.request_progress, HtpRequestProgress::COMPLETE); + assert_eq!(tx2.response_progress, HtpResponseProgress::COMPLETE); + + assert_response_header_eq!(tx2, "Server", "Apache"); +} + +#[test] +fn PostUrlencodedChunked() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("04-post-urlencoded-chunked.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(25, tx.request_message_len); + assert_eq!(12, tx.request_entity_len); +} + +#[test] +fn Expect() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("05-expect.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + // The interim header from the 100 response should not be among the final headers. + assert!(tx.request_headers.get_nocase_nozero("Header1").is_none()); +} + +#[test] +fn UriNormal() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("06-uri-normal.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let _tx = t.connp.tx(0).unwrap(); +} + +#[test] +fn PipelinedConn() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("07-pipelined-connection.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + assert!(t.connp.conn.flags.is_set(ConnectionFlags::PIPELINED)); + + let _tx = t.connp.tx(0).unwrap(); +} + +#[test] +fn NotPipelinedConn() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("08-not-pipelined-connection.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + assert!(!t.connp.conn.flags.is_set(ConnectionFlags::PIPELINED)); + + let tx = t.connp.tx(0).unwrap(); + + assert!(!tx.flags.is_set(HtpFlags::MULTI_PACKET_HEAD)); +} + +#[test] +fn MultiPacketRequest() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("09-multi-packet-request-head.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::MULTI_PACKET_HEAD)); +} + +#[test] +fn HeaderHostParsing() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("10-host-in-headers.t").is_ok()); + assert_eq!(4, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert!(tx1 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + + let tx2 = t.connp.tx(1).unwrap(); + + assert!(tx2 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com.")); + + let tx3 = t.connp.tx(2).unwrap(); + + assert!(tx3 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + + let tx4 = t.connp.tx(3).unwrap(); + + assert!(tx4 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); +} + +#[test] +fn ResponseWithoutContentLength() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("11-response-stream-closure.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); +} + +#[test] +fn FailedConnectRequest() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("12-connect-request.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + assert!(tx.request_method.as_ref().unwrap().eq_slice("CONNECT")); + assert!(tx + .response_content_type + .as_ref() + .unwrap() + .eq_slice("text/html")); + assert!(tx + .response_message + .as_ref() + .unwrap() + .eq_slice("Method Not Allowed")); + assert!(tx.response_status_number.eq_num(405)); +} + +#[test] +fn CompressedResponseContentType() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("13-compressed-response-gzip-ct.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert_eq!(187, tx.response_message_len); + assert_eq!(225, tx.response_entity_len); + assert!(tx + .response_message + .as_ref() + .unwrap() + .eq_slice("Moved Temporarily")); +} + +#[test] +fn CompressedResponseChunked() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(28261, tx.response_message_len); + + assert_eq!(159_590, tx.response_entity_len); +} + +#[test] +fn SuccessfulConnectRequest() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("15-connect-complete.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + // TODO: Update the test_run_file() function to provide better + // simulation of real traffic. At the moment, it does not + // invoke inbound parsing after outbound parsing returns + // HTP_DATA_OTHER, which is why the check below fails. + //assert!(tx.is_complete()); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("CONNECT")); + + assert!(tx.response_status_number.eq_num(200)); +} + +#[test] +fn ConnectRequestWithExtraData() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("16-connect-extra.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert!(tx1.is_complete()); + assert!(tx1 + .response_content_type + .as_ref() + .unwrap() + .eq_slice("text/html")); + + let tx2 = t.connp.tx(1).unwrap(); + + assert!(tx2.is_complete()); +} + +#[test] +fn Multipart() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("17-multipart-1.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); +} + +#[test] +fn CompressedResponseDeflate() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("18-compressed-response-deflate.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(755, tx.response_message_len); + + assert_eq!(1433, tx.response_entity_len); +} + +#[test] +fn UrlEncoded() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("19-urlencoded-test.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("POST")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=1&q=2")); +} + +#[test] +fn AmbiguousHost() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("20-ambiguous-host.t").is_ok()); + + assert_eq!(5, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert!(tx1.is_complete()); + assert!(!tx1.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); + + let tx2 = t.connp.tx(1).unwrap(); + + assert!(tx2.is_complete()); + assert!(tx2.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); + assert!(tx2 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("example.com")); + + let tx3 = t.connp.tx(2).unwrap(); + + assert!(tx3.is_complete()); + assert!(!tx3.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); + assert!(tx3 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + assert_eq!(Some(8001), tx3.request_port_number); + + let tx4 = t.connp.tx(3).unwrap(); + + assert!(tx4.is_complete()); + assert!(tx4.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); + assert!(tx4 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + assert_eq!(Some(8002), tx4.request_port_number); + + let tx5 = t.connp.tx(4).unwrap(); + + assert!(tx5.is_complete()); + assert!(!tx5.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); + assert!(tx5 + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + assert_eq!(Some(80), tx5.request_port_number); +} + +#[test] +fn Http_0_9() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("21-http09.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + assert!(!t.connp.conn.flags.is_set(ConnectionFlags::HTTP_0_9_EXTRA)); + + let _tx = t.connp.tx(0).unwrap(); +} + +#[test] +fn Http11HostMissing() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("22-http_1_1-host_missing").is_ok()); + assert_eq!(1, t.connp.tx_size()); + let tx = t.connp.tx(0).unwrap(); + assert!(tx.flags.is_set(HtpFlags::HOST_MISSING)); +} + +#[test] +fn Http_0_9_Multiple() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("23-http09-multiple.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let _tx = t.connp.tx(0).unwrap(); +} + +#[test] +fn Http_0_9_Explicit() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("24-http09-explicit.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(!tx.is_protocol_0_9); +} + +#[test] +fn SmallChunks() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("25-small-chunks.t").is_ok()); +} + +fn ConnectionParsing_RequestHeaderData_REQUEST_HEADER_DATA( + tx: &mut Transaction, d: &ParserData, +) -> Result<()> { + let mut counter = *tx.user_data::().unwrap_or(&0); + let data = d.as_slice(); + match counter { + 0 => { + if data != b"User-Agent:" { + eprintln!("Mismatch in chunk 0"); + counter = -1; + } + } + 1 => { + if data != b" Test" { + eprintln!("Mismatch in chunk 1"); + counter = -1; + } + } + 2 => { + if data != b" User" { + eprintln!("Mismatch in chunk 2"); + counter = -1; + } + } + 3 => { + if data != b" Agent\nHost: www.example.com\n\n" { + eprintln!("Mismatch in chunk 3"); + counter = -1; + } + } + _ => { + if counter >= 0 { + eprintln!("Seen more than 4 chunks"); + counter = -1; + } + } + } + + if counter >= 0 { + counter += 1; + } + tx.set_user_data(Box::new(counter)); + Ok(()) +} + +#[test] +fn RequestHeaderData() { + let mut cfg = TestConfig(); + cfg.register_request_header_data(ConnectionParsing_RequestHeaderData_REQUEST_HEADER_DATA); + let mut t = Test::new(cfg); + assert!(t.run_file("26-request-headers-raw.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_eq!(4, *tx.user_data::().unwrap()); +} + +fn ConnectionParsing_RequestTrailerData_REQUEST_TRAILER_DATA( + tx: &mut Transaction, d: &ParserData, +) -> Result<()> { + let mut counter = *tx.user_data::().unwrap_or(&0); + let data = d.as_slice(); + match counter { + 0 => { + if data != b"Cookie:" { + eprintln!("Mismatch in chunk 0"); + counter = -1; + } + } + 1 => { + if data != b" 2\r\n\r\n" { + eprintln!("Mismatch in chunk 1"); + counter = -2; + } + } + _ => { + if counter >= 0 { + eprintln!("Seen more than 4 chunks"); + counter = -3; + } + } + } + + if counter >= 0 { + counter += 1; + } + tx.set_user_data(Box::new(counter)); + Ok(()) +} + +#[test] +fn RequestTrailerData() { + let mut cfg = TestConfig(); + cfg.register_request_trailer_data(ConnectionParsing_RequestTrailerData_REQUEST_TRAILER_DATA); + let mut t = Test::new(cfg); + assert!(t.run_file("27-request-trailer-raw.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_eq!(2, *tx.user_data::().unwrap()); +} + +fn ConnectionParsing_ResponseHeaderData_RESPONSE_HEADER_DATA( + tx: &mut Transaction, d: &ParserData, +) -> Result<()> { + let mut counter = *tx.user_data::().unwrap_or(&0); + let data = d.as_slice(); + match counter { + 0 => { + if data != b"Date:" { + eprintln!("Mismatch in chunk 0"); + counter = -1; + } + } + 1 => { + if data != b" Mon," { + eprintln!("Mismatch in chunk 1"); + counter = -2; + } + } + 2 => { + if data != b" 31 Aug 2009 20:25:50 GMT\r\nServer:" { + eprintln!("Mismatch in chunk 2"); + counter = -3; + } + } + 3 => { + if data != b" Apache\r\nConnection: close\r\nContent-Type: text/html\r\nTransfer-Encoding: chunked\r\n\r\n" { + eprintln!("Mismatch in chunk 3"); + counter = -4; + } + } + _ => { + if counter >= 0 { + eprintln!("Seen more than 4 chunks"); + counter = -5; + } + } + } + + if counter >= 0 { + counter += 1; + } + tx.set_user_data(Box::new(counter)); + Ok(()) +} + +#[test] +fn ResponseHeaderData() { + let mut cfg = TestConfig(); + cfg.register_response_header_data(ConnectionParsing_ResponseHeaderData_RESPONSE_HEADER_DATA); + let mut t = Test::new(cfg); + assert!(t.run_file("28-response-headers-raw.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + assert_eq!(4, *tx.user_data::().unwrap()); +} + +fn ConnectionParsing_ResponseTrailerData_RESPONSE_TRAILER_DATA( + tx: &mut Transaction, d: &ParserData, +) -> Result<()> { + let mut counter = *tx.user_data::().unwrap_or(&0); + let data = d.as_slice(); + match counter { + 0 => { + if data != b"Set-Cookie:" { + eprintln!("Mismatch in chunk 0"); + counter = -1; + } + } + + 1 => { + if data != b" name=" { + eprintln!("Mismatch in chunk 1"); + counter = -2; + } + } + + 2 => { + if data != b"value\r\nAnother-Header:" { + eprintln!("Mismatch in chunk 1"); + counter = -3; + } + } + + 3 => { + if data != b" Header-Value\r\n\r\n" { + eprintln!("Mismatch in chunk 1"); + counter = -4; + } + } + + _ => { + if counter >= 0 { + eprintln!("Seen more than 4 chunks"); + counter = -5; + } + } + } + + if counter >= 0 { + counter += 1; + } + tx.set_user_data(Box::new(counter)); + Ok(()) +} + +#[test] +fn ResponseTrailerData() { + let mut cfg = TestConfig(); + cfg.register_response_trailer_data(ConnectionParsing_ResponseTrailerData_RESPONSE_TRAILER_DATA); + let mut t = Test::new(cfg); + assert!(t.run_file("29-response-trailer-raw.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + assert_eq!(4, *tx.user_data::().unwrap()); +} + +#[test] +fn GetIPv6() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("30-get-ipv6.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + + assert!(tx + .request_uri + .as_ref() + .unwrap() + .eq_slice("http://[::1]:8080/?p=%20")); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .hostname + .as_ref() + .unwrap() + .eq_slice("[::1]")); + assert_eq!(8080, tx.parsed_uri.as_ref().unwrap().port_number.unwrap()); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .query + .as_ref() + .unwrap() + .eq_slice("p=%20")); +} + +#[test] +fn GetRequestLineNul() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("31-get-request-line-nul.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20")); +} + +#[test] +fn InvalidHostname1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("32-invalid-hostname.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.flags.is_set(HtpFlags::HOSTH_INVALID)); + assert!(tx.flags.is_set(HtpFlags::HOSTU_INVALID)); + assert!(tx.flags.is_set(HtpFlags::HOST_INVALID)); +} + +#[test] +fn InvalidHostname2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("33-invalid-hostname.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(!tx.flags.is_set(HtpFlags::HOSTH_INVALID)); + assert!(tx.flags.is_set(HtpFlags::HOSTU_INVALID)); + assert!(tx.flags.is_set(HtpFlags::HOST_INVALID)); +} + +#[test] +fn InvalidHostname3() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("34-invalid-hostname.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::HOSTH_INVALID)); + assert!(!tx.flags.is_set(HtpFlags::HOSTU_INVALID)); + assert!(tx.flags.is_set(HtpFlags::HOST_INVALID)); +} + +#[test] +fn EarlyResponse() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("35-early-response.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); +} + +#[test] +fn InvalidRequest1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("36-invalid-request-1-invalid-c-l.t").is_err()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::HEADERS, tx.request_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID)); + assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID_C_L)); + + assert!(tx.request_hostname.is_some()); +} + +#[test] +fn InvalidRequest2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("37-invalid-request-2-t-e-and-c-l.t").is_ok()); + // No error, flags only. + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING)); + + assert!(tx.request_hostname.is_some()); +} + +#[test] +fn InvalidRequest3() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("38-invalid-request-3-invalid-t-e.t").is_err()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::HEADERS, tx.request_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID)); + assert!(tx.flags.is_set(HtpFlags::REQUEST_INVALID_T_E)); + + assert!(tx.request_hostname.is_some()); +} + +#[test] +fn AutoDestroyCrash() { + let mut cfg = TestConfig(); + cfg.set_tx_auto_destroy(true); + let mut t = Test::new(cfg); + assert!(t.run_file("39-auto-destroy-crash.t").is_ok()); + + assert_eq!(4, t.connp.tx_size()); +} + +#[test] +fn AuthBasic() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("40-auth-basic.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpAuthType::BASIC, tx.request_auth_type); + + assert!(tx.request_auth_username.as_ref().unwrap().eq_slice("ivanr")); + assert!(tx + .request_auth_password + .as_ref() + .unwrap() + .eq_slice("secret")); +} + +#[test] +fn AuthDigest() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("41-auth-digest.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type); + + assert!(tx.request_auth_username.as_ref().unwrap().eq_slice("ivanr")); + + assert!(tx.request_auth_password.is_none()); +} + +#[test] +fn Unknown_MethodOnly() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("42-unknown-method_only.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("HELLO")); + + assert!(tx.request_uri.is_none()); + + assert!(tx.is_protocol_0_9); +} + +#[test] +fn InvalidHtpProtocol() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("43-invalid-protocol.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpProtocol::INVALID, tx.request_protocol_number); +} + +#[test] +fn AuthBasicInvalid() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("44-auth-basic-invalid.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::BASIC, tx.request_auth_type); + + assert!(tx.request_auth_username.is_none()); + + assert!(tx.request_auth_password.is_none()); + + assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID)); +} + +#[test] +fn AuthDigestUnquotedUsername() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("45-auth-digest-unquoted-username.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type); + + assert!(tx.request_auth_username.is_none()); + + assert!(tx.request_auth_password.is_none()); + + assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID)); +} + +#[test] +fn AuthDigestInvalidUsername1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("46-auth-digest-invalid-username.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type); + + assert!(tx.request_auth_username.is_none()); + + assert!(tx.request_auth_password.is_none()); + + assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID)); +} + +#[test] +fn AuthUnrecognized() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("47-auth-unrecognized.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::UNRECOGNIZED, tx.request_auth_type); + + assert!(tx.request_auth_username.is_none()); + + assert!(tx.request_auth_password.is_none()); +} + +#[test] +fn InvalidResponseHeaders1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("48-invalid-response-headers-1.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert_eq!(8, tx.response_headers.size()); + + assert_response_header_eq!(tx, "", "No Colon"); + assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_INVALID); + assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_UNPARSEABLE); + + assert_response_header_eq!(tx, "Lws", "After Header Name"); + assert_response_header_flag_contains!(tx, "Lws", HtpFlags::FIELD_INVALID); + + assert_response_header_eq!(tx, "Header@Name", "Not Token"); + assert_response_header_flag_contains!(tx, "Header@Name", HtpFlags::FIELD_INVALID); +} + +#[test] +fn InvalidResponseHeaders2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("49-invalid-response-headers-2.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert_eq!(6, tx.response_headers.size()); + + assert_response_header_eq!(tx, "", "Empty Name"); + assert_response_header_flag_contains!(tx, "", HtpFlags::FIELD_INVALID); +} + +#[test] +fn Util() { + use htp::{htp_error, htp_log}; + let mut cfg = TestConfig(); + cfg.log_level = HtpLogLevel::NONE; + let mut t = Test::new(cfg); + assert!(t.run_file("50-util.t").is_ok()); + // Explicitly add a log message to verify it is not logged + htp_error!(&mut t.connp.logger, HtpLogCode::UNKNOWN, "Log message"); + assert_eq!(0, t.connp.conn.get_logs().len()); +} + +#[test] +fn GetIPv6Invalid() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("51-get-ipv6-invalid.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + + assert!(tx + .request_uri + .as_ref() + .unwrap() + .eq_slice("http://[::1:8080/?p=%20")); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .hostname + .as_ref() + .unwrap() + .eq_slice("[::1:8080")); +} + +#[test] +fn InvalidPath() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("52-invalid-path.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + + assert!(tx + .request_uri + .as_ref() + .unwrap() + .eq_slice("invalid/path?p=%20")); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("invalid/path")); +} + +#[test] +fn PathUtf8_None() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("53-path-utf8-none.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID)); + assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); + assert!(!tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE)); +} + +#[test] +fn PathUtf8_Valid() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("54-path-utf8-valid.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_VALID)); +} + +#[test] +fn PathUtf8_Overlong2() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("55-path-utf8-overlong-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); +} + +#[test] +fn PathUtf8_Overlong3() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("56-path-utf8-overlong-3.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); +} + +#[test] +fn PathUtf8_Overlong4() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("57-path-utf8-overlong-4.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); +} + +#[test] +fn PathUtf8_Invalid() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("58-path-utf8-invalid.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_INVALID)); + assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID)); +} + +#[test] +fn PathUtf8_FullWidth() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("59-path-utf8-fullwidth.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE)); +} + +#[test] +fn PathUtf8_Decode_Valid() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("54-path-utf8-valid.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/Ristic.txt")); +} + +#[test] +fn PathUtf8_Decode_Overlong2() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + assert!(t.run_file("55-path-utf8-overlong-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/&.txt")); +} + +#[test] +fn PathUtf8_Decode_Overlong3() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("56-path-utf8-overlong-3.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/&.txt")); +} + +#[test] +fn PathUtf8_Decode_Overlong4() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("57-path-utf8-overlong-4.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_OVERLONG)); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/&.txt")); +} + +#[test] +fn PathUtf8_Decode_Invalid() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + assert!(t.run_file("58-path-utf8-invalid.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_UTF8_INVALID)); + assert!(!tx.flags.is_set(HtpFlags::PATH_UTF8_VALID)); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/Ristic?.txt")); +} + +#[test] +fn PathUtf8_Decode_FullWidth() { + let mut cfg = TestConfig(); + cfg.set_utf8_convert_bestfit(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("59-path-utf8-fullwidth.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.flags.is_set(HtpFlags::PATH_HALF_FULL_RANGE)); + + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .path + .as_ref() + .unwrap() + .eq_slice("/&.txt")); +} + +#[test] +fn EmptyLineBetweenRequests() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("61-empty-line-between-requests.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let _tx = t.connp.tx(1).unwrap(); + + /*part of previous request body assert_eq!(1, tx.request_ignored_lines);*/ +} + +#[test] +fn PostNoBody() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("62-post-no-body.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress); + assert!(tx1 + .response_content_type + .as_ref() + .unwrap() + .eq_slice("text/html")); + + let tx2 = t.connp.tx(1).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress); + assert!(tx2 + .response_content_type + .as_ref() + .unwrap() + .eq_slice("text/html")); +} + +#[test] +fn PostChunkedValid1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("63-post-chunked-invalid-1.t").is_err()); +} + +#[test] +fn PostChunkedInvalid2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("64-post-chunked-invalid-2.t").is_err()); +} + +#[test] +fn PostChunkedInvalid3() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("65-post-chunked-invalid-3.t").is_err()); +} + +#[test] +fn PostChunkedSplitChunk() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("66-post-chunked-split-chunk.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); +} + +#[test] +fn LongRequestLine1() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("67-long-request-line.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx + .request_uri + .as_ref() + .unwrap() + .eq_slice("/0123456789/0123456789/")); +} + +#[test] +fn LongRequestLine2() { + let mut cfg = TestConfig(); + cfg.set_field_limit(16); + let mut t = Test::new(cfg); + + assert!(t.run_file("67-long-request-line.t").is_err()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::LINE, tx.request_progress); +} + +#[test] +fn InvalidRequestHeader() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("68-invalid-request-header.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).expect("expected at least one transaction"); + + assert_request_header_eq!(tx, "Header-With-NUL", "BEFORE \0AFTER"); +} + +#[test] +fn TestGenericPersonality() { + let mut cfg = TestConfig(); + cfg.set_server_personality(HtpServerPersonality::IDS) + .unwrap(); + let mut t = Test::new(cfg); + + assert!(t.run_file("02-header-test-apache2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let _tx = t.connp.tx(0).unwrap(); +} + +#[test] +fn LongResponseHeader() { + let mut cfg = TestConfig(); + cfg.set_field_limit(18); + let mut t = Test::new(cfg); + + assert!(t.run_file("69-long-response-header.t").is_err()); + + let tx = t.connp.tx(0).unwrap(); + + //error first assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::HEADERS, tx.response_progress); +} + +#[test] +fn ResponseInvalidChunkLength() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("70-response-invalid-chunk-length.t").is_ok()); +} + +#[test] +fn ResponseSplitChunk() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("71-response-split-chunk.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn ResponseBody() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("72-response-split-body.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn ResponseContainsTeAndCl() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("73-response-te-and-cl.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING)); +} + +#[test] +fn ResponseMultipleCl() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("74-response-multiple-cl.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING)); + + assert_response_header_eq!(tx, "Content-Length", "12"); + assert_response_header_flag_contains!(tx, "Content-Length", HtpFlags::FIELD_REPEATED); +} + +#[test] +fn ResponseMultipleClMismatch() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("88-response-multiple-cl-mismatch.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING)); + + assert_response_header_eq!(tx, "Content-Length", "12"); + assert_response_header_flag_contains!(tx, "Content-Length", HtpFlags::FIELD_REPEATED); + + let logs = t.connp.conn.get_logs(); + assert_eq!(2, logs.len()); + assert_eq!( + logs.first().unwrap().msg.msg, + "Ambiguous response C-L value" + ); + assert_eq!(HtpLogLevel::WARNING, logs.first().unwrap().msg.level); + assert_eq!(logs.get(1).unwrap().msg.msg, "Repetition for header"); + assert_eq!(HtpLogLevel::WARNING, logs.get(1).unwrap().msg.level); +} + +#[test] +fn ResponseInvalidCl() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("75-response-invalid-cl.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert!(!tx.flags.is_set(HtpFlags::REQUEST_SMUGGLING)); +} + +#[test] +fn ResponseNoBody() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("76-response-no-body.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress); + + assert_response_header_eq!(tx1, "Server", "Apache"); + + let tx2 = t.connp.tx(1).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress); + + assert!(tx1 != tx2); +} + +#[test] +fn ResponseFoldedHeaders() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("77-response-folded-headers.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let tx1 = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx1.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx1.response_progress); + + assert_response_header_eq!(tx1, "Server", "Apache Server"); + + let tx2 = t.connp.tx(1).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx2.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx2.response_progress); +} + +#[test] +fn ResponseNoStatusHeaders() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("78-response-no-status-headers.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn ConnectInvalidHostport() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("79-connect-invalid-hostport.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); +} + +#[test] +fn HostnameInvalid1() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("80-hostname-invalid-1.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); +} + +#[test] +fn HostnameInvalid2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("81-hostname-invalid-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); +} + +#[test] +fn AuthDigestInvalidUsername2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("83-auth-digest-invalid-username-2.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type); + + assert!(tx.request_auth_username.is_none()); + + assert!(tx.request_auth_password.is_none()); + + assert!(tx.flags.is_set(HtpFlags::AUTH_INVALID)); +} + +#[test] +fn ResponseNoStatusHeaders2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("84-response-no-status-headers-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +// Test was commented out of libhtp +//#[test] +//fn ZeroByteRequestTimeout() { +// let mut t = Test::new(TestConfig()); +//unsafe { +// assert!(t.run_file("85-zero-byte-request-timeout.t").is_ok()); +// +// assert_eq!(1, t.connp.tx_size()); +// +// let tx = t.connp.conn.get_tx(0); +// assert!(!tx.is_null()); +// +// assert_eq!(HtpRequestProgress::NOT_STARTED, tx.request_progress); +// assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +//}} + +#[test] +fn PartialRequestTimeout() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("86-partial-request-timeout.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn IncorrectHostAmbiguousWarning() { + let mut t = Test::new(TestConfig()); + assert!(t + .run_file("87-issue-55-incorrect-host-ambiguous-warning.t") + .is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx + .parsed_uri_raw + .as_ref() + .unwrap() + .port + .as_ref() + .unwrap() + .eq_slice("443")); + assert!(tx + .parsed_uri_raw + .as_ref() + .unwrap() + .hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + assert_eq!( + 443, + tx.parsed_uri_raw.as_ref().unwrap().port_number.unwrap() + ); + + assert!(tx + .request_hostname + .as_ref() + .unwrap() + .eq_slice("www.example.com")); + + assert!(!tx.flags.is_set(HtpFlags::HOST_AMBIGUOUS)); +} + +#[test] +fn GetWhitespace() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("89-get-whitespace.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice(" GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/?p=%20")); + assert!(tx + .parsed_uri + .as_ref() + .unwrap() + .query + .as_ref() + .unwrap() + .eq_slice("p=%20")); +} + +#[test] +fn RequestUriTooLarge() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("90-request-uri-too-large.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn RequestInvalid() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("91-request-unexpected-body.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + + let mut tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("POST")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + tx = t.connp.tx(1).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::NOT_STARTED, tx.response_progress); +} + +#[test] +fn Http_0_9_MethodOnly() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("92-http_0_9-method_only.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); + assert!(tx.is_protocol_0_9); +} + +#[test] +fn CompressedResponseDeflateAsGzip() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("93-compressed-response-deflateasgzip.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(755, tx.response_message_len); + assert_eq!(1433, tx.response_entity_len); +} + +#[test] +fn CompressedResponseZlibAsDeflate() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-118.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + + assert_response_header_eq!( + tx, + "content-disposition", + "attachment; filename=\"eicar.txt\"" + ); + assert_response_header_eq!(tx, "content-encoding", "deflate"); + assert_eq!(68, tx.response_entity_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(1, user_data.response_data.len()); + let chunk = &user_data.response_data[0]; + assert_eq!( + b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(), + chunk.as_slice() + ); +} + +#[test] +fn CompressedResponseMultiple() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("94-compressed-response-multiple.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(51, tx.response_message_len); + assert_eq!(25, tx.response_entity_len); +} + +#[test] +fn CompressedResponseBombLimitOkay() { + let mut cfg = TestConfig(); + cfg.compression_options.set_bomb_limit(0); + let mut t = Test::new(cfg); + + assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(28261, tx.response_message_len); + assert_eq!(159_590, tx.response_entity_len); +} + +#[test] +fn CompressedResponseBombLimitExceeded() { + let mut cfg = TestConfig(); + cfg.compression_options.set_bomb_limit(0); + cfg.compression_options.set_bomb_ratio(2); + let mut t = Test::new(cfg); + + assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_err()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(!tx.is_complete()); + + assert_eq!(1208, tx.response_message_len); + assert_eq!(2608, tx.response_entity_len); +} + +#[test] +fn CompressedResponseTimeLimitExceeded() { + let mut cfg = TestConfig(); + cfg.compression_options.set_time_limit(0); + let mut t = Test::new(cfg); + + assert!(t.run_file("14-compressed-response-gzip-chunked.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + + assert_eq!(28261, tx.response_message_len); + assert_eq!(29656, tx.response_entity_len); +} + +#[test] +fn CompressedResponseGzipAsDeflate() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("95-compressed-response-gzipasdeflate.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(187, tx.response_message_len); + assert_eq!(225, tx.response_entity_len); +} + +#[test] +fn CompressedResponseLzma() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("96-compressed-response-lzma.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(90, tx.response_message_len); + assert_eq!(68, tx.response_entity_len); +} + +#[test] +fn CompressedResponseLzmaDisabled() { + let mut cfg = TestConfig(); + cfg.compression_options.set_lzma_memlimit(0); + let mut t = Test::new(cfg); + + assert!(t.run_file("96-compressed-response-lzma.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + + assert_eq!(90, tx.response_message_len); + assert_eq!(90, tx.response_entity_len); +} + +#[test] +fn CompressedResponseLzmaMemlimit() { + let mut cfg = TestConfig(); + cfg.compression_options.set_lzma_memlimit(1); + let mut t = Test::new(cfg); + + assert!(t.run_file("96-compressed-response-lzma.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + assert_eq!(90, tx.response_message_len); + assert_eq!(72, tx.response_entity_len); + assert!(tx.response_message.as_ref().unwrap().eq_slice("ok")); +} + +#[test] +fn RequestsCut() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("97-requests-cut.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + let mut tx = t.connp.tx(0).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + tx = t.connp.tx(1).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); +} + +#[test] +fn ResponsesCut() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("98-responses-cut.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + let mut tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert!(tx.response_status_number.eq_num(200)); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + tx = t.connp.tx(1).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert!(tx.response_status_number.eq_num(200)); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn AuthDigest_EscapedQuote() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("100-auth-digest-escaped-quote.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + + assert_eq!(HtpAuthType::DIGEST, tx.request_auth_type); + + assert!(tx + .request_auth_username + .as_ref() + .unwrap() + .eq_slice("ivan\"r\"")); + + assert!(tx.request_auth_password.is_none()); +} + +#[test] +fn Tunnelled1() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("106-tunnelled-1.t").is_ok()); + assert_eq!(2, t.connp.tx_size()); + let tx1 = t.connp.tx(0).unwrap(); + + assert!(tx1.request_method.as_ref().unwrap().eq_slice("CONNECT")); + let tx2 = t.connp.tx(1).unwrap(); + + assert!(tx2.request_method.as_ref().unwrap().eq_slice("GET")); +} + +#[test] +fn Expect100() { + let mut t = Test::new(TestConfig()); + + assert!(t.run_file("105-expect-100.t").is_ok()); + assert_eq!(2, t.connp.tx_size()); + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("PUT")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert!(tx.response_status_number.eq_num(401)); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + let tx = t.connp.tx(1).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("POST")); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert!(tx.response_status_number.eq_num(200)); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn UnknownStatusNumber() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("107-response_unknown_status.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(tx.response_status_number, HtpResponseNumber::UNKNOWN); +} + +#[test] +fn ResponseHeaderCrOnly() { + // Content-Length terminated with \r only. + let mut t = Test::new(TestConfig()); + assert!(t.run_file("108-response-headers-cr-only.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_eq!(2, tx.response_headers.size()); + // Check response headers + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "Content-Length", "7"); +} + +#[test] +fn ResponseHeaderDeformedEOL() { + // Content-Length terminated with \n\r\r\n\r\n only. + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("109-response-headers-deformed-eol.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_eq!(2, tx.response_headers.size()); + // Check response headers + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "content-length", "6"); + let logs = t.connp.conn.get_logs(); + let log_message_count = logs.len(); + assert_eq!(log_message_count, 2); + assert_eq!(logs.first().unwrap().msg.code, HtpLogCode::DEFORMED_EOL); + + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(2, user_data.response_data.len()); + assert_eq!(b"abcdef".as_ref(), user_data.response_data[0].as_slice()); +} + +#[test] +fn ResponseFoldedHeaders2() { + // Space folding char + let mut t = Test::new(TestConfig()); + assert!(t.run_file("110-response-folded-headers-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert_response_header_eq!(tx, "Server", "Apache Server"); + assert_eq!(3, tx.response_headers.size()); +} + +#[test] +fn ResponseHeadersChunked() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("111-response-headers-chunked.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert_eq!(2, tx.response_headers.size()); + + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "content-length", "12"); +} + +#[test] +fn ResponseHeadersChunked2() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("112-response-headers-chunked-2.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + assert_eq!(2, tx.response_headers.size()); + + assert_response_header_eq!(tx, "content-type", "text/html"); + assert_response_header_eq!(tx, "content-length", "12"); +} + +#[test] +fn ResponseMultipartRanges() { + // This should be is_ok() once multipart/byteranges is handled in response parsing + let mut t = Test::new(TestConfig()); + assert!(t.run_file("113-response-multipart-byte-ranges.t").is_err()); +} + +#[test] +fn Http2Upgrade() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("114-http-2-upgrade.t").is_ok()); + + assert_eq!(2, t.connp.tx_size()); + assert!(!t.connp.tx(0).unwrap().is_http_2_upgrade); + assert!(t.connp.tx(1).unwrap().is_http_2_upgrade); +} + +#[test] +fn AuthBearer() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("115-auth-bearer.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpAuthType::BEARER, tx.request_auth_type); + + assert!(tx + .request_auth_token + .as_ref() + .unwrap() + .eq_slice("mF_9.B5f-4.1JqM")); +} + +#[test] +fn HttpCloseHeaders() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-close-headers.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert!(tx.request_method.as_ref().unwrap().eq_slice("GET")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); + + assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number); + assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number); + + assert_request_header_eq!(tx, "Host", "100.64.0.200"); + assert_request_header_eq!(tx, "Connection", "keep-alive"); + assert_request_header_eq!(tx, "Accept-Encoding", "gzip, deflate"); + assert_request_header_eq!(tx, "Accept", "*/*"); + assert_request_header_eq!(tx, "User-Agent", "python-requests/2.21.0"); + assert_response_header_eq!(tx, "Server", "ng1nx"); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpStartFromResponse() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-start-from-response.t").is_ok()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.is_none()); + assert_eq!( + tx.request_uri, + Some(Bstr::from("/libhtp::request_uri_not_seen")) + ); + assert!(tx.response_status_number.eq_num(200)); + + assert_eq!(HtpProtocol::UNKNOWN, tx.request_protocol_number); + assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + let tx = t.connp.tx(1).unwrap(); + assert_eq!(tx.request_method, Some(Bstr::from("GET"))); + assert_eq!(tx.request_uri, Some(Bstr::from("/favicon.ico"))); + assert!(tx.response_status_number.eq_num(404)); + + assert_eq!(HtpProtocol::V1_1, tx.request_protocol_number); + assert_eq!(HtpProtocol::V1_1, tx.response_protocol_number); + + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); + + let logs = t.connp.conn.get_logs(); + assert_eq!(1, logs.len()); + assert_eq!( + logs.first().unwrap().msg.msg, + "Unable to match response to request" + ); + assert_eq!(HtpLogLevel::ERROR, logs.first().unwrap().msg.level); +} + +#[test] +fn RequestCompression() { + let mut cfg = TestConfig(); + cfg.set_request_decompression(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("116-request-compression.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(1355, tx.request_message_len); + assert_eq!(2614, tx.request_entity_len); +} + +#[test] +fn RequestResponseCompression() { + let mut cfg = TestConfig(); + cfg.set_request_decompression(true); + let mut t = Test::new(cfg); + + assert!(t.run_file("117-request-response-compression.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.is_complete()); + + assert_eq!(1355, tx.request_message_len); + assert_eq!(2614, tx.request_entity_len); + + assert_eq!(51, tx.response_message_len); + assert_eq!(25, tx.response_entity_len); +} + +#[test] +fn AmbiguousEOL() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("119-ambiguous-eol.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + + assert!(tx.request_method.as_ref().unwrap().eq_slice("POST")); + assert!(tx.request_uri.as_ref().unwrap().eq_slice("/")); + assert_eq!(HtpProtocol::V1_0, tx.request_protocol_number); + + assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number); + assert!(tx.response_status_number.eq_num(200)); +} + +// Evader Tests +#[test] +fn HttpEvader017() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-017.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/cr-size"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "transfer-encoding", "chunked"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(101, tx.response_message_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(5, user_data.response_data.len()); + assert_eq!( + b"X5O!P%@AP[4\\PZX".as_ref(), + user_data.response_data[0].as_slice() + ); + assert_eq!( + b"54(P^)7CC)7}$EI".as_ref(), + user_data.response_data[1].as_slice() + ); + assert_eq!( + b"CAR-STANDARD-AN".as_ref(), + user_data.response_data[2].as_slice() + ); + assert_eq!( + b"TIVIRUS-TEST-FI".as_ref(), + user_data.response_data[3].as_slice() + ); + assert_eq!(b"LE!$H+H*".as_ref(), user_data.response_data[4].as_slice()); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpEvader018() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-018.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/lf-size"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "transfer-encoding", "chunked"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(101, tx.response_message_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(5, user_data.response_data.len()); + assert_eq!( + b"X5O!P%@AP[4\\PZX".as_ref(), + user_data.response_data[0].as_slice() + ); + assert_eq!( + b"54(P^)7CC)7}$EI".as_ref(), + user_data.response_data[1].as_slice() + ); + assert_eq!( + b"CAR-STANDARD-AN".as_ref(), + user_data.response_data[2].as_slice() + ); + assert_eq!( + b"TIVIRUS-TEST-FI".as_ref(), + user_data.response_data[3].as_slice() + ); + assert_eq!(b"LE!$H+H*".as_ref(), user_data.response_data[4].as_slice()); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpEvader044() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-044.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/chunked,http10,do_clen"); + assert_eq!(HtpProtocol::V1_0, tx.response_protocol_number); + assert!(tx.response_status_number.eq_num(200)); + assert_response_header_eq!(tx, "content-type", "application/octet-stream"); + assert_response_header_eq!( + tx, + "content-disposition", + "attachment; filename=\"eicar.txt\"" + ); + assert_response_header_eq!(tx, "transfer-encoding", "chunked"); + assert_response_header_eq!(tx, "connection", "close"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(68, tx.response_message_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(1, user_data.response_data.len()); + let chunk = &user_data.response_data[0]; + assert_eq!( + b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(), + chunk.as_slice() + ); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpEvader059() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-059.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/chunkednl-"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader060() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-060.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/nl-nl-chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader061() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-061.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/nl-nl-chunked-nl-"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} +#[test] +fn HttpEvader078() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-078.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/chunked/eicar.txt/chunkedcr-,do_clen"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "transfer-encoding", "chunked"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(68, tx.response_message_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(1, user_data.response_data.len()); + let chunk = &user_data.response_data[0]; + assert_eq!( + b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(), + chunk.as_slice() + ); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpEvader130() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-evader-130.t").is_err()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!( + tx, + "/compressed/eicar.txt/ce%3Adeflate-nl-,-nl-deflate-nl-;deflate;deflate" + ); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-Encoding", "deflate , deflate"); + assert_response_header_eq!(tx, "Content-Length", "75"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(76, tx.response_message_len); +} + +#[test] +fn HttpEvader195() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-195.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!( + tx, + "/compressed/eicar.txt/ce%3Agzip;gzip;replace%3A3,1%7C02;replace%3A10,0=0000" + ); + assert_response_header_eq!(tx, "Content-Encoding", "gzip"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(90, tx.response_message_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(1, user_data.response_data.len()); + assert_eq!( + user_data.response_data[0].as_slice(), + b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref() + ); +} + +#[test] +fn HttpEvader274() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-274.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/somehdr;space;chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader284() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-284.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/cr;chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader286() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-286.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/crcronly;chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader287() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-287.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/cr-cronly;chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader297() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-297.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/te%5C015%5C040%3Achunked;do_chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader300() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-300.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/te%5C015%5C012%5C040%5C015%5C012%5C040%3A%5C015%5C012%5C040chunked;do_chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader303() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-303.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/te%3A%5C000chunked;do_chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader307() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-307.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/te%3A%5C012%5C000chunked;do_chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader318() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-evader-318.t").is_err()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/ce%5C015%5C012%5C040%3Agzip;do_gzip"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-Encoding", "gzip"); + assert_eq!(68, tx.response_entity_len); + assert_eq!(89, tx.response_message_len); +} + +#[test] +fn HttpEvader320() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-evader-320.t").is_err()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/ce%5C013%3Agzip;do_gzip"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-Encoding", "gzip"); + assert_response_header_eq!(tx, "Content-Length", "88"); + assert_eq!(88, tx.response_entity_len); + assert_eq!(99, tx.response_message_len); +} + +#[test] +fn HttpEvader321() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-evader-321.t").is_err()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/ce%5C014%3Agzip;do_gzip"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-Encoding", "gzip"); + assert_response_header_eq!(tx, "Content-Length", "88"); + assert_eq!(88, tx.response_entity_len); + assert_eq!(99, tx.response_message_len); +} + +#[test] +fn HttpEvader390() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-390.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!( + tx, + "/broken/eicar.txt/status%3A%5C000HTTP/1.1%28space%29200%28space%29ok;chunked" + ); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader402() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-402.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/chunked;cr-no-crlf;end-crlflf"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader405() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-405.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/chunked;lfcr-no-crlf;end-crlfcrlf"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader411() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-411.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/end-lfcrcrlf;chunked"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader416() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-416.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/end-lf%5C040lf"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-length", "68"); + assert_eq!(69, tx.response_message_len); + assert_eq!(69, tx.response_entity_len); + let user_data = tx.user_data::().unwrap(); + assert!(user_data.request_data.is_empty()); + assert_eq!(2, user_data.response_data.len()); + assert_eq!( + b"X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*".as_ref(), + user_data.response_data[0].as_slice() + ); + assert_eq!(b"\n".as_ref(), user_data.response_data[1].as_slice()); + assert_eq!(HtpRequestProgress::COMPLETE, tx.request_progress); + assert_eq!(HtpResponseProgress::COMPLETE, tx.response_progress); +} + +#[test] +fn HttpEvader419() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("http-evader-419.t").is_ok()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/chunked;end-lf%5C040lf"); + assert_evader_response!(tx); + assert_evader_chunked!(tx); +} + +#[test] +fn HttpEvader423() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("http-evader-423.t").is_err()); + let tx = t.connp.tx(0).unwrap(); + assert_evader_request!(tx, "/broken/eicar.txt/gzip;end-lf%5C040lflf"); + assert_evader_response!(tx); + assert_response_header_eq!(tx, "Content-Encoding", "gzip"); + assert_response_header_eq!(tx, "Content-length", "88"); + assert_eq!(89, tx.response_message_len); + assert_eq!(68, tx.response_entity_len); +} + +#[test] +fn RequestGap() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("120-request-gap.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + let user_data = tx.user_data::().unwrap(); + + assert!(tx.flags.is_set(HtpFlags::REQUEST_MISSING_BYTES)); + + // The interim header from the 100 response should not be among the final headers. + assert!(tx.request_headers.get_nocase_nozero("Header1").is_none()); + assert_eq!(user_data.request_data[1].as_slice(), b"".as_ref()); +} + +#[test] +fn ResponseGap() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("121-response-gap.t").is_ok()); + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + let user_data = tx.user_data::().unwrap(); + + assert!(tx.flags.is_set(HtpFlags::RESPONSE_MISSING_BYTES)); + + assert_eq!(user_data.response_data[0].as_slice(), b"Hell".as_ref()); + // Next chunk is a gap of size 4 + assert_eq!(user_data.response_data[1].as_slice(), b"".as_ref()); + assert_eq!(user_data.response_data[1].capacity(), 4); + assert_eq!(user_data.response_data[2].as_slice(), b"rld!".as_ref()); +} + +#[test] +fn ResponseBodyData() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("122-response-body-data.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + + let user_data = tx.user_data::().unwrap(); + let response_data = &user_data.response_data; + assert_eq!(3, response_data.len()); + assert_eq!(b"1\n", response_data[0].as_slice()); + assert_eq!(b"23\n", response_data[1].as_slice()); + assert_eq!(b"4", response_data[2].as_slice()); +} + +#[test] +fn ResponseHeaderParsing() { + let mut t = Test::new(TestConfig()); + assert!(t.run_file("123-response-header-bug.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).expect("expected tx to exist"); + + let actual: Vec<(&[u8], &[u8])> = (&tx.response_headers) + .into_iter() + .map(|val| (val.name.as_slice(), val.value.as_slice())) + .collect(); + + let expected: Vec<(&[u8], &[u8])> = [ + ("Date", "Mon, 31 Aug 2009 20:25:50 GMT"), + ("Server", "Apache"), + ("Connection", "close"), + ("Content-Type", "text/html"), + ("Content-Length", "12"), + ] + .iter() + .map(|(key, val)| (key.as_bytes(), val.as_bytes())) + .collect(); + assert_eq!( + actual, + expected, + "{:?} != {:?}", + actual + .clone() + .into_iter() + .map(|(key, val)| ( + String::from_utf8_lossy(key).to_string(), + String::from_utf8_lossy(val).to_string() + )) + .collect::>(), + expected + .clone() + .into_iter() + .map(|(key, val)| ( + String::from_utf8_lossy(key).to_string(), + String::from_utf8_lossy(val).to_string() + )) + .collect::>(), + ); +} + +#[test] +fn RequestSingleBytes() { + // Test input fed in one byte at a time + let input = b" GET / HTTP/1.0\r\nUser-Agent: Test/1.0\r\n\r\n"; + let mut t = Test::new_with_callbacks(); + t.open_connection(None); + for x in 0..input.len() { + t.connp + .request_data(ParserData::from(&input[x..(x + 1)]), None); + } + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + let h = tx.request_headers.get_nocase_nozero("User-Agent").unwrap(); + assert!(h.value.eq_slice(b"Test/1.0")); +} + +#[test] +fn ResponseIncomplete() { + let mut t = Test::new_with_callbacks(); + assert!(t.run_file("124-response-incomplete.t").is_ok()); + + assert_eq!(1, t.connp.tx_size()); + + let tx = t.connp.tx(0).unwrap(); + assert!(tx.is_complete()); + + let user_data = tx.user_data::().unwrap(); + + assert_eq!( + vec![ + "request_start 0", + "response_start 0", + "request_complete 0", + "response_complete 0", + "transaction_complete 0" + ], + user_data.order + ); +} + +#[test] +fn RandomInput() { + let mut t = Test::new(TestConfig()); + if let Ok(file) = std::env::var("LIBHTP_TEST") { + t.run_file(&file).ok(); + println!("{:#?}", t.connp); + for x in 0..t.connp.tx_size() { + println!("{:#?}", t.connp.tx(x)); + } + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index bea7854f107e..51f278ef1c08 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -142,3 +142,5 @@ pub mod ldap; #[allow(unused_imports)] pub use suricata_lua_sys; +//Re-export htp symbols +pub use htp::c_api::*; diff --git a/scripts/bundle.sh b/scripts/bundle.sh index 06ea1ba9285d..2b90684c3f10 100755 --- a/scripts/bundle.sh +++ b/scripts/bundle.sh @@ -1,12 +1,12 @@ #! /usr/bin/env bash # -# This script will bundle libhtp and/or suricata-update for you. +# This script will bundle suricata-update for you. # # To use, run from the top Suricata source directory: # -# ./scripts/bundle.sh [suricata-update|libhtp] +# ./scripts/bundle.sh [suricata-update] # -# If no arguments are provided, both suricata-update and libhtp will +# If no arguments are provided, suricata-update will # be bundled. # # Environment variables: @@ -15,10 +15,6 @@ # SU_BRANCH: Override the Suricata-Update branch to a branch, tag or # {pull,merge}-request. # -# LIBHTP_REPO: Overrides the libhtp git repo -# LIBHTP_BRANCH: Override the libhtp branch to a branch, tag or -# {pull,merge}-request. -# # DESTDIR: Checkout to another directory instead of the current # directory. # @@ -39,8 +35,8 @@ what="$1" # For GitHub the following formats are allowed: # - pr/123 # - pull/123 -# - https://github.com/OISF/libhtp/pull/123 -# - OISF/libhtp#123 +# - https://github.com/OISF/suricata-update/pull/123 +# - OISF/suricata-update#123 # # For GibLab only the format "mr/123" is supported. transform_branch() { @@ -96,14 +92,6 @@ while IFS= read -r requirement; do cp -a ${DESTDIR}/suricata-update.tmp/. ${DESTDIR}/suricata-update rm -rf ${DESTDIR}/suricata-update.tmp ;; - libhtp) - LIBHTP_REPO=${LIBHTP_REPO:-$2} - LIBHTP_BRANCH=$(transform_branch ${LIBHTP_BRANCH:-$3}) - echo "===> Bundling ${LIBHTP_REPO} (${LIBHTP_BRANCH})" - rm -rf ${DESTDIR}/libhtp - fetch "${LIBHTP_REPO}" "${DESTDIR}/libhtp" "${LIBHTP_BRANCH}" - rm -rf libhtp/.git - ;; \#*) # Ignore comment. ;; diff --git a/scripts/check-setup.sh b/scripts/check-setup.sh index 9abe8001f1db..174ee8dd9d02 100755 --- a/scripts/check-setup.sh +++ b/scripts/check-setup.sh @@ -8,14 +8,6 @@ trap "rm -rf ${tmpdir}" EXIT (cd .. && tar cf - $(git ls-files)) | (cd ${tmpdir} && tar xf -) -if [ -e ../libhtp ]; then - (cd ../libhtp && git archive --format=tar --prefix=libhtp/ HEAD) | \ - (cd ${tmpdir} && tar xvf -) -else - echo "error: this script required bundled libhtp..." - exit 1 -fi - cd ${tmpdir} # Do initial build. diff --git a/scripts/clang-format.sh b/scripts/clang-format.sh index fc69e49dbf31..dc658fb82b9c 100755 --- a/scripts/clang-format.sh +++ b/scripts/clang-format.sh @@ -301,8 +301,7 @@ function RequireProgram { # Make sure we are running from the top-level git directory. # Same approach as for setup-decoder.sh. Good enough. -# We could probably use git rev-parse --show-toplevel to do so, as long as we -# handle the libhtp subfolder correctly. +# We could probably use git rev-parse --show-toplevel to do so function SetTopLevelDir { if [ -e ./src/suricata.c ]; then # Do nothing. diff --git a/src/Makefile.am b/src/Makefile.am index 82155d7f825d..c794b3a808d1 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,7 +25,6 @@ noinst_HEADERS = \ app-layer-htp-body.h \ app-layer-htp-file.h \ app-layer-htp.h \ - app-layer-htp-libhtp.h \ app-layer-htp-mem.h \ app-layer-htp-range.h \ app-layer-htp-xff.h \ @@ -591,7 +590,6 @@ libsuricata_c_a_SOURCES = \ app-layer-htp-body.c \ app-layer-htp.c \ app-layer-htp-file.c \ - app-layer-htp-libhtp.c \ app-layer-htp-mem.c \ app-layer-htp-range.c \ app-layer-htp-xff.c \ @@ -1201,9 +1199,9 @@ suricata_SOURCES = main.c suricata_LDFLAGS = $(all_libraries) ${SECLDFLAGS} # rust library depends also on c if LINKER_SUPPORTS_GROUP -LDADD_GENERIC = "-Wl,--start-group,libsuricata_c.a,$(RUST_SURICATA_LIB),--end-group" $(HTP_LDADD) $(RUST_LDADD) +LDADD_GENERIC = "-Wl,--start-group,libsuricata_c.a,$(RUST_SURICATA_LIB),--end-group" $(RUST_LDADD) else -LDADD_GENERIC = libsuricata_c.a $(RUST_SURICATA_LIB) libsuricata_c.a $(RUST_SURICATA_LIB) $(HTP_LDADD) $(RUST_LDADD) +LDADD_GENERIC = libsuricata_c.a $(RUST_SURICATA_LIB) libsuricata_c.a $(RUST_SURICATA_LIB) $(RUST_LDADD) endif suricata_LDADD = $(LDADD_GENERIC) suricata_DEPENDENCIES = libsuricata_c.a $(RUST_SURICATA_LIB) diff --git a/src/app-layer-htp-file.c b/src/app-layer-htp-file.c index 48f3a1a2ee5b..a6781dcc3300 100644 --- a/src/app-layer-htp-file.c +++ b/src/app-layer-htp-file.c @@ -91,9 +91,9 @@ int HTPFileOpen(HtpState *s, HtpTxUserData *tx, const uint8_t *filename, uint16_ * @param[in] rawvalue * @param[out] range * - * @return HTP_OK on success, HTP_ERROR on failure. + * @return HTP_STATUS_OK on success, HTP_STATUS_ERROR on failure. */ -int HTPParseContentRange(bstr *rawvalue, HTTPContentRange *range) +int HTPParseContentRange(const bstr *rawvalue, HTTPContentRange *range) { uint32_t len = (uint32_t)bstr_len(rawvalue); return rs_http_parse_content_range(range, bstr_ptr(rawvalue), len); @@ -108,7 +108,7 @@ int HTPParseContentRange(bstr *rawvalue, HTTPContentRange *range) * @return HTP_OK on success, HTP_ERROR, -2, -3 on failure. */ static int HTPParseAndCheckContentRange( - bstr *rawvalue, HTTPContentRange *range, HtpState *s, HtpTxUserData *htud) + const bstr *rawvalue, HTTPContentRange *range, HtpState *s, HtpTxUserData *htud) { int r = HTPParseContentRange(rawvalue, range); if (r != 0) { @@ -147,8 +147,8 @@ static int HTPParseAndCheckContentRange( * \retval -1 error */ int HTPFileOpenWithRange(HtpState *s, HtpTxUserData *txud, const uint8_t *filename, - uint16_t filename_len, const uint8_t *data, uint32_t data_len, htp_tx_t *tx, bstr *rawvalue, - HtpTxUserData *htud) + uint16_t filename_len, const uint8_t *data, uint32_t data_len, const htp_tx_t *tx, + const bstr *rawvalue, HtpTxUserData *htud) { SCEnter(); uint16_t flags; @@ -181,9 +181,9 @@ int HTPFileOpenWithRange(HtpState *s, HtpTxUserData *txud, const uint8_t *filena // Then, we will try to handle reassembly of different ranges of the same file uint8_t *keyurl; uint32_t keylen; - if (tx->request_hostname != NULL) { - uint32_t hlen = (uint32_t)bstr_len(tx->request_hostname); - if (bstr_len(tx->request_hostname) > UINT16_MAX) { + if (htp_tx_request_hostname(tx) != NULL) { + uint32_t hlen = (uint32_t)bstr_len(htp_tx_request_hostname(tx)); + if (hlen > UINT16_MAX) { hlen = UINT16_MAX; } keylen = hlen + filename_len; @@ -191,7 +191,7 @@ int HTPFileOpenWithRange(HtpState *s, HtpTxUserData *txud, const uint8_t *filena if (keyurl == NULL) { SCReturnInt(-1); } - memcpy(keyurl, bstr_ptr(tx->request_hostname), hlen); + memcpy(keyurl, bstr_ptr(htp_tx_request_hostname(tx)), hlen); memcpy(keyurl + hlen, filename, filename_len); } else { // do not reassemble file without host info @@ -402,9 +402,9 @@ static int HTPFileParserTest01(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -476,8 +476,8 @@ static int HTPFileParserTest02(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF_NULL(htp_tx_request_method(tx)); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); FAIL_IF_NULL(tx_ud->files_ts.tail); @@ -568,9 +568,9 @@ static int HTPFileParserTest03(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); @@ -664,9 +664,9 @@ static int HTPFileParserTest04(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); @@ -730,9 +730,9 @@ static int HTPFileParserTest05(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); @@ -805,9 +805,9 @@ static int HTPFileParserTest06(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); @@ -870,8 +870,8 @@ static int HTPFileParserTest07(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF_NULL(htp_tx_request_method(tx)); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); @@ -1192,9 +1192,9 @@ static int HTPFileParserTest11(void) htp_tx_t *tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NULL(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); - FAIL_IF(memcmp(bstr_util_strdup_to_c(tx->request_method), "POST", 4) != 0); + FAIL_IF(memcmp(bstr_util_strdup_to_c(htp_tx_request_method(tx)), "POST", 4) != 0); HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); FAIL_IF_NULL(tx_ud); diff --git a/src/app-layer-htp-file.h b/src/app-layer-htp-file.h index 53a47d5d3020..1fac823ce423 100644 --- a/src/app-layer-htp-file.h +++ b/src/app-layer-htp-file.h @@ -30,12 +30,12 @@ int HTPFileOpen( HtpState *, HtpTxUserData *, const uint8_t *, uint16_t, const uint8_t *, uint32_t, uint8_t); int HTPFileOpenWithRange(HtpState *, HtpTxUserData *, const uint8_t *, uint16_t, const uint8_t *, - uint32_t, htp_tx_t *, bstr *rawvalue, HtpTxUserData *htud); + uint32_t, const htp_tx_t *, const bstr *rawvalue, HtpTxUserData *htud); bool HTPFileCloseHandleRange(const StreamingBufferConfig *sbcfg, FileContainer *, const uint16_t, HttpRangeContainerBlock *, const uint8_t *, uint32_t); int HTPFileStoreChunk(HtpTxUserData *, const uint8_t *, uint32_t, uint8_t); -int HTPParseContentRange(bstr *rawvalue, HTTPContentRange *range); +int HTPParseContentRange(const bstr *rawvalue, HTTPContentRange *range); int HTPFileClose(HtpTxUserData *tx, const uint8_t *data, uint32_t data_len, uint8_t flags, uint8_t direction); diff --git a/src/app-layer-htp-libhtp.c b/src/app-layer-htp-libhtp.c deleted file mode 100644 index dcc4a92b8bb0..000000000000 --- a/src/app-layer-htp-libhtp.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * We are using this file to hold APIs copied from libhtp 0.5.x. - */ - -/*************************************************************************** - * Copyright (c) 2009-2010 Open Information Security Foundation - * Copyright (c) 2010-2013 Qualys, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * - Neither the name of the Qualys, Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ***************************************************************************/ - -/** - * \file - * - * \author Anoop Saldanha - * - * APIs from libhtp 0.5.x. - */ - -#include "suricata-common.h" -#include -#include "app-layer-htp-libhtp.h" - -/** - * \brief Generates the normalized uri. - * - * Libhtp doesn't recreate the whole normalized uri and save it. - * That duty has now been passed to us. A lot of this code has been - * copied from libhtp. - * - * Keep an eye out on the tx->parsed_uri struct and how the parameters - * in it are generated, just in case some modifications are made to - * them in the future. - * - * \param uri_include_all boolean to indicate if scheme, username/password, - hostname and port should be part of the buffer - */ -bstr *SCHTPGenerateNormalizedUri(htp_tx_t *tx, htp_uri_t *uri, bool uri_include_all) -{ - if (uri == NULL) - return NULL; - - // On the first pass determine the length of the final string - size_t len = 0; - - if (uri_include_all) { - if (uri->scheme != NULL) { - len += bstr_len(uri->scheme); - len += 3; // "://" - } - - if ((uri->username != NULL) || (uri->password != NULL)) { - if (uri->username != NULL) { - len += bstr_len(uri->username); - } - - len += 1; // ":" - - if (uri->password != NULL) { - len += bstr_len(uri->password); - } - - len += 1; // "@" - } - - if (uri->hostname != NULL) { - len += bstr_len(uri->hostname); - } - - if (uri->port != NULL) { - len += 1; // ":" - len += bstr_len(uri->port); - } - } - - if (uri->path != NULL) { - len += bstr_len(uri->path); - } - - if (uri->query != NULL) { - len += 1; // "?" - len += bstr_len(uri->query); - } - - if (uri->fragment != NULL) { - len += 1; // "#" - len += bstr_len(uri->fragment); - } - - // On the second pass construct the string - /* FIXME in memcap */ - bstr *r = bstr_alloc(len); - if (r == NULL) { - return NULL; - } - - if (uri_include_all) { - if (uri->scheme != NULL) { - bstr_add_noex(r, uri->scheme); - bstr_add_c_noex(r, "://"); - } - - if ((uri->username != NULL) || (uri->password != NULL)) { - if (uri->username != NULL) { - bstr_add_noex(r, uri->username); - } - - bstr_add_c_noex(r, ":"); - - if (uri->password != NULL) { - bstr_add_noex(r, uri->password); - } - - bstr_add_c_noex(r, "@"); - } - - if (uri->hostname != NULL) { - bstr_add_noex(r, uri->hostname); - } - - if (uri->port != NULL) { - bstr_add_c_noex(r, ":"); - bstr_add_noex(r, uri->port); - } - } - - if (uri->path != NULL) { - bstr_add_noex(r, uri->path); - } - - if (uri->query != NULL) { - bstr *query = bstr_dup(uri->query); - if (query) { - uint64_t flags = 0; - htp_urldecode_inplace(tx->cfg, HTP_DECODER_URLENCODED, query, &flags); - bstr_add_c_noex(r, "?"); - bstr_add_noex(r, query); - bstr_free(query); - } - } - - if (uri->fragment != NULL) { - bstr_add_c_noex(r, "#"); - bstr_add_noex(r, uri->fragment); - } - - return r; -} diff --git a/src/app-layer-htp-libhtp.h b/src/app-layer-htp-libhtp.h deleted file mode 100644 index fa9a9ecc3b25..000000000000 --- a/src/app-layer-htp-libhtp.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * We are using this file to hold APIs copied from libhtp 0.5.x. - */ - -/*************************************************************************** - * Copyright (c) 2009-2010 Open Information Security Foundation - * Copyright (c) 2010-2013 Qualys, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * - Neither the name of the Qualys, Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ***************************************************************************/ - -/** - * \file - * - * \author Anoop Saldanha - * - * APIs from libhtp 0.5.x. - */ - -#ifndef SURICATA_APP_LAYER_HTP_LIBHTP__H -#define SURICATA_APP_LAYER_HTP_LIBHTP__H - -#include "suricata-common.h" - -bstr *SCHTPGenerateNormalizedUri(htp_tx_t *tx, htp_uri_t *uri, bool uri_include_all); - -#endif /* SURICATA_APP_LAYER_HTP_LIBHTP__H */ diff --git a/src/app-layer-htp-xff.c b/src/app-layer-htp-xff.c index c145e5818e23..a4096f0c8ee4 100644 --- a/src/app-layer-htp-xff.c +++ b/src/app-layer-htp-xff.c @@ -139,20 +139,17 @@ int HttpXFFGetIPFromTx(const Flow *f, uint64_t tx_id, HttpXFFCfg *xff_cfg, return 0; } - htp_header_t *h_xff = NULL; - if (tx->request_headers != NULL) { - h_xff = htp_table_get_c(tx->request_headers, xff_cfg->header); - } + const htp_header_t *h_xff = htp_tx_request_header(tx, xff_cfg->header); - if (h_xff != NULL && bstr_len(h_xff->value) >= XFF_CHAIN_MINLEN && - bstr_len(h_xff->value) < XFF_CHAIN_MAXLEN) { + if (h_xff != NULL && htp_header_value_len(h_xff) >= XFF_CHAIN_MINLEN && + htp_header_value_len(h_xff) < XFF_CHAIN_MAXLEN) { - memcpy(xff_chain, bstr_ptr(h_xff->value), bstr_len(h_xff->value)); - xff_chain[bstr_len(h_xff->value)]=0; + memcpy(xff_chain, htp_header_value_ptr(h_xff), htp_header_value_len(h_xff)); + xff_chain[htp_header_value_len(h_xff)] = 0; if (xff_cfg->flags & XFF_REVERSE) { /** Get the last IP address from the chain */ - p_xff = memrchr(xff_chain, ' ', bstr_len(h_xff->value)); + p_xff = memrchr(xff_chain, ' ', htp_header_value_len(h_xff)); if (p_xff == NULL) { p_xff = xff_chain; } else { @@ -161,7 +158,7 @@ int HttpXFFGetIPFromTx(const Flow *f, uint64_t tx_id, HttpXFFCfg *xff_cfg, } else { /** Get the first IP address from the chain */ - p_xff = memchr(xff_chain, ',', bstr_len(h_xff->value)); + p_xff = memchr(xff_chain, ',', htp_header_value_len(h_xff)); if (p_xff != NULL) { *p_xff = 0; } diff --git a/src/app-layer-htp.c b/src/app-layer-htp.c index 328a10b72cb8..53cb1788fc4b 100644 --- a/src/app-layer-htp.c +++ b/src/app-layer-htp.c @@ -52,7 +52,6 @@ #include "app-layer-htp.h" #include "app-layer-htp-body.h" #include "app-layer-htp-file.h" -#include "app-layer-htp-libhtp.h" #include "app-layer-htp-xff.h" #include "app-layer-htp-range.h" #include "app-layer-htp-mem.h" @@ -103,77 +102,116 @@ static uint64_t htp_state_memcnt = 0; #endif SCEnumCharMap http_decoder_event_table[] = { - { "UNKNOWN_ERROR", HTTP_DECODER_EVENT_UNKNOWN_ERROR }, - { "GZIP_DECOMPRESSION_FAILED", HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED }, - { "REQUEST_FIELD_MISSING_COLON", HTTP_DECODER_EVENT_REQUEST_FIELD_MISSING_COLON }, - { "RESPONSE_FIELD_MISSING_COLON", HTTP_DECODER_EVENT_RESPONSE_FIELD_MISSING_COLON }, - { "INVALID_REQUEST_CHUNK_LEN", HTTP_DECODER_EVENT_INVALID_REQUEST_CHUNK_LEN }, - { "INVALID_RESPONSE_CHUNK_LEN", HTTP_DECODER_EVENT_INVALID_RESPONSE_CHUNK_LEN }, + { "UNKNOWN_ERROR", HTP_LOG_CODE_UNKNOWN }, + { "GZIP_DECOMPRESSION_FAILED", HTP_LOG_CODE_GZIP_DECOMPRESSION_FAILED }, + { "REQUEST_FIELD_MISSING_COLON", HTP_LOG_CODE_REQUEST_FIELD_MISSING_COLON }, + { "RESPONSE_FIELD_MISSING_COLON", HTP_LOG_CODE_RESPONSE_FIELD_MISSING_COLON }, + { "INVALID_REQUEST_CHUNK_LEN", HTP_LOG_CODE_INVALID_REQUEST_CHUNK_LEN }, + { "INVALID_RESPONSE_CHUNK_LEN", HTP_LOG_CODE_INVALID_RESPONSE_CHUNK_LEN }, { "INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST", - HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST }, + HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST }, { "INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE", - HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE }, + HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE }, { "INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST", - HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST }, + HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST }, { "INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE", - HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE }, + HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE }, { "DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST", - HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST }, + HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST }, { "DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE", - HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE }, - { "100_CONTINUE_ALREADY_SEEN", HTTP_DECODER_EVENT_100_CONTINUE_ALREADY_SEEN }, - { "UNABLE_TO_MATCH_RESPONSE_TO_REQUEST", - HTTP_DECODER_EVENT_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST }, - { "INVALID_SERVER_PORT_IN_REQUEST", HTTP_DECODER_EVENT_INVALID_SERVER_PORT_IN_REQUEST }, - { "INVALID_AUTHORITY_PORT", HTTP_DECODER_EVENT_INVALID_AUTHORITY_PORT }, - { "REQUEST_HEADER_INVALID", HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID }, - { "RESPONSE_HEADER_INVALID", HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID }, - { "MISSING_HOST_HEADER", HTTP_DECODER_EVENT_MISSING_HOST_HEADER }, - { "HOST_HEADER_AMBIGUOUS", HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS }, - { "INVALID_REQUEST_FIELD_FOLDING", HTTP_DECODER_EVENT_INVALID_REQUEST_FIELD_FOLDING }, - { "INVALID_RESPONSE_FIELD_FOLDING", HTTP_DECODER_EVENT_INVALID_RESPONSE_FIELD_FOLDING }, - { "REQUEST_FIELD_TOO_LONG", HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG }, - { "RESPONSE_FIELD_TOO_LONG", HTTP_DECODER_EVENT_RESPONSE_FIELD_TOO_LONG }, - { "FILE_NAME_TOO_LONG", HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG }, - { "REQUEST_LINE_INVALID", HTTP_DECODER_EVENT_REQUEST_LINE_INVALID }, - { "REQUEST_BODY_UNEXPECTED", HTTP_DECODER_EVENT_REQUEST_BODY_UNEXPECTED }, - { "REQUEST_SERVER_PORT_TCP_PORT_MISMATCH", - HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH }, - { "REQUEST_URI_HOST_INVALID", HTTP_DECODER_EVENT_URI_HOST_INVALID }, - { "REQUEST_HEADER_HOST_INVALID", HTTP_DECODER_EVENT_HEADER_HOST_INVALID }, - { "REQUEST_AUTH_UNRECOGNIZED", HTTP_DECODER_EVENT_AUTH_UNRECOGNIZED }, - { "REQUEST_HEADER_REPETITION", HTTP_DECODER_EVENT_REQUEST_HEADER_REPETITION }, - { "RESPONSE_HEADER_REPETITION", HTTP_DECODER_EVENT_RESPONSE_HEADER_REPETITION }, - { "DOUBLE_ENCODED_URI", HTTP_DECODER_EVENT_DOUBLE_ENCODED_URI }, - { "URI_DELIM_NON_COMPLIANT", HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT }, - { "METHOD_DELIM_NON_COMPLIANT", HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT }, - { "REQUEST_LINE_LEADING_WHITESPACE", HTTP_DECODER_EVENT_REQUEST_LINE_LEADING_WHITESPACE }, - { "TOO_MANY_ENCODING_LAYERS", HTTP_DECODER_EVENT_TOO_MANY_ENCODING_LAYERS }, - { "ABNORMAL_CE_HEADER", HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER }, - { "RESPONSE_MULTIPART_BYTERANGES", HTTP_DECODER_EVENT_RESPONSE_MULTIPART_BYTERANGES }, - { "RESPONSE_ABNORMAL_TRANSFER_ENCODING", - HTTP_DECODER_EVENT_RESPONSE_ABNORMAL_TRANSFER_ENCODING }, - { "RESPONSE_CHUNKED_OLD_PROTO", HTTP_DECODER_EVENT_RESPONSE_CHUNKED_OLD_PROTO }, - { "RESPONSE_INVALID_PROTOCOL", HTTP_DECODER_EVENT_RESPONSE_INVALID_PROTOCOL }, - { "RESPONSE_INVALID_STATUS", HTTP_DECODER_EVENT_RESPONSE_INVALID_STATUS }, - { "REQUEST_LINE_INCOMPLETE", HTTP_DECODER_EVENT_REQUEST_LINE_INCOMPLETE }, - - { "LZMA_MEMLIMIT_REACHED", HTTP_DECODER_EVENT_LZMA_MEMLIMIT_REACHED }, - { "COMPRESSION_BOMB", HTTP_DECODER_EVENT_COMPRESSION_BOMB }, - - { "RANGE_INVALID", HTTP_DECODER_EVENT_RANGE_INVALID }, - { "REQUEST_CHUNK_EXTENSION", HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION }, - { "REQUEST_LINE_MISSING_PROTOCOL", HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL }, - - { "REQUEST_TOO_MANY_HEADERS", HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS }, - { "RESPONSE_TOO_MANY_HEADERS", HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS }, + HTP_LOG_CODE_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE }, + { "100_CONTINUE_ALREADY_SEEN", HTP_LOG_CODE_CONTINUE_ALREADY_SEEN }, + { "UNABLE_TO_MATCH_RESPONSE_TO_REQUEST", HTP_LOG_CODE_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST }, + { "INVALID_SERVER_PORT_IN_REQUEST", HTP_LOG_CODE_INVALID_SERVER_PORT_IN_REQUEST }, + { "INVALID_AUTHORITY_PORT", HTP_LOG_CODE_INVALID_AUTHORITY_PORT }, + { "REQUEST_HEADER_INVALID", HTP_LOG_CODE_REQUEST_HEADER_INVALID }, + { "RESPONSE_HEADER_INVALID", HTP_LOG_CODE_RESPONSE_HEADER_INVALID }, + { "MISSING_HOST_HEADER", HTP_LOG_CODE_MISSING_HOST_HEADER }, + { "HOST_HEADER_AMBIGUOUS", HTP_LOG_CODE_HOST_HEADER_AMBIGUOUS }, + { "INVALID_REQUEST_FIELD_FOLDING", HTP_LOG_CODE_INVALID_REQUEST_FIELD_FOLDING }, + { "INVALID_RESPONSE_FIELD_FOLDING", HTP_LOG_CODE_INVALID_RESPONSE_FIELD_FOLDING }, + { "REQUEST_FIELD_TOO_LONG", HTP_LOG_CODE_REQUEST_FIELD_TOO_LONG }, + { "RESPONSE_FIELD_TOO_LONG", HTP_LOG_CODE_RESPONSE_FIELD_TOO_LONG }, + { "REQUEST_LINE_INVALID", HTP_LOG_CODE_REQUEST_LINE_INVALID }, + { "REQUEST_BODY_UNEXPECTED", HTP_LOG_CODE_REQUEST_BODY_UNEXPECTED }, + { "RESPONSE_BODY_UNEXPECTED", HTP_LOG_CODE_RESPONSE_BODY_UNEXPECTED }, + { "REQUEST_SERVER_PORT_TCP_PORT_MISMATCH", HTP_LOG_CODE_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH }, + { "REQUEST_URI_HOST_INVALID", HTP_LOG_CODE_URI_HOST_INVALID }, + { "REQUEST_HEADER_HOST_INVALID", HTP_LOG_CODE_HEADER_HOST_INVALID }, + { "REQUEST_AUTH_UNRECOGNIZED", HTP_LOG_CODE_AUTH_UNRECOGNIZED }, + { "REQUEST_HEADER_REPETITION", HTP_LOG_CODE_REQUEST_HEADER_REPETITION }, + { "RESPONSE_HEADER_REPETITION", HTP_LOG_CODE_RESPONSE_HEADER_REPETITION }, + { "DOUBLE_ENCODED_URI", HTP_LOG_CODE_DOUBLE_ENCODED_URI }, + { "URI_DELIM_NON_COMPLIANT", HTP_LOG_CODE_URI_DELIM_NON_COMPLIANT }, + { "METHOD_DELIM_NON_COMPLIANT", HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT }, + { "REQUEST_LINE_LEADING_WHITESPACE", HTP_LOG_CODE_REQUEST_LINE_LEADING_WHITESPACE }, + { "TOO_MANY_ENCODING_LAYERS", HTP_LOG_CODE_TOO_MANY_ENCODING_LAYERS }, + { "REQUEST_TOO_MANY_LZMA_LAYERS", HTP_LOG_CODE_REQUEST_TOO_MANY_LZMA_LAYERS }, + { "RESPONSE_TOO_MANY_LZMA_LAYERS", HTP_LOG_CODE_RESPONSE_TOO_MANY_LZMA_LAYERS }, + { "ABNORMAL_CE_HEADER", HTP_LOG_CODE_ABNORMAL_CE_HEADER }, + { "RESPONSE_MULTIPART_BYTERANGES", HTP_LOG_CODE_RESPONSE_MULTIPART_BYTERANGES }, + { "RESPONSE_ABNORMAL_TRANSFER_ENCODING", HTP_LOG_CODE_RESPONSE_ABNORMAL_TRANSFER_ENCODING }, + { "RESPONSE_CHUNKED_OLD_PROTO", HTP_LOG_CODE_RESPONSE_CHUNKED_OLD_PROTO }, + { "RESPONSE_INVALID_PROTOCOL", HTP_LOG_CODE_RESPONSE_INVALID_PROTOCOL }, + { "RESPONSE_INVALID_STATUS", HTP_LOG_CODE_RESPONSE_INVALID_STATUS }, + { "REQUEST_LINE_INCOMPLETE", HTP_LOG_CODE_REQUEST_LINE_INCOMPLETE }, + { "PROTOCOL_CONTAINS_EXTRA_DATA", HTP_LOG_CODE_PROTOCOL_CONTAINS_EXTRA_DATA }, + { + "CONTENT_LENGTH_EXTRA_DATA_START", + HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_START, + }, + { + "CONTENT_LENGTH_EXTRA_DATA_END", + HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_END, + }, + { + "CONTENT_LENGTH_EXTRA_DATA_END", + HTP_LOG_CODE_CONTENT_LENGTH_EXTRA_DATA_END, + }, + { "SWITCHING_PROTO_WITH_CONTENT_LENGTH", HTP_LOG_CODE_SWITCHING_PROTO_WITH_CONTENT_LENGTH }, + { "DEFORMED_EOL", HTP_LOG_CODE_DEFORMED_EOL }, + { "PARSER_STATE_ERROR", HTP_LOG_CODE_PARSER_STATE_ERROR }, + { "MISSING_OUTBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_OUTBOUND_TRANSACTION_DATA }, + { "MISSING_INBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_INBOUND_TRANSACTION_DATA }, + { "MISSING_INBOUND_TRANSACTION_DATA", HTP_LOG_CODE_MISSING_INBOUND_TRANSACTION_DATA }, + { "ZERO_LENGTH_DATA_CHUNKS", HTP_LOG_CODE_ZERO_LENGTH_DATA_CHUNKS }, + { "REQUEST_LINE_UNKNOWN_METHOD", HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD }, + { "REQUEST_LINE_UNKNOWN_METHOD", HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD }, + { "REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL", + HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_NO_PROTOCOL }, + { "REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL", + HTP_LOG_CODE_REQUEST_LINE_UNKNOWN_METHOD_INVALID_PROTOCOL }, + { "REQUEST_LINE_MISSING_PROTOCOL", HTP_LOG_CODE_REQUEST_LINE_NO_PROTOCOL }, + { "RESPONSE_LINE_INVALID_PROTOCOL", HTP_LOG_CODE_RESPONSE_LINE_INVALID_PROTOCOL }, + { "RESPONSE_LINE_INVALID_RESPONSE_STATUS", HTP_LOG_CODE_RESPONSE_LINE_INVALID_RESPONSE_STATUS }, + { "RESPONSE_BODY_INTERNAL_ERROR", HTP_LOG_CODE_RESPONSE_BODY_INTERNAL_ERROR }, + { "REQUEST_BODY_DATA_CALLBACK_ERROR", HTP_LOG_CODE_REQUEST_BODY_DATA_CALLBACK_ERROR }, + { "RESPONSE_INVALID_EMPTY_NAME", HTP_LOG_CODE_RESPONSE_INVALID_EMPTY_NAME }, + { "REQUEST_INVALID_EMPTY_NAME", HTP_LOG_CODE_REQUEST_INVALID_EMPTY_NAME }, + { "RESPONSE_INVALID_LWS_AFTER_NAME", HTP_LOG_CODE_RESPONSE_INVALID_LWS_AFTER_NAME }, + { "RESPONSE_HEADER_NAME_NOT_TOKEN", HTP_LOG_CODE_RESPONSE_HEADER_NAME_NOT_TOKEN }, + { "REQUEST_INVALID_LWS_AFTER_NAME", HTP_LOG_CODE_REQUEST_INVALID_LWS_AFTER_NAME }, + { "LZMA_DECOMPRESSION_DISABLED", HTP_LOG_CODE_LZMA_DECOMPRESSION_DISABLED }, + { "CONNECTION_ALREADY_OPEN", HTP_LOG_CODE_CONNECTION_ALREADY_OPEN }, + { "COMPRESSION_BOMB_DOUBLE_LZMA", HTP_LOG_CODE_COMPRESSION_BOMB_DOUBLE_LZMA }, + { "INVALID_CONTENT_ENCODING", HTP_LOG_CODE_INVALID_CONTENT_ENCODING }, + { "INVALID_GAP", HTP_LOG_CODE_INVALID_GAP }, + { "REQUEST_CHUNK_EXTENSION", HTP_LOG_CODE_REQUEST_CHUNK_EXTENSION }, + { "RESPONSE_CHUNK_EXTENSION", HTP_LOG_CODE_RESPONSE_CHUNK_EXTENSION }, + + { "LZMA_MEMLIMIT_REACHED", HTP_LOG_CODE_LZMA_MEMLIMIT_REACHED }, + { "COMPRESSION_BOMB", HTP_LOG_CODE_COMPRESSION_BOMB }, + + { "REQUEST_TOO_MANY_HEADERS", HTP_LOG_CODE_REQUEST_TOO_MANY_HEADERS }, + { "RESPONSE_TOO_MANY_HEADERS", HTP_LOG_CODE_RESPONSE_TOO_MANY_HEADERS }, /* suricata warnings/errors */ { "MULTIPART_GENERIC_ERROR", HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR }, { "MULTIPART_NO_FILEDATA", HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA }, { "MULTIPART_INVALID_HEADER", HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER }, - { "TOO_MANY_WARNINGS", HTTP_DECODER_EVENT_TOO_MANY_WARNINGS }, + { "RANGE_INVALID", HTTP_DECODER_EVENT_RANGE_INVALID }, + { "FILE_NAME_TOO_LONG", HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG }, { "FAILED_PROTOCOL_CHANGE", HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE }, { NULL, -1 }, @@ -242,8 +280,9 @@ static inline uint64_t HtpGetActiveResponseTxID(HtpState *s) */ static const char *HTPLookupPersonalityString(int p) { -#define CASE_HTP_PERSONALITY_STRING(p) \ - case HTP_SERVER_ ## p: return #p +#define CASE_HTP_PERSONALITY_STRING(p) \ + case HTP_SERVER_PERSONALITY_##p: \ + return #p switch (p) { CASE_HTP_PERSONALITY_STRING(MINIMAL); @@ -271,8 +310,9 @@ static const char *HTPLookupPersonalityString(int p) */ static int HTPLookupPersonality(const char *str) { -#define IF_HTP_PERSONALITY_NUM(p) \ - if (strcasecmp(#p, str) == 0) return HTP_SERVER_ ## p +#define IF_HTP_PERSONALITY_NUM(p) \ + if (strcasecmp(#p, str) == 0) \ + return HTP_SERVER_PERSONALITY_##p IF_HTP_PERSONALITY_NUM(MINIMAL); IF_HTP_PERSONALITY_NUM(GENERIC); @@ -296,7 +336,7 @@ static int HTPLookupPersonality(const char *str) "longer supported by libhtp, failing back to " "Apache2 personality.", str); - return HTP_SERVER_APACHE_2; + return HTP_SERVER_PERSONALITY_APACHE_2; } return -1; @@ -320,7 +360,7 @@ static void HTPSetEvent(HtpState *s, HtpTxUserData *htud, if (tx == NULL && tx_id > 0) tx = HTPStateGetTx(s, tx_id - 1); if (tx != NULL) { - htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud != NULL) { AppLayerDecoderEventsSetEventRaw(&htud->tx_data.events, e); s->events++; @@ -360,7 +400,6 @@ static void HtpTxUserDataFree(HtpState *state, HtpTxUserData *htud) if (likely(htud)) { HtpBodyFree(&htud->request_body); HtpBodyFree(&htud->response_body); - bstr_free(htud->request_uri_normalized); if (htud->request_headers_raw) HTPFree(htud->request_headers_raw, htud->request_headers_raw_len); if (htud->response_headers_raw) @@ -401,10 +440,10 @@ void HTPStateFree(void *state) uint64_t total_txs = HTPStateGetTxCnt(state); /* free the list of body chunks */ if (s->conn != NULL) { - for (tx_id = s->tx_freed; tx_id < total_txs; tx_id++) { + for (tx_id = 0; tx_id < total_txs; tx_id++) { htp_tx_t *tx = HTPStateGetTx(s, tx_id); if (tx != NULL) { - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); HtpTxUserDataFree(s, htud); htp_tx_set_user_data(tx, NULL); } @@ -429,8 +468,6 @@ void HTPStateFree(void *state) /** * \brief HTP transaction cleanup callback * - * \warning We cannot actually free the transactions here. It seems that - * HTP only accepts freeing of transactions in the response callback. */ static void HTPStateTransactionFree(void *state, uint64_t id) { @@ -443,25 +480,11 @@ static void HTPStateTransactionFree(void *state, uint64_t id) htp_tx_t *tx = HTPStateGetTx(s, id); if (tx != NULL) { /* This will remove obsolete body chunks */ - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); HtpTxUserDataFree(s, htud); htp_tx_set_user_data(tx, NULL); - - /* hack: even if libhtp considers the tx incomplete, we want to - * free it here. htp_tx_destroy however, will refuse to do this. - * As htp_tx_destroy_incomplete isn't available in the public API, - * we hack around it here. */ - if (unlikely(!( - tx->request_progress == HTP_REQUEST_COMPLETE && - tx->response_progress == HTP_RESPONSE_COMPLETE))) - { - tx->request_progress = HTP_REQUEST_COMPLETE; - tx->response_progress = HTP_RESPONSE_COMPLETE; - } - // replaces tx in the s->conn->transactions list by NULL - htp_tx_destroy(tx); + htp_tx_destroy(s->connp, tx); } - s->tx_freed += htp_connp_tx_freed(s->connp); } /** @@ -508,7 +531,7 @@ void AppLayerHtpNeedFileInspection(void) static void AppLayerHtpSetStreamDepthFlag(void *tx, const uint8_t flags) { - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data((htp_tx_t *)tx); + HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data((htp_tx_t *)tx); if (tx_ud) { SCLogDebug("setting HTP_STREAM_DEPTH_SET, flags %02x", flags); if (flags & STREAM_TOCLIENT) { @@ -557,132 +580,6 @@ static uint32_t AppLayerHtpComputeChunkLength(uint64_t content_len_so_far, uint3 return (chunk_len == 0 ? data_len : chunk_len); } -/* below error messages updated up to libhtp 0.5.7 (git 379632278b38b9a792183694a4febb9e0dbd1e7a) */ -struct { - const char *msg; - uint8_t de; -} htp_errors[] = { - { "GZip decompressor: inflateInit2 failed", HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED}, - { "Request field invalid: colon missing", HTTP_DECODER_EVENT_REQUEST_FIELD_MISSING_COLON}, - { "Response field invalid: missing colon", HTTP_DECODER_EVENT_RESPONSE_FIELD_MISSING_COLON}, - { "Request chunk encoding: Invalid chunk length", HTTP_DECODER_EVENT_INVALID_REQUEST_CHUNK_LEN}, - { "Response chunk encoding: Invalid chunk length", HTTP_DECODER_EVENT_INVALID_RESPONSE_CHUNK_LEN}, -/* { "Invalid T-E value in request", HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST}, <- tx flag HTP_REQUEST_INVALID_T_E - { "Invalid T-E value in response", HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE}, <- nothing to replace it */ -/* { "Invalid C-L field in request", HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST}, <- tx flag HTP_REQUEST_INVALID_C_L */ - { "Invalid C-L field in response", HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE}, - { "Already seen 100-Continue", HTTP_DECODER_EVENT_100_CONTINUE_ALREADY_SEEN}, - { "Unable to match response to request", HTTP_DECODER_EVENT_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST}, - { "Invalid server port information in request", HTTP_DECODER_EVENT_INVALID_SERVER_PORT_IN_REQUEST}, -/* { "Invalid authority port", HTTP_DECODER_EVENT_INVALID_AUTHORITY_PORT}, htp no longer returns this error */ - { "Request buffer over", HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG}, - { "Response buffer over", HTTP_DECODER_EVENT_RESPONSE_FIELD_TOO_LONG}, - { "C-T multipart/byteranges in responses not supported", HTTP_DECODER_EVENT_RESPONSE_MULTIPART_BYTERANGES}, - { "Compression bomb:", HTTP_DECODER_EVENT_COMPRESSION_BOMB}, -}; - -struct { - const char *msg; - uint8_t de; -} htp_warnings[] = { - { "GZip decompressor:", HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED }, - { "Request field invalid", HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID }, - { "Response field invalid", HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID }, - { "Request header name is not a token", HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID }, - { "Response header name is not a token", HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID }, - /* { "Host information in request headers required by HTTP/1.1", - HTTP_DECODER_EVENT_MISSING_HOST_HEADER}, <- tx flag HTP_HOST_MISSING { "Host information - ambiguous", HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS}, <- tx flag HTP_HOST_AMBIGUOUS */ - { "Invalid request field folding", HTTP_DECODER_EVENT_INVALID_REQUEST_FIELD_FOLDING }, - { "Invalid response field folding", HTTP_DECODER_EVENT_INVALID_RESPONSE_FIELD_FOLDING }, - /* line is now: htp_log(connp, HTP_LOG_MARK, HTP_LOG_ERROR, 0, "Request server port=%d number - * differs from the actual TCP port=%d", port, connp->conn->server_port); luckily, "Request - * server port=" is unique */ - /* { "Request server port number differs from the actual TCP port", - HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH}, */ - { "Request server port=", HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH }, - { "Request line: URI contains non-compliant delimiter", - HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT }, - { "Request line: non-compliant delimiter between Method and URI", - HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT }, - { "Request line: leading whitespace", HTTP_DECODER_EVENT_REQUEST_LINE_LEADING_WHITESPACE }, - { "Too many response content encoding layers", HTTP_DECODER_EVENT_TOO_MANY_ENCODING_LAYERS }, - { "C-E gzip has abnormal value", HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER }, - { "C-E deflate has abnormal value", HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER }, - { "C-E unknown setting", HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER }, - { "Excessive request header repetitions", HTTP_DECODER_EVENT_REQUEST_HEADER_REPETITION }, - { "Excessive response header repetitions", HTTP_DECODER_EVENT_RESPONSE_HEADER_REPETITION }, - { "Transfer-encoding has abnormal chunked value", - HTTP_DECODER_EVENT_RESPONSE_ABNORMAL_TRANSFER_ENCODING }, - { "Chunked transfer-encoding on HTTP/0.9 or HTTP/1.0", - HTTP_DECODER_EVENT_RESPONSE_CHUNKED_OLD_PROTO }, - { "Invalid response line: invalid protocol", HTTP_DECODER_EVENT_RESPONSE_INVALID_PROTOCOL }, - { "Invalid response line: invalid response status", - HTTP_DECODER_EVENT_RESPONSE_INVALID_STATUS }, - { "Request line incomplete", HTTP_DECODER_EVENT_REQUEST_LINE_INCOMPLETE }, - { "Unexpected request body", HTTP_DECODER_EVENT_REQUEST_BODY_UNEXPECTED }, - { "LZMA decompressor: memory limit reached", HTTP_DECODER_EVENT_LZMA_MEMLIMIT_REACHED }, - { "Ambiguous request C-L value", HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST }, - { "Ambiguous response C-L value", - HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE }, - { "Request chunk extension", HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION }, - { "Request line: missing protocol", HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL }, - { "Too many request headers", HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS }, - { "Too many response headers", HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS }, -}; - -#define HTP_ERROR_MAX (sizeof(htp_errors) / sizeof(htp_errors[0])) -#define HTP_WARNING_MAX (sizeof(htp_warnings) / sizeof(htp_warnings[0])) - -/** - * \internal - * - * \brief Get the warning id for the warning msg. - * - * \param msg warning message - * - * \retval id the id or 0 in case of not found - */ -static uint8_t HTPHandleWarningGetId(const char *msg) -{ - SCLogDebug("received warning \"%s\"", msg); - size_t idx; - for (idx = 0; idx < HTP_WARNING_MAX; idx++) { - if (strncmp(htp_warnings[idx].msg, msg, - strlen(htp_warnings[idx].msg)) == 0) - { - return htp_warnings[idx].de; - } - } - - return 0; -} - -/** - * \internal - * - * \brief Get the error id for the error msg. - * - * \param msg error message - * - * \retval id the id or 0 in case of not found - */ -static uint8_t HTPHandleErrorGetId(const char *msg) -{ - SCLogDebug("received error \"%s\"", msg); - - size_t idx; - for (idx = 0; idx < HTP_ERROR_MAX; idx++) { - if (strncmp(htp_errors[idx].msg, msg, - strlen(htp_errors[idx].msg)) == 0) - { - return htp_errors[idx].de; - } - } - - return 0; -} - /** * \internal * @@ -693,99 +590,81 @@ static uint8_t HTPHandleErrorGetId(const char *msg) */ static void HTPHandleError(HtpState *s, const uint8_t dir) { - if (s == NULL || s->conn == NULL || - s->conn->messages == NULL) { - return; - } - - size_t size = htp_list_size(s->conn->messages); - size_t msg; - if(size >= HTP_MAX_MESSAGES) { - if (s->htp_messages_offset < HTP_MAX_MESSAGES) { - //only once per HtpState - HTPSetEvent(s, NULL, dir, HTTP_DECODER_EVENT_TOO_MANY_WARNINGS); - s->htp_messages_offset = HTP_MAX_MESSAGES; - //too noisy in fuzzing - //DEBUG_VALIDATE_BUG_ON("Too many libhtp messages"); - } + if (s == NULL || s->conn == NULL || s->htp_messages_count >= HTP_MAX_MESSAGES) { // ignore further messages return; } - for (msg = s->htp_messages_offset; msg < size; msg++) { - htp_log_t *log = htp_list_get(s->conn->messages, msg); - if (log == NULL) + htp_log_t *log = htp_conn_next_log(s->conn); + while (log != NULL) { + char *msg = htp_log_message(log); + if (msg == NULL) { + htp_log_free(log); + log = htp_conn_next_log(s->conn); continue; + } - HtpTxUserData *htud = NULL; - htp_tx_t *tx = log->tx; // will be NULL in <=0.5.9 - if (tx != NULL) - htud = (HtpTxUserData *) htp_tx_get_user_data(tx); - - SCLogDebug("message %s", log->msg); + SCLogDebug("message %s", msg); - uint8_t id = HTPHandleErrorGetId(log->msg); - if (id == 0) { - id = HTPHandleWarningGetId(log->msg); - if (id == 0) - id = HTTP_DECODER_EVENT_UNKNOWN_ERROR; + htp_log_code_t id = htp_log_code(log); + if (id != HTP_LOG_CODE_UNKNOWN && id != HTP_LOG_CODE_ERROR) { + HTPSetEvent(s, NULL, dir, (uint8_t)id); } - - if (id > 0) { - HTPSetEvent(s, htud, dir, id); + htp_free_cstring(msg); + htp_log_free(log); + s->htp_messages_count++; + if (s->htp_messages_count >= HTP_MAX_MESSAGES) { + // only once per HtpState + HTPSetEvent(s, NULL, dir, HTTP_DECODER_EVENT_TOO_MANY_WARNINGS); + // too noisy in fuzzing + // DEBUG_VALIDATE_BUG_ON("Too many libhtp messages"); + break; } + log = htp_conn_next_log(s->conn); } - s->htp_messages_offset = (uint16_t)msg; - SCLogDebug("s->htp_messages_offset %u", s->htp_messages_offset); + SCLogDebug("s->htp_messages_count %u", s->htp_messages_count); } -static inline void HTPErrorCheckTxRequestFlags(HtpState *s, htp_tx_t *tx) +static inline void HTPErrorCheckTxRequestFlags(HtpState *s, const htp_tx_t *tx) { #ifdef DEBUG BUG_ON(s == NULL || tx == NULL); #endif - if (tx->flags & ( HTP_REQUEST_INVALID_T_E|HTP_REQUEST_INVALID_C_L| - HTP_HOST_MISSING|HTP_HOST_AMBIGUOUS|HTP_HOSTU_INVALID| - HTP_HOSTH_INVALID)) - { - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + if (htp_tx_flags(tx) & (HTP_FLAGS_REQUEST_INVALID_T_E | HTP_FLAGS_REQUEST_INVALID_C_L | + HTP_FLAGS_HOST_MISSING | HTP_FLAGS_HOST_AMBIGUOUS | + HTP_FLAGS_HOSTU_INVALID | HTP_FLAGS_HOSTH_INVALID)) { + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud == NULL) return; - if (tx->flags & HTP_REQUEST_INVALID_T_E) - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST); - if (tx->flags & HTP_REQUEST_INVALID_C_L) - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST); - if (tx->flags & HTP_HOST_MISSING) + if (htp_tx_flags(tx) & HTP_FLAGS_REQUEST_INVALID_T_E) HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MISSING_HOST_HEADER); - if (tx->flags & HTP_HOST_AMBIGUOUS) - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS); - if (tx->flags & HTP_HOSTU_INVALID) - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_URI_HOST_INVALID); - if (tx->flags & HTP_HOSTH_INVALID) - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_HEADER_HOST_INVALID); + HTP_LOG_CODE_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST); + if (htp_tx_flags(tx) & HTP_FLAGS_REQUEST_INVALID_C_L) + HTPSetEvent( + s, htud, STREAM_TOSERVER, HTP_LOG_CODE_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST); + if (htp_tx_flags(tx) & HTP_FLAGS_HOST_MISSING) + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_MISSING_HOST_HEADER); + if (htp_tx_flags(tx) & HTP_FLAGS_HOST_AMBIGUOUS) + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_HOST_HEADER_AMBIGUOUS); + if (htp_tx_flags(tx) & HTP_FLAGS_HOSTU_INVALID) + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_URI_HOST_INVALID); + if (htp_tx_flags(tx) & HTP_FLAGS_HOSTH_INVALID) + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_HEADER_HOST_INVALID); } - if (tx->request_auth_type == HTP_AUTH_UNRECOGNIZED) { - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + if (htp_tx_request_auth_type(tx) == HTP_AUTH_TYPE_UNRECOGNIZED) { + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud == NULL) return; - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_AUTH_UNRECOGNIZED); + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_AUTH_UNRECOGNIZED); } - if (tx->is_protocol_0_9 && tx->request_method_number == HTP_M_UNKNOWN && - (tx->request_protocol_number == HTP_PROTOCOL_INVALID || - tx->request_protocol_number == HTP_PROTOCOL_UNKNOWN)) { - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + if (htp_tx_is_protocol_0_9(tx) && htp_tx_request_method_number(tx) == HTP_METHOD_UNKNOWN && + (htp_tx_request_protocol_number(tx) == HTP_PROTOCOL_INVALID || + htp_tx_request_protocol_number(tx) == HTP_PROTOCOL_UNKNOWN)) { + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud == NULL) return; - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_REQUEST_LINE_INVALID); + HTPSetEvent(s, htud, STREAM_TOSERVER, HTP_LOG_CODE_REQUEST_LINE_INVALID); } } @@ -833,7 +712,7 @@ static int Setup(Flow *f, HtpState *hstate) goto error; } - hstate->conn = htp_connp_get_connection(hstate->connp); + hstate->conn = (htp_conn_t *)htp_connp_connection(hstate->connp); htp_connp_set_user_data(hstate->connp, (void *)hstate); hstate->cfg = htp_cfg_rec; @@ -884,12 +763,12 @@ static AppLayerResult HTPHandleRequestData(Flow *f, void *htp_state, AppLayerPar const uint8_t *input = StreamSliceGetData(&stream_slice); uint32_t input_len = StreamSliceGetDataLen(&stream_slice); - htp_time_t ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) }; + struct timeval ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) }; /* pass the new data to the htp parser */ if (input_len > 0) { - const int r = htp_connp_req_data(hstate->connp, &ts, input, input_len); + const int r = htp_connp_request_data(hstate->connp, &ts, input, input_len); switch (r) { - case HTP_STREAM_ERROR: + case HTP_STREAM_STATE_ERROR: ret = -1; break; default: @@ -902,7 +781,7 @@ static AppLayerResult HTPHandleRequestData(Flow *f, void *htp_state, AppLayerPar if (AppLayerParserStateIssetFlag(pstate, APP_LAYER_PARSER_EOF_TS) && !(hstate->flags & HTP_FLAG_STATE_CLOSED_TS)) { - htp_connp_req_close(hstate->connp, &ts); + htp_connp_request_close(hstate->connp, &ts); hstate->flags |= HTP_FLAG_STATE_CLOSED_TS; SCLogDebug("stream eof encountered, closing htp handle for ts"); } @@ -951,29 +830,28 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa DEBUG_VALIDATE_BUG_ON(hstate->connp == NULL); hstate->slice = &stream_slice; - htp_time_t ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) }; - htp_tx_t *tx = NULL; + struct timeval ts = { SCTIME_SECS(f->startts), SCTIME_USECS(f->startts) }; + const htp_tx_t *tx = NULL; uint32_t consumed = 0; if (input_len > 0) { - const int r = htp_connp_res_data(hstate->connp, &ts, input, input_len); + const int r = htp_connp_response_data(hstate->connp, &ts, input, input_len); switch (r) { - case HTP_STREAM_ERROR: + case HTP_STREAM_STATE_ERROR: ret = -1; break; - case HTP_STREAM_TUNNEL: - tx = htp_connp_get_out_tx(hstate->connp); - if (tx != NULL && tx->response_status_number == 101) { - htp_header_t *h = - (htp_header_t *)htp_table_get_c(tx->response_headers, "Upgrade"); + case HTP_STREAM_STATE_TUNNEL: + tx = htp_connp_get_response_tx(hstate->connp); + if (tx != NULL && htp_tx_response_status_number(tx) == 101) { + const htp_header_t *h = htp_tx_response_header(tx, "Upgrade"); if (h == NULL) { break; } uint16_t dp = 0; - if (tx->request_port_number != -1) { - dp = (uint16_t)tx->request_port_number; + if (htp_tx_request_port_number(tx) != -1) { + dp = (uint16_t)htp_tx_request_port_number(tx); } - consumed = (uint32_t)htp_connp_res_data_consumed(hstate->connp); - if (bstr_cmp_c(h->value, "h2c") == 0) { + consumed = (uint32_t)htp_connp_response_data_consumed(hstate->connp); + if (bstr_cmp_c(htp_header_value(h), "h2c") == 0) { if (AppLayerProtoDetectGetProtoName(ALPROTO_HTTP2) == NULL) { // if HTTP2 is disabled, keep the HTP_STREAM_TUNNEL mode break; @@ -989,7 +867,7 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa SCReturnStruct(APP_LAYER_INCOMPLETE(consumed, input_len - consumed)); } SCReturnStruct(APP_LAYER_OK); - } else if (bstr_cmp_c_nocase(h->value, "WebSocket") == 0) { + } else if (bstr_cmp_c_nocase(htp_header_value(h), "WebSocket") == 0) { if (AppLayerProtoDetectGetProtoName(ALPROTO_WEBSOCKET) == NULL) { // if WS is disabled, keep the HTP_STREAM_TUNNEL mode break; @@ -1034,8 +912,8 @@ static AppLayerResult HTPHandleResponseData(Flow *f, void *htp_state, AppLayerPa /** * \param name /Lowercase/ version of the variable name */ -static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, - uint8_t *data, size_t len, uint8_t **retptr, size_t *retlen) +static int HTTPParseContentDispositionHeader(const uint8_t *name, size_t name_len, + const uint8_t *data, size_t len, uint8_t const **retptr, size_t *retlen) { #ifdef PRINT printf("DATA START: \n"); @@ -1053,7 +931,7 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, if (x >= len) return 0; - uint8_t *line = data+x; + const uint8_t *line = data + x; size_t line_len = len-x; size_t offset = 0; #ifdef PRINT @@ -1068,7 +946,7 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, } if (((line[x - 1] != '\\' && line[x] == ';') || ((x + 1) == line_len)) && (quote == 0 || quote % 2 == 0)) { - uint8_t *token = line + offset; + const uint8_t *token = line + offset; size_t token_len = x - offset; if ((x + 1) == line_len) { @@ -1088,7 +966,7 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, #endif if (token_len > name_len) { if (name == NULL || SCMemcmpLowercase(name, token, name_len) == 0) { - uint8_t *value = token + name_len; + const uint8_t *value = token + name_len; size_t value_len = token_len - name_len; if (value[0] == '\"') { @@ -1128,12 +1006,12 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, * If the request contains a multipart message, this function will * set the HTP_BOUNDARY_SET in the transaction. */ -static int HtpRequestBodySetupMultipart(htp_tx_t *tx, HtpTxUserData *htud) +static int HtpRequestBodySetupMultipart(const htp_tx_t *tx, HtpTxUserData *htud) { - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, - "Content-Type"); - if (h != NULL && bstr_len(h->value) > 0) { - htud->mime_state = SCMimeStateInit(bstr_ptr(h->value), (uint32_t)bstr_len(h->value)); + const htp_header_t *h = htp_tx_request_header(tx, "Content-Type"); + if (h != NULL && htp_header_value_len(h) > 0) { + htud->mime_state = + SCMimeStateInit(htp_header_value_ptr(h), (uint32_t)htp_header_value_len(h)); if (htud->mime_state) { htud->tsflags |= HTP_BOUNDARY_SET; SCReturnInt(1); @@ -1171,7 +1049,7 @@ static void FlagDetectStateNewFile(HtpTxUserData *tx, int dir) } } -static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, void *tx, +static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, const void *tx, const uint8_t *chunks_buffer, uint32_t chunks_buffer_len, bool eof) { #ifdef PRINT @@ -1182,8 +1060,8 @@ static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, // libhtp will not call us back too late // should libhtp send a callback eof for 0 chunked ? - DEBUG_VALIDATE_BUG_ON(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, - STREAM_TOSERVER) >= HTP_REQUEST_COMPLETE); + DEBUG_VALIDATE_BUG_ON(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, (void *)tx, + STREAM_TOSERVER) >= HTP_REQUEST_PROGRESS_COMPLETE); const uint8_t *cur_buf = chunks_buffer; uint32_t cur_buf_len = chunks_buffer_len; @@ -1278,8 +1156,8 @@ static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, /** \internal * \brief Handle POST or PUT, no multipart body data */ -static int HtpRequestBodyHandlePOSTorPUT(HtpState *hstate, HtpTxUserData *htud, - htp_tx_t *tx, uint8_t *data, uint32_t data_len) +static int HtpRequestBodyHandlePOSTorPUT(HtpState *hstate, HtpTxUserData *htud, const htp_tx_t *tx, + const uint8_t *data, uint32_t data_len) { int result = 0; @@ -1290,9 +1168,9 @@ static int HtpRequestBodyHandlePOSTorPUT(HtpState *hstate, HtpTxUserData *htud, size_t filename_len = 0; /* get the name */ - if (tx->parsed_uri != NULL && tx->parsed_uri->path != NULL) { - filename = (uint8_t *)bstr_ptr(tx->parsed_uri->path); - filename_len = bstr_len(tx->parsed_uri->path); + if (htp_uri_path(htp_tx_parsed_uri(tx)) != NULL) { + filename = (uint8_t *)bstr_ptr(htp_uri_path(htp_tx_parsed_uri(tx))); + filename_len = bstr_len(htp_uri_path(htp_tx_parsed_uri(tx))); } if (filename != NULL) { @@ -1334,44 +1212,43 @@ static int HtpRequestBodyHandlePOSTorPUT(HtpState *hstate, HtpTxUserData *htud, return -1; } -static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, - htp_tx_t *tx, uint8_t *data, uint32_t data_len) +static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, const htp_tx_t *tx, + const uint8_t *data, uint32_t data_len) { SCEnter(); int result = 0; /* see if we need to open the file - * we check for tx->response_line in case of junk + * we check for htp_tx_response_line(tx) in case of junk * interpreted as body before response line */ if (!(htud->tcflags & HTP_FILENAME_SET)) { SCLogDebug("setting up file name"); - uint8_t *filename = NULL; + const uint8_t *filename = NULL; size_t filename_len = 0; /* try Content-Disposition header first */ - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->response_headers, - "Content-Disposition"); - if (h != NULL && bstr_len(h->value) > 0) { + const htp_header_t *h = htp_tx_response_header(tx, "Content-Disposition"); + if (h != NULL && htp_header_value_len(h) > 0) { /* parse content-disposition */ (void)HTTPParseContentDispositionHeader((uint8_t *)"filename=", 9, - (uint8_t *) bstr_ptr(h->value), bstr_len(h->value), &filename, &filename_len); + htp_header_value_ptr(h), htp_header_value_len(h), &filename, &filename_len); } /* fall back to name from the uri */ if (filename == NULL) { /* get the name */ - if (tx->parsed_uri != NULL && tx->parsed_uri->path != NULL) { - filename = (uint8_t *)bstr_ptr(tx->parsed_uri->path); - filename_len = bstr_len(tx->parsed_uri->path); + if (htp_uri_path(htp_tx_parsed_uri(tx)) != NULL) { + filename = (uint8_t *)bstr_ptr(htp_uri_path(htp_tx_parsed_uri(tx))); + filename_len = bstr_len(htp_uri_path(htp_tx_parsed_uri(tx))); } } if (filename != NULL) { // set range if present - htp_header_t *h_content_range = htp_table_get_c(tx->response_headers, "content-range"); + const htp_header_t *h_content_range = htp_tx_response_header(tx, "content-range"); if (filename_len > SC_FILENAME_MAX) { // explicitly truncate the file name if too long filename_len = SC_FILENAME_MAX; @@ -1379,7 +1256,7 @@ static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, } if (h_content_range != NULL) { result = HTPFileOpenWithRange(hstate, htud, filename, (uint16_t)filename_len, data, - data_len, tx, h_content_range->value, htud); + data_len, tx, htp_header_value(h_content_range), htud); } else { result = HTPFileOpen(hstate, htud, filename, (uint16_t)filename_len, data, data_len, STREAM_TOCLIENT); @@ -1419,35 +1296,38 @@ static int HtpResponseBodyHandle(HtpState *hstate, HtpTxUserData *htud, /** * \brief Function callback to append chunks for Requests * \param d pointer to the htp_tx_data_t structure (a chunk from htp lib) - * \retval int HTP_OK if all goes well + * \retval int HTP_STATUS_OK if all goes well */ -static int HTPCallbackRequestBodyData(htp_tx_data_t *d) +static int HTPCallbackRequestBodyData(const htp_connp_t *connp, htp_tx_data_t *d) { SCEnter(); + const htp_tx_t *tx = htp_tx_data_tx(d); + if (!(SC_ATOMIC_GET(htp_config_flags) & HTP_REQUIRE_REQUEST_BODY)) - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); - if (d->len == 0) - SCReturnInt(HTP_OK); + if (htp_tx_data_is_empty(d)) + SCReturnInt(HTP_STATUS_OK); #ifdef PRINT printf("HTPBODY START: \n"); - PrintRawDataFp(stdout, (uint8_t *)d->data, d->len); + PrintRawDataFp(stdout, (uint8_t *)htp_tx_data_data(d), htp_tx_data_len(d)); printf("HTPBODY END: \n"); #endif - HtpState *hstate = htp_connp_get_user_data(d->tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } SCLogDebug("New request body data available at %p -> %p -> %p, bodylen " - "%"PRIu32"", hstate, d, d->data, (uint32_t)d->len); + "%" PRIu32 "", + hstate, d, htp_tx_data_data(d), (uint32_t)htp_tx_data_len(d)); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(d->tx); + HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (tx_ud == NULL) { - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } tx_ud->tx_data.updated_ts = true; SCTxDataUpdateFileFlags(&tx_ud->tx_data, hstate->state_data.file_flags); @@ -1455,16 +1335,16 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) if (!tx_ud->response_body_init) { tx_ud->response_body_init = 1; - if (d->tx->request_method_number == HTP_M_POST) { + if (htp_tx_request_method_number(tx) == HTP_METHOD_POST) { SCLogDebug("POST"); - int r = HtpRequestBodySetupMultipart(d->tx, tx_ud); + int r = HtpRequestBodySetupMultipart(tx, tx_ud); if (r == 1) { tx_ud->request_body_type = HTP_BODY_REQUEST_MULTIPART; } else if (r == 0) { tx_ud->request_body_type = HTP_BODY_REQUEST_POST; SCLogDebug("not multipart"); } - } else if (d->tx->request_method_number == HTP_M_PUT) { + } else if (htp_tx_request_method_number(tx) == HTP_METHOD_PUT) { tx_ud->request_body_type = HTP_BODY_REQUEST_PUT; } } @@ -1479,13 +1359,11 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) if (AppLayerHtpCheckDepth(&hstate->cfg->request, &tx_ud->request_body, tx_ud->tsflags)) { uint32_t stream_depth = FileReassemblyDepth(); uint32_t len = AppLayerHtpComputeChunkLength(tx_ud->request_body.content_len_so_far, - hstate->cfg->request.body_limit, - stream_depth, - tx_ud->tsflags, - (uint32_t)d->len); - BUG_ON(len > (uint32_t)d->len); + hstate->cfg->request.body_limit, stream_depth, tx_ud->tsflags, + (uint32_t)htp_tx_data_len(d)); + BUG_ON(len > (uint32_t)htp_tx_data_len(d)); - HtpBodyAppendChunk(&tx_ud->request_body, d->data, len); + HtpBodyAppendChunk(&tx_ud->request_body, htp_tx_data_data(d), len); const uint8_t *chunks_buffer = NULL; uint32_t chunks_buffer_len = 0; @@ -1506,12 +1384,13 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) printf("REASSCHUNK END: \n"); #endif - HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len, - (d->data == NULL && d->len == 0)); + HtpRequestBodyHandleMultipart(hstate, tx_ud, htp_tx_data_tx(d), chunks_buffer, + chunks_buffer_len, (htp_tx_data_data(d) == NULL && htp_tx_data_len(d) == 0)); } else if (tx_ud->request_body_type == HTP_BODY_REQUEST_POST || tx_ud->request_body_type == HTP_BODY_REQUEST_PUT) { - HtpRequestBodyHandlePOSTorPUT(hstate, tx_ud, d->tx, (uint8_t *)d->data, len); + HtpRequestBodyHandlePOSTorPUT( + hstate, tx_ud, htp_tx_data_tx(d), htp_tx_data_data(d), len); } } else { @@ -1524,10 +1403,11 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) end: if (hstate->conn != NULL) { - SCLogDebug("checking body size %"PRIu64" against inspect limit %u (cur %"PRIu64", last %"PRIu64")", - tx_ud->request_body.content_len_so_far, - hstate->cfg->request.inspect_min_size, - (uint64_t)hstate->conn->in_data_counter, hstate->last_request_data_stamp); + SCLogDebug("checking body size %" PRIu64 " against inspect limit %u (cur %" PRIu64 + ", last %" PRIu64 ")", + tx_ud->request_body.content_len_so_far, hstate->cfg->request.inspect_min_size, + (uint64_t)htp_conn_request_data_counter(hstate->conn), + hstate->last_request_data_stamp); /* if we reach the inspect_min_size we'll trigger inspection, * so make sure that raw stream is also inspected. Set the @@ -1535,11 +1415,14 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) * get here. */ if (tx_ud->request_body.body_inspected == 0 && tx_ud->request_body.content_len_so_far >= hstate->cfg->request.inspect_min_size) { - if ((uint64_t)hstate->conn->in_data_counter > hstate->last_request_data_stamp && - (uint64_t)hstate->conn->in_data_counter - hstate->last_request_data_stamp < (uint64_t)UINT_MAX) - { - const uint32_t data_size = (uint32_t)( - (uint64_t)hstate->conn->in_data_counter - hstate->last_request_data_stamp); + if ((uint64_t)htp_conn_request_data_counter(hstate->conn) > + hstate->last_request_data_stamp && + (uint64_t)htp_conn_request_data_counter(hstate->conn) - + hstate->last_request_data_stamp < + (uint64_t)UINT_MAX) { + uint32_t data_size = + (uint32_t)((uint64_t)htp_conn_request_data_counter(hstate->conn) - + hstate->last_request_data_stamp); const uint32_t depth = MIN(data_size, hstate->cfg->request.inspect_min_size); /* body still in progress, but due to min inspect size we need to inspect now */ @@ -1551,35 +1434,38 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOSERVER, 0); } } - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } /** * \brief Function callback to append chunks for Responses * \param d pointer to the htp_tx_data_t structure (a chunk from htp lib) - * \retval int HTP_OK if all goes well + * \retval int HTP_STATUS_OK if all goes well */ -static int HTPCallbackResponseBodyData(htp_tx_data_t *d) +static int HTPCallbackResponseBodyData(const htp_connp_t *connp, htp_tx_data_t *d) { SCEnter(); + const htp_tx_t *tx = htp_tx_data_tx(d); + if (!(SC_ATOMIC_GET(htp_config_flags) & HTP_REQUIRE_RESPONSE_BODY)) - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); - if (d->len == 0) - SCReturnInt(HTP_OK); + if (htp_tx_data_is_empty(d)) + SCReturnInt(HTP_STATUS_OK); - HtpState *hstate = htp_connp_get_user_data(d->tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } SCLogDebug("New response body data available at %p -> %p -> %p, bodylen " - "%"PRIu32"", hstate, d, d->data, (uint32_t)d->len); + "%" PRIu32 "", + hstate, d, htp_tx_data_data(d), (uint32_t)htp_tx_data_len(d)); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(d->tx); + HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (tx_ud == NULL) { - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } tx_ud->tx_data.updated_tc = true; SCTxDataUpdateFileFlags(&tx_ud->tx_data, hstate->state_data.file_flags); @@ -1597,15 +1483,13 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d) if (AppLayerHtpCheckDepth(&hstate->cfg->response, &tx_ud->response_body, tx_ud->tcflags)) { uint32_t stream_depth = FileReassemblyDepth(); uint32_t len = AppLayerHtpComputeChunkLength(tx_ud->response_body.content_len_so_far, - hstate->cfg->response.body_limit, - stream_depth, - tx_ud->tcflags, - (uint32_t)d->len); - BUG_ON(len > (uint32_t)d->len); + hstate->cfg->response.body_limit, stream_depth, tx_ud->tcflags, + (uint32_t)htp_tx_data_len(d)); + BUG_ON(len > (uint32_t)htp_tx_data_len(d)); - HtpBodyAppendChunk(&tx_ud->response_body, d->data, len); + HtpBodyAppendChunk(&tx_ud->response_body, htp_tx_data_data(d), len); - HtpResponseBodyHandle(hstate, tx_ud, d->tx, (uint8_t *)d->data, len); + HtpResponseBodyHandle(hstate, tx_ud, htp_tx_data_tx(d), htp_tx_data_data(d), len); } else { if (tx_ud->tcflags & HTP_FILENAME_SET) { SCLogDebug("closing file that was being stored"); @@ -1615,21 +1499,25 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d) } if (hstate->conn != NULL) { - SCLogDebug("checking body size %"PRIu64" against inspect limit %u (cur %"PRIu64", last %"PRIu64")", - tx_ud->response_body.content_len_so_far, - hstate->cfg->response.inspect_min_size, - (uint64_t)hstate->conn->in_data_counter, hstate->last_response_data_stamp); + SCLogDebug("checking body size %" PRIu64 " against inspect limit %u (cur %" PRIu64 + ", last %" PRIu64 ")", + tx_ud->response_body.content_len_so_far, hstate->cfg->response.inspect_min_size, + (uint64_t)htp_conn_request_data_counter(hstate->conn), + hstate->last_response_data_stamp); /* if we reach the inspect_min_size we'll trigger inspection, * so make sure that raw stream is also inspected. Set the * data to be used to the amount of raw bytes we've seen to * get here. */ if (tx_ud->response_body.body_inspected == 0 && tx_ud->response_body.content_len_so_far >= hstate->cfg->response.inspect_min_size) { - if ((uint64_t)hstate->conn->out_data_counter > hstate->last_response_data_stamp && - (uint64_t)hstate->conn->out_data_counter - hstate->last_response_data_stamp < (uint64_t)UINT_MAX) - { - const uint32_t data_size = (uint32_t)((uint64_t)hstate->conn->out_data_counter - - hstate->last_response_data_stamp); + if ((uint64_t)htp_conn_response_data_counter(hstate->conn) > + hstate->last_response_data_stamp && + (uint64_t)htp_conn_response_data_counter(hstate->conn) - + hstate->last_response_data_stamp < + (uint64_t)UINT_MAX) { + uint32_t data_size = + (uint32_t)((uint64_t)htp_conn_response_data_counter(hstate->conn) - + hstate->last_response_data_stamp); const uint32_t depth = MIN(data_size, hstate->cfg->response.inspect_min_size); /* body still in progress, but due to min inspect size we need to inspect now */ @@ -1641,7 +1529,7 @@ static int HTPCallbackResponseBodyData(htp_tx_data_t *d) StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOCLIENT, 0); } } - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } /** @@ -1684,41 +1572,40 @@ void HTPFreeConfig(void) SCReturn; } -static int HTPCallbackRequestHasTrailer(htp_tx_t *tx) +static int HTPCallbackRequestHasTrailer(const htp_connp_t *connp, htp_tx_t *tx) { HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud != NULL) { htud->tx_data.updated_ts = true; htud->request_has_trailers = 1; } - return HTP_OK; + return HTP_STATUS_OK; } -static int HTPCallbackResponseHasTrailer(htp_tx_t *tx) +static int HTPCallbackResponseHasTrailer(const htp_connp_t *connp, htp_tx_t *tx) { HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud != NULL) { htud->tx_data.updated_tc = true; htud->response_has_trailers = 1; } - return HTP_OK; + return HTP_STATUS_OK; } /**\internal * \brief called at start of request * Set min inspect size. */ -static int HTPCallbackRequestStart(htp_tx_t *tx) +static int HTPCallbackRequestStart(const htp_connp_t *connp, htp_tx_t *tx) { - HtpState *hstate = htp_connp_get_user_data(tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } - uint64_t consumed = hstate->slice->offset + htp_connp_req_data_consumed(hstate->connp); + uint64_t consumed = hstate->slice->offset + htp_connp_request_data_consumed(hstate->connp); SCLogDebug("HTTP request start: data offset %" PRIu64 ", in_data_counter %" PRIu64, consumed, - (uint64_t)hstate->conn->in_data_counter); - + (uint64_t)htp_conn_request_data_counter(hstate->conn)); /* app-layer-frame-documentation tag start: frame registration http request */ Frame *frame = AppLayerFrameNewByAbsoluteOffset( hstate->f, hstate->slice, consumed, -1, 0, HTTP_FRAME_REQUEST); @@ -1733,34 +1620,34 @@ static int HTPCallbackRequestStart(htp_tx_t *tx) StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOSERVER, hstate->cfg->request.inspect_min_size); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (tx_ud == NULL) { tx_ud = HTPCalloc(1, sizeof(HtpTxUserData)); if (unlikely(tx_ud == NULL)) { - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } tx_ud->tx_data.file_tx = STREAM_TOSERVER | STREAM_TOCLIENT; // each http tx may xfer files htp_tx_set_user_data(tx, tx_ud); } else { tx_ud->tx_data.updated_ts = true; } - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } /**\internal * \brief called at start of response * Set min inspect size. */ -static int HTPCallbackResponseStart(htp_tx_t *tx) +static int HTPCallbackResponseStart(const htp_connp_t *connp, htp_tx_t *tx) { - HtpState *hstate = htp_connp_get_user_data(tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } - uint64_t consumed = hstate->slice->offset + htp_connp_res_data_consumed(hstate->connp); + uint64_t consumed = hstate->slice->offset + htp_connp_response_data_consumed(hstate->connp); SCLogDebug("HTTP response start: data offset %" PRIu64 ", out_data_counter %" PRIu64, consumed, - (uint64_t)hstate->conn->out_data_counter); + (uint64_t)htp_conn_response_data_counter(hstate->conn)); Frame *frame = AppLayerFrameNewByAbsoluteOffset( hstate->f, hstate->slice, consumed, -1, 1, HTTP_FRAME_RESPONSE); @@ -1774,11 +1661,11 @@ static int HTPCallbackResponseStart(htp_tx_t *tx) StreamTcpReassemblySetMinInspectDepth(hstate->f->protoctx, STREAM_TOCLIENT, hstate->cfg->response.inspect_min_size); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (tx_ud == NULL) { tx_ud = HTPCalloc(1, sizeof(HtpTxUserData)); if (unlikely(tx_ud == NULL)) { - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } tx_ud->tx_data.file_tx = STREAM_TOCLIENT; // each http tx may xfer files. Toserver already missed. @@ -1786,30 +1673,30 @@ static int HTPCallbackResponseStart(htp_tx_t *tx) } else { tx_ud->tx_data.updated_tc = true; } - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } /** * \brief callback for request to store the recent incoming request - into the recent_in_tx for the given htp state + into the recent_request_tx for the given htp state * \param connp pointer to the current connection parser which has the htp * state in it as user data */ -static int HTPCallbackRequestComplete(htp_tx_t *tx) +static int HTPCallbackRequestComplete(const htp_connp_t *connp, htp_tx_t *tx) { SCEnter(); if (tx == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } - HtpState *hstate = htp_connp_get_user_data(tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } const uint64_t abs_right_edge = - hstate->slice->offset + htp_connp_req_data_consumed(hstate->connp); + hstate->slice->offset + htp_connp_request_data_consumed(hstate->connp); /* app-layer-frame-documentation tag start: updating frame->len */ if (hstate->request_frame_id > 0) { @@ -1852,29 +1739,29 @@ static int HTPCallbackRequestComplete(htp_tx_t *tx) /* request done, do raw reassembly now to inspect state and stream * at the same time. */ AppLayerParserTriggerRawStreamReassembly(hstate->f, STREAM_TOSERVER); - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } /** * \brief callback for response to remove the recent received requests - from the recent_in_tx for the given htp state + from the recent_request_tx for the given htp state * \param connp pointer to the current connection parser which has the htp * state in it as user data */ -static int HTPCallbackResponseComplete(htp_tx_t *tx) +static int HTPCallbackResponseComplete(const htp_connp_t *connp, htp_tx_t *tx) { SCEnter(); - HtpState *hstate = htp_connp_get_user_data(tx->connp); + HtpState *hstate = htp_connp_user_data(connp); if (hstate == NULL) { - SCReturnInt(HTP_ERROR); + SCReturnInt(HTP_STATUS_ERROR); } /* we have one whole transaction now */ hstate->transaction_cnt++; const uint64_t abs_right_edge = - hstate->slice->offset + htp_connp_res_data_consumed(hstate->connp); + hstate->slice->offset + htp_connp_response_data_consumed(hstate->connp); if (hstate->response_frame_id > 0) { Frame *frame = AppLayerFrameGetById(hstate->f, 1, hstate->response_frame_id); @@ -1890,7 +1777,7 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx) hstate->response_frame_id = 0; } - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud != NULL) { htud->tx_data.updated_tc = true; if (htud->tcflags & HTP_FILENAME_SET) { @@ -1905,149 +1792,97 @@ static int HTPCallbackResponseComplete(htp_tx_t *tx) AppLayerParserTriggerRawStreamReassembly(hstate->f, STREAM_TOCLIENT); /* handle HTTP CONNECT */ - if (tx->request_method_number == HTP_M_CONNECT) { + if (htp_tx_request_method_number(tx) == HTP_METHOD_CONNECT) { /* any 2XX status response implies that the connection will become a tunnel immediately after this packet (RFC 7230, 3.3.3). */ - if ((tx->response_status_number >= 200) && - (tx->response_status_number < 300) && - (hstate->transaction_cnt == 1)) { + if ((htp_tx_response_status_number(tx) >= 200) && + (htp_tx_response_status_number(tx) < 300) && (hstate->transaction_cnt == 1)) { uint16_t dp = 0; - if (tx->request_port_number != -1) { - dp = (uint16_t)tx->request_port_number; + if (htp_tx_request_port_number(tx) != -1) { + dp = (uint16_t)htp_tx_request_port_number(tx); } // both ALPROTO_HTTP1 and ALPROTO_TLS are normal options if (!AppLayerRequestProtocolChange(hstate->f, dp, ALPROTO_UNKNOWN)) { HTPSetEvent( hstate, htud, STREAM_TOCLIENT, HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE); } - tx->request_progress = HTP_REQUEST_COMPLETE; - tx->response_progress = HTP_RESPONSE_COMPLETE; } } hstate->last_response_data_stamp = abs_right_edge; - SCReturnInt(HTP_OK); + SCReturnInt(HTP_STATUS_OK); } -static int HTPCallbackRequestLine(htp_tx_t *tx) +static int HTPCallbackRequestLine(const htp_connp_t *connp, htp_tx_t *tx) { HtpTxUserData *tx_ud; - bstr *request_uri_normalized; - HtpState *hstate = htp_connp_get_user_data(tx->connp); - const HTPCfgRec *cfg = hstate->cfg; - - request_uri_normalized = SCHTPGenerateNormalizedUri(tx, tx->parsed_uri, cfg->uri_include_all); - if (request_uri_normalized == NULL) - return HTP_OK; + HtpState *hstate = htp_connp_user_data(connp); tx_ud = htp_tx_get_user_data(tx); if (unlikely(tx_ud == NULL)) { - bstr_free(request_uri_normalized); - return HTP_OK; + return HTP_STATUS_OK; } - if (unlikely(tx_ud->request_uri_normalized != NULL)) - bstr_free(tx_ud->request_uri_normalized); - tx_ud->request_uri_normalized = request_uri_normalized; - if (tx->flags) { + if (htp_tx_flags(tx)) { HTPErrorCheckTxRequestFlags(hstate, tx); } - return HTP_OK; + return HTP_STATUS_OK; } -static int HTPCallbackDoubleDecodeUriPart(htp_tx_t *tx, bstr *part) -{ - if (part == NULL) - return HTP_OK; - - uint64_t flags = 0; - size_t prevlen = bstr_len(part); - htp_status_t res = htp_urldecode_inplace(tx->cfg, HTP_DECODER_URLENCODED, part, &flags); - // shorter string means that uri was encoded - if (res == HTP_OK && prevlen > bstr_len(part)) { - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); - if (htud == NULL) - return HTP_OK; - HtpState *s = htp_connp_get_user_data(tx->connp); - if (s == NULL) - return HTP_OK; - HTPSetEvent(s, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_DOUBLE_ENCODED_URI); - } - - return HTP_OK; -} - -static int HTPCallbackDoubleDecodeQuery(htp_tx_t *tx) -{ - if (tx->parsed_uri == NULL) - return HTP_OK; - - return HTPCallbackDoubleDecodeUriPart(tx, tx->parsed_uri->query); -} - -static int HTPCallbackDoubleDecodePath(htp_tx_t *tx) -{ - if (tx->parsed_uri == NULL) - return HTP_OK; - - return HTPCallbackDoubleDecodeUriPart(tx, tx->parsed_uri->path); -} - -static int HTPCallbackRequestHeaderData(htp_tx_data_t *tx_data) +static int HTPCallbackRequestHeaderData(const htp_connp_t *connp, htp_tx_data_t *tx_data) { void *ptmp; - if (tx_data->len == 0 || tx_data->tx == NULL) - return HTP_OK; + const htp_tx_t *tx = htp_tx_data_tx(tx_data); + if (htp_tx_data_is_empty(tx_data) || tx == NULL) + return HTP_STATUS_OK; - HtpTxUserData *tx_ud = htp_tx_get_user_data(tx_data->tx); + HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); if (tx_ud == NULL) { - return HTP_OK; + return HTP_STATUS_OK; } - ptmp = HTPRealloc(tx_ud->request_headers_raw, - tx_ud->request_headers_raw_len, - tx_ud->request_headers_raw_len + tx_data->len); + ptmp = HTPRealloc(tx_ud->request_headers_raw, tx_ud->request_headers_raw_len, + tx_ud->request_headers_raw_len + htp_tx_data_len(tx_data)); if (ptmp == NULL) { - return HTP_OK; + return HTP_STATUS_OK; } tx_ud->request_headers_raw = ptmp; tx_ud->tx_data.updated_ts = true; - memcpy(tx_ud->request_headers_raw + tx_ud->request_headers_raw_len, - tx_data->data, tx_data->len); - tx_ud->request_headers_raw_len += tx_data->len; + memcpy(tx_ud->request_headers_raw + tx_ud->request_headers_raw_len, htp_tx_data_data(tx_data), + htp_tx_data_len(tx_data)); + tx_ud->request_headers_raw_len += htp_tx_data_len(tx_data); - if (tx_data->tx && tx_data->tx->flags) { - HtpState *hstate = htp_connp_get_user_data(tx_data->tx->connp); - HTPErrorCheckTxRequestFlags(hstate, tx_data->tx); + if (tx && htp_tx_flags(tx)) { + HtpState *hstate = htp_connp_user_data(connp); + HTPErrorCheckTxRequestFlags(hstate, tx); } - return HTP_OK; + return HTP_STATUS_OK; } -static int HTPCallbackResponseHeaderData(htp_tx_data_t *tx_data) +static int HTPCallbackResponseHeaderData(const htp_connp_t *connp, htp_tx_data_t *tx_data) { void *ptmp; - if (tx_data->len == 0 || tx_data->tx == NULL) - return HTP_OK; + const htp_tx_t *tx = htp_tx_data_tx(tx_data); + if (htp_tx_data_is_empty(tx_data) || tx == NULL) + return HTP_STATUS_OK; - HtpTxUserData *tx_ud = htp_tx_get_user_data(tx_data->tx); + HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); if (tx_ud == NULL) { - return HTP_OK; + return HTP_STATUS_OK; } tx_ud->tx_data.updated_tc = true; - ptmp = HTPRealloc(tx_ud->response_headers_raw, - tx_ud->response_headers_raw_len, - tx_ud->response_headers_raw_len + tx_data->len); + ptmp = HTPRealloc(tx_ud->response_headers_raw, tx_ud->response_headers_raw_len, + tx_ud->response_headers_raw_len + htp_tx_data_len(tx_data)); if (ptmp == NULL) { - return HTP_OK; + return HTP_STATUS_OK; } tx_ud->response_headers_raw = ptmp; - memcpy(tx_ud->response_headers_raw + tx_ud->response_headers_raw_len, - tx_data->data, tx_data->len); - tx_ud->response_headers_raw_len += tx_data->len; + memcpy(tx_ud->response_headers_raw + tx_ud->response_headers_raw_len, htp_tx_data_data(tx_data), + htp_tx_data_len(tx_data)); + tx_ud->response_headers_raw_len += htp_tx_data_len(tx_data); - return HTP_OK; + return HTP_STATUS_OK; } /* @@ -2055,7 +1890,7 @@ static int HTPCallbackResponseHeaderData(htp_tx_data_t *tx_data) */ static void HTPConfigSetDefaultsPhase1(HTPCfgRec *cfg_prec) { - cfg_prec->uri_include_all = false; + htp_config_set_normalized_uri_include_all(cfg_prec->cfg, false); cfg_prec->request.body_limit = HTP_CONFIG_DEFAULT_REQUEST_BODY_LIMIT; cfg_prec->response.body_limit = HTP_CONFIG_DEFAULT_RESPONSE_BODY_LIMIT; cfg_prec->request.inspect_min_size = HTP_CONFIG_DEFAULT_REQUEST_INSPECT_MIN_SIZE; @@ -2088,45 +1923,21 @@ static void HTPConfigSetDefaultsPhase1(HTPCfgRec *cfg_prec) htp_config_register_response_complete(cfg_prec->cfg, HTPCallbackResponseComplete); htp_config_set_parse_request_cookies(cfg_prec->cfg, 0); -#ifdef HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI htp_config_set_allow_space_uri(cfg_prec->cfg, 1); -#endif /* don't convert + to space by default */ - htp_config_set_plusspace_decode(cfg_prec->cfg, HTP_DECODER_URLENCODED, 0); + htp_config_set_plusspace_decode(cfg_prec->cfg, 0); // enables request decompression htp_config_set_request_decompression(cfg_prec->cfg, 1); -#ifdef HAVE_HTP_CONFIG_SET_LZMA_LAYERS - // disable by default htp_config_set_lzma_layers(cfg_prec->cfg, HTP_CONFIG_DEFAULT_LZMA_LAYERS); -#endif -#ifdef HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT - htp_config_set_lzma_memlimit(cfg_prec->cfg, - HTP_CONFIG_DEFAULT_LZMA_MEMLIMIT); -#endif -#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT - htp_config_set_compression_bomb_limit(cfg_prec->cfg, - HTP_CONFIG_DEFAULT_COMPRESSION_BOMB_LIMIT); -#endif -#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT + htp_config_set_lzma_memlimit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_LZMA_MEMLIMIT); + htp_config_set_compression_bomb_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_COMPRESSION_BOMB_LIMIT); htp_config_set_compression_time_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_COMPRESSION_TIME_LIMIT); -#endif -#ifdef HAVE_HTP_CONFIG_SET_MAX_TX #define HTP_CONFIG_DEFAULT_MAX_TX_LIMIT 512 htp_config_set_max_tx(cfg_prec->cfg, HTP_CONFIG_DEFAULT_MAX_TX_LIMIT); -#endif -#ifdef HAVE_HTP_CONFIG_SET_HEADERS_LIMIT #define HTP_CONFIG_DEFAULT_HEADERS_LIMIT 1024 htp_config_set_number_headers_limit(cfg_prec->cfg, HTP_CONFIG_DEFAULT_HEADERS_LIMIT); -#endif - /* libhtp <= 0.5.9 doesn't use soft limit, but it's impossible to set - * only the hard limit. So we set both here to the (current) htp defaults. - * The reason we do this is that if the user sets the hard limit in the - * config, we have to set the soft limit as well. If libhtp starts using - * the soft limit in the future, we at least make sure we control what - * it's value is. */ - htp_config_set_field_limits(cfg_prec->cfg, (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT, - (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_HARD); + htp_config_set_field_limit(cfg_prec->cfg, (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT); } /* hack: htp random range code expects random values in range of 0-RAND_MAX, @@ -2225,7 +2036,8 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT if (personality >= 0) { SCLogDebug("LIBHTP default: %s=%s (%d)", p->name, p->val, personality); - if (htp_config_set_server_personality(cfg_prec->cfg, personality) == HTP_ERROR){ + if (htp_config_set_server_personality(cfg_prec->cfg, personality) == + HTP_STATUS_ERROR) { SCLogWarning("LIBHTP Failed adding " "personality \"%s\", ignoring", p->val); @@ -2237,7 +2049,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT /* The IDS personality by default converts the path (and due to * our query string callback also the query string) to lowercase. * Signatures do not expect this, so override it. */ - htp_config_set_convert_lowercase(cfg_prec->cfg, HTP_DECODER_URL_PATH, 0); + htp_config_set_convert_lowercase(cfg_prec->cfg, 0); } else { SCLogWarning("LIBHTP Unknown personality " "\"%s\", ignoring", @@ -2279,16 +2091,10 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT } } else if (strcasecmp("double-decode-query", p->name) == 0) { - if (ConfValIsTrue(p->val)) { - htp_config_register_request_line(cfg_prec->cfg, - HTPCallbackDoubleDecodeQuery); - } + htp_config_set_double_decode_normalized_query(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("double-decode-path", p->name) == 0) { - if (ConfValIsTrue(p->val)) { - htp_config_register_request_line(cfg_prec->cfg, - HTPCallbackDoubleDecodePath); - } + htp_config_set_double_decode_normalized_path(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("response-body-minimal-inspect-size", p->name) == 0) { if (ParseSizeStringU32(p->val, &cfg_prec->response.inspect_min_size) < 0) { @@ -2314,78 +2120,49 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT p->val); exit(EXIT_FAILURE); } -#ifdef HAVE_HTP_CONFIG_SET_RESPONSE_DECOMPRESSION_LAYER_LIMIT - htp_config_set_response_decompression_layer_limit(cfg_prec->cfg, value); -#else - SCLogWarning("can't set response-body-decompress-layer-limit " - "to %u, libhtp version too old", - value); -#endif + htp_config_set_decompression_layer_limit(cfg_prec->cfg, value); } else if (strcasecmp("path-convert-backslash-separators", p->name) == 0) { - htp_config_set_backslash_convert_slashes(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_backslash_convert_slashes(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-bestfit-replacement-char", p->name) == 0) { if (strlen(p->val) == 1) { - htp_config_set_bestfit_replacement_byte(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - p->val[0]); + htp_config_set_bestfit_replacement_byte(cfg_prec->cfg, p->val[0]); } else { SCLogError("Invalid entry " "for libhtp param path-bestfit-replacement-char"); } } else if (strcasecmp("path-convert-lowercase", p->name) == 0) { - htp_config_set_convert_lowercase(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_convert_lowercase(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-nul-encoded-terminates", p->name) == 0) { - htp_config_set_nul_encoded_terminates(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_nul_encoded_terminates(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-nul-raw-terminates", p->name) == 0) { - htp_config_set_nul_raw_terminates(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_nul_raw_terminates(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-separators-compress", p->name) == 0) { - htp_config_set_path_separators_compress(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_path_separators_compress(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-separators-decode", p->name) == 0) { - htp_config_set_path_separators_decode(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_path_separators_decode(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-u-encoding-decode", p->name) == 0) { - htp_config_set_u_encoding_decode(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_u_encoding_decode(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("path-url-encoding-invalid-handling", p->name) == 0) { enum htp_url_encoding_handling_t handling; if (strcasecmp(p->val, "preserve_percent") == 0) { - handling = HTP_URL_DECODE_PRESERVE_PERCENT; + handling = HTP_URL_ENCODING_HANDLING_PRESERVE_PERCENT; } else if (strcasecmp(p->val, "remove_percent") == 0) { - handling = HTP_URL_DECODE_REMOVE_PERCENT; + handling = HTP_URL_ENCODING_HANDLING_REMOVE_PERCENT; } else if (strcasecmp(p->val, "decode_invalid") == 0) { - handling = HTP_URL_DECODE_PROCESS_INVALID; + handling = HTP_URL_ENCODING_HANDLING_PROCESS_INVALID; } else { SCLogError("Invalid entry " "for libhtp param path-url-encoding-invalid-handling"); return; } - htp_config_set_url_encoding_invalid_handling(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - handling); + htp_config_set_url_encoding_invalid_handling(cfg_prec->cfg, handling); } else if (strcasecmp("path-utf8-convert-bestfit", p->name) == 0) { - htp_config_set_utf8_convert_bestfit(cfg_prec->cfg, - HTP_DECODER_URL_PATH, - ConfValIsTrue(p->val)); + htp_config_set_utf8_convert_bestfit(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("uri-include-all", p->name) == 0) { - cfg_prec->uri_include_all = (1 == ConfValIsTrue(p->val)); - SCLogDebug("uri-include-all %s", - cfg_prec->uri_include_all ? "enabled" : "disabled"); + htp_config_set_normalized_uri_include_all(cfg_prec->cfg, ConfValIsTrue(p->val)); + SCLogDebug("uri-include-all %s", ConfValIsTrue(p->val) ? "enabled" : "disabled"); } else if (strcasecmp("query-plusspace-decode", p->name) == 0) { - htp_config_set_plusspace_decode(cfg_prec->cfg, - HTP_DECODER_URLENCODED, - ConfValIsTrue(p->val)); + htp_config_set_plusspace_decode(cfg_prec->cfg, ConfValIsTrue(p->val)); } else if (strcasecmp("meta-field-limit", p->name) == 0) { uint32_t limit = 0; if (ParseSizeStringU32(p->val, &limit) < 0) { @@ -2399,10 +2176,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT "from conf file cannot be 0. Killing engine"); } /* set default soft-limit with our new hard limit */ - htp_config_set_field_limits(cfg_prec->cfg, - (size_t)HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT, - (size_t)limit); -#ifdef HAVE_HTP_CONFIG_SET_LZMA_MEMLIMIT + htp_config_set_field_limit(cfg_prec->cfg, (size_t)limit); } else if (strcasecmp("lzma-memlimit", p->name) == 0) { uint32_t limit = 0; if (ParseSizeStringU32(p->val, &limit) < 0) { @@ -2417,8 +2191,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT /* set default soft-limit with our new hard limit */ SCLogConfig("Setting HTTP LZMA memory limit to %"PRIu32" bytes", limit); htp_config_set_lzma_memlimit(cfg_prec->cfg, (size_t)limit); -#endif -#ifdef HAVE_HTP_CONFIG_SET_LZMA_LAYERS } else if (strcasecmp("lzma-enabled", p->name) == 0) { if (ConfValIsTrue(p->val)) { htp_config_set_lzma_layers(cfg_prec->cfg, 1); @@ -2432,8 +2204,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT SCLogConfig("Setting HTTP LZMA decompression layers to %" PRIu32 "", (int)limit); htp_config_set_lzma_layers(cfg_prec->cfg, limit); } -#endif -#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_BOMB_LIMIT } else if (strcasecmp("compression-bomb-limit", p->name) == 0) { uint32_t limit = 0; if (ParseSizeStringU32(p->val, &limit) < 0) { @@ -2448,8 +2218,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT /* set default soft-limit with our new hard limit */ SCLogConfig("Setting HTTP compression bomb limit to %"PRIu32" bytes", limit); htp_config_set_compression_bomb_limit(cfg_prec->cfg, (size_t)limit); -#endif -#ifdef HAVE_HTP_CONFIG_SET_COMPRESSION_TIME_LIMIT } else if (strcasecmp("decompression-time-limit", p->name) == 0) { uint32_t limit = 0; // between 1 usec and 1 second @@ -2459,9 +2227,7 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT p->val); } SCLogConfig("Setting HTTP decompression time limit to %" PRIu32 " usec", limit); - htp_config_set_compression_time_limit(cfg_prec->cfg, (size_t)limit); -#endif -#ifdef HAVE_HTP_CONFIG_SET_MAX_TX + htp_config_set_compression_time_limit(cfg_prec->cfg, limit); } else if (strcasecmp("max-tx", p->name) == 0) { uint32_t limit = 0; if (ParseSizeStringU32(p->val, &limit) < 0) { @@ -2472,8 +2238,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT /* set default soft-limit with our new hard limit */ SCLogConfig("Setting HTTP max-tx limit to %" PRIu32 " bytes", limit); htp_config_set_max_tx(cfg_prec->cfg, limit); -#endif -#ifdef HAVE_HTP_CONFIG_SET_HEADERS_LIMIT } else if (strcasecmp("headers-limit", p->name) == 0) { uint32_t limit = 0; if (ParseSizeStringU32(p->val, &limit) < 0) { @@ -2483,7 +2247,6 @@ static void HTPConfigParseParameters(HTPCfgRec *cfg_prec, ConfNode *s, struct HT } SCLogConfig("Setting HTTP headers limit to %" PRIu32, limit); htp_config_set_number_headers_limit(cfg_prec->cfg, limit); -#endif } else if (strcasecmp("randomize-inspection-sizes", p->name) == 0) { if (!g_disable_randomness) { cfg_prec->randomize = ConfValIsTrue(p->val); @@ -2673,21 +2436,21 @@ static AppLayerGetFileState HTPGetTxFiles(void *txv, uint8_t direction) static int HTPStateGetAlstateProgress(void *tx, uint8_t direction) { if (direction & STREAM_TOSERVER) - return ((htp_tx_t *)tx)->request_progress; + return htp_tx_request_progress((htp_tx_t *)tx); else - return ((htp_tx_t *)tx)->response_progress; + return htp_tx_response_progress((htp_tx_t *)tx); } static uint64_t HTPStateGetTxCnt(void *alstate) { HtpState *http_state = (HtpState *)alstate; - if (http_state != NULL && http_state->conn != NULL) { - const int64_t size = (int64_t)htp_list_size(http_state->conn->transactions); + if (http_state != NULL && http_state->connp != NULL) { + const int64_t size = htp_connp_tx_size(http_state->connp); if (size < 0) return 0ULL; SCLogDebug("size %"PRIu64, size); - return (uint64_t)size + http_state->tx_freed; + return (uint64_t)size; } else { return 0ULL; } @@ -2697,8 +2460,8 @@ static void *HTPStateGetTx(void *alstate, uint64_t tx_id) { HtpState *http_state = (HtpState *)alstate; - if (http_state != NULL && http_state->conn != NULL && tx_id >= http_state->tx_freed) - return htp_list_get(http_state->conn->transactions, tx_id - http_state->tx_freed); + if (http_state != NULL && http_state->connp != NULL) + return (void *)htp_connp_tx(http_state->connp, tx_id); else return NULL; } @@ -2707,10 +2470,10 @@ void *HtpGetTxForH2(void *alstate) { // gets last transaction HtpState *http_state = (HtpState *)alstate; - if (http_state != NULL && http_state->conn != NULL) { - size_t txid = HTPStateGetTxCnt(http_state); - if (txid > http_state->tx_freed) { - return htp_list_get(http_state->conn->transactions, txid - http_state->tx_freed - 1); + if (http_state != NULL && http_state->connp != NULL) { + size_t txid = htp_connp_tx_size(http_state->connp); + if (txid > 0) { + return (void *)htp_connp_tx(http_state->connp, txid - 1); } } return NULL; @@ -2838,7 +2601,7 @@ void RegisterHTPParsers(void) AppLayerParserRegisterGetTx(IPPROTO_TCP, ALPROTO_HTTP1, HTPStateGetTx); AppLayerParserRegisterStateProgressCompletionStatus( - ALPROTO_HTTP1, HTP_REQUEST_COMPLETE, HTP_RESPONSE_COMPLETE); + ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_COMPLETE, HTP_RESPONSE_PROGRESS_COMPLETE); AppLayerParserRegisterGetEventInfo(IPPROTO_TCP, ALPROTO_HTTP1, HTPStateGetEventInfo); AppLayerParserRegisterGetEventInfoById( IPPROTO_TCP, ALPROTO_HTTP1, HTPStateGetEventInfoById); @@ -2932,12 +2695,12 @@ static int HTPParserTest01(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(strcmp(bstr_util_strdup_to_c(h->value), "Victor/1.0")); - FAIL_IF(tx->request_method_number != HTP_M_POST); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(bstr_cmp_c(htp_header_value(h), "Victor/1.0")); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_POST); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -2976,12 +2739,12 @@ static int HTPParserTest01b(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(strcmp(bstr_util_strdup_to_c(h->value), "Victor/1.0")); - FAIL_IF(tx->request_method_number != HTP_M_POST); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(strcmp(bstr_util_strdup_to_c(htp_header_value(h)), "Victor/1.0")); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_POST); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3031,12 +2794,12 @@ static int HTPParserTest01c(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(strcmp(bstr_util_strdup_to_c(h->value), "Victor/1.0")); - FAIL_IF(tx->request_method_number != HTP_M_POST); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(strcmp(bstr_util_strdup_to_c(htp_header_value(h)), "Victor/1.0")); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_POST); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3087,12 +2850,12 @@ static int HTPParserTest01a(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(strcmp(bstr_util_strdup_to_c(h->value), "Victor/1.0")); - FAIL_IF(tx->request_method_number != HTP_M_POST); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(strcmp(bstr_util_strdup_to_c(htp_header_value(h)), "Victor/1.0")); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_POST); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3129,11 +2892,11 @@ static int HTPParserTest02(void) htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NOT_NULL(h); - FAIL_IF_NULL(tx->request_method); - char *method = bstr_util_strdup_to_c(tx->request_method); + FAIL_IF_NULL(htp_tx_request_method(tx)); + char *method = bstr_util_strdup_to_c(htp_tx_request_method(tx)); FAIL_IF_NULL(method); FAIL_IF(strcmp(method, "POST") != 0); @@ -3183,10 +2946,10 @@ static int HTPParserTest03(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NOT_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_UNKNOWN); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_UNKNOWN); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3224,11 +2987,10 @@ static int HTPParserTest04(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); - + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NOT_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_UNKNOWN); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_0_9); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_UNKNOWN); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V0_9); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3295,13 +3057,13 @@ static int HTPParserTest05(void) htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF_NOT(tx->request_method_number == HTP_M_POST); - FAIL_IF_NOT(tx->request_protocol_number == HTP_PROTOCOL_1_0); + FAIL_IF_NOT(htp_tx_request_method_number(tx) == HTP_METHOD_POST); + FAIL_IF_NOT(htp_tx_request_protocol_number(tx) == HTP_PROTOCOL_V1_0); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF_NOT(tx->response_status_number == 200); + FAIL_IF_NOT(htp_tx_response_status_number(tx) == 200); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3383,13 +3145,13 @@ static int HTPParserTest06(void) htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != 200); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_response_status_number(tx) != 200); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); AppLayerParserThreadCtxFree(alp_tctx); @@ -3442,14 +3204,11 @@ static int HTPParserTest07(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3506,11 +3265,9 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - PrintRawDataFp(stdout, bstr_ptr(tx_ud->request_uri_normalized), - bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + PrintRawDataFp(stdout, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized)); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3570,12 +3327,9 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - PrintRawDataFp(stdout, bstr_ptr(tx_ud->request_uri_normalized), - bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + PrintRawDataFp(stdout, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized)); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3628,14 +3382,14 @@ static int HTPParserTest10(void) FAIL_IF_NULL(htp_state); htp_tx_t *tx = HTPStateGetTx(htp_state, 0); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - char *name = bstr_util_strdup_to_c(h->name); + char *name = bstr_util_strdup_to_c(htp_header_name(h)); FAIL_IF_NULL(name); FAIL_IF(strcmp(name, "Host") != 0); - char *value = bstr_util_strdup_to_c(h->value); + char *value = bstr_util_strdup_to_c(htp_header_value(h)); FAIL_IF_NULL(value); FAIL_IF(strcmp(value, "www.google.com") != 0); @@ -3688,17 +3442,14 @@ static int HTPParserTest11(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); - HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - - FAIL_IF(bstr_len(tx_ud->request_uri_normalized) != 4); - - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[0] != '/'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[1] != '%'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[2] != '0'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[3] != '0'); + FAIL_IF(bstr_len(request_uri_normalized) != 4); + FAIL_IF(bstr_ptr(request_uri_normalized)[0] != '/'); + FAIL_IF(bstr_ptr(request_uri_normalized)[1] != '%'); + FAIL_IF(bstr_ptr(request_uri_normalized)[2] != '0'); + FAIL_IF(bstr_ptr(request_uri_normalized)[3] != '0'); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3747,19 +3498,17 @@ static int HTPParserTest12(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - - FAIL_IF(bstr_len(tx_ud->request_uri_normalized) != 7); - - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[0] != '/'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[1] != '?'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[2] != 'a'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[3] != '='); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[4] != '%'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[5] != '0'); - FAIL_IF(bstr_ptr(tx_ud->request_uri_normalized)[6] != '0'); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + + FAIL_IF(bstr_len(request_uri_normalized) != 7); + FAIL_IF(bstr_ptr(request_uri_normalized)[0] != '/'); + FAIL_IF(bstr_ptr(request_uri_normalized)[1] != '?'); + FAIL_IF(bstr_ptr(request_uri_normalized)[2] != 'a'); + FAIL_IF(bstr_ptr(request_uri_normalized)[3] != '='); + FAIL_IF(bstr_ptr(request_uri_normalized)[4] != '%'); + FAIL_IF(bstr_ptr(request_uri_normalized)[5] != '0'); + FAIL_IF(bstr_ptr(request_uri_normalized)[6] != '0'); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -3806,14 +3555,14 @@ static int HTPParserTest13(void) htp_state = f->alstate; FAIL_IF_NULL(htp_state); htp_tx_t *tx = HTPStateGetTx(htp_state, 0); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - char *name = bstr_util_strdup_to_c(h->name); + char *name = bstr_util_strdup_to_c(htp_header_name(h)); FAIL_IF_NULL(name); FAIL_IF(strcmp(name, "Host") != 0); - char *value = bstr_util_strdup_to_c(h->value); + char *value = bstr_util_strdup_to_c(htp_header_value(h)); FAIL_IF_NULL(value); FAIL_IF(strcmp(value, "www.google.com\rName: Value") != 0); @@ -4090,11 +3839,8 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - FAIL_IF(tx->cfg != htp); - tx = HTPStateGetTx(htp_state, 1); FAIL_IF_NULL(tx); - FAIL_IF(tx->cfg != htp); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4169,36 +3915,36 @@ libhtp:\n\ FAIL_IF_NULL(tx); HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); uint8_t ref2[] = "/abc/def?ghi/jkl"; reflen = sizeof(ref2) - 1; tx = HTPStateGetTx(htp_state, 1); FAIL_IF_NULL(tx); + tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); - - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0); uint8_t ref3[] = "/abc/def?ghi%2fjkl"; reflen = sizeof(ref3) - 1; tx = HTPStateGetTx(htp_state, 2); FAIL_IF_NULL(tx); - tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3, - bstr_len(tx_ud->request_uri_normalized)) != 0); + tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(tx_ud); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4260,11 +4006,11 @@ libhtp:\n\ FAIL_IF_NULL(tx); HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); uint8_t ref2[] = "/abc/def?ghi/jkl"; reflen = sizeof(ref2) - 1; @@ -4272,24 +4018,24 @@ libhtp:\n\ tx = HTPStateGetTx(htp_state, 1); FAIL_IF_NULL(tx); tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0); uint8_t ref3[] = "/abc/def?ghi%2fjkl"; reflen = sizeof(ref3) - 1; tx = HTPStateGetTx(htp_state, 2); FAIL_IF_NULL(tx); tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4367,37 +4113,31 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); uint8_t ref2[] = "/abc/def?ghi/jkl"; reflen = sizeof(ref2) - 1; tx = HTPStateGetTx(htp_state, 1); FAIL_IF_NULL(tx); - tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0); uint8_t ref3[] = "/abc/def?ghi%2fjkl"; reflen = sizeof(ref3) - 1; tx = HTPStateGetTx(htp_state, 2); FAIL_IF_NULL(tx); - tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref3, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref3, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4473,26 +4213,22 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); uint8_t ref2[] = "/abc/def?ghi/jkl"; reflen = sizeof(ref2) - 1; tx = HTPStateGetTx(htp_state, 1); FAIL_IF_NULL(tx); - tx_ud = (HtpTxUserData *)htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref2, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref2, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4564,13 +4300,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4642,13 +4376,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4720,13 +4452,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4799,13 +4529,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4875,13 +4603,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4952,13 +4678,11 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - HtpTxUserData *tx_ud = (HtpTxUserData *) htp_tx_get_user_data(tx); - FAIL_IF_NULL(tx_ud); - FAIL_IF_NULL(tx_ud->request_uri_normalized); - FAIL_IF(reflen != bstr_len(tx_ud->request_uri_normalized)); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + FAIL_IF_NULL(request_uri_normalized); + FAIL_IF(reflen != bstr_len(request_uri_normalized)); - FAIL_IF(memcmp(bstr_ptr(tx_ud->request_uri_normalized), ref1, - bstr_len(tx_ud->request_uri_normalized)) != 0); + FAIL_IF(memcmp(bstr_ptr(request_uri_normalized), ref1, bstr_len(request_uri_normalized)) != 0); AppLayerParserThreadCtxFree(alp_tctx); HTPFreeConfig(); @@ -4981,8 +4705,12 @@ static int HTPBodyReassemblyTest01(void) Flow flow; memset(&flow, 0x00, sizeof(flow)); AppLayerParserState *parser = AppLayerParserStateAlloc(); - htp_tx_t tx; - memset(&tx, 0, sizeof(tx)); + htp_cfg_t *cfg = htp_config_create(); + BUG_ON(cfg == NULL); + htp_connp_t *connp = htp_connp_create(cfg); + BUG_ON(connp == NULL); + const htp_tx_t *tx = htp_connp_get_request_tx(connp); + BUG_ON(tx == NULL); hstate.f = &flow; flow.alparser = parser; @@ -5156,15 +4884,15 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); void *txtmp = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, f->alstate, 0); AppLayerDecoderEvents *decoder_events = AppLayerParserGetEventsByTx(IPPROTO_TCP, ALPROTO_HTTP1, txtmp); FAIL_IF_NULL(decoder_events); - FAIL_IF(decoder_events->events[0] != HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG); + FAIL_IF(decoder_events->events[0] != HTP_LOG_CODE_REQUEST_FIELD_TOO_LONG); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -5257,8 +4985,8 @@ libhtp:\n\ htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); void *txtmp = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, f->alstate, 0); AppLayerDecoderEvents *decoder_events = @@ -5314,8 +5042,8 @@ static int HTPParserTest16(void) htp_tx_t *tx = HTPStateGetTx(htp_state, 0); FAIL_IF_NULL(tx); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION //these events are disabled during fuzzing as they are too noisy and consume much resource @@ -5324,9 +5052,8 @@ static int HTPParserTest16(void) AppLayerParserGetEventsByTx(IPPROTO_TCP, ALPROTO_HTTP1, txtmp); FAIL_IF_NULL(decoder_events); - FAIL_IF(decoder_events->events[0] != HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT); - FAIL_IF(decoder_events->events[1] != HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT); - + FAIL_IF(decoder_events->events[0] != HTP_LOG_CODE_METHOD_DELIM_NON_COMPLIANT); + FAIL_IF(decoder_events->events[1] != HTP_LOG_CODE_URI_DELIM_NON_COMPLIANT); #endif AppLayerParserThreadCtxFree(alp_tctx); @@ -5379,14 +5106,14 @@ static int HTPParserTest20(void) FAIL_IF_NULL(http_state); htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != 0); - FAIL_IF(tx->response_protocol_number != -1); + FAIL_IF(htp_tx_response_status_number(tx) != 0); + FAIL_IF(htp_tx_response_protocol_number(tx) != -1); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -5438,14 +5165,14 @@ static int HTPParserTest21(void) FAIL_IF_NULL(http_state); htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != 0); - FAIL_IF(tx->response_protocol_number != -1); + FAIL_IF(htp_tx_response_status_number(tx) != 0); + FAIL_IF(htp_tx_response_protocol_number(tx) != -1); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -5492,14 +5219,14 @@ static int HTPParserTest22(void) FAIL_IF_NULL(http_state); htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != -0); - FAIL_IF(tx->response_protocol_number != -1); + FAIL_IF(htp_tx_response_status_number(tx) != -0); + FAIL_IF(htp_tx_response_protocol_number(tx) != -1); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -5546,14 +5273,14 @@ static int HTPParserTest23(void) FAIL_IF_NULL(http_state); htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != -1); - FAIL_IF(tx->response_protocol_number != -2); + FAIL_IF(htp_tx_response_status_number(tx) != -1); + FAIL_IF(htp_tx_response_protocol_number(tx) != -2); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); @@ -5600,14 +5327,14 @@ static int HTPParserTest24(void) FAIL_IF_NULL(http_state); htp_tx_t *tx = HTPStateGetTx(http_state, 0); FAIL_IF_NULL(tx); - htp_header_t *h = htp_table_get_index(tx->request_headers, 0, NULL); + const htp_header_t *h = htp_tx_request_header_index(tx, 0); FAIL_IF_NULL(h); - FAIL_IF(tx->request_method_number != HTP_M_GET); - FAIL_IF(tx->request_protocol_number != HTP_PROTOCOL_1_1); + FAIL_IF(htp_tx_request_method_number(tx) != HTP_METHOD_GET); + FAIL_IF(htp_tx_request_protocol_number(tx) != HTP_PROTOCOL_V1_1); - FAIL_IF(tx->response_status_number != -1); - FAIL_IF(tx->response_protocol_number != HTP_PROTOCOL_1_0); + FAIL_IF(htp_tx_response_status_number(tx) != -1); + FAIL_IF(htp_tx_response_protocol_number(tx) != HTP_PROTOCOL_V1_0); AppLayerParserThreadCtxFree(alp_tctx); StreamTcpFreeConfig(true); diff --git a/src/app-layer-htp.h b/src/app-layer-htp.h index 55ff9c9b9fce..9813eb458cf5 100644 --- a/src/app-layer-htp.h +++ b/src/app-layer-htp.h @@ -36,7 +36,7 @@ #include "rust.h" #include "app-layer-frames.h" -#include +#include "htp/htp_rs.h" /* default request body limit */ #define HTP_CONFIG_DEFAULT_REQUEST_BODY_LIMIT 4096U @@ -45,8 +45,7 @@ #define HTP_CONFIG_DEFAULT_REQUEST_INSPECT_WINDOW 4096U #define HTP_CONFIG_DEFAULT_RESPONSE_INSPECT_MIN_SIZE 32768U #define HTP_CONFIG_DEFAULT_RESPONSE_INSPECT_WINDOW 4096U -#define HTP_CONFIG_DEFAULT_FIELD_LIMIT_SOFT 9000U -#define HTP_CONFIG_DEFAULT_FIELD_LIMIT_HARD 18000U +#define HTP_CONFIG_DEFAULT_FIELD_LIMIT 18000U #define HTP_CONFIG_DEFAULT_LZMA_LAYERS 0U /* default libhtp lzma limit, taken from libhtp. */ @@ -73,70 +72,15 @@ enum { }; enum { - /* libhtp errors/warnings */ - HTTP_DECODER_EVENT_UNKNOWN_ERROR, - HTTP_DECODER_EVENT_GZIP_DECOMPRESSION_FAILED, - HTTP_DECODER_EVENT_REQUEST_FIELD_MISSING_COLON, - HTTP_DECODER_EVENT_RESPONSE_FIELD_MISSING_COLON, - HTTP_DECODER_EVENT_INVALID_REQUEST_CHUNK_LEN, - HTTP_DECODER_EVENT_INVALID_RESPONSE_CHUNK_LEN, - HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_REQUEST, - HTTP_DECODER_EVENT_INVALID_TRANSFER_ENCODING_VALUE_IN_RESPONSE, - HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_REQUEST, - HTTP_DECODER_EVENT_INVALID_CONTENT_LENGTH_FIELD_IN_RESPONSE, - HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_REQUEST, - HTTP_DECODER_EVENT_DUPLICATE_CONTENT_LENGTH_FIELD_IN_RESPONSE, - HTTP_DECODER_EVENT_100_CONTINUE_ALREADY_SEEN, - HTTP_DECODER_EVENT_UNABLE_TO_MATCH_RESPONSE_TO_REQUEST, - HTTP_DECODER_EVENT_INVALID_SERVER_PORT_IN_REQUEST, - HTTP_DECODER_EVENT_INVALID_AUTHORITY_PORT, - HTTP_DECODER_EVENT_REQUEST_HEADER_INVALID, - HTTP_DECODER_EVENT_RESPONSE_HEADER_INVALID, - HTTP_DECODER_EVENT_MISSING_HOST_HEADER, - HTTP_DECODER_EVENT_HOST_HEADER_AMBIGUOUS, - HTTP_DECODER_EVENT_INVALID_REQUEST_FIELD_FOLDING, - HTTP_DECODER_EVENT_INVALID_RESPONSE_FIELD_FOLDING, - HTTP_DECODER_EVENT_REQUEST_FIELD_TOO_LONG, - HTTP_DECODER_EVENT_RESPONSE_FIELD_TOO_LONG, - HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG, - HTTP_DECODER_EVENT_REQUEST_SERVER_PORT_TCP_PORT_MISMATCH, - HTTP_DECODER_EVENT_URI_HOST_INVALID, - HTTP_DECODER_EVENT_HEADER_HOST_INVALID, - HTTP_DECODER_EVENT_METHOD_DELIM_NON_COMPLIANT, - HTTP_DECODER_EVENT_URI_DELIM_NON_COMPLIANT, - HTTP_DECODER_EVENT_REQUEST_LINE_LEADING_WHITESPACE, - HTTP_DECODER_EVENT_TOO_MANY_ENCODING_LAYERS, - HTTP_DECODER_EVENT_ABNORMAL_CE_HEADER, - HTTP_DECODER_EVENT_AUTH_UNRECOGNIZED, - HTTP_DECODER_EVENT_REQUEST_HEADER_REPETITION, - HTTP_DECODER_EVENT_RESPONSE_HEADER_REPETITION, - HTTP_DECODER_EVENT_RESPONSE_MULTIPART_BYTERANGES, - HTTP_DECODER_EVENT_RESPONSE_ABNORMAL_TRANSFER_ENCODING, - HTTP_DECODER_EVENT_RESPONSE_CHUNKED_OLD_PROTO, - HTTP_DECODER_EVENT_RESPONSE_INVALID_PROTOCOL, - HTTP_DECODER_EVENT_RESPONSE_INVALID_STATUS, - HTTP_DECODER_EVENT_REQUEST_LINE_INCOMPLETE, - HTTP_DECODER_EVENT_DOUBLE_ENCODED_URI, - HTTP_DECODER_EVENT_REQUEST_LINE_INVALID, - HTTP_DECODER_EVENT_REQUEST_BODY_UNEXPECTED, - - HTTP_DECODER_EVENT_LZMA_MEMLIMIT_REACHED, - HTTP_DECODER_EVENT_COMPRESSION_BOMB, - - HTTP_DECODER_EVENT_RANGE_INVALID, - HTTP_DECODER_EVENT_REQUEST_CHUNK_EXTENSION, - HTTP_DECODER_EVENT_REQUEST_LINE_MISSING_PROTOCOL, - HTTP_DECODER_EVENT_REQUEST_TOO_MANY_HEADERS, - HTTP_DECODER_EVENT_RESPONSE_TOO_MANY_HEADERS, - /* suricata errors/warnings */ - HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR, - HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA, - HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER, - - HTTP_DECODER_EVENT_TOO_MANY_WARNINGS, - - HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE, + HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR = 200, + HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA = 201, + HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER = 202, + + HTTP_DECODER_EVENT_TOO_MANY_WARNINGS = 203, + HTTP_DECODER_EVENT_RANGE_INVALID = 204, + HTTP_DECODER_EVENT_FILE_NAME_TOO_LONG = 205, + HTTP_DECODER_EVENT_FAILED_PROTOCOL_CHANGE = 206, }; typedef enum HtpSwfCompressType_ { @@ -219,8 +163,6 @@ typedef struct HtpTxUserData_ { HtpBody request_body; HtpBody response_body; - bstr *request_uri_normalized; - uint8_t *request_headers_raw; uint8_t *response_headers_raw; uint32_t request_headers_raw_len; @@ -242,18 +184,11 @@ typedef struct HtpState_ { htp_conn_t *conn; Flow *f; /**< Needed to retrieve the original flow when using HTPLib callbacks */ uint64_t transaction_cnt; - // tx_freed is the number of already freed transactions - // This is needed as libhtp only keeps the live transactions : - // To get the total number of transactions, we need to add - // the number of transactions tracked by libhtp to this number. - // It is also needed as an offset to translate between suricata - // transaction id to libhtp offset in its list/array - uint64_t tx_freed; const struct HTPCfgRec_ *cfg; uint16_t flags; uint16_t events; - uint16_t htp_messages_offset; /**< offset into conn->messages list */ - uint32_t file_track_id; /**< used to assign file track ids to files */ + uint16_t htp_messages_count; /**< Number of already logged messages */ + uint32_t file_track_id; /**< used to assign file track ids to files */ uint64_t last_request_data_stamp; uint64_t last_response_data_stamp; StreamSlice *slice; diff --git a/src/app-layer-http2.c b/src/app-layer-http2.c index 84f51ec5553b..bababc099c22 100644 --- a/src/app-layer-http2.c +++ b/src/app-layer-http2.c @@ -77,23 +77,23 @@ void HTTP2MimicHttp1Request(void *alstate_orig, void *h2s) if (h2s == NULL || h1tx == NULL) { return; } - if (h1tx->request_method == NULL) { + if (htp_tx_request_method(h1tx) == NULL) { // may happen if we only got the reply, not the HTTP1 request return; } // else - rs_http2_tx_set_method( - h2s, bstr_ptr(h1tx->request_method), (uint32_t)bstr_len(h1tx->request_method)); - if (h1tx->request_uri != NULL) { + rs_http2_tx_set_method(h2s, bstr_ptr(htp_tx_request_method(h1tx)), + (uint32_t)bstr_len(htp_tx_request_method(h1tx))); + if (htp_tx_request_uri(h1tx) != NULL) { // A request line without spaces gets interpreted as a request_method // and has request_uri=NULL - rs_http2_tx_set_uri( - h2s, bstr_ptr(h1tx->request_uri), (uint32_t)bstr_len(h1tx->request_uri)); + rs_http2_tx_set_uri(h2s, bstr_ptr(htp_tx_request_uri(h1tx)), + (uint32_t)bstr_len(htp_tx_request_uri(h1tx))); } - size_t nbheaders = htp_table_size(h1tx->request_headers); + size_t nbheaders = htp_tx_request_headers_size(h1tx); for (size_t i = 0; i < nbheaders; i++) { - htp_header_t *h = htp_table_get_index(h1tx->request_headers, i, NULL); - rs_http2_tx_add_header(h2s, bstr_ptr(h->name), (uint32_t)bstr_len(h->name), - bstr_ptr(h->value), (uint32_t)bstr_len(h->value)); + const htp_header_t *h = htp_tx_request_header_index(h1tx, i); + rs_http2_tx_add_header(h2s, htp_header_name_ptr(h), (uint32_t)htp_header_name_len(h), + htp_header_value_ptr(h), (uint32_t)htp_header_value_len(h)); } } diff --git a/src/detect-file-data.c b/src/detect-file-data.c index d976b51c00b4..f34a3c7b099e 100644 --- a/src/detect-file-data.c +++ b/src/detect-file-data.c @@ -260,7 +260,7 @@ static InspectionBuffer *FiledataGetDataCallback(DetectEngineThreadCtx *det_ctx, ips = htp_state->cfg->http_body_inline; const bool body_done = AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, - flow_flags) > HTP_RESPONSE_BODY; + flow_flags) > HTP_RESPONSE_PROGRESS_BODY; SCLogDebug("response.body_limit %u file_size %" PRIu64 ", cur_file->inspect_min_size %" PRIu32 ", EOF %s, progress > body? %s", diff --git a/src/detect-http-client-body.c b/src/detect-http-client-body.c index 7747b61b858b..a5a56b942dea 100644 --- a/src/detect-http-client-body.c +++ b/src/detect-http-client-body.c @@ -104,10 +104,10 @@ void DetectHttpClientBodyRegister(void) sigmatch_table[DETECT_HTTP_REQUEST_BODY].flags |= SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_client_body", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_BODY, DetectEngineInspectBufferHttpBody, NULL); + HTP_REQUEST_PROGRESS_BODY, DetectEngineInspectBufferHttpBody, NULL); DetectAppLayerMpmRegister("http_client_body", SIG_FLAG_TOSERVER, 2, - PrefilterMpmHttpRequestBodyRegister, NULL, ALPROTO_HTTP1, HTP_REQUEST_BODY); + PrefilterMpmHttpRequestBodyRegister, NULL, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_BODY); DetectAppLayerInspectEngineRegister("http_client_body", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectFiledata, NULL); @@ -245,7 +245,7 @@ static InspectionBuffer *HttpRequestBodyGetDataCallback(DetectEngineThreadCtx *d htp_state->cfg->request.body_limit, body->content_len_so_far, htp_state->cfg->request.inspect_min_size, flags & STREAM_EOF ? "true" : "false", (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) > - HTP_REQUEST_BODY) + HTP_REQUEST_PROGRESS_BODY) ? "true" : "false"); @@ -256,7 +256,7 @@ static InspectionBuffer *HttpRequestBodyGetDataCallback(DetectEngineThreadCtx *d body->content_len_so_far < htp_state->cfg->request.body_limit) && body->content_len_so_far < htp_state->cfg->request.inspect_min_size && !(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) > - HTP_REQUEST_BODY) && + HTP_REQUEST_PROGRESS_BODY) && !(flags & STREAM_EOF)) { SCLogDebug("we still haven't seen the entire request body. " "Let's defer body inspection till we see the " @@ -336,11 +336,11 @@ static uint8_t DetectEngineInspectBufferHttpBody(DetectEngineCtx *de_ctx, if (flags & STREAM_TOSERVER) { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, txv, flags) > - HTP_REQUEST_BODY) + HTP_REQUEST_PROGRESS_BODY) return DETECT_ENGINE_INSPECT_SIG_CANT_MATCH; } else { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, txv, flags) > - HTP_RESPONSE_BODY) + HTP_RESPONSE_PROGRESS_BODY) return DETECT_ENGINE_INSPECT_SIG_CANT_MATCH; } return DETECT_ENGINE_INSPECT_SIG_NO_MATCH; diff --git a/src/detect-http-cookie.c b/src/detect-http-cookie.c index 5f4898285460..3eaff220a493 100644 --- a/src/detect-http-cookie.c +++ b/src/detect-http-cookie.c @@ -107,14 +107,14 @@ void DetectHttpCookieRegister(void) sigmatch_table[DETECT_HTTP_COOKIE].flags |= SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_cookie", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetRequestData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetRequestData); DetectAppLayerInspectEngineRegister("http_cookie", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetResponseData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetResponseData); DetectAppLayerMpmRegister("http_cookie", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetRequestData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetRequestData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerMpmRegister("http_cookie", SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, - GetResponseData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetResponseData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister("http_cookie", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetRequestData2); @@ -177,18 +177,17 @@ static InspectionBuffer *GetRequestData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->request_headers == NULL) + if (htp_tx_request_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, - "Cookie"); - if (h == NULL || h->value == NULL) { + const htp_header_t *h = htp_tx_request_header(tx, "Cookie"); + if (h == NULL || htp_header_value(h) == NULL) { SCLogDebug("HTTP cookie header not present in this request"); return NULL; } - const uint32_t data_len = bstr_len(h->value); - const uint8_t *data = bstr_ptr(h->value); + const uint32_t data_len = htp_header_value_len(h); + const uint8_t *data = htp_header_value_ptr(h); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); @@ -205,18 +204,17 @@ static InspectionBuffer *GetResponseData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->response_headers == NULL) + if (htp_tx_response_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->response_headers, - "Set-Cookie"); - if (h == NULL || h->value == NULL) { + const htp_header_t *h = htp_tx_response_header(tx, "Set-Cookie"); + if (h == NULL || htp_header_value(h) == NULL) { SCLogDebug("HTTP cookie header not present in this request"); return NULL; } - const uint32_t data_len = bstr_len(h->value); - const uint8_t *data = bstr_ptr(h->value); + const uint32_t data_len = htp_header_value_len(h); + const uint8_t *data = htp_header_value_ptr(h); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-header-names.c b/src/detect-http-header-names.c index 66bc73d44c80..5cd395dfcc79 100644 --- a/src/detect-http-header-names.c +++ b/src/detect-http-header-names.c @@ -86,27 +86,27 @@ static uint8_t *GetBufferForTX( return NULL; } - htp_table_t *headers; + const htp_headers_t *headers; if (flags & STREAM_TOSERVER) { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_REQUEST_HEADERS) + HTP_REQUEST_PROGRESS_HEADERS) return NULL; - headers = tx->request_headers; + headers = htp_tx_request_headers(tx); } else { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_RESPONSE_HEADERS) + HTP_RESPONSE_PROGRESS_HEADERS) return NULL; - headers = tx->response_headers; + headers = htp_tx_response_headers(tx); } if (headers == NULL) return NULL; /* fill the buffer. \r\nName1\r\nName2\r\n\r\n */ size_t i = 0; - size_t no_of_headers = htp_table_size(headers); + size_t no_of_headers = htp_headers_size(headers); for (; i < no_of_headers; i++) { - htp_header_t *h = htp_table_get_index(headers, i, NULL); - size_t size = bstr_size(h->name) + 2; // for \r\n + const htp_header_t *h = htp_headers_get_index(headers, i); + size_t size = htp_header_name_len(h) + 2; // for \r\n if (i == 0) size += 2; if (i + 1 == no_of_headers) @@ -126,8 +126,8 @@ static uint8_t *GetBufferForTX( buf->buffer[buf->len++] = '\n'; } - memcpy(buf->buffer + buf->len, bstr_ptr(h->name), bstr_size(h->name)); - buf->len += bstr_size(h->name); + memcpy(buf->buffer + buf->len, htp_header_name_ptr(h), htp_header_name_len(h)); + buf->len += htp_header_name_len(h); buf->buffer[buf->len++] = '\r'; buf->buffer[buf->len++] = '\n'; @@ -220,14 +220,14 @@ void DetectHttpHeaderNamesRegister(void) /* http1 */ DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetBuffer1ForTX, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetBuffer1ForTX, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, - GetBuffer1ForTX, ALPROTO_HTTP1, HTP_RESPONSE_HEADERS); + GetBuffer1ForTX, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); + HTP_RESPONSE_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); /* http2 */ DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, diff --git a/src/detect-http-header.c b/src/detect-http-header.c index 22a487583ae7..a3af37d65873 100644 --- a/src/detect-http-header.c +++ b/src/detect-http-header.c @@ -79,36 +79,34 @@ static uint8_t *GetBufferForTX( return NULL; } - htp_table_t *headers; + const htp_headers_t *headers; if (flags & STREAM_TOSERVER) { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_REQUEST_HEADERS) + HTP_REQUEST_PROGRESS_HEADERS) return NULL; - headers = tx->request_headers; + headers = htp_tx_request_headers(tx); } else { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_RESPONSE_HEADERS) + HTP_RESPONSE_PROGRESS_HEADERS) return NULL; - headers = tx->response_headers; + headers = htp_tx_response_headers(tx); } if (headers == NULL) return NULL; size_t i = 0; - size_t no_of_headers = htp_table_size(headers); + size_t no_of_headers = htp_headers_size(headers); for (; i < no_of_headers; i++) { - htp_header_t *h = htp_table_get_index(headers, i, NULL); - size_t size1 = bstr_size(h->name); - size_t size2 = bstr_size(h->value); + const htp_header_t *h = htp_headers_get_index(headers, i); + size_t size1 = htp_header_name_len(h); + size_t size2 = htp_header_value_len(h); if (flags & STREAM_TOSERVER) { - if (size1 == 6 && - SCMemcmpLowercase("cookie", bstr_ptr(h->name), 6) == 0) { + if (size1 == 6 && SCMemcmpLowercase("cookie", htp_header_name_ptr(h), 6) == 0) { continue; } } else { - if (size1 == 10 && - SCMemcmpLowercase("set-cookie", bstr_ptr(h->name), 10) == 0) { + if (size1 == 10 && SCMemcmpLowercase("set-cookie", htp_header_name_ptr(h), 10) == 0) { continue; } } @@ -124,12 +122,12 @@ static uint8_t *GetBufferForTX( } } - memcpy(buf->buffer + buf->len, bstr_ptr(h->name), bstr_size(h->name)); - buf->len += bstr_size(h->name); + memcpy(buf->buffer + buf->len, htp_header_name_ptr(h), htp_header_name_len(h)); + buf->len += htp_header_name_len(h); buf->buffer[buf->len++] = ':'; buf->buffer[buf->len++] = ' '; - memcpy(buf->buffer + buf->len, bstr_ptr(h->value), bstr_size(h->value)); - buf->len += bstr_size(h->value); + memcpy(buf->buffer + buf->len, htp_header_value_ptr(h), htp_header_value_len(h)); + buf->len += htp_header_value_len(h); buf->buffer[buf->len++] = '\r'; buf->buffer[buf->len++] = '\n'; #if 0 // looks like this breaks existing rules @@ -304,9 +302,8 @@ static int PrefilterMpmHttpHeaderRequestRegister(DetectEngineCtx *de_ctx, SigGro pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeader, - mpm_reg->app_v2.alproto, HTP_REQUEST_HEADERS, - pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); + int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeader, mpm_reg->app_v2.alproto, + HTP_REQUEST_PROGRESS_HEADERS, pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); return r; @@ -320,9 +317,8 @@ static int PrefilterMpmHttpHeaderRequestRegister(DetectEngineCtx *de_ctx, SigGro pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailer, - mpm_reg->app_v2.alproto, HTP_REQUEST_TRAILER, - pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); + r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailer, mpm_reg->app_v2.alproto, + HTP_REQUEST_PROGRESS_TRAILER, pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); } @@ -342,9 +338,8 @@ static int PrefilterMpmHttpHeaderResponseRegister(DetectEngineCtx *de_ctx, SigGr pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeader, - mpm_reg->app_v2.alproto, HTP_RESPONSE_HEADERS, - pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); + int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeader, mpm_reg->app_v2.alproto, + HTP_RESPONSE_PROGRESS_HEADERS, pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); return r; @@ -358,9 +353,8 @@ static int PrefilterMpmHttpHeaderResponseRegister(DetectEngineCtx *de_ctx, SigGr pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailer, - mpm_reg->app_v2.alproto, HTP_RESPONSE_TRAILER, - pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); + r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailer, mpm_reg->app_v2.alproto, + HTP_RESPONSE_PROGRESS_TRAILER, pectx, PrefilterMpmHttpHeaderFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); } @@ -430,13 +424,13 @@ void DetectHttpHeaderRegister(void) sigmatch_table[DETECT_HTTP_HEADER].flags |= SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_header", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferHttpHeader, NULL); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferHttpHeader, NULL); DetectAppLayerMpmRegister("http_header", SIG_FLAG_TOSERVER, 2, PrefilterMpmHttpHeaderRequestRegister, NULL, ALPROTO_HTTP1, 0); /* not used, registered twice: HEADERS/TRAILER */ DetectAppLayerInspectEngineRegister("http_header", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS, DetectEngineInspectBufferHttpHeader, NULL); + HTP_RESPONSE_PROGRESS_HEADERS, DetectEngineInspectBufferHttpHeader, NULL); DetectAppLayerMpmRegister("http_header", SIG_FLAG_TOCLIENT, 2, PrefilterMpmHttpHeaderResponseRegister, NULL, ALPROTO_HTTP1, 0); /* not used, registered twice: HEADERS/TRAILER */ @@ -556,13 +550,13 @@ static InspectionBuffer *GetHttp1HeaderData(DetectEngineThreadCtx *det_ctx, } htp_tx_t *tx = (htp_tx_t *)txv; - htp_table_t *headers; + const htp_headers_t *headers; if (flags & STREAM_TOSERVER) { - headers = tx->request_headers; + headers = htp_tx_request_headers(tx); } else { - headers = tx->response_headers; + headers = htp_tx_response_headers(tx); } - size_t no_of_headers = htp_table_size(headers); + size_t no_of_headers = htp_headers_size(headers); if (local_id == 0) { // We initialize a big buffer on first item // Then, we will just use parts of it @@ -579,9 +573,9 @@ static InspectionBuffer *GetHttp1HeaderData(DetectEngineThreadCtx *det_ctx, hdr_td->cap = no_of_headers; } for (size_t i = 0; i < no_of_headers; i++) { - htp_header_t *h = htp_table_get_index(headers, i, NULL); - size_t size1 = bstr_size(h->name); - size_t size2 = bstr_size(h->value); + const htp_header_t *h = htp_headers_get_index(headers, i); + size_t size1 = htp_header_name_len(h); + size_t size2 = htp_header_value_len(h); size_t size = size1 + size2 + 2; if (hdr_td->items[i].len < size) { // Use realloc, as this pointer is not freed until HttpMultiBufHeaderThreadDataFree @@ -591,10 +585,10 @@ static InspectionBuffer *GetHttp1HeaderData(DetectEngineThreadCtx *det_ctx, } hdr_td->items[i].buffer = tmp; } - memcpy(hdr_td->items[i].buffer, bstr_ptr(h->name), size1); + memcpy(hdr_td->items[i].buffer, htp_header_name_ptr(h), size1); hdr_td->items[i].buffer[size1] = ':'; hdr_td->items[i].buffer[size1 + 1] = ' '; - memcpy(hdr_td->items[i].buffer + size1 + 2, bstr_ptr(h->value), size2); + memcpy(hdr_td->items[i].buffer + size1 + 2, htp_header_value_ptr(h), size2); hdr_td->items[i].len = size; } hdr_td->len = no_of_headers; @@ -637,7 +631,7 @@ void DetectHttpRequestHeaderRegister(void) DetectAppLayerMultiRegister("http_request_header", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateOpen, GetHttp2HeaderData, 2, HTTP2StateOpen); DetectAppLayerMultiRegister("http_request_header", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, GetHttp1HeaderData, 2, HTP_REQUEST_HEADERS); + HTP_REQUEST_PROGRESS_HEADERS, GetHttp1HeaderData, 2, HTP_REQUEST_PROGRESS_HEADERS); DetectBufferTypeSetDescriptionByName("http_request_header", "HTTP header name and value"); g_http_request_header_buffer_id = DetectBufferTypeGetByName("http_request_header"); @@ -670,7 +664,7 @@ void DetectHttpResponseHeaderRegister(void) DetectAppLayerMultiRegister("http_response_header", ALPROTO_HTTP2, SIG_FLAG_TOCLIENT, HTTP2StateOpen, GetHttp2HeaderData, 2, HTTP2StateOpen); DetectAppLayerMultiRegister("http_response_header", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS, GetHttp1HeaderData, 2, HTP_RESPONSE_HEADERS); + HTP_RESPONSE_PROGRESS_HEADERS, GetHttp1HeaderData, 2, HTP_RESPONSE_PROGRESS_HEADERS); DetectBufferTypeSetDescriptionByName("http_response_header", "HTTP header name and value"); g_http_response_header_buffer_id = DetectBufferTypeGetByName("http_response_header"); diff --git a/src/detect-http-headers-stub.h b/src/detect-http-headers-stub.h index 82d5f543d7a9..38729e0caa07 100644 --- a/src/detect-http-headers-stub.h +++ b/src/detect-http-headers-stub.h @@ -29,7 +29,7 @@ #include "suricata-common.h" #include "flow.h" -#include +#include "htp/htp_rs.h" #include "detect.h" #include "detect-parse.h" @@ -53,19 +53,18 @@ static InspectionBuffer *GetRequestData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->request_headers == NULL) + if (htp_tx_request_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, - HEADER_NAME); - if (h == NULL || h->value == NULL) { + const htp_header_t *h = htp_tx_request_header(tx, HEADER_NAME); + if (h == NULL || htp_header_value(h) == NULL) { SCLogDebug("HTTP %s header not present in this request", HEADER_NAME); return NULL; } - const uint32_t data_len = bstr_len(h->value); - const uint8_t *data = bstr_ptr(h->value); + const uint32_t data_len = htp_header_value_len(h); + const uint8_t *data = htp_header_value_ptr(h); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); @@ -109,19 +108,18 @@ static InspectionBuffer *GetResponseData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->response_headers == NULL) + if (htp_tx_response_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->response_headers, - HEADER_NAME); - if (h == NULL || h->value == NULL) { + const htp_header_t *h = htp_tx_response_header(tx, HEADER_NAME); + if (h == NULL || htp_header_value(h) == NULL) { SCLogDebug("HTTP %s header not present in this request", HEADER_NAME); return NULL; } - const uint32_t data_len = bstr_len(h->value); - const uint8_t *data = bstr_ptr(h->value); + const uint32_t data_len = htp_header_value_len(h); + const uint8_t *data = htp_header_value_ptr(h); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); @@ -187,25 +185,25 @@ static void DetectHttpHeadersRegisterStub(void) #ifdef KEYWORD_TOSERVER DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetRequestData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetRequestData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, GetRequestData2, ALPROTO_HTTP2, HTTP2StateDataClient); #endif #ifdef KEYWORD_TOCLIENT DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, - GetResponseData, ALPROTO_HTTP1, HTP_RESPONSE_HEADERS); + GetResponseData, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_HEADERS); DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, GetResponseData2, ALPROTO_HTTP2, HTTP2StateDataServer); #endif #ifdef KEYWORD_TOSERVER DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetRequestData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetRequestData); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetRequestData2); #endif #ifdef KEYWORD_TOCLIENT DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS, DetectEngineInspectBufferGeneric, GetResponseData); + HTP_RESPONSE_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetResponseData); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP2, SIG_FLAG_TOCLIENT, HTTP2StateDataServer, DetectEngineInspectBufferGeneric, GetResponseData2); #endif diff --git a/src/detect-http-host.c b/src/detect-http-host.c index fe36a261e6cc..e57fc44d6a8c 100644 --- a/src/detect-http-host.c +++ b/src/detect-http-host.c @@ -106,10 +106,10 @@ void DetectHttpHHRegister(void) sigmatch_table[DETECT_HTTP_HOST].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_host", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_host", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister("http_host", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); @@ -141,10 +141,10 @@ void DetectHttpHHRegister(void) sigmatch_table[DETECT_HTTP_HOST_RAW].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_raw_host", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetRawData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetRawData); DetectAppLayerMpmRegister("http_raw_host", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetRawData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetRawData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister("http_raw_host", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetRawData2); @@ -242,11 +242,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->request_hostname == NULL) + if (htp_tx_request_hostname(tx) == NULL) return NULL; - const uint32_t data_len = bstr_len(tx->request_hostname); - const uint8_t *data = bstr_ptr(tx->request_hostname); + const uint32_t data_len = bstr_len(htp_tx_request_hostname(tx)); + const uint8_t *data = bstr_ptr(htp_tx_request_hostname(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); @@ -345,20 +345,19 @@ static InspectionBuffer *GetRawData(DetectEngineThreadCtx *det_ctx, const uint8_t *data = NULL; uint32_t data_len = 0; - if (tx->parsed_uri == NULL || tx->parsed_uri->hostname == NULL) { - if (tx->request_headers == NULL) + if (htp_uri_hostname(htp_tx_parsed_uri(tx)) == NULL) { + if (htp_tx_request_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, - "Host"); - if (h == NULL || h->value == NULL) + const htp_header_t *h = htp_tx_request_header(tx, "Host"); + if (htp_header_value(h) == NULL) return NULL; - data = (const uint8_t *)bstr_ptr(h->value); - data_len = bstr_len(h->value); + data = htp_header_value_ptr(h); + data_len = htp_header_value_len(h); } else { - data = (const uint8_t *)bstr_ptr(tx->parsed_uri->hostname); - data_len = bstr_len(tx->parsed_uri->hostname); + data = (const uint8_t *)bstr_ptr(htp_uri_hostname(htp_tx_parsed_uri(tx))); + data_len = bstr_len(htp_uri_hostname(htp_tx_parsed_uri(tx))); } InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); diff --git a/src/detect-http-method.c b/src/detect-http-method.c index 8d08f0369e90..4fdc2466fd8a 100644 --- a/src/detect-http-method.c +++ b/src/detect-http-method.c @@ -98,10 +98,10 @@ void DetectHttpMethodRegister(void) sigmatch_table[DETECT_HTTP_METHOD].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_method", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_method", SIG_FLAG_TOSERVER, 4, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_REQUEST_LINE); + GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_method", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); @@ -203,11 +203,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->request_method == NULL) + if (htp_tx_request_method(tx) == NULL) return NULL; - const uint32_t data_len = bstr_len(tx->request_method); - const uint8_t *data = bstr_ptr(tx->request_method); + const uint32_t data_len = bstr_len(htp_tx_request_method(tx)); + const uint8_t *data = bstr_ptr(htp_tx_request_method(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-protocol.c b/src/detect-http-protocol.c index 6214c80513be..1f0941945eb3 100644 --- a/src/detect-http-protocol.c +++ b/src/detect-http-protocol.c @@ -87,13 +87,13 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, { InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id); if (buffer->inspect == NULL) { - bstr *str = NULL; + const bstr *str = NULL; htp_tx_t *tx = (htp_tx_t *)txv; if (flow_flags & STREAM_TOSERVER) - str = tx->request_protocol; + str = htp_tx_request_protocol(tx); else if (flow_flags & STREAM_TOCLIENT) - str = tx->response_protocol; + str = htp_tx_response_protocol(tx); if (str == NULL) { SCLogDebug("HTTP protocol not set"); @@ -130,7 +130,6 @@ static InspectionBuffer *GetData2(DetectEngineThreadCtx *det_ctx, static bool DetectHttpProtocolValidateCallback(const Signature *s, const char **sigerror) { -#ifdef HAVE_HTP_CONFIG_SET_ALLOW_SPACE_URI for (uint32_t x = 0; x < s->init_data->buffer_index; x++) { if (s->init_data->buffers[x].id != (uint32_t)g_buffer_id) continue; @@ -148,7 +147,6 @@ static bool DetectHttpProtocolValidateCallback(const Signature *s, const char ** } } } -#endif return true; } @@ -165,13 +163,13 @@ void DetectHttpProtocolRegister(void) sigmatch_table[DETECT_AL_HTTP_PROTOCOL].flags |= SIGMATCH_INFO_STICKY_BUFFER | SIGMATCH_NOOPT; DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_REQUEST_LINE); + GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_LINE); DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_RESPONSE_LINE); + GetData, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_LINE); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_RESPONSE_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); diff --git a/src/detect-http-raw-header.c b/src/detect-http-raw-header.c index 0bb834b7726a..2cd4303e13c6 100644 --- a/src/detect-http-raw-header.c +++ b/src/detect-http-raw-header.c @@ -96,9 +96,9 @@ void DetectHttpRawHeaderRegister(void) sigmatch_table[DETECT_HTTP_RAW_HEADER].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_raw_header", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS + 1, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_HEADERS + 1, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerInspectEngineRegister("http_raw_header", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS + 1, DetectEngineInspectBufferGeneric, GetData); + HTP_RESPONSE_PROGRESS_HEADERS + 1, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_raw_header", SIG_FLAG_TOSERVER, 2, PrefilterMpmHttpHeaderRawRequestRegister, NULL, ALPROTO_HTTP1, @@ -303,9 +303,8 @@ static int PrefilterMpmHttpHeaderRawRequestRegister(DetectEngineCtx *de_ctx, Sig pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeaderRaw, - mpm_reg->app_v2.alproto, HTP_REQUEST_HEADERS+1, - pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); + int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeaderRaw, mpm_reg->app_v2.alproto, + HTP_REQUEST_PROGRESS_HEADERS + 1, pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); return r; @@ -319,9 +318,8 @@ static int PrefilterMpmHttpHeaderRawRequestRegister(DetectEngineCtx *de_ctx, Sig pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailerRaw, - mpm_reg->app_v2.alproto, HTP_REQUEST_TRAILER+1, - pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); + r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailerRaw, mpm_reg->app_v2.alproto, + HTP_REQUEST_PROGRESS_TRAILER + 1, pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); } @@ -341,9 +339,8 @@ static int PrefilterMpmHttpHeaderRawResponseRegister(DetectEngineCtx *de_ctx, Si pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeaderRaw, - mpm_reg->app_v2.alproto, HTP_RESPONSE_HEADERS, - pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); + int r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpHeaderRaw, mpm_reg->app_v2.alproto, + HTP_RESPONSE_PROGRESS_HEADERS, pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); return r; @@ -357,9 +354,8 @@ static int PrefilterMpmHttpHeaderRawResponseRegister(DetectEngineCtx *de_ctx, Si pectx->mpm_ctx = mpm_ctx; pectx->transforms = &mpm_reg->transforms; - r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailerRaw, - mpm_reg->app_v2.alproto, HTP_RESPONSE_TRAILER, - pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); + r = PrefilterAppendTxEngine(de_ctx, sgh, PrefilterMpmHttpTrailerRaw, mpm_reg->app_v2.alproto, + HTP_RESPONSE_PROGRESS_TRAILER, pectx, PrefilterMpmHttpHeaderRawFree, mpm_reg->pname); if (r != 0) { SCFree(pectx); } diff --git a/src/detect-http-request-line.c b/src/detect-http-request-line.c index 886e643a3eda..287a45209d22 100644 --- a/src/detect-http-request-line.c +++ b/src/detect-http-request-line.c @@ -110,10 +110,10 @@ void DetectHttpRequestLineRegister(void) sigmatch_table[DETECT_AL_HTTP_REQUEST_LINE].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_request_line", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_request_line", SIG_FLAG_TOSERVER, 2, - PrefilterGenericMpmRegister, GetData, ALPROTO_HTTP1, HTP_REQUEST_LINE); + PrefilterGenericMpmRegister, GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_request_line", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); @@ -158,11 +158,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id); if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (unlikely(tx->request_line == NULL)) { + if (unlikely(htp_tx_request_line(tx) == NULL)) { return NULL; } - const uint32_t data_len = bstr_len(tx->request_line); - const uint8_t *data = bstr_ptr(tx->request_line); + const uint32_t data_len = bstr_len(htp_tx_request_line(tx)); + const uint8_t *data = bstr_ptr(htp_tx_request_line(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-response-line.c b/src/detect-http-response-line.c index 69ee8c2709ab..ab4d7f9dba5b 100644 --- a/src/detect-http-response-line.c +++ b/src/detect-http-response-line.c @@ -109,10 +109,10 @@ void DetectHttpResponseLineRegister(void) sigmatch_table[DETECT_AL_HTTP_RESPONSE_LINE].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_response_line", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_RESPONSE_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_response_line", SIG_FLAG_TOCLIENT, 2, - PrefilterGenericMpmRegister, GetData, ALPROTO_HTTP1, HTP_RESPONSE_LINE); + PrefilterGenericMpmRegister, GetData, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_response_line", ALPROTO_HTTP2, SIG_FLAG_TOCLIENT, HTTP2StateDataServer, DetectEngineInspectBufferGeneric, GetData2); @@ -157,11 +157,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id); if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (unlikely(tx->response_line == NULL)) { + if (unlikely(htp_tx_response_line(tx) == NULL)) { return NULL; } - const uint32_t data_len = bstr_len(tx->response_line); - const uint8_t *data = bstr_ptr(tx->response_line); + const uint32_t data_len = bstr_len(htp_tx_response_line(tx)); + const uint8_t *data = bstr_ptr(htp_tx_response_line(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-start.c b/src/detect-http-start.c index e88ac3cdf68f..fa6e11af0d36 100644 --- a/src/detect-http-start.c +++ b/src/detect-http-start.c @@ -85,20 +85,20 @@ static uint8_t *GetBufferForTX( return NULL; } - bstr *line = NULL; - htp_table_t *headers; + const bstr *line = NULL; + const htp_headers_t *headers; if (flags & STREAM_TOSERVER) { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_REQUEST_HEADERS) + HTP_REQUEST_PROGRESS_HEADERS) return NULL; - line = tx->request_line; - headers = tx->request_headers; + line = htp_tx_request_line(tx); + headers = htp_tx_request_headers(tx); } else { if (AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, flags) <= - HTP_RESPONSE_HEADERS) + HTP_RESPONSE_PROGRESS_HEADERS) return NULL; - headers = tx->response_headers; - line = tx->response_line; + headers = htp_tx_response_headers(tx); + line = htp_tx_response_line(tx); } if (line == NULL || headers == NULL) return NULL; @@ -115,11 +115,11 @@ static uint8_t *GetBufferForTX( buf->buffer[buf->len++] = '\n'; size_t i = 0; - size_t no_of_headers = htp_table_size(headers); + size_t no_of_headers = htp_headers_size(headers); for (; i < no_of_headers; i++) { - htp_header_t *h = htp_table_get_index(headers, i, NULL); - size_t size1 = bstr_size(h->name); - size_t size2 = bstr_size(h->value); + const htp_header_t *h = htp_headers_get_index(headers, i); + size_t size1 = htp_header_name_len(h); + size_t size2 = htp_header_value_len(h); size_t size = size1 + size2 + 4; if (i + 1 == no_of_headers) size += 2; @@ -129,12 +129,12 @@ static uint8_t *GetBufferForTX( } } - memcpy(buf->buffer + buf->len, bstr_ptr(h->name), bstr_size(h->name)); - buf->len += bstr_size(h->name); + memcpy(buf->buffer + buf->len, htp_header_name_ptr(h), htp_header_name_len(h)); + buf->len += htp_header_name_len(h); buf->buffer[buf->len++] = ':'; buf->buffer[buf->len++] = ' '; - memcpy(buf->buffer + buf->len, bstr_ptr(h->value), bstr_size(h->value)); - buf->len += bstr_size(h->value); + memcpy(buf->buffer + buf->len, htp_header_value_ptr(h), htp_header_value_len(h)); + buf->len += htp_header_value_len(h); buf->buffer[buf->len++] = '\r'; buf->buffer[buf->len++] = '\n'; if (i + 1 == no_of_headers) { @@ -189,14 +189,14 @@ void DetectHttpStartRegister(void) sigmatch_table[DETECT_AL_HTTP_START].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetBuffer1ForTX, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetBuffer1ForTX, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerMpmRegister(BUFFER_NAME, SIG_FLAG_TOCLIENT, 2, PrefilterGenericMpmRegister, - GetBuffer1ForTX, ALPROTO_HTTP1, HTP_RESPONSE_HEADERS); + GetBuffer1ForTX, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); DetectAppLayerInspectEngineRegister(BUFFER_NAME, ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); + HTP_RESPONSE_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetBuffer1ForTX); DetectBufferTypeSetDescriptionByName(BUFFER_NAME, BUFFER_DESC); diff --git a/src/detect-http-stat-code.c b/src/detect-http-stat-code.c index 37dfb2efbdcc..965027d67a6c 100644 --- a/src/detect-http-stat-code.c +++ b/src/detect-http-stat-code.c @@ -99,10 +99,10 @@ void DetectHttpStatCodeRegister (void) sigmatch_table[DETECT_HTTP_STAT_CODE].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_stat_code", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_RESPONSE_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_stat_code", SIG_FLAG_TOCLIENT, 4, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_RESPONSE_LINE); + GetData, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_stat_code", ALPROTO_HTTP2, SIG_FLAG_TOCLIENT, HTTP2StateDataServer, DetectEngineInspectBufferGeneric, GetData2); @@ -161,11 +161,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->response_status == NULL) + if (htp_tx_response_status(tx) == NULL) return NULL; - const uint32_t data_len = bstr_len(tx->response_status); - const uint8_t *data = bstr_ptr(tx->response_status); + const uint32_t data_len = bstr_len(htp_tx_response_status(tx)); + const uint8_t *data = bstr_ptr(htp_tx_response_status(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-stat-msg.c b/src/detect-http-stat-msg.c index b1a485d7a933..da17a579fb9e 100644 --- a/src/detect-http-stat-msg.c +++ b/src/detect-http-stat-msg.c @@ -109,10 +109,10 @@ void DetectHttpStatMsgRegister (void) sigmatch_table[DETECT_HTTP_STAT_MSG].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_stat_msg", ALPROTO_HTTP1, SIG_FLAG_TOCLIENT, - HTP_RESPONSE_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_RESPONSE_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_stat_msg", SIG_FLAG_TOCLIENT, 3, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_RESPONSE_LINE); + GetData, ALPROTO_HTTP1, HTP_RESPONSE_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_stat_msg", ALPROTO_HTTP2, SIG_FLAG_TOCLIENT, HTTP2StateDataServer, DetectEngineInspectBufferGeneric, GetData2); @@ -170,11 +170,11 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->response_message == NULL) + if (htp_tx_response_message(tx) == NULL) return NULL; - const uint32_t data_len = bstr_len(tx->response_message); - const uint8_t *data = bstr_ptr(tx->response_message); + const uint32_t data_len = bstr_len(htp_tx_response_message(tx)); + const uint8_t *data = bstr_ptr(htp_tx_response_message(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-ua.c b/src/detect-http-ua.c index 8babd9adcb50..4b3206856932 100644 --- a/src/detect-http-ua.c +++ b/src/detect-http-ua.c @@ -99,10 +99,10 @@ void DetectHttpUARegister(void) sigmatch_table[DETECT_HTTP_UA].flags |= SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_user_agent", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_HEADERS, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_HEADERS, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_user_agent", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_REQUEST_HEADERS); + GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_HEADERS); DetectAppLayerInspectEngineRegister("http_user_agent", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); @@ -161,18 +161,17 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, if (buffer->inspect == NULL) { htp_tx_t *tx = (htp_tx_t *)txv; - if (tx->request_headers == NULL) + if (htp_tx_request_headers(tx) == NULL) return NULL; - htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, - "User-Agent"); - if (h == NULL || h->value == NULL) { + const htp_header_t *h = htp_tx_request_header(tx, "User-Agent"); + if (h == NULL || htp_header_value(h) == NULL) { SCLogDebug("HTTP UA header not present in this request"); return NULL; } - const uint32_t data_len = bstr_len(h->value); - const uint8_t *data = bstr_ptr(h->value); + const uint32_t data_len = htp_header_value_len(h); + const uint8_t *data = htp_header_value_ptr(h); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-http-uri.c b/src/detect-http-uri.c index 12c6f8788549..0cbd2ecf9c56 100644 --- a/src/detect-http-uri.c +++ b/src/detect-http-uri.c @@ -108,10 +108,10 @@ void DetectHttpUriRegister (void) sigmatch_table[DETECT_HTTP_URI].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_uri", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_LINE, DetectEngineInspectBufferGeneric, GetData); + HTP_REQUEST_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetData); DetectAppLayerMpmRegister("http_uri", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetData, ALPROTO_HTTP1, HTP_REQUEST_LINE); + GetData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_LINE); DetectAppLayerInspectEngineRegister("http_uri", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, HTTP2StateDataClient, DetectEngineInspectBufferGeneric, GetData2); @@ -146,10 +146,10 @@ void DetectHttpUriRegister (void) sigmatch_table[DETECT_HTTP_URI_RAW].flags |= SIGMATCH_NOOPT|SIGMATCH_INFO_STICKY_BUFFER; DetectAppLayerInspectEngineRegister("http_raw_uri", ALPROTO_HTTP1, SIG_FLAG_TOSERVER, - HTP_REQUEST_LINE, DetectEngineInspectBufferGeneric, GetRawData); + HTP_REQUEST_PROGRESS_LINE, DetectEngineInspectBufferGeneric, GetRawData); DetectAppLayerMpmRegister("http_raw_uri", SIG_FLAG_TOSERVER, 2, PrefilterGenericMpmRegister, - GetRawData, ALPROTO_HTTP1, HTP_REQUEST_LINE); + GetRawData, ALPROTO_HTTP1, HTP_REQUEST_PROGRESS_LINE); // no difference between raw and decoded uri for HTTP2 DetectAppLayerInspectEngineRegister("http_raw_uri", ALPROTO_HTTP2, SIG_FLAG_TOSERVER, @@ -226,15 +226,12 @@ static InspectionBuffer *GetData(DetectEngineThreadCtx *det_ctx, InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id); if (!buffer->initialized) { htp_tx_t *tx = (htp_tx_t *)txv; - HtpTxUserData *tx_ud = htp_tx_get_user_data(tx); - - if (tx_ud == NULL || tx_ud->request_uri_normalized == NULL) { - SCLogDebug("no tx_id or uri"); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); + if (request_uri_normalized == NULL) return NULL; - } - const uint32_t data_len = bstr_len(tx_ud->request_uri_normalized); - const uint8_t *data = bstr_ptr(tx_ud->request_uri_normalized); + const uint32_t data_len = bstr_len(request_uri_normalized); + const uint8_t *data = bstr_ptr(request_uri_normalized); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); @@ -321,11 +318,11 @@ static InspectionBuffer *GetRawData(DetectEngineThreadCtx *det_ctx, InspectionBuffer *buffer = InspectionBufferGet(det_ctx, list_id); if (!buffer->initialized) { htp_tx_t *tx = (htp_tx_t *)txv; - if (unlikely(tx->request_uri == NULL)) { + if (unlikely(htp_tx_request_uri(tx) == NULL)) { return NULL; } - const uint32_t data_len = bstr_len(tx->request_uri); - const uint8_t *data = bstr_ptr(tx->request_uri); + const uint32_t data_len = bstr_len(htp_tx_request_uri(tx)); + const uint8_t *data = bstr_ptr(htp_tx_request_uri(tx)); InspectionBufferSetup(det_ctx, list_id, buffer, data, data_len); InspectionBufferApplyTransforms(buffer, transforms); diff --git a/src/detect-lua.c b/src/detect-lua.c index bc01d6df28d4..418e423d6ef1 100644 --- a/src/detect-lua.c +++ b/src/detect-lua.c @@ -376,12 +376,12 @@ static int DetectLuaMatch (DetectEngineThreadCtx *det_ctx, if (tx == NULL) continue; - if ((tlua->flags & FLAG_DATATYPE_HTTP_REQUEST_LINE) && tx->request_line != NULL && - bstr_len(tx->request_line) > 0) { + if ((tlua->flags & FLAG_DATATYPE_HTTP_REQUEST_LINE) && + htp_tx_request_line(tx) != NULL && bstr_len(htp_tx_request_line(tx)) > 0) { lua_pushliteral(tlua->luastate, "http.request_line"); /* stack at -2 */ LuaPushStringBuffer(tlua->luastate, - (const uint8_t *)bstr_ptr(tx->request_line), - bstr_len(tx->request_line)); + (const uint8_t *)bstr_ptr(htp_tx_request_line(tx)), + bstr_len(htp_tx_request_line(tx))); lua_settable(tlua->luastate, -3); } } @@ -422,12 +422,12 @@ static int DetectLuaAppMatchCommon (DetectEngineThreadCtx *det_ctx, htp_tx_t *tx = NULL; tx = AppLayerParserGetTx(IPPROTO_TCP, ALPROTO_HTTP1, htp_state, det_ctx->tx_id); if (tx != NULL) { - if ((tlua->flags & FLAG_DATATYPE_HTTP_REQUEST_LINE) && tx->request_line != NULL && - bstr_len(tx->request_line) > 0) { + if ((tlua->flags & FLAG_DATATYPE_HTTP_REQUEST_LINE) && + htp_tx_request_line(tx) != NULL && bstr_len(htp_tx_request_line(tx)) > 0) { lua_pushliteral(tlua->luastate, "http.request_line"); /* stack at -2 */ LuaPushStringBuffer(tlua->luastate, - (const uint8_t *)bstr_ptr(tx->request_line), - bstr_len(tx->request_line)); + (const uint8_t *)bstr_ptr(htp_tx_request_line(tx)), + bstr_len(htp_tx_request_line(tx))); lua_settable(tlua->luastate, -3); } } diff --git a/src/detect-parse.c b/src/detect-parse.c index 3b03dfb92b36..1a6937bfcf1d 100644 --- a/src/detect-parse.c +++ b/src/detect-parse.c @@ -93,8 +93,8 @@ void DetectFileRegisterFileProtocols(DetectFileHandlerTableElmt *reg) { .al_proto = ALPROTO_FTPDATA, .direction = SIG_FLAG_TOSERVER | SIG_FLAG_TOCLIENT }, { .al_proto = ALPROTO_HTTP1, .direction = SIG_FLAG_TOSERVER | SIG_FLAG_TOCLIENT, - .to_client_progress = HTP_RESPONSE_BODY, - .to_server_progress = HTP_REQUEST_BODY }, + .to_client_progress = HTP_RESPONSE_PROGRESS_BODY, + .to_server_progress = HTP_REQUEST_PROGRESS_BODY }, { .al_proto = ALPROTO_HTTP2, .direction = SIG_FLAG_TOSERVER | SIG_FLAG_TOCLIENT, .to_client_progress = HTTP2StateDataServer, diff --git a/src/log-httplog.c b/src/log-httplog.c index ae9e7df54a93..f30bdf6e3330 100644 --- a/src/log-httplog.c +++ b/src/log-httplog.c @@ -99,12 +99,12 @@ typedef struct LogHttpLogThread_ { } LogHttpLogThread; /* Retrieves the selected cookie value */ -static uint32_t GetCookieValue(uint8_t *rawcookies, uint32_t rawcookies_len, char *cookiename, - uint8_t **cookievalue) +static uint32_t GetCookieValue(const uint8_t *rawcookies, uint32_t rawcookies_len, char *cookiename, + const uint8_t **cookievalue) { - uint8_t *p = rawcookies; - uint8_t *cn = p; /* ptr to cookie name start */ - uint8_t *cv = NULL; /* ptr to cookie value start */ + const uint8_t *p = rawcookies; + const uint8_t *cn = p; /* ptr to cookie name start */ + const uint8_t *cv = NULL; /* ptr to cookie value start */ while (p < rawcookies + rawcookies_len) { if (cv == NULL && *p == '=') { cv = p + 1; @@ -133,11 +133,11 @@ static void LogHttpLogCustom(LogHttpLogThread *aft, htp_tx_t *tx, const SCTime_t size_t datalen; char buf[128]; - uint8_t *cvalue = NULL; + const uint8_t *cvalue = NULL; uint32_t cvalue_len = 0; - htp_header_t *h_request_hdr; - htp_header_t *h_response_hdr; + const htp_header_t *h_request_hdr; + const htp_header_t *h_response_hdr; for (i = 0; i < httplog_ctx->cf->cf_n; i++) { h_request_hdr = NULL; @@ -182,80 +182,74 @@ static void LogHttpLogCustom(LogHttpLogThread *aft, htp_tx_t *tx, const SCTime_t break; case LOG_HTTP_CF_REQUEST_METHOD: /* METHOD */ - if (tx->request_method != NULL) { - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(tx->request_method), - bstr_len(tx->request_method)); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); - } + if (htp_tx_request_method(tx) != NULL) { + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + (uint8_t *)bstr_ptr(htp_tx_request_method(tx)), + bstr_len(htp_tx_request_method(tx))); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_REQUEST_URI: /* URI */ - if (tx->request_uri != NULL) { - datalen = node->maxlen; - if (datalen == 0 || datalen > bstr_len(tx->request_uri)) { - datalen = bstr_len(tx->request_uri); - } - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(tx->request_uri), - datalen); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); + if (htp_tx_request_uri(tx) != NULL) { + datalen = node->maxlen; + if (datalen == 0 || datalen > bstr_len(htp_tx_request_uri(tx))) { + datalen = bstr_len(htp_tx_request_uri(tx)); } + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + (uint8_t *)bstr_ptr(htp_tx_request_uri(tx)), datalen); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_REQUEST_HOST: /* HOSTNAME */ - if (tx->request_hostname != NULL) - { - datalen = node->maxlen; - if (datalen == 0 || datalen > bstr_len(tx->request_hostname)) { - datalen = bstr_len(tx->request_hostname); - } - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(tx->request_hostname), - datalen); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); + if (htp_tx_request_hostname(tx) != NULL) { + datalen = node->maxlen; + if (datalen == 0 || datalen > bstr_len(htp_tx_request_hostname(tx))) { + datalen = bstr_len(htp_tx_request_hostname(tx)); } + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + (uint8_t *)bstr_ptr(htp_tx_request_hostname(tx)), datalen); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_REQUEST_PROTOCOL: /* PROTOCOL */ - if (tx->request_protocol != NULL) { - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(tx->request_protocol), - bstr_len(tx->request_protocol)); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); - } + if (htp_tx_request_protocol(tx) != NULL) { + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + (uint8_t *)bstr_ptr(htp_tx_request_protocol(tx)), + bstr_len(htp_tx_request_protocol(tx))); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_REQUEST_HEADER: /* REQUEST HEADER */ - if (tx->request_headers != NULL) { - h_request_hdr = htp_table_get_c(tx->request_headers, node->data); - } - if (h_request_hdr != NULL) { - datalen = node->maxlen; - if (datalen == 0 || datalen > bstr_len(h_request_hdr->value)) { - datalen = bstr_len(h_request_hdr->value); - } - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(h_request_hdr->value), - datalen); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); + h_request_hdr = htp_tx_request_header(tx, node->data); + if (h_request_hdr != NULL) { + datalen = node->maxlen; + if (datalen == 0 || datalen > (size_t)htp_header_value_len(h_request_hdr)) { + datalen = htp_header_value_len(h_request_hdr); } + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + htp_header_value_ptr(h_request_hdr), datalen); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_REQUEST_COOKIE: /* REQUEST COOKIE */ - if (tx->request_headers != NULL) { - h_request_hdr = htp_table_get_c(tx->request_headers, "Cookie"); - if (h_request_hdr != NULL) { - cvalue_len = GetCookieValue((uint8_t *)bstr_ptr(h_request_hdr->value), - (uint32_t)bstr_len(h_request_hdr->value), (char *)node->data, - &cvalue); - } + if (htp_tx_request_headers(tx) != NULL) { + h_request_hdr = htp_tx_request_header(tx, "Cookie"); + if (h_request_hdr != NULL) { + cvalue_len = GetCookieValue(htp_header_value_ptr(h_request_hdr), + (uint32_t)htp_header_value_len(h_request_hdr), (char *)node->data, + &cvalue); } + } if (cvalue_len > 0 && cvalue != NULL) { datalen = node->maxlen; if (datalen == 0 || datalen > cvalue_len) { @@ -269,40 +263,40 @@ static void LogHttpLogCustom(LogHttpLogThread *aft, htp_tx_t *tx, const SCTime_t break; case LOG_HTTP_CF_REQUEST_LEN: /* REQUEST LEN */ - MemBufferWriteString(aft->buffer, "%"PRIuMAX"", (uintmax_t)tx->request_message_len); - break; + MemBufferWriteString( + aft->buffer, "%" PRIuMAX "", (uintmax_t)htp_tx_request_message_len(tx)); + break; case LOG_HTTP_CF_RESPONSE_STATUS: /* RESPONSE STATUS */ - if (tx->response_status != NULL) { - PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(tx->response_status), - bstr_len(tx->response_status)); - } else { - MemBufferWriteString(aft->buffer, LOG_CF_NONE); - } + if (htp_tx_response_status(tx) != NULL) { + PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, + (uint8_t *)bstr_ptr(htp_tx_response_status(tx)), + bstr_len(htp_tx_response_status(tx))); + } else { + MemBufferWriteString(aft->buffer, LOG_CF_NONE); + } break; case LOG_HTTP_CF_RESPONSE_HEADER: /* RESPONSE HEADER */ - if (tx->response_headers != NULL) { - h_response_hdr = htp_table_get_c(tx->response_headers, - node->data); - } + if (htp_tx_response_headers(tx) != NULL) { + h_response_hdr = htp_tx_response_header(tx, node->data); + } if (h_response_hdr != NULL) { datalen = node->maxlen; - if (datalen == 0 || datalen > bstr_len(h_response_hdr->value)) { - datalen = bstr_len(h_response_hdr->value); + if (datalen == 0 || datalen > (size_t)htp_header_value_len(h_response_hdr)) { + datalen = htp_header_value_len(h_response_hdr); } PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, - aft->buffer->size, (uint8_t *)bstr_ptr(h_response_hdr->value), - datalen); + aft->buffer->size, htp_header_value_ptr(h_response_hdr), datalen); } else { MemBufferWriteString(aft->buffer, LOG_CF_NONE); } break; case LOG_HTTP_CF_RESPONSE_LEN: /* RESPONSE LEN */ - MemBufferWriteString(aft->buffer, "%"PRIuMAX"", (uintmax_t)tx->response_message_len); - break; + MemBufferWriteString( + aft->buffer, "%" PRIuMAX "", (uintmax_t)htp_tx_response_message_len(tx)); + break; default: /* NO MATCH */ MemBufferWriteString(aft->buffer, LOG_CF_NONE); @@ -318,14 +312,11 @@ static void LogHttpLogExtended(LogHttpLogThread *aft, htp_tx_t *tx) LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* referer */ - htp_header_t *h_referer = NULL; - if (tx->request_headers != NULL) { - h_referer = htp_table_get_c(tx->request_headers, "referer"); - } + const htp_header_t *h_referer = htp_tx_request_header(tx, "referer"); + if (h_referer != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(h_referer->value), - bstr_len(h_referer->value)); + htp_header_value_ptr(h_referer), htp_header_value_len(h_referer)); } else { MemBufferWriteString(aft->buffer, ""); } @@ -333,37 +324,37 @@ static void LogHttpLogExtended(LogHttpLogThread *aft, htp_tx_t *tx) LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* method */ - if (tx->request_method != NULL) { + if (htp_tx_request_method(tx) != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(tx->request_method), - bstr_len(tx->request_method)); + (uint8_t *)bstr_ptr(htp_tx_request_method(tx)), + bstr_len(htp_tx_request_method(tx))); } LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* protocol */ - if (tx->request_protocol != NULL) { + if (htp_tx_request_protocol(tx) != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(tx->request_protocol), - bstr_len(tx->request_protocol)); + (uint8_t *)bstr_ptr(htp_tx_request_protocol(tx)), + bstr_len(htp_tx_request_protocol(tx))); } else { MemBufferWriteString(aft->buffer, ""); } LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* response status */ - if (tx->response_status != NULL) { + if (htp_tx_response_status(tx) != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(tx->response_status), - bstr_len(tx->response_status)); + (uint8_t *)bstr_ptr(htp_tx_response_status(tx)), + bstr_len(htp_tx_response_status(tx))); /* Redirect? */ - if ((tx->response_status_number > 300) && ((tx->response_status_number) < 303)) { - htp_header_t *h_location = htp_table_get_c(tx->response_headers, "location"); + if ((htp_tx_response_status_number(tx) > 300) && + ((htp_tx_response_status_number(tx)) < 303)) { + const htp_header_t *h_location = htp_tx_response_header(tx, "location"); if (h_location != NULL) { MemBufferWriteString(aft->buffer, " => "); PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(h_location->value), - bstr_len(h_location->value)); + htp_header_value_ptr(h_location), htp_header_value_len(h_location)); } } } else { @@ -372,7 +363,8 @@ static void LogHttpLogExtended(LogHttpLogThread *aft, htp_tx_t *tx) /* length */ LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); - MemBufferWriteString(aft->buffer, "%"PRIuMAX" bytes", (uintmax_t)tx->response_message_len); + MemBufferWriteString( + aft->buffer, "%" PRIuMAX " bytes", (uintmax_t)htp_tx_response_message_len(tx)); } static TmEcode LogHttpLogIPWrapper(ThreadVars *tv, void *data, const Packet *p, Flow *f, HtpState *htp_state, htp_tx_t *tx, uint64_t tx_id, int ipproto) @@ -432,32 +424,27 @@ static TmEcode LogHttpLogIPWrapper(ThreadVars *tv, void *data, const Packet *p, MemBufferWriteString(aft->buffer, "%s ", timebuf); /* hostname */ - if (tx->request_hostname != NULL) { + if (htp_tx_request_hostname(tx) != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(tx->request_hostname), - bstr_len(tx->request_hostname)); + (uint8_t *)bstr_ptr(htp_tx_request_hostname(tx)), + bstr_len(htp_tx_request_hostname(tx))); } else { MemBufferWriteString(aft->buffer, ""); } LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* uri */ - if (tx->request_uri != NULL) { + if (htp_tx_request_uri(tx) != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(tx->request_uri), - bstr_len(tx->request_uri)); + (uint8_t *)bstr_ptr(htp_tx_request_uri(tx)), bstr_len(htp_tx_request_uri(tx))); } LOG_CF_WRITE_STAR_SEPARATOR(aft->buffer); /* user agent */ - htp_header_t *h_user_agent = NULL; - if (tx->request_headers != NULL) { - h_user_agent = htp_table_get_c(tx->request_headers, "user-agent"); - } + const htp_header_t *h_user_agent = htp_tx_request_header(tx, "user-agent"); if (h_user_agent != NULL) { PrintRawUriBuf((char *)aft->buffer->buffer, &aft->buffer->offset, aft->buffer->size, - (uint8_t *)bstr_ptr(h_user_agent->value), - bstr_len(h_user_agent->value)); + htp_header_value_ptr(h_user_agent), htp_header_value_len(h_user_agent)); } else { MemBufferWriteString(aft->buffer, ""); } diff --git a/src/output-json-http.c b/src/output-json-http.c index 87c8b6ddb85a..ede48357429c 100644 --- a/src/output-json-http.c +++ b/src/output-json-http.c @@ -197,9 +197,9 @@ struct { static void EveHttpLogJSONBasic(JsonBuilder *js, htp_tx_t *tx) { /* hostname */ - if (tx->request_hostname != NULL) { - jb_set_string_from_bytes(js, "hostname", bstr_ptr(tx->request_hostname), - (uint32_t)bstr_len(tx->request_hostname)); + if (htp_tx_request_hostname(tx) != NULL) { + jb_set_string_from_bytes(js, "hostname", bstr_ptr(htp_tx_request_hostname(tx)), + (uint32_t)bstr_len(htp_tx_request_hostname(tx))); } /* port */ @@ -208,51 +208,52 @@ static void EveHttpLogJSONBasic(JsonBuilder *js, htp_tx_t *tx) * There is no connection (from the suricata point of view) between this * port and the TCP destination port of the flow. */ - if (tx->request_port_number >= 0) { - jb_set_uint(js, "http_port", tx->request_port_number); + if (htp_tx_request_port_number(tx) >= 0) { + jb_set_uint(js, "http_port", htp_tx_request_port_number(tx)); } /* uri */ - if (tx->request_uri != NULL) { - jb_set_string_from_bytes( - js, "url", bstr_ptr(tx->request_uri), (uint32_t)bstr_len(tx->request_uri)); + if (htp_tx_request_uri(tx) != NULL) { + jb_set_string_from_bytes(js, "url", bstr_ptr(htp_tx_request_uri(tx)), + (uint32_t)bstr_len(htp_tx_request_uri(tx))); } - if (tx->request_headers != NULL) { + if (htp_tx_request_headers(tx) != NULL) { /* user agent */ - htp_header_t *h_user_agent = htp_table_get_c(tx->request_headers, "user-agent"); + const htp_header_t *h_user_agent = htp_tx_request_header(tx, "user-agent"); if (h_user_agent != NULL) { - jb_set_string_from_bytes(js, "http_user_agent", bstr_ptr(h_user_agent->value), - (uint32_t)bstr_len(h_user_agent->value)); + jb_set_string_from_bytes(js, "http_user_agent", htp_header_value_ptr(h_user_agent), + (uint32_t)htp_header_value_len(h_user_agent)); } /* x-forwarded-for */ - htp_header_t *h_x_forwarded_for = htp_table_get_c(tx->request_headers, "x-forwarded-for"); + const htp_header_t *h_x_forwarded_for = htp_tx_request_header(tx, "x-forwarded-for"); if (h_x_forwarded_for != NULL) { - jb_set_string_from_bytes(js, "xff", bstr_ptr(h_x_forwarded_for->value), - (uint32_t)bstr_len(h_x_forwarded_for->value)); + jb_set_string_from_bytes(js, "xff", htp_header_value_ptr(h_x_forwarded_for), + (uint32_t)htp_header_value_len(h_x_forwarded_for)); } } /* content-type */ - if (tx->response_headers != NULL) { - htp_header_t *h_content_type = htp_table_get_c(tx->response_headers, "content-type"); + if (htp_tx_response_headers(tx) != NULL) { + const htp_header_t *h_content_type = htp_tx_response_header(tx, "content-type"); if (h_content_type != NULL) { - const size_t size = bstr_len(h_content_type->value) * 2 + 1; + const size_t size = htp_header_value_len(h_content_type) * 2 + 1; char string[size]; - BytesToStringBuffer(bstr_ptr(h_content_type->value), bstr_len(h_content_type->value), string, size); + BytesToStringBuffer(htp_header_value_ptr(h_content_type), + htp_header_value_len(h_content_type), string, size); char *p = strchr(string, ';'); if (p != NULL) *p = '\0'; jb_set_string(js, "http_content_type", string); } - htp_header_t *h_content_range = htp_table_get_c(tx->response_headers, "content-range"); + const htp_header_t *h_content_range = htp_tx_response_header(tx, "content-range"); if (h_content_range != NULL) { jb_open_object(js, "content_range"); - jb_set_string_from_bytes(js, "raw", bstr_ptr(h_content_range->value), - (uint32_t)bstr_len(h_content_range->value)); + jb_set_string_from_bytes(js, "raw", htp_header_value_ptr(h_content_range), + (uint32_t)htp_header_value_len(h_content_range)); HTTPContentRange crparsed; - if (HTPParseContentRange(h_content_range->value, &crparsed) == 0) { + if (HTPParseContentRange(htp_header_value(h_content_range), &crparsed) == 0) { if (crparsed.start >= 0) jb_set_uint(js, "start", crparsed.start); if (crparsed.end >= 0) @@ -268,62 +269,60 @@ static void EveHttpLogJSONBasic(JsonBuilder *js, htp_tx_t *tx) static void EveHttpLogJSONExtended(JsonBuilder *js, htp_tx_t *tx) { /* referer */ - htp_header_t *h_referer = NULL; - if (tx->request_headers != NULL) { - h_referer = htp_table_get_c(tx->request_headers, "referer"); + const htp_header_t *h_referer = NULL; + if (htp_tx_request_headers(tx) != NULL) { + h_referer = htp_tx_request_header(tx, "referer"); } if (h_referer != NULL) { - jb_set_string_from_bytes( - js, "http_refer", bstr_ptr(h_referer->value), (uint32_t)bstr_len(h_referer->value)); + jb_set_string_from_bytes(js, "http_refer", htp_header_value_ptr(h_referer), + (uint32_t)htp_header_value_len(h_referer)); } /* method */ - if (tx->request_method != NULL) { - jb_set_string_from_bytes(js, "http_method", bstr_ptr(tx->request_method), - (uint32_t)bstr_len(tx->request_method)); + if (htp_tx_request_method(tx) != NULL) { + jb_set_string_from_bytes(js, "http_method", bstr_ptr(htp_tx_request_method(tx)), + (uint32_t)bstr_len(htp_tx_request_method(tx))); } /* protocol */ - if (tx->request_protocol != NULL) { - jb_set_string_from_bytes(js, "protocol", bstr_ptr(tx->request_protocol), - (uint32_t)bstr_len(tx->request_protocol)); + if (htp_tx_request_protocol(tx) != NULL) { + jb_set_string_from_bytes(js, "protocol", bstr_ptr(htp_tx_request_protocol(tx)), + (uint32_t)bstr_len(htp_tx_request_protocol(tx))); } - /* response status: from libhtp: - * "Response status code, available only if we were able to parse it, HTP_STATUS_INVALID - * otherwise. HTP_STATUS_UNKNOWN until parsing is attempted" .*/ - const int resp = tx->response_status_number; + /* response status */ + const int resp = htp_tx_response_status_number(tx); if (resp > 0) { jb_set_uint(js, "status", (uint32_t)resp); - } else if (tx->response_status != NULL) { - jb_set_string_from_bytes(js, "status_string", bstr_ptr(tx->response_status), - (uint32_t)bstr_len(tx->response_status)); + } else if (htp_tx_response_status(tx) != NULL) { + jb_set_string_from_bytes(js, "status_string", bstr_ptr(htp_tx_response_status(tx)), + (uint32_t)bstr_len(htp_tx_response_status(tx))); } - htp_header_t *h_location = htp_table_get_c(tx->response_headers, "location"); + const htp_header_t *h_location = htp_tx_response_header(tx, "location"); if (h_location != NULL) { - jb_set_string_from_bytes( - js, "redirect", bstr_ptr(h_location->value), (uint32_t)bstr_len(h_location->value)); + jb_set_string_from_bytes(js, "redirect", htp_header_value_ptr(h_location), + (uint32_t)htp_header_value_len(h_location)); } /* length */ - jb_set_uint(js, "length", tx->response_message_len); + jb_set_uint(js, "length", htp_tx_response_message_len(tx)); } static void EveHttpLogJSONHeaders( JsonBuilder *js, uint32_t direction, htp_tx_t *tx, LogHttpFileCtx *http_ctx) { - htp_table_t * headers = direction & LOG_HTTP_REQ_HEADERS ? - tx->request_headers : tx->response_headers; + const htp_headers_t *headers = direction & LOG_HTTP_REQ_HEADERS ? htp_tx_request_headers(tx) + : htp_tx_response_headers(tx); char name[MAX_SIZE_HEADER_NAME] = {0}; char value[MAX_SIZE_HEADER_VALUE] = {0}; - size_t n = htp_table_size(headers); + size_t n = htp_headers_size(headers); JsonBuilderMark mark = { 0, 0, 0 }; jb_get_mark(js, &mark); bool array_empty = true; jb_open_array(js, direction & LOG_HTTP_REQ_HEADERS ? "request_headers" : "response_headers"); for (size_t i = 0; i < n; i++) { - htp_header_t *h = htp_table_get_index(headers, i, NULL); + const htp_header_t *h = htp_headers_get_index(headers, i); if ((http_ctx->flags & direction) == 0 && http_ctx->fields != 0) { bool tolog = false; for (HttpField f = HTTP_FIELD_ACCEPT; f < HTTP_FIELD_SIZE; f++) { @@ -333,7 +332,7 @@ static void EveHttpLogJSONHeaders( if (((http_ctx->flags & LOG_HTTP_EXTENDED) == 0) || ((http_ctx->flags & LOG_HTTP_EXTENDED) != (http_fields[f].flags & LOG_HTTP_EXTENDED))) { - if (bstr_cmp_c_nocase(h->name, http_fields[f].htp_field) == 0) { + if (bstr_cmp_c_nocase(htp_header_name(h), http_fields[f].htp_field) == 0) { tolog = true; break; } @@ -346,14 +345,16 @@ static void EveHttpLogJSONHeaders( } array_empty = false; jb_start_object(js); - size_t size_name = bstr_len(h->name) < MAX_SIZE_HEADER_NAME - 1 ? - bstr_len(h->name) : MAX_SIZE_HEADER_NAME - 1; - memcpy(name, bstr_ptr(h->name), size_name); + size_t size_name = htp_header_name_len(h) < MAX_SIZE_HEADER_NAME - 1 + ? htp_header_name_len(h) + : MAX_SIZE_HEADER_NAME - 1; + memcpy(name, htp_header_name_ptr(h), size_name); name[size_name] = '\0'; jb_set_string(js, "name", name); - size_t size_value = bstr_len(h->value) < MAX_SIZE_HEADER_VALUE - 1 ? - bstr_len(h->value) : MAX_SIZE_HEADER_VALUE - 1; - memcpy(value, bstr_ptr(h->value), size_value); + size_t size_value = htp_header_value_len(h) < MAX_SIZE_HEADER_VALUE - 1 + ? htp_header_value_len(h) + : MAX_SIZE_HEADER_VALUE - 1; + memcpy(value, htp_header_value_ptr(h), size_value); value[size_value] = '\0'; jb_set_string(js, "value", value); jb_close(js); diff --git a/src/suricata.c b/src/suricata.c index 7c238c48cc5e..2db33c156c53 100644 --- a/src/suricata.c +++ b/src/suricata.c @@ -732,9 +732,7 @@ static void PrintBuildInfo(void) #ifdef HAVE_LIBNET11 strlcat(features, "LIBNET1.1 ", sizeof(features)); #endif -#ifdef HAVE_HTP_URI_NORMALIZE_HOOK strlcat(features, "HAVE_HTP_URI_NORMALIZE_HOOK ", sizeof(features)); -#endif #ifdef PCRE2_HAVE_JIT strlcat(features, "PCRE_JIT ", sizeof(features)); #endif @@ -879,8 +877,7 @@ static void PrintBuildInfo(void) #endif printf("thread local storage method: %s\n", tls); - printf("compiled with %s, linked against %s\n", - HTP_VERSION_STRING_FULL, htp_get_version()); + printf("compiled with %s\n", htp_get_version()); printf("\n"); #include "build-info.h" } diff --git a/src/util-file-swf-decompression.c b/src/util-file-swf-decompression.c index 378b4f96e942..a932ab637f40 100644 --- a/src/util-file-swf-decompression.c +++ b/src/util-file-swf-decompression.c @@ -84,6 +84,7 @@ int FileSwfZlibDecompression(DetectEngineThreadCtx *det_ctx, uint8_t *compressed_data, uint32_t compressed_data_len, uint8_t *decompressed_data, uint32_t decompressed_data_len) { + printf("lolz %d\n", __LINE__); int ret = 1; z_stream infstream; memset(&infstream, 0, sizeof(infstream)); @@ -97,12 +98,14 @@ int FileSwfZlibDecompression(DetectEngineThreadCtx *det_ctx, infstream.next_out = (Bytef *)decompressed_data; int result = inflateInit(&infstream); + printf("lolz %d %d\n", result, __LINE__); if (result != Z_OK) { DetectEngineSetEvent(det_ctx, FILE_DECODER_EVENT_Z_UNKNOWN_ERROR); return 0; } result = inflate(&infstream, Z_NO_FLUSH); + printf("lolz %d %d\n", result, __LINE__); switch(result) { case Z_STREAM_END: break; diff --git a/src/util-lua-http.c b/src/util-lua-http.c index 4f8916ce9c17..fbc529e6d8a2 100644 --- a/src/util-lua-http.c +++ b/src/util-lua-http.c @@ -63,11 +63,11 @@ static int HttpGetRequestHost(lua_State *luastate) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - if (tx->request_hostname == NULL) + if (htp_tx_request_hostname(tx) == NULL) return LuaCallbackError(luastate, "no request hostname"); - return LuaPushStringBuffer(luastate, - bstr_ptr(tx->request_hostname), bstr_len(tx->request_hostname)); + return LuaPushStringBuffer( + luastate, bstr_ptr(htp_tx_request_hostname(tx)), bstr_len(htp_tx_request_hostname(tx))); } static int HttpGetRequestUriRaw(lua_State *luastate) @@ -79,11 +79,11 @@ static int HttpGetRequestUriRaw(lua_State *luastate) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - if (tx->request_uri == NULL) + if (htp_tx_request_uri(tx) == NULL) return LuaCallbackError(luastate, "no request uri"); - return LuaPushStringBuffer(luastate, - bstr_ptr(tx->request_uri), bstr_len(tx->request_uri)); + return LuaPushStringBuffer( + luastate, bstr_ptr(htp_tx_request_uri(tx)), bstr_len(htp_tx_request_uri(tx))); } static int HttpGetRequestUriNormalized(lua_State *luastate) @@ -95,18 +95,14 @@ static int HttpGetRequestUriNormalized(lua_State *luastate) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); - if (htud == NULL) - return LuaCallbackError(luastate, "no htud in tx"); + bstr *request_uri_normalized = (bstr *)htp_tx_normalized_uri(tx); - if (htud->request_uri_normalized == NULL || - bstr_ptr(htud->request_uri_normalized) == NULL || - bstr_len(htud->request_uri_normalized) == 0) + if (request_uri_normalized == NULL || bstr_ptr(request_uri_normalized) == NULL || + bstr_len(request_uri_normalized) == 0) return LuaCallbackError(luastate, "no normalized uri"); - return LuaPushStringBuffer(luastate, - bstr_ptr(htud->request_uri_normalized), - bstr_len(htud->request_uri_normalized)); + return LuaPushStringBuffer( + luastate, bstr_ptr(request_uri_normalized), bstr_len(request_uri_normalized)); } static int HttpGetRequestLine(lua_State *luastate) @@ -118,11 +114,11 @@ static int HttpGetRequestLine(lua_State *luastate) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - if (tx->request_line == NULL) + if (htp_tx_request_line(tx) == NULL) return LuaCallbackError(luastate, "no request_line"); - return LuaPushStringBuffer(luastate, - bstr_ptr(tx->request_line), bstr_len(tx->request_line)); + return LuaPushStringBuffer( + luastate, bstr_ptr(htp_tx_request_line(tx)), bstr_len(htp_tx_request_line(tx))); } static int HttpGetResponseLine(lua_State *luastate) @@ -134,11 +130,11 @@ static int HttpGetResponseLine(lua_State *luastate) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - if (tx->response_line == NULL) + if (htp_tx_response_line(tx) == NULL) return LuaCallbackError(luastate, "no response_line"); - return LuaPushStringBuffer(luastate, - bstr_ptr(tx->response_line), bstr_len(tx->response_line)); + return LuaPushStringBuffer( + luastate, bstr_ptr(htp_tx_response_line(tx)), bstr_len(htp_tx_response_line(tx))); } static int HttpGetHeader(lua_State *luastate, int dir) @@ -154,18 +150,17 @@ static int HttpGetHeader(lua_State *luastate, int dir) if (name == NULL) return LuaCallbackError(luastate, "1st argument missing, empty or wrong type"); - htp_table_t *headers = tx->request_headers; - if (dir == 1) - headers = tx->response_headers; - if (headers == NULL) - return LuaCallbackError(luastate, "tx has no headers"); + const htp_header_t *h = NULL; + if (dir == 0) { + h = htp_tx_request_header(tx, name); + } else { + h = htp_tx_response_header(tx, name); + } - htp_header_t *h = (htp_header_t *)htp_table_get_c(headers, name); - if (h == NULL || bstr_len(h->value) == 0) + if (h == NULL || htp_header_value_len(h) == 0) return LuaCallbackError(luastate, "header not found"); - return LuaPushStringBuffer(luastate, - bstr_ptr(h->value), bstr_len(h->value)); + return LuaPushStringBuffer(luastate, htp_header_value_ptr(h), htp_header_value_len(h)); } static int HttpGetRequestHeader(lua_State *luastate) @@ -187,7 +182,7 @@ static int HttpGetRawHeaders(lua_State *luastate, int dir) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud == NULL) return LuaCallbackError(luastate, "no htud in tx"); @@ -224,20 +219,20 @@ static int HttpGetHeaders(lua_State *luastate, int dir) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - htp_table_t *table = tx->request_headers; + const htp_headers_t *table = htp_tx_request_headers(tx); if (dir == 1) - table = tx->response_headers; - if (tx->request_headers == NULL) + table = htp_tx_response_headers(tx); + if (table == NULL) return LuaCallbackError(luastate, "no headers"); lua_newtable(luastate); - htp_header_t *h = NULL; + const htp_header_t *h = NULL; size_t i = 0; - size_t no_of_headers = htp_table_size(table); + size_t no_of_headers = htp_headers_size(table); for (; i < no_of_headers; i++) { - h = htp_table_get_index(table, i, NULL); - LuaPushStringBuffer(luastate, bstr_ptr(h->name), bstr_len(h->name)); - LuaPushStringBuffer(luastate, bstr_ptr(h->value), bstr_len(h->value)); + h = htp_headers_get_index(table, i); + LuaPushStringBuffer(luastate, htp_header_name_ptr(h), htp_header_name_len(h)); + LuaPushStringBuffer(luastate, htp_header_value_ptr(h), htp_header_value_len(h)); lua_settable(luastate, -3); } return 1; @@ -266,7 +261,7 @@ static int HttpGetBody(lua_State *luastate, int dir) if (tx == NULL) return LuaCallbackError(luastate, "internal error: no tx"); - HtpTxUserData *htud = (HtpTxUserData *) htp_tx_get_user_data(tx); + HtpTxUserData *htud = (HtpTxUserData *)htp_tx_get_user_data(tx); if (htud == NULL) return LuaCallbackError(luastate, "no htud in tx"); diff --git a/src/util-print.c b/src/util-print.c index 16b66679f680..77d88faa64dd 100644 --- a/src/util-print.c +++ b/src/util-print.c @@ -66,7 +66,7 @@ void PrintRawLineHexBuf(char *retbuf, uint32_t retbuflen, const uint8_t *buf, ui } } -void PrintRawUriFp(FILE *fp, uint8_t *buf, uint32_t buflen) +void PrintRawUriFp(FILE *fp, const uint8_t *buf, uint32_t buflen) { #define BUFFER_LENGTH 2048 char nbuf[BUFFER_LENGTH] = ""; @@ -90,7 +90,8 @@ void PrintRawUriFp(FILE *fp, uint8_t *buf, uint32_t buflen) fprintf(fp, "%s", nbuf); } -void PrintRawUriBuf(char *retbuf, uint32_t *offset, uint32_t retbuflen, uint8_t *buf, size_t buflen) +void PrintRawUriBuf( + char *retbuf, uint32_t *offset, uint32_t retbuflen, const uint8_t *buf, size_t buflen) { for (size_t u = 0; u < buflen; u++) { if (isprint(buf[u]) && buf[u] != '\"') { diff --git a/src/util-print.h b/src/util-print.h index c9f19b4cdb2d..e589a0a2ddba 100644 --- a/src/util-print.h +++ b/src/util-print.h @@ -40,8 +40,8 @@ } while (0) void PrintBufferRawLineHex(char *, int *,int, const uint8_t *, uint32_t); -void PrintRawUriFp(FILE *, uint8_t *, uint32_t); -void PrintRawUriBuf(char *, uint32_t *, uint32_t, uint8_t *, size_t); +void PrintRawUriFp(FILE *, const uint8_t *, uint32_t); +void PrintRawUriBuf(char *, uint32_t *, uint32_t, const uint8_t *, size_t); void PrintRawDataFp(FILE *, const uint8_t *, uint32_t); void PrintRawDataToBuffer(uint8_t *dst_buf, uint32_t *dst_buf_offset_ptr, uint32_t dst_buf_size, const uint8_t *src_buf, uint32_t src_buf_len); diff --git a/src/util-unittest.h b/src/util-unittest.h index 749ce21c93bd..ea30d4cb4f75 100644 --- a/src/util-unittest.h +++ b/src/util-unittest.h @@ -68,12 +68,14 @@ extern int unittests_fatal; /** * \brief Fail a test if expression evaluates to true. */ -#define FAIL_IF(expr) do { \ - if (unittests_fatal) { \ - BUG_ON(expr); \ - } else if (expr) { \ - return 0; \ - } \ +#define FAIL_IF(expr) \ + do { \ + printf("failed %s:%d\n", __FILE__, __LINE__); \ + if (unittests_fatal) { \ + BUG_ON(expr); \ + } else if (expr) { \ + return 0; \ + } \ } while (0) /**