Skip to content

Commit

Permalink
feat(httpclientx): allow configuring max-response-body size (#1588)
Browse files Browse the repository at this point in the history
While removing `httpapi` in #1560,
I noticed that I actually liked limiting the max body size, which is
particularly useful in case of gzip bombs. So, add tests for gzip bombs
and allow configuring the maximum body size. We detect truncation by
reading one more byte than needed and then checking the body length.

Part of ooni/probe#2729.
  • Loading branch information
bassosimone authored May 3, 2024
1 parent 0232df5 commit 7ab645d
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 14 deletions.
12 changes: 4 additions & 8 deletions internal/httpclientx/DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ We compare to `httpapi.Call` and `httpx.GetJSONWithQuery`.
| handle gzip encoding | yes | yes | NO |
| limit io.Reader | yes | yes | yes |
| netxlite.ReadAllContext() | yes | yes | yes |
| handle truncated body | NO | yes | NO |
| handle truncated body | yes | yes | NO |
| log response body | yes | yes | yes |
| handle non-200 response | ️ yes | yes* | yes* |
| unmarshal JSON | yes | yes | yes |
Expand All @@ -232,10 +232,6 @@ introducing the new `./internal/urlx` package to handle this situation).
3. Setting the `Accept` header does not seem to matter in out context because we mostly
call API for which there's no need for content negotiation.

4. It's difficult to say whether a body size was exactly the amount specified for truncation
or the body has been truncated. While this is a corner case, it seems perhaps wiser to let
the caller try parsing the body and failing if it is indeed truncated.

#### GetRaw

Here we're comparing to `httpapi.Call` and `httpx.FetchResource`.
Expand All @@ -257,7 +253,7 @@ Here we're comparing to `httpapi.Call` and `httpx.FetchResource`.
| handle gzip encoding | yes | yes | NO |
| limit io.Reader | yes | yes | yes |
| netxlite.ReadAllContext() | yes | yes | yes |
| handle truncated body | NO | yes | NO |
| handle truncated body | yes | yes | NO |
| log response body | yes | yes | yes |
| handle non-200 response | ️ yes | yes* | yes |

Expand Down Expand Up @@ -285,7 +281,7 @@ two APIs, the caller would need to fetch a raw body and then manually parse XML.
| handle gzip encoding | yes | N/A | N/A |
| limit io.Reader | yes | N/A | N/A |
| netxlite.ReadAllContext() | yes | N/A | N/A |
| handle truncated body | NO | N/A | N/A |
| handle truncated body | yes | N/A | N/A |
| log response body | yes | N/A | N/A |
| handle non-200 response | ️ yes | N/A | N/A |
| unmarshal XML | yes | N/A | N/A |
Expand Down Expand Up @@ -316,7 +312,7 @@ Here we're comparing to `httpapi.Call` and `httpx.PostJSON`.
| handle gzip encoding | yes | yes | NO |
| limit io.Reader | yes | yes | yes |
| netxlite.ReadAllContext() | yes | yes | yes |
| handle truncated body | NO | yes | NO |
| handle truncated body | yes | yes | NO |
| log response body | yes | yes | yes |
| handle non-200 response | ️ yes | yes* | yes* |
| unmarshal JSON | yes | yes | yes |
Expand Down
15 changes: 15 additions & 0 deletions internal/httpclientx/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,21 @@ type Config struct {
// Logger is the MANDATORY [model.Logger] to use.
Logger model.Logger

// MaxResponseBodySize OPTIONALLY limits the maximum body size. If not set, we
// use the [DefaultMaxResponseBodySize] value.
MaxResponseBodySize int64

// UserAgent is the MANDATORY User-Agent header value to use.
UserAgent string
}

// DefaultMaxResponseBodySize is the default maximum response body size.
const DefaultMaxResponseBodySize = 1 << 24

func (c *Config) maxResponseBodySize() (value int64) {
value = c.MaxResponseBodySize
if value <= 0 {
value = DefaultMaxResponseBodySize
}
return
}
21 changes: 21 additions & 0 deletions internal/httpclientx/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package httpclientx

import "testing"

func TestConfigMaxResponseBodySize(t *testing.T) {
t.Run("the default returned value corresponds to the constant default", func(t *testing.T) {
config := &Config{}
if value := config.maxResponseBodySize(); value != DefaultMaxResponseBodySize {
t.Fatal("unexpected maxResponseBodySize()", value)
}
})

t.Run("we can override the default", func(t *testing.T) {
config := &Config{}
const expectedValue = DefaultMaxResponseBodySize / 2
config.MaxResponseBodySize = expectedValue
if value := config.maxResponseBodySize(); value != expectedValue {
t.Fatal("unexpected maxResponseBodySize()", value)
}
})
}
21 changes: 15 additions & 6 deletions internal/httpclientx/httpclientx.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package httpclientx
import (
"compress/gzip"
"context"
"errors"
"io"
"net/http"

Expand All @@ -34,11 +35,11 @@ func zeroValue[T any]() T {
return *new(T)
}

// newLimitReader is a wrapper for [io.LimitReader] that automatically
// sets the maximum readable amount of bytes.
func newLimitReader(r io.Reader) io.Reader {
return io.LimitReader(r, 1<<24)
}
// ErrTruncated indicates we truncated the response body.
//
// Note: we SHOULD NOT change the error string because this error string was previously
// used by the httpapi package and it's better to keep the same strings.
var ErrTruncated = errors.New("httpapi: truncated response body")

// do is the internal function to finish preparing the request and getting a raw response.
func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config) ([]byte, error) {
Expand Down Expand Up @@ -84,7 +85,10 @@ func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config)
}

// protect against unreasonably large response bodies
limitReader := newLimitReader(baseReader)
//
// read one more byte than the maximum allowed size so we can
// always tell whether it was truncated here
limitReader := io.LimitReader(baseReader, config.maxResponseBodySize()+1)

// read the raw body
rawrespbody, err := netxlite.ReadAllContext(ctx, limitReader)
Expand All @@ -94,6 +98,11 @@ func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config)
return nil, err
}

// handle the case of truncated body
if int64(len(rawrespbody)) > config.maxResponseBodySize() {
return nil, ErrTruncated
}

// log the response body for debugging purposes
config.Logger.Debugf("%s %s: raw response body: %s", req.Method, req.URL.String(), string(rawrespbody))

Expand Down
111 changes: 111 additions & 0 deletions internal/httpclientx/httpclientx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@ import (
"github.com/ooni/probe-cli/v3/internal/testingx"
)

// createGzipBomb creates a gzip bomb with the given size.
func createGzipBomb(size int) []byte {
input := make([]byte, size)
runtimex.Assert(len(input) == size, "unexpected input length")
var buf bytes.Buffer
gz := runtimex.Try1(gzip.NewWriterLevel(&buf, gzip.BestCompression))
_ = runtimex.Try1(gz.Write(input))
runtimex.Try0(gz.Close())
return buf.Bytes()
}

// gzipBomb is a gzip bomb containing 1 megabyte of zeroes
var gzipBomb = createGzipBomb(1 << 20)

func TestGzipDecompression(t *testing.T) {
t.Run("we correctly handle gzip encoding", func(t *testing.T) {
expected := []byte(`Bonsoir, Elliot!!!`)
Expand Down Expand Up @@ -83,6 +97,36 @@ func TestGzipDecompression(t *testing.T) {
t.Fatal("expected nil response body")
}
})

t.Run("we can correctly decode a large body", func(t *testing.T) {
// create a server returning compressed content
server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Encoding", "gzip")
w.Write(gzipBomb)
}))
defer server.Close()

// make sure we can read it
respbody, err := GetRaw(
context.Background(),
NewEndpoint(server.URL),
&Config{
Client: http.DefaultClient,
Logger: model.DiscardLogger,
UserAgent: model.HTTPHeaderUserAgent,
})

//t.Log(respbody) // maybe this operation is a bit expensive to be the default
t.Log(err)

if err != nil {
t.Fatal(err)
}

if length := len(respbody); length != 1<<20 {
t.Fatal("unexpected response body length", length)
}
})
}

func TestHTTPStatusCodeHandling(t *testing.T) {
Expand Down Expand Up @@ -142,3 +186,70 @@ func TestHTTPReadBodyErrorsHandling(t *testing.T) {
t.Fatal("expected nil response body")
}
}

func TestLimitMaximumBodySize(t *testing.T) {
t.Run("we can correctly avoid receiving a large body when uncompressed", func(t *testing.T) {
// create a server returning uncompressed content
server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write(make([]byte, 1<<20))
}))
defer server.Close()

// make sure we can read it
//
// note: here we're using a small max body size, definitely smaller than what we send
respbody, err := GetRaw(
context.Background(),
NewEndpoint(server.URL),
&Config{
Client: http.DefaultClient,
Logger: model.DiscardLogger,
MaxResponseBodySize: 1 << 10,
UserAgent: model.HTTPHeaderUserAgent,
})

t.Log(respbody)
t.Log(err)

if !errors.Is(err, ErrTruncated) {
t.Fatal("unexpected error", err)
}

if len(respbody) != 0 {
t.Fatal("expected zero length response body length")
}
})

t.Run("we can correctly avoid receiving a large body when compressed", func(t *testing.T) {
// create a server returning compressed content
server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Encoding", "gzip")
w.Write(gzipBomb)
}))
defer server.Close()

// make sure we can read it
//
// note: here we're using a small max body size, definitely smaller than the gzip bomb
respbody, err := GetRaw(
context.Background(),
NewEndpoint(server.URL),
&Config{
Client: http.DefaultClient,
Logger: model.DiscardLogger,
MaxResponseBodySize: 1 << 10,
UserAgent: model.HTTPHeaderUserAgent,
})

t.Log(respbody)
t.Log(err)

if !errors.Is(err, ErrTruncated) {
t.Fatal("unexpected error", err)
}

if len(respbody) != 0 {
t.Fatal("expected zero length response body length")
}
})
}

0 comments on commit 7ab645d

Please sign in to comment.