From 7ab645d3fc080fb6a24408a0a76f3c931e2fbb44 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 3 May 2024 15:38:39 +0200 Subject: [PATCH] feat(httpclientx): allow configuring max-response-body size (#1588) While removing `httpapi` in https://github.com/ooni/probe-cli/pull/1560, I noticed that I actually liked limiting the max body size, which is particularly useful in case of gzip bombs. So, add tests for gzip bombs and allow configuring the maximum body size. We detect truncation by reading one more byte than needed and then checking the body length. Part of https://github.com/ooni/probe/issues/2729. --- internal/httpclientx/DESIGN.md | 12 +-- internal/httpclientx/config.go | 15 +++ internal/httpclientx/config_test.go | 21 +++++ internal/httpclientx/httpclientx.go | 21 +++-- internal/httpclientx/httpclientx_test.go | 111 +++++++++++++++++++++++ 5 files changed, 166 insertions(+), 14 deletions(-) create mode 100644 internal/httpclientx/config_test.go diff --git a/internal/httpclientx/DESIGN.md b/internal/httpclientx/DESIGN.md index fe91bbe37..dd3c396f4 100644 --- a/internal/httpclientx/DESIGN.md +++ b/internal/httpclientx/DESIGN.md @@ -212,7 +212,7 @@ We compare to `httpapi.Call` and `httpx.GetJSONWithQuery`. | handle gzip encoding | yes | yes | NO | | limit io.Reader | yes | yes | yes | | netxlite.ReadAllContext() | yes | yes | yes | -| handle truncated body | NO | yes | NO | +| handle truncated body | yes | yes | NO | | log response body | yes | yes | yes | | handle non-200 response | ️ yes | yes* | yes* | | unmarshal JSON | yes | yes | yes | @@ -232,10 +232,6 @@ introducing the new `./internal/urlx` package to handle this situation). 3. Setting the `Accept` header does not seem to matter in out context because we mostly call API for which there's no need for content negotiation. -4. It's difficult to say whether a body size was exactly the amount specified for truncation -or the body has been truncated. While this is a corner case, it seems perhaps wiser to let -the caller try parsing the body and failing if it is indeed truncated. - #### GetRaw Here we're comparing to `httpapi.Call` and `httpx.FetchResource`. @@ -257,7 +253,7 @@ Here we're comparing to `httpapi.Call` and `httpx.FetchResource`. | handle gzip encoding | yes | yes | NO | | limit io.Reader | yes | yes | yes | | netxlite.ReadAllContext() | yes | yes | yes | -| handle truncated body | NO | yes | NO | +| handle truncated body | yes | yes | NO | | log response body | yes | yes | yes | | handle non-200 response | ️ yes | yes* | yes | @@ -285,7 +281,7 @@ two APIs, the caller would need to fetch a raw body and then manually parse XML. | handle gzip encoding | yes | N/A | N/A | | limit io.Reader | yes | N/A | N/A | | netxlite.ReadAllContext() | yes | N/A | N/A | -| handle truncated body | NO | N/A | N/A | +| handle truncated body | yes | N/A | N/A | | log response body | yes | N/A | N/A | | handle non-200 response | ️ yes | N/A | N/A | | unmarshal XML | yes | N/A | N/A | @@ -316,7 +312,7 @@ Here we're comparing to `httpapi.Call` and `httpx.PostJSON`. | handle gzip encoding | yes | yes | NO | | limit io.Reader | yes | yes | yes | | netxlite.ReadAllContext() | yes | yes | yes | -| handle truncated body | NO | yes | NO | +| handle truncated body | yes | yes | NO | | log response body | yes | yes | yes | | handle non-200 response | ️ yes | yes* | yes* | | unmarshal JSON | yes | yes | yes | diff --git a/internal/httpclientx/config.go b/internal/httpclientx/config.go index 71bdeb616..43ba8206f 100644 --- a/internal/httpclientx/config.go +++ b/internal/httpclientx/config.go @@ -15,6 +15,21 @@ type Config struct { // Logger is the MANDATORY [model.Logger] to use. Logger model.Logger + // MaxResponseBodySize OPTIONALLY limits the maximum body size. If not set, we + // use the [DefaultMaxResponseBodySize] value. + MaxResponseBodySize int64 + // UserAgent is the MANDATORY User-Agent header value to use. UserAgent string } + +// DefaultMaxResponseBodySize is the default maximum response body size. +const DefaultMaxResponseBodySize = 1 << 24 + +func (c *Config) maxResponseBodySize() (value int64) { + value = c.MaxResponseBodySize + if value <= 0 { + value = DefaultMaxResponseBodySize + } + return +} diff --git a/internal/httpclientx/config_test.go b/internal/httpclientx/config_test.go new file mode 100644 index 000000000..2c2674317 --- /dev/null +++ b/internal/httpclientx/config_test.go @@ -0,0 +1,21 @@ +package httpclientx + +import "testing" + +func TestConfigMaxResponseBodySize(t *testing.T) { + t.Run("the default returned value corresponds to the constant default", func(t *testing.T) { + config := &Config{} + if value := config.maxResponseBodySize(); value != DefaultMaxResponseBodySize { + t.Fatal("unexpected maxResponseBodySize()", value) + } + }) + + t.Run("we can override the default", func(t *testing.T) { + config := &Config{} + const expectedValue = DefaultMaxResponseBodySize / 2 + config.MaxResponseBodySize = expectedValue + if value := config.maxResponseBodySize(); value != expectedValue { + t.Fatal("unexpected maxResponseBodySize()", value) + } + }) +} diff --git a/internal/httpclientx/httpclientx.go b/internal/httpclientx/httpclientx.go index 30e95cbae..97514b353 100644 --- a/internal/httpclientx/httpclientx.go +++ b/internal/httpclientx/httpclientx.go @@ -8,6 +8,7 @@ package httpclientx import ( "compress/gzip" "context" + "errors" "io" "net/http" @@ -34,11 +35,11 @@ func zeroValue[T any]() T { return *new(T) } -// newLimitReader is a wrapper for [io.LimitReader] that automatically -// sets the maximum readable amount of bytes. -func newLimitReader(r io.Reader) io.Reader { - return io.LimitReader(r, 1<<24) -} +// ErrTruncated indicates we truncated the response body. +// +// Note: we SHOULD NOT change the error string because this error string was previously +// used by the httpapi package and it's better to keep the same strings. +var ErrTruncated = errors.New("httpapi: truncated response body") // do is the internal function to finish preparing the request and getting a raw response. func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config) ([]byte, error) { @@ -84,7 +85,10 @@ func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config) } // protect against unreasonably large response bodies - limitReader := newLimitReader(baseReader) + // + // read one more byte than the maximum allowed size so we can + // always tell whether it was truncated here + limitReader := io.LimitReader(baseReader, config.maxResponseBodySize()+1) // read the raw body rawrespbody, err := netxlite.ReadAllContext(ctx, limitReader) @@ -94,6 +98,11 @@ func do(ctx context.Context, req *http.Request, epnt *Endpoint, config *Config) return nil, err } + // handle the case of truncated body + if int64(len(rawrespbody)) > config.maxResponseBodySize() { + return nil, ErrTruncated + } + // log the response body for debugging purposes config.Logger.Debugf("%s %s: raw response body: %s", req.Method, req.URL.String(), string(rawrespbody)) diff --git a/internal/httpclientx/httpclientx_test.go b/internal/httpclientx/httpclientx_test.go index 61f727fa9..41c5faca2 100644 --- a/internal/httpclientx/httpclientx_test.go +++ b/internal/httpclientx/httpclientx_test.go @@ -15,6 +15,20 @@ import ( "github.com/ooni/probe-cli/v3/internal/testingx" ) +// createGzipBomb creates a gzip bomb with the given size. +func createGzipBomb(size int) []byte { + input := make([]byte, size) + runtimex.Assert(len(input) == size, "unexpected input length") + var buf bytes.Buffer + gz := runtimex.Try1(gzip.NewWriterLevel(&buf, gzip.BestCompression)) + _ = runtimex.Try1(gz.Write(input)) + runtimex.Try0(gz.Close()) + return buf.Bytes() +} + +// gzipBomb is a gzip bomb containing 1 megabyte of zeroes +var gzipBomb = createGzipBomb(1 << 20) + func TestGzipDecompression(t *testing.T) { t.Run("we correctly handle gzip encoding", func(t *testing.T) { expected := []byte(`Bonsoir, Elliot!!!`) @@ -83,6 +97,36 @@ func TestGzipDecompression(t *testing.T) { t.Fatal("expected nil response body") } }) + + t.Run("we can correctly decode a large body", func(t *testing.T) { + // create a server returning compressed content + server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Add("Content-Encoding", "gzip") + w.Write(gzipBomb) + })) + defer server.Close() + + // make sure we can read it + respbody, err := GetRaw( + context.Background(), + NewEndpoint(server.URL), + &Config{ + Client: http.DefaultClient, + Logger: model.DiscardLogger, + UserAgent: model.HTTPHeaderUserAgent, + }) + + //t.Log(respbody) // maybe this operation is a bit expensive to be the default + t.Log(err) + + if err != nil { + t.Fatal(err) + } + + if length := len(respbody); length != 1<<20 { + t.Fatal("unexpected response body length", length) + } + }) } func TestHTTPStatusCodeHandling(t *testing.T) { @@ -142,3 +186,70 @@ func TestHTTPReadBodyErrorsHandling(t *testing.T) { t.Fatal("expected nil response body") } } + +func TestLimitMaximumBodySize(t *testing.T) { + t.Run("we can correctly avoid receiving a large body when uncompressed", func(t *testing.T) { + // create a server returning uncompressed content + server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write(make([]byte, 1<<20)) + })) + defer server.Close() + + // make sure we can read it + // + // note: here we're using a small max body size, definitely smaller than what we send + respbody, err := GetRaw( + context.Background(), + NewEndpoint(server.URL), + &Config{ + Client: http.DefaultClient, + Logger: model.DiscardLogger, + MaxResponseBodySize: 1 << 10, + UserAgent: model.HTTPHeaderUserAgent, + }) + + t.Log(respbody) + t.Log(err) + + if !errors.Is(err, ErrTruncated) { + t.Fatal("unexpected error", err) + } + + if len(respbody) != 0 { + t.Fatal("expected zero length response body length") + } + }) + + t.Run("we can correctly avoid receiving a large body when compressed", func(t *testing.T) { + // create a server returning compressed content + server := testingx.MustNewHTTPServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Add("Content-Encoding", "gzip") + w.Write(gzipBomb) + })) + defer server.Close() + + // make sure we can read it + // + // note: here we're using a small max body size, definitely smaller than the gzip bomb + respbody, err := GetRaw( + context.Background(), + NewEndpoint(server.URL), + &Config{ + Client: http.DefaultClient, + Logger: model.DiscardLogger, + MaxResponseBodySize: 1 << 10, + UserAgent: model.HTTPHeaderUserAgent, + }) + + t.Log(respbody) + t.Log(err) + + if !errors.Is(err, ErrTruncated) { + t.Fatal("unexpected error", err) + } + + if len(respbody) != 0 { + t.Fatal("expected zero length response body length") + } + }) +}