From bd9f657a9136df617e99c7b266a1c9f13389b560 Mon Sep 17 00:00:00 2001 From: Joaquin Date: Wed, 18 Sep 2024 15:57:34 -0300 Subject: [PATCH] add time groupping --- README.md | 61 ++++++--- lib/boom_notifier/error_storage.ex | 25 ++-- lib/boom_notifier/notification_sender.ex | 71 +++++++---- .../example_app/lib/example_app_web/router.ex | 2 +- test/unit/error_storage_test.exs | 52 ++++---- test/unit/notification_sender_test.exs | 116 ++++++++++++++---- 6 files changed, 222 insertions(+), 105 deletions(-) diff --git a/README.md b/README.md index 0c70eee..7d94352 100644 --- a/README.md +++ b/README.md @@ -91,34 +91,38 @@ defmodule YourApp.Router do options: # ... ] ] + + #... +end ``` -## Notification Trigger +## Throttling notifications By default, `BoomNotifier` will send a notification every time an exception is raised. -However, there are different strategies to decide when to send the -notifications using the `:notification_trigger` option with one of the -following values: `:always` and `:exponential`. - -### Always +There are two throttling mechanisms you can use to reduce notification rate. Counting +based and time based throttling. The former allows notifications to be delivered +on exponential error counting and the latter based on time throttling, that is +if a predefined amount of time has passed from the last error. -This option is the default one. It will trigger a notification for every -exception. +### Count Based Throttle ```elixir defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: :always, notifiers: [ - # ... - ] + # ... + ], + groupping: :count, + count: :exponential, + time_limit: :timer.minutes(30) +end ``` -### Exponential +#### Exponential It uses a formula of `log2(errors_count)` to determine whether to send a notification, based on the accumulated error count for each specific @@ -132,7 +136,7 @@ defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: :exponential, + count: :exponential, notifiers: [ # ... ] @@ -143,24 +147,42 @@ defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: [exponential: [limit: 100]] + count: [exponential: [limit: 100]] notifiers: [ # ... ] ``` -### Notification trigger time limit +### Time Based Throttle + +```elixir +defmodule YourApp.Router do + use Phoenix.Router + + use BoomNotifier, + notifiers: [ + # ... + ], + groupping: :time, + throttle: :timer.minutes(1) +end +``` + +### Time Limit -If you've defined a triggering strategy which holds off notification delivering you can define a time limit value -which will be used to deliver the notification after a time limit milliseconds have passed from the last error. The -time counter is reset on new errors and only applies for cases where notifications are not sent. +Both groupping strategies allow you to define a time limit that tells boom to deliver a notification if the amount +of time has passed from the last time a notification was sent or error groupping started. + +If specified, a notification will be triggered even though the groupping strategy does not met the criteria. For +time based throttling this is usefull if errors keep appearing before the throttle time and for count based throttle +to reset (and notify) the grupping after a certain time period. ```elixir defmodule YourApp.Router do use Phoenix.Router use BoomNotifier, - notification_trigger: [:exponential], + count: [:exponential], time_limit: :timer.minutes(30), notifier: CustomNotifier @@ -168,7 +190,6 @@ defmodule YourApp.Router do end ``` - ## Custom data or Metadata By default, `BoomNotifier` will **not** include any custom data from your diff --git a/lib/boom_notifier/error_storage.ex b/lib/boom_notifier/error_storage.ex index 547b850..d6fc49b 100644 --- a/lib/boom_notifier/error_storage.ex +++ b/lib/boom_notifier/error_storage.ex @@ -15,7 +15,8 @@ defmodule BoomNotifier.ErrorStorage do ] @type t :: %__MODULE__{} - @type error_strategy :: :always | :exponential | [exponential: [limit: non_neg_integer()]] + @type error_strategy :: + :always | :none | :exponential | [exponential: [limit: non_neg_integer()]] use Agent, start: {__MODULE__, :start_link, []} @@ -32,8 +33,8 @@ defmodule BoomNotifier.ErrorStorage do occurrences is increased and it also updates the first and last time it happened. """ - @spec store_error(ErrorInfo.t()) :: :ok - def store_error(error_info) do + @spec accumulate(ErrorInfo.t()) :: :ok + def accumulate(error_info) do %{key: error_hash_key} = error_info timestamp = error_info.timestamp || DateTime.utc_now() @@ -65,8 +66,8 @@ defmodule BoomNotifier.ErrorStorage do @doc """ Given an error info, it returns the aggregated info stored in the agent. """ - @spec get_error_stats(ErrorInfo.t()) :: %__MODULE__{} - def get_error_stats(error_info) do + @spec get_stats(ErrorInfo.t()) :: %__MODULE__{} + def get_stats(error_info) do %{key: error_hash_key} = error_info Agent.get(:boom_notifier, fn state -> state end) @@ -94,7 +95,7 @@ defmodule BoomNotifier.ErrorStorage do Reset the accumulated_occurrences for the given error info to zero. It also increments the max storage capacity based on the notification strategy. """ - @spec reset_accumulated_errors(error_strategy, ErrorInfo.t()) :: :ok + @spec reset_accumulated_errors(error_strategy | nil, ErrorInfo.t()) :: :ok def reset_accumulated_errors(:exponential, error_info) do %{key: error_hash_key} = error_info @@ -119,7 +120,7 @@ defmodule BoomNotifier.ErrorStorage do ) end - def reset_accumulated_errors(:always, error_info) do + def reset_accumulated_errors(value, error_info) when value in [nil, :none, :always] do %{key: error_hash_key} = error_info Agent.update( @@ -138,6 +139,16 @@ defmodule BoomNotifier.ErrorStorage do |> Map.replace!(:last_occurrence, nil) end + def eleapsed(nil), do: 0 + + def eleapsed(%__MODULE__{} = error_info) do + DateTime.diff( + error_info.last_occurrence, + error_info.first_occurrence, + :millisecond + ) + end + @spec do_send_notification?(ErrorInfo.t() | nil) :: boolean() defp do_send_notification?(nil), do: false diff --git a/lib/boom_notifier/notification_sender.ex b/lib/boom_notifier/notification_sender.ex index 4ccd991..2042d60 100644 --- a/lib/boom_notifier/notification_sender.ex +++ b/lib/boom_notifier/notification_sender.ex @@ -14,49 +14,49 @@ defmodule BoomNotifier.NotificationSender do GenServer.start_link(__MODULE__, :ok, name: __MODULE__) end - def async_notify(notifier, occurrences, options) do - GenServer.cast(__MODULE__, {:notify, notifier, occurrences, options}) - end - def async_trigger_notify(settings, error_info) do GenServer.cast(__MODULE__, {:trigger_notify, settings, error_info}) end - def notify(notifier, occurrences, options) do - spawn_link(fn -> - notifier.notify(occurrences, options) - end) - end - - def notify_all(settings, error_info) do - notification_trigger = Keyword.get(settings, :notification_trigger, :always) - occurrences = Map.put(error_info, :occurrences, ErrorStorage.get_error_stats(error_info)) - - ErrorStorage.reset_accumulated_errors(notification_trigger, error_info) + def trigger_notify(settings, error_info) do + ErrorStorage.accumulate(error_info) - BoomNotifier.walkthrough_notifiers( + do_trigger_notify( + Keyword.get(settings, :groupping, :count), settings, - fn notifier, options -> notify(notifier, occurrences, options) end + error_info ) end - def trigger_notify(settings, error_info) do - timeout = Keyword.get(settings, :time_limit) - - ErrorStorage.store_error(error_info) + defp do_trigger_notify(:count, settings, error_info) do + time_limit = Keyword.get(settings, :time_limit) if ErrorStorage.send_notification?(error_info) do notify_all(settings, error_info) :ok else - if timeout do - {:schedule, timeout} + if time_limit do + {:schedule, time_limit} else - :ok + :ignored end end end + defp do_trigger_notify(:time, settings, error_info) do + throttle = Keyword.get(settings, :throttle, 100) + time_limit = Keyword.get(settings, :time_limit) + + stats = ErrorStorage.get_stats(error_info) + + if ErrorStorage.eleapsed(stats) >= time_limit do + notify_all(settings, error_info) + :ok + else + {:schedule, throttle} + end + end + # Server callbacks @impl true @@ -79,6 +79,9 @@ defmodule BoomNotifier.NotificationSender do cancel_timer(timer) case trigger_notify(settings, error_info) do + :ignored -> + {:noreply, state} + :ok -> {:noreply, state} @@ -116,6 +119,26 @@ defmodule BoomNotifier.NotificationSender do {:noreply, state |> Map.delete(error_info.key)} end + # Private methods + + defp notify(notifier, occurrences, options) do + spawn_link(fn -> + notifier.notify(occurrences, options) + end) + end + + defp notify_all(settings, error_info) do + count_strategy = Keyword.get(settings, :count) + + occurrences = Map.put(error_info, :occurrences, ErrorStorage.get_stats(error_info)) + ErrorStorage.reset_accumulated_errors(count_strategy, error_info) + + BoomNotifier.walkthrough_notifiers( + settings, + fn notifier, options -> notify(notifier, occurrences, options) end + ) + end + defp cancel_timer(nil), do: nil defp cancel_timer(timer), do: Process.cancel_timer(timer) end diff --git a/test/example_app/lib/example_app_web/router.ex b/test/example_app/lib/example_app_web/router.ex index 4a8b8f2..59ece0a 100644 --- a/test/example_app/lib/example_app_web/router.ex +++ b/test/example_app/lib/example_app_web/router.ex @@ -2,7 +2,7 @@ defmodule ExampleAppWeb.Router do use ExampleAppWeb, :router use BoomNotifier, - notification_trigger: [exponential: [limit: 8]], + count: [exponential: [limit: 8]], custom_data: [:assigns, :logger], ignore_exceptions: [IgnoreExceptionError], notifiers: [ diff --git a/test/unit/error_storage_test.exs b/test/unit/error_storage_test.exs index 7d56bd5..91f7a65 100644 --- a/test/unit/error_storage_test.exs +++ b/test/unit/error_storage_test.exs @@ -24,7 +24,7 @@ defmodule ErrorStorageTest do clear_error_storage() end - describe "store_error/1" do + describe "accumulate/1" do test "groups errors by type" do another_timestamp = DateTime.utc_now() @@ -41,9 +41,9 @@ defmodule ErrorStorageTest do %{key: another_error_hash} = another_error_info - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(another_error_info) %{@error_hash => error_stat_1, ^another_error_hash => error_stat_2} = Agent.get(:boom_notifier, & &1) @@ -64,12 +64,12 @@ defmodule ErrorStorageTest do end end - describe "get_error_stats/1" do + describe "get_stats/1" do test "returns the errors for the proper error kind" do - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) - assert ErrorStorage.get_error_stats(@error_info) == + assert ErrorStorage.get_stats(@error_info) == %ErrorStorage{ __max_storage_capacity__: 1, accumulated_occurrences: 2, @@ -85,9 +85,9 @@ defmodule ErrorStorageTest do timestamp: another_timestamp } - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) - assert ErrorStorage.get_error_stats(another_error_info) == + assert ErrorStorage.get_stats(another_error_info) == %ErrorStorage{ __max_storage_capacity__: 1, accumulated_occurrences: 1, @@ -97,27 +97,27 @@ defmodule ErrorStorageTest do end test "returns nil if error info does not exist" do - assert ErrorStorage.get_error_stats(@error_info) == nil + assert ErrorStorage.get_stats(@error_info) == nil end end describe "send_notification?/1" do test "returns false when count is smaller than the error length" do # increase the max capacity to 2 - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_accumulated_errors(:exponential, @error_info) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) refute ErrorStorage.send_notification?(@error_info) end test "returns true when error length is greater or equal than count" do # creates the error key - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) # increase the max capacity to 2 ErrorStorage.reset_accumulated_errors(:exponential, @error_info) - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(@error_info) another_error_info = %ErrorInfo{ reason: "Another error information", @@ -126,12 +126,12 @@ defmodule ErrorStorageTest do } # creates the error key - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) # increase the max capacity to 2 ErrorStorage.reset_accumulated_errors(:exponential, another_error_info) - ErrorStorage.store_error(another_error_info) - ErrorStorage.store_error(another_error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(another_error_info) + ErrorStorage.accumulate(another_error_info) + ErrorStorage.accumulate(another_error_info) assert ErrorStorage.send_notification?(@error_info) assert ErrorStorage.send_notification?(another_error_info) @@ -144,7 +144,7 @@ defmodule ErrorStorageTest do describe "reset_accumulated_errors/2" do test "increases the counter when notification trigger is :exponential" do - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_accumulated_errors(:exponential, @error_info) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -177,8 +177,8 @@ defmodule ErrorStorageTest do } end - test "increases the counter when notification trigger is :exponential and :limit is set" do - ErrorStorage.store_error(@error_info) + test "increases the counter when count is :exponential and :limit is set" do + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_accumulated_errors([exponential: [limit: 5]], @error_info) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -212,7 +212,7 @@ defmodule ErrorStorageTest do end test "does not increase the counter when notification_trigger is :always" do - ErrorStorage.store_error(@error_info) + ErrorStorage.accumulate(@error_info) ErrorStorage.reset_accumulated_errors(:always, @error_info) [error_stat] = Agent.get(:boom_notifier, fn state -> state end) |> Map.values() @@ -251,8 +251,8 @@ defmodule ErrorStorageTest do %{key: another_error_hash} = another_error_info - ErrorStorage.store_error(@error_info) - ErrorStorage.store_error(another_error_info) + ErrorStorage.accumulate(@error_info) + ErrorStorage.accumulate(another_error_info) ErrorStorage.reset_accumulated_errors(:exponential, @error_info) diff --git a/test/unit/notification_sender_test.exs b/test/unit/notification_sender_test.exs index 8f0592b..40a7da5 100644 --- a/test/unit/notification_sender_test.exs +++ b/test/unit/notification_sender_test.exs @@ -10,6 +10,7 @@ defmodule BoomNotifier.NotificationSenderTest do } @time_limit 500 + @throttle 500 @receive_timeout 100 @settings_basic [ @@ -17,11 +18,16 @@ defmodule BoomNotifier.NotificationSenderTest do options: [pid_name: BoomNotifier.TestMessageProxy] ] - @settings_groupping @settings_basic ++ - [ - time_limit: @time_limit, - notification_trigger: :exponential - ] + @settings_groupping_count @settings_basic ++ + [ + time_limit: @time_limit, + count: :exponential + ] + @settings_groupping_time @settings_basic ++ + [ + groupping: :time, + throttle: @throttle + ] defmodule NotificationSenderTestNotifier do def notify(error_info, opts) do @@ -45,6 +51,11 @@ defmodule BoomNotifier.NotificationSenderTest do %{error_info: error_info} end + def notification_sent(error_info) do + ErrorStorage.accumulate(error_info) + ErrorStorage.reset_accumulated_errors(:exponential, error_info) + end + setup do clear_error_storage() @@ -56,7 +67,7 @@ defmodule BoomNotifier.NotificationSenderTest do setup :build_error_info - describe "with default notification trigger (always)" do + describe "with default notification count (none)" do test "sends a notification", %{error_info: error_info} do NotificationSender.trigger_notify(@settings_basic, error_info) @@ -65,19 +76,19 @@ defmodule BoomNotifier.NotificationSenderTest do end end - describe "sync call with exponential notification trigger" do + describe "sync call with exponential notification count" do test "sends a notification", %{error_info: error_info} do - NotificationSender.trigger_notify(@settings_groupping, error_info) + NotificationSender.trigger_notify(@settings_groupping_count, error_info) {_, rcv_error_info} = assert_receive({:notify_called, _}, @receive_timeout) assert Map.delete(rcv_error_info, :occurrences) == Map.delete(error_info, :occurrences) end test "does not send a second notification", %{error_info: error_info} do - ErrorStorage.store_error(error_info) - ErrorStorage.reset_accumulated_errors(:exponential, error_info) + notification_sent(error_info) - trigger_notify_resp = NotificationSender.trigger_notify(@settings_groupping, error_info) + trigger_notify_resp = + NotificationSender.trigger_notify(@settings_groupping_count, error_info) refute_receive({:notify_called, _}, @receive_timeout) assert {:schedule, @time_limit} = trigger_notify_resp @@ -85,7 +96,7 @@ defmodule BoomNotifier.NotificationSenderTest do test "sends notification occurrences along error info", %{error_info: error_info} do for _ <- 1..7 do - NotificationSender.trigger_notify(@settings_groupping, error_info) + NotificationSender.trigger_notify(@settings_groupping_count, error_info) end assert_receive({:notify_called, %{occurrences: %{accumulated_occurrences: 1}}}) @@ -94,44 +105,41 @@ defmodule BoomNotifier.NotificationSenderTest do end end - describe "async call with exponential notification trigger" do + describe "async call with exponential notification count" do test "sends a notification", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) assert_receive({:notify_called, _}, @receive_timeout) end end - describe "repeated async call with exponential notification trigger" do + describe "repeated async call with exponential notification count" do setup(%{error_info: error_info}) do - ErrorStorage.store_error(error_info) - ErrorStorage.reset_accumulated_errors(:exponential, error_info) + notification_sent(error_info) end test "sends a second notification after a timeout", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) assert_receive({:notify_called, _}, @time_limit + @receive_timeout) - - assert error_info |> ErrorStorage.get_error_stats() |> Map.get(:accumulated_occurrences) == - 0 + assert error_info |> ErrorStorage.get_stats() |> Map.get(:accumulated_occurrences) == 0 end test "does not send a second notification before a timeout", %{error_info: error_info} do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) refute_receive({:notify_called, _}, @time_limit - 50) - assert ErrorStorage.get_error_stats(error_info) |> Map.get(:accumulated_occurrences) > 0 + assert ErrorStorage.get_stats(error_info) |> Map.get(:accumulated_occurrences) > 0 end test( "it does not sends a scheduled notification if another error happens", %{error_info: error_info} ) do - NotificationSender.async_trigger_notify(@settings_groupping, error_info) - NotificationSender.async_trigger_notify(@settings_groupping, error_info) - NotificationSender.async_trigger_notify(@settings_groupping, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) + NotificationSender.async_trigger_notify(@settings_groupping_count, error_info) notification_sender_state = NotificationSender @@ -148,7 +156,7 @@ defmodule BoomNotifier.NotificationSenderTest do "it does not schedule a notification if time_limit is not specified", %{error_info: error_info} ) do - settings = Keyword.delete(@settings_groupping, :time_limit) + settings = Keyword.delete(@settings_groupping_count, :time_limit) NotificationSender.async_trigger_notify(settings, error_info) NotificationSender.async_trigger_notify(settings, error_info) @@ -159,4 +167,58 @@ defmodule BoomNotifier.NotificationSenderTest do assert notification_sender_state |> Map.keys() |> length() == 0 end end + + describe "with time groupping" do + setup :build_error_info + + test "it returns a schedule action", %{error_info: error_info} do + result = NotificationSender.trigger_notify(@settings_groupping_time, error_info) + + assert {:schedule, @throttle} = result + end + + test "it does not send the notification before throttle", %{error_info: error_info} do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + + refute_receive({:notify_called, _}, @throttle - 50) + end + + test "it sends the notification after throttle", %{error_info: error_info} do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + + assert_receive({:notify_called, _}, @throttle + 50) + end + + test "it throttles repeated error notifications", %{error_info: error_info} do + for _ <- 1..20 do + NotificationSender.async_trigger_notify(@settings_groupping_time, error_info) + end + + state = :sys.get_state(NotificationSender) + assert_receive({:notify_called, _}, @throttle + 50) + error_key = error_info.key + assert %{^error_key => _} = state + assert state |> Map.keys() |> length() == 1 + end + + test "it sends a throttle notification if time limit is present", %{error_info: error_info} do + settings = + @settings_groupping_time + |> Keyword.merge( + throttle: 400, + time_limit: 100 + ) + + for _ <- 1..3 do + NotificationSender.async_trigger_notify( + settings, + error_info |> Map.put(:timestamp, DateTime.utc_now()) + ) + + :timer.sleep(100) + end + + assert_received({:notify_called, _}) + end + end end