Skip to content

Commit

Permalink
dashboard: add panels for expirationd module statistics
Browse files Browse the repository at this point in the history
Statistics for expirationd module was introduced in
expirationd 1.2.0 [1]. Statistics integrated with metrics and enabled
by default.

To disable statistics integrated with metrics, call:

expirationd.cfg({
    metrics = false
})

This patch adds panels for counters:
- expirationd_checked_count
- expirationd_expired_count
- expirationd_restarts
- expirationd_working_time

The meaning of counters is same as for expirationd.stats() [2].

expirationd panels are stored in "expirationd module statistics"
section. Mostly it copy-paste from TDG's dashboard [3][4].

1. https://github.com/tarantool/expirationd/releases/tag/1.2.0
2. https://tarantool.github.io/expirationd/#stats
3. ca6e0e1
4. https://github.com/tarantool/grafana-dashboard/blob/0c623e0fae8e526976ed70da5e5f5a6640856275/dashboard/panels/tdg/expirationd.libsonnet

Closes #149
  • Loading branch information
oleg-jukovec committed Jun 29, 2022
1 parent 0c623e0 commit dee2252
Show file tree
Hide file tree
Showing 11 changed files with 2,159 additions and 12 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Added
- expirationd dashboard

## [1.2.1] - 2022-06-24
Grafana revisions: [InfluxDB revision 12](https://grafana.com/api/dashboards/12567/revisions/12/download), [Prometheus revision 12](https://grafana.com/api/dashboards/13054/revisions/12/download), [InfluxDB TDG revision 2](https://grafana.com/api/dashboards/16405/revisions/2/download), [Prometheus TDG revision 2](https://grafana.com/api/dashboards/16406/revisions/2/download).

Expand Down
6 changes: 6 additions & 0 deletions dashboard/influxdb_dashboard.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,10 @@ dashboard.new(
policy=variable.influxdb.policy,
measurement=variable.influxdb.measurement,
)
).addPanels(
section.expirationd(
datasource=variable.datasource.influxdb,
policy=variable.influxdb.policy,
measurement=variable.influxdb.measurement,
)
)
158 changes: 158 additions & 0 deletions dashboard/panels/expirationd.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
local common_utils = import 'common.libsonnet';
local grafana = import 'grafonnet/grafana.libsonnet';

local influxdb = grafana.influxdb;
local prometheus = grafana.prometheus;

{
row:: common_utils.row('expirationd module statistics'),

local target(
datasource,
metric_name,
job=null,
policy=null,
measurement=null,
) =
if datasource == '${DS_PROMETHEUS}' then
prometheus.target(
expr=std.format('%s{job=~"%s"}', [metric_name, job]),
legendFormat='{{name}} — {{alias}}',
)
else if datasource == '${DS_INFLUXDB}' then
influxdb.target(
policy=policy,
measurement=measurement,
group_tags=[
'label_pairs_alias',
'label_pairs_name',
],
alias='$tag_label_pairs_name — $tag_label_pairs_alias',
).where('metric_name', '=', metric_name)
.selectField('value').addConverter('mean'),

local rps_target(
datasource,
metric_name,
job=null,
rate_time_range=null,
policy=null,
measurement=null,
) =
if datasource == '${DS_PROMETHEUS}' then
prometheus.target(
expr=std.format('rate(%s{job=~"%s"}[%s])',
[metric_name, job, rate_time_range]),
legendFormat='{{name}} — {{alias}}',
)
else if datasource == '${DS_INFLUXDB}' then
influxdb.target(
policy=policy,
measurement=measurement,
group_tags=[
'label_pairs_alias',
'label_pairs_name',
],
alias='$tag_label_pairs_name — $tag_label_pairs_alias',
).where('metric_name', '=', metric_name)
.selectField('value').addConverter('mean').addConverter('non_negative_derivative', ['1s']),

tuples_checked(
title='Tuples checked',
description=common_utils.rate_warning(|||
A number of task tuples checked for expiration (expired + skipped).
Graph shows mean tuples per second.
|||),
datasource=null,
policy=null,
measurement=null,
job=null,
rate_time_range=null,
):: common_utils.default_graph(
title=title,
description=description,
datasource=datasource,
labelY1='tuples per second',
panel_width=12,
).addTarget(rps_target(
datasource,
'expirationd_checked_count',
job,
rate_time_range,
policy,
measurement,
)),

tuples_expired(
title='Tuples expired',
description=common_utils.rate_warning(|||
A number of task expired tuples.
Graph shows mean tuples per second.
|||, datasource),
datasource=null,
policy=null,
measurement=null,
job=null,
rate_time_range=null,
):: common_utils.default_graph(
title=title,
description=description,
datasource=datasource,
labelY1='tuples per second',
panel_width=12,
).addTarget(rps_target(
datasource,
'expirationd_expired_count',
job,
rate_time_range,
policy,
measurement,
)),

restarts(
title='Restart count',
description=|||
A number of task restarts since start.
From the start is equal to 1.
|||,
datasource=null,
policy=null,
measurement=null,
job=null,
):: common_utils.default_graph(
title=title,
description=description,
datasource=datasource,
decimals=0,
panel_width=12,
).addTarget(target(
datasource,
'expirationd_restarts',
job,
policy,
measurement,
)),

operation_time(
title='Operation time',
description=|||
A task's operation time.
|||,
datasource=null,
policy=null,
measurement=null,
job=null,
):: common_utils.default_graph(
title=title,
description=description,
datasource=datasource,
format='s',
panel_width=12,
).addTarget(target(
datasource,
'expirationd_working_time',
job,
policy,
measurement,
)),
}
6 changes: 6 additions & 0 deletions dashboard/prometheus_dashboard.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,10 @@ dashboard.new(
job=variable.prometheus.job,
rate_time_range=variable.prometheus.rate_time_range,
)
).addPanels(
section.expirationd(
datasource=variable.datasource.prometheus,
job=variable.prometheus.job,
rate_time_range=variable.prometheus.rate_time_range,
)
)
35 changes: 35 additions & 0 deletions dashboard/section.libsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
local cluster = import 'panels/cluster.libsonnet';
local cpu = import 'panels/cpu.libsonnet';
local crud = import 'panels/crud.libsonnet';
local expirationd = import 'panels/expirationd.libsonnet';
local http = import 'panels/http.libsonnet';
local luajit = import 'panels/luajit.libsonnet';
local net = import 'panels/net.libsonnet';
Expand Down Expand Up @@ -1227,6 +1228,40 @@ local tdg_tuples = import 'panels/tdg/tuples.libsonnet';
),
],

expirationd(datasource, policy=null, measurement=null, job=null, rate_time_range=null):: [
expirationd.row,

expirationd.tuples_checked(
datasource=datasource,
policy=policy,
measurement=measurement,
job=job,
rate_time_range=rate_time_range,
),

expirationd.tuples_expired(
datasource=datasource,
policy=policy,
measurement=measurement,
job=job,
rate_time_range=rate_time_range,
),

expirationd.restarts(
datasource=datasource,
policy=policy,
measurement=measurement,
job=job,
),

expirationd.operation_time(
datasource=datasource,
policy=policy,
measurement=measurement,
job=job,
),
],

tdg_kafka_common(datasource, policy=null, measurement=null, job=null, rate_time_range=null):: [
tdg_kafka_common.row,

Expand Down
18 changes: 18 additions & 0 deletions example_cluster/project/generate_load.lua
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,24 @@ local load_generators = {
generate_crud_load,
}

for name, instance in pairs(instances) do
if name:match('router') ~= nil then
local spaces = {instance.net_box.space.MY_SPACE, instance.net_box.space.MY_VINYL_SPACE}
for _, space in ipairs(spaces) do
local task_name = name .. "_" .. space.name
local eval_str = string.format([[
local expirationd = require('expirationd')
local half_true = function() return math.random(0, 1) == 0 and true or false end
local always_true = function() return true end
expirationd.start("%s", %d, half_true, {
process_expired_tuple = always_true,
force = true })
]], task_name, space.id)
instance.net_box:eval(eval_str)
end
end
end

while true do
for name, instance in pairs(instances) do
for _, load_generator in ipairs(load_generators) do
Expand Down
1 change: 1 addition & 0 deletions example_cluster/project/project-scm-1.rockspec
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies = {
'metrics == 0.13.0-1',
'cartridge-cli-extensions == 1.1.1-1',
'crud == 0.11.1',
'expirationd == 1.2.0',
}
build = {
type = 'none';
Expand Down
Loading

0 comments on commit dee2252

Please sign in to comment.