Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

glb-director-xdp: Add statsd metrics. #113

Merged
merged 5 commits into from
Sep 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions script/cibuild
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ script/cibuild-prepare
script/test-glb-redirect
script/test-glb-healthcheck
script/test-glb-director
script/test-glb-director-xdp
67 changes: 53 additions & 14 deletions src/glb-director-xdp/bpf/glb_encap.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string.h>

#include "bpf_helpers.h"
#include "glb_stats.h"

#include <glb-hashing/glb_gue.h>
#include <glb-hashing/pdnet.h>
Expand Down Expand Up @@ -95,6 +96,14 @@ struct bpf_map_def SEC("maps") glb_table_secrets = {
.max_entries = 4096,
};

struct bpf_map_def SEC("maps") glb_global_packet_counters = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(uint32_t),
.value_size = sizeof(struct glb_global_stats),
/* we don't actually need an array, but PERCPU_* only has multi-element types */
.max_entries = 1,
};

static __always_inline uint16_t compute_ipv4_checksum(void *iph) {
uint16_t *iph16 = (uint16_t *)iph;

Expand All @@ -117,7 +126,7 @@ static __always_inline uint16_t compute_ipv4_checksum(void *iph) {
* Expects that `eth_hdr` points to ROUTE_CONTEXT_ENCAP_SIZE(ctx) bytes of free space
* before the inner/original IP packet header begins.
*/
static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth_hdr, void *data_end, glb_route_context *route_context)
static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth_hdr, void *data_end, glb_route_context *route_context, struct glb_global_stats *g_stats)
{
if (route_context == NULL)
return XDP_DROP;
Expand All @@ -136,8 +145,10 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

uint32_t config_bit = 0;
struct pdnet_mac_addr *gw_mac = (struct pdnet_mac_addr *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (gw_mac == NULL)
if (gw_mac == NULL) {
g_stats->ErrorMissingGatewayMAC++;
return XDP_DROP;
}
eth_hdr->src_addr = route_context->orig_dst_mac;
eth_hdr->dst_addr = *gw_mac;
eth_hdr->ether_type = htons(PDNET_ETHER_TYPE_IPV4);
Expand All @@ -149,8 +160,10 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

config_bit = 1;
uint32_t *src_ip = (uint32_t *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (src_ip == NULL)
if (src_ip == NULL) {
g_stats->ErrorMissingSourceAddress++;
return XDP_DROP;
}

glb_bpf_printk(" src_ip: 0x%x\n", *src_ip);

Expand Down Expand Up @@ -224,6 +237,7 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

glb_bpf_printk(" encaped!\n");

g_stats->Encapsulated++;
return XDP_TX;
}

Expand All @@ -233,6 +247,11 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

// cat /sys/kernel/debug/tracing/trace_pipe
glb_bpf_printk("Greetings\n");

uint32_t stat = 0;
struct glb_global_stats *g_stats = bpf_map_lookup_elem(&glb_global_packet_counters, &stat);
if (g_stats == NULL) return XDP_PASS; /* this should always succeed, but we must bail if not for eBPF verifier */
g_stats->Processed++;

int rc = XDP_PASS;

Expand All @@ -242,8 +261,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {
route_context.packet_end = data_end;
rc = glb_extract_packet_fields(&route_context);
glb_bpf_printk(" parse rc = %d\n", rc);
if (rc != 0)
if (rc != 0) {
g_stats->UnknownFormat++;
return XDP_PASS;
}

glb_bpf_printk(" dst_addr: 0x%x\n", route_context.dst_addr.ipv4);
glb_bpf_printk(" src_addr: 0x%x\n", route_context.src_addr.ipv4);
Expand All @@ -264,26 +285,36 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {
glb_bpf_printk(" bind proto: 0x%x\n", bind.proto);

uint32_t *table_id_ptr = (uint32_t *)bpf_map_lookup_elem(&glb_binds, &bind);
if (table_id_ptr == NULL)
if (table_id_ptr == NULL) {
g_stats->NoMatchingBind++;
return XDP_PASS;
}

g_stats->Matched++;

uint32_t table_id = *table_id_ptr;
glb_bpf_printk(" bind maps to table id: %d\n", table_id);

struct bpf_map_def *table = (struct bpf_map_def *)bpf_map_lookup_elem(&glb_tables, &table_id);
glb_bpf_printk(" bind maps to table fd: 0x%p\n", table);
if (table == NULL)
if (table == NULL) {
g_stats->ErrorTable++;
return XDP_PASS; // we don't know
}

uint8_t *secret = (uint8_t *)bpf_map_lookup_elem(&glb_table_secrets, &table_id);
glb_bpf_printk(" table secret: 0x%p\n", secret);
if (secret == NULL)
if (secret == NULL) {
g_stats->ErrorSecret++;
return XDP_PASS; // we don't have a valid secret, bail
}

uint32_t config_bit = 3;
glb_director_hash_fields *hf_cfg_ptr = (glb_director_hash_fields *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (hf_cfg_ptr == NULL)
if (hf_cfg_ptr == NULL) {
g_stats->ErrorHashConfig++;
return XDP_PASS;
}

// glb_bpf_printk(" dst_addr: 0x%x\n", route_context.dst_addr.ipv4);
// glb_bpf_printk(" src_addr: 0x%x\n", route_context.src_addr.ipv4);
Expand All @@ -298,8 +329,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

glb_bpf_printk(" which is tableRow %d: 0x%p\n", tableRowIndex, tableRow);

if (tableRow == NULL)
if (tableRow == NULL) {
g_stats->ErrorMissingRow++;
return XDP_PASS; // we don't know
}

glb_bpf_printk(" table primary: %d\n", tableRow[0]);
glb_bpf_printk(" table secondary: %d\n", tableRow[1]);
Expand All @@ -313,8 +346,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

config_bit = 4;
glb_director_hash_fields *hf_cfg_alt_ptr = (glb_director_hash_fields *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (hf_cfg_alt_ptr == NULL)
if (hf_cfg_alt_ptr == NULL) {
g_stats->ErrorHashConfig++;
return XDP_PASS;
}

if (hf_cfg_alt_ptr->dst_addr || hf_cfg_alt_ptr->dst_port || hf_cfg_alt_ptr->src_addr || hf_cfg_alt_ptr->src_port) {
uint64_t hash = glb_compute_hash(&route_context, secret, hf_cfg_alt_ptr);
Expand All @@ -325,8 +360,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

glb_bpf_printk(" which is tableRow (alt) %d: 0x%p\n", tableRowIndex, tableRow);

if (tableRow == NULL)
if (tableRow == NULL) {
g_stats->ErrorMissingRow++;
return XDP_PASS; // we don't know
}

glb_bpf_printk(" table (alt) primary: %d\n", tableRow[0]);
glb_bpf_printk(" table (alt) secondary: %d\n", tableRow[1]);
Expand All @@ -340,16 +377,18 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

// encapsulate!
// we want to essentially remove (add to our start) an eth and add (subtract from our start) all the bits we need.
if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct pdnet_ethernet_hdr) - (int)ROUTE_CONTEXT_ENCAP_SIZE(&route_context)))
if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct pdnet_ethernet_hdr) - (int)ROUTE_CONTEXT_ENCAP_SIZE(&route_context))) {
g_stats->ErrorCreatingSpace++;
return XDP_DROP;
}

/* these must be retrieved again after the adjust_head */
data = (void*)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
if (data + ROUTE_CONTEXT_ENCAP_SIZE(&route_context) > data_end)
if (data + ROUTE_CONTEXT_ENCAP_SIZE(&route_context) > data_end) /* this is just to let the compiler know we checked for safety */
return XDP_DROP;

return glb_encapsulate_packet(data, data_end, &route_context);
return glb_encapsulate_packet(data, data_end, &route_context, g_stats);
}

SEC("xdp/xdp_glb_director")
Expand Down
45 changes: 45 additions & 0 deletions src/glb-director-xdp/bpf/glb_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef _GLB_STATS_H_
#define _GLB_STATS_H_

/* NOTE: these fields follow Go naming conventions, because they are an interface with
* cgo which needs the fields to be capitalised to be exported and binary-unmarshal-able.
*/

typedef struct glb_global_stats {
/* The number of packets entering the XDP pipeline */
uint64_t Processed;
/* The number of packets that couldn't be parsed, meaning it wasn't the protocols we know how to parse.
* This isn't always an error, since we listen for any packets on the host.
*/
uint64_t UnknownFormat;
/* The number of packets that we could successfully parse, but then didn't match a bind.
* This is also expected in production.
*/
uint64_t NoMatchingBind;
/* The number of processed packets that matched a bind and should be included in the table stats */
uint64_t Matched;

/* The number of packets that made it all the way through to encapsulation and transmit. */
uint64_t Encapsulated;

/* The below errors are unexpected, and we generally expect none of them to occur.
* They might be useful to debug why the system isn't behaving as expected
*/

/* Internal Error: Reference of a table that we then couldn't look up */
uint64_t ErrorTable;
/* Internal Error: Reference of a table with no hashing secret */
uint64_t ErrorSecret;
/* Internal Error: The hash field configuration couldn't be retrieved */
uint64_t ErrorHashConfig;
/* Internal Error: We looked up a table, but the table didn't have a row where we expected */
uint64_t ErrorMissingRow;
/* Internal Error: We tried to create space to encapsulate the packet (at the front), but this failed */
uint64_t ErrorCreatingSpace;
/* Internal Error: The outbound gateway MAC address could not be read from configuration */
uint64_t ErrorMissingGatewayMAC;
/* Internal Error: The local machine's source IP address could not be read from configuration */
uint64_t ErrorMissingSourceAddress;
} glb_global_stats;

#endif
1 change: 1 addition & 0 deletions src/glb-director-xdp/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/github/glb-director/src/glb-director-xdp
go 1.14

require (
github.com/DataDog/datadog-go v4.0.0+incompatible // indirect
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
Expand Down
2 changes: 2 additions & 0 deletions src/glb-director-xdp/go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/DataDog/datadog-go v4.0.0+incompatible h1:Dq8Dr+4sV1gBO1sHDWdW+4G+PdsA+YSJOK925MxrrCY=
github.com/DataDog/datadog-go v4.0.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690 h1:GpWvisoWNFzz/RB0fMgyYUcFmF4G5MlYD9uUHoofayo=
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
Expand Down
85 changes: 84 additions & 1 deletion src/glb-director-xdp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package main

import (
"github.com/DataDog/datadog-go/statsd"
"github.com/cilium/ebpf"
"github.com/coreos/go-systemd/daemon"
"github.com/docopt/docopt-go"
Expand All @@ -58,6 +59,7 @@ import (
#include <stdint.h>
#include "../glb-hashing/pdnet.h"

#include "bpf/glb_stats.h"

// meh
#include <sys/resource.h>
Expand Down Expand Up @@ -155,6 +157,8 @@ type GLBDirectorConfig struct {
DstPort bool `json:"dst_port"`
} `json:"alt_hash_fields"`

StatsdPort uint16 `json:"statsd_port"`

// unused by XDP version: num_worker_queues, flow_paths, lcores
}

Expand All @@ -180,6 +184,8 @@ type Application struct {
TableSpec *ebpf.MapSpec

ForwardingTablePath string

StatsClient *statsd.Client
}

func boolToC(a bool) C.uchar {
Expand Down Expand Up @@ -342,6 +348,82 @@ func (app *Application) ReloadForwardingTable() {
}
}

func (app *Application) InitStatsCollection() {
if app.Config.StatsdPort != 0 {
client, err := statsd.New(fmt.Sprintf("127.0.0.1:%d", app.Config.StatsdPort))
if err != nil {
log.Fatal(err)
}

client.Namespace = "glb.director."
client.Tags = append(client.Tags, "glb_engine:xdp")
app.StatsClient = client

globalCounters := app.Collection.Maps["glb_global_packet_counters"]
if globalCounters == nil {
log.Fatal("Could not load map glb_global_packet_counters")
}

go app.runStatsCollection(globalCounters)
}
}

func diffAndSumGlobalStats(last []C.glb_global_stats, curr []C.glb_global_stats) C.glb_global_stats {
sum := C.glb_global_stats{}

for cpuIndex := 0; cpuIndex < len(last); cpuIndex++ {
sum.Processed += curr[cpuIndex].Processed - last[cpuIndex].Processed
sum.Encapsulated += curr[cpuIndex].Encapsulated - last[cpuIndex].Encapsulated

sum.UnknownFormat += curr[cpuIndex].UnknownFormat - last[cpuIndex].UnknownFormat
sum.NoMatchingBind += curr[cpuIndex].NoMatchingBind - last[cpuIndex].NoMatchingBind
sum.Matched += curr[cpuIndex].Matched - last[cpuIndex].Matched

sum.ErrorTable += curr[cpuIndex].ErrorTable - last[cpuIndex].ErrorTable
sum.ErrorSecret += curr[cpuIndex].ErrorSecret - last[cpuIndex].ErrorSecret
sum.ErrorHashConfig += curr[cpuIndex].ErrorHashConfig - last[cpuIndex].ErrorHashConfig
sum.ErrorMissingRow += curr[cpuIndex].ErrorMissingRow - last[cpuIndex].ErrorMissingRow
sum.ErrorCreatingSpace += curr[cpuIndex].ErrorCreatingSpace - last[cpuIndex].ErrorCreatingSpace
sum.ErrorMissingGatewayMAC += curr[cpuIndex].ErrorMissingGatewayMAC - last[cpuIndex].ErrorMissingGatewayMAC
sum.ErrorMissingSourceAddress += curr[cpuIndex].ErrorMissingSourceAddress - last[cpuIndex].ErrorMissingSourceAddress
}

return sum
}

func (app *Application) runStatsCollection(globalCounters *ebpf.Map) {
var lastGlobalValues []C.struct_glb_global_stats
// grab the data at the start, this will ensure that at the first tick we can immediately emit data.
globalCounters.Lookup(uint32(0), &lastGlobalValues)

for range time.Tick(10 * time.Second) {
var globalValues []C.struct_glb_global_stats

err := globalCounters.Lookup(uint32(0), &globalValues)
if err == nil {
if len(lastGlobalValues) > 0 {
sum := diffAndSumGlobalStats(lastGlobalValues, globalValues)
app.StatsClient.Count("packets.processed", int64(sum.Processed), nil, 1)
app.StatsClient.Count("packets.encapsulated", int64(sum.Encapsulated), nil, 1)

app.StatsClient.Count("packets.results", int64(sum.UnknownFormat), []string{"result:UnknownFormat"}, 1)
app.StatsClient.Count("packets.results", int64(sum.NoMatchingBind), []string{"result:NoMatchingBind"}, 1)
app.StatsClient.Count("packets.results", int64(sum.Matched), []string{"result:Matched"}, 1)

app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorTable), []string{"error:ErrorTable"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorSecret), []string{"error:ErrorSecret"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorHashConfig), []string{"error:ErrorHashConfig"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingRow), []string{"error:ErrorMissingRow"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorCreatingSpace), []string{"error:ErrorCreatingSpace"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingGatewayMAC), []string{"error:ErrorMissingGatewayMAC"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingSourceAddress), []string{"error:ErrorMissingSourceAddress"}, 1)
}

lastGlobalValues = globalValues
}
}
}

func gracefullReloadByExec() {
fmt.Printf("Reloading by exec-ing a new version of glb-director-xdp\n")

Expand Down Expand Up @@ -372,7 +454,7 @@ func gracefullReloadByExec() {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Env = append(os.Environ(),
"NOTIFY_SOCKET=" + readySock,
"NOTIFY_SOCKET="+readySock,
)

err = cmd.Start()
Expand Down Expand Up @@ -506,6 +588,7 @@ func main() {

// load up our entire config/forwarding table before we attach
// this makes the attach itself the atomic cut-over between reloads.
app.InitStatsCollection()
app.SyncConfigMap()
app.ReloadForwardingTable()

Expand Down
2 changes: 1 addition & 1 deletion src/glb-director/packaging/version.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
GLB_DIRECTOR_VERSION="1.0.6"
GLB_DIRECTOR_VERSION="1.0.7"
Loading