Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make ECMP hashing for GTP-U traffic TEID-based #232

Merged
merged 13 commits into from
Apr 20, 2021
6 changes: 5 additions & 1 deletion .jenkins/pr_verify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ echo "Build all profiles using SDE ${SDE_P4C_DOCKER_IMG}..."
# Pull first to avoid pulling multiple times in parallel by the make jobs
docker pull "${SDE_P4C_DOCKER_IMG}"
# Jenkins uses 8 cores 15G VM
make -j8 all
# We commented out 'all' target, because we exceeded 45 min limit on Jenkins.
# TODO: revert once the PTF tests execution time is optimized
# make -j8 all
make -j8 fabric-int
make -j8 fabric-spgw-int

echo "Build and verify Java pipeconf"
make constants pipeconf MVN_FLAGS="-Pci-verify -Pcoverage"
Expand Down
44 changes: 41 additions & 3 deletions p4src/include/control/hasher.p4
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,54 @@
#include "../define.p4"
#include "../header.p4"

// Used for ECMP hashing.
struct gtp_flow_t {
bit<32> ipv4_src;
bit<32> ipv4_dst;
teid_t gtpu_teid;
}

control Hasher(
in parsed_headers_t hdr,
inout fabric_ingress_metadata_t fabric_md) {

Hash<flow_hash_t>(HashAlgorithm_t.CRC32) ipv4_hasher;
Hash<flow_hash_t>(HashAlgorithm_t.CRC32) ip_hasher;
Hash<flow_hash_t>(HashAlgorithm_t.CRC32) gtp_flow_hasher;
ccascone marked this conversation as resolved.
Show resolved Hide resolved
Hash<flow_hash_t>(HashAlgorithm_t.CRC32) non_ip_hasher;

apply {
if (fabric_md.acl_lkp.is_ipv4) {
fabric_md.bridged.base.flow_hash = ipv4_hasher.get(fabric_md.acl_lkp);
gtp_flow_t to_hash;
bool calc_gtp_hash = false;

// we always need to calculate hash from the inner IPv4 header for the INT reporter.
fabric_md.bridged.base.inner_hash = ip_hasher.get(fabric_md.acl_lkp);

// use inner hash by default
fabric_md.ecmp_hash = fabric_md.bridged.base.inner_hash;

// if an outer GTP header exists, use it to perform GTP-aware ECMP
if (hdr.gtpu.isValid()) {
ccascone marked this conversation as resolved.
Show resolved Hide resolved
to_hash.ipv4_src = hdr.ipv4.src_addr;
to_hash.ipv4_dst = hdr.ipv4.dst_addr;
to_hash.gtpu_teid = hdr.gtpu.teid;
calc_gtp_hash = true;
}

#ifdef WITH_SPGW
// enable GTP-aware ECMP for downlink packets.
if (fabric_md.bridged.spgw.needs_gtpu_encap) {
to_hash.ipv4_src = fabric_md.bridged.spgw.gtpu_tunnel_sip;
to_hash.ipv4_dst = fabric_md.bridged.spgw.gtpu_tunnel_dip;
to_hash.gtpu_teid = fabric_md.bridged.spgw.gtpu_teid;
calc_gtp_hash = true;
}
#endif // WITH_SPGW

if (calc_gtp_hash) {
fabric_md.ecmp_hash = gtp_flow_hasher.get(to_hash);
}

}
// FIXME: remove ipv6 support or test it
// https://github.com/stratum/fabric-tna/pull/227
Expand All @@ -28,7 +66,7 @@ control Hasher(
// }
else {
// Not an IP packet
fabric_md.bridged.base.flow_hash = non_ip_hasher.get({
fabric_md.bridged.base.inner_hash = non_ip_hasher.get({
hdr.ethernet.dst_addr,
hdr.ethernet.src_addr,
hdr.eth_type.value
Expand Down
10 changes: 5 additions & 5 deletions p4src/include/control/int.p4
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ control FlowReportFilter(
fabric_md.bridged.base.ig_port,
eg_intr_md.egress_port,
fabric_md.int_md.hop_latency,
fabric_md.bridged.base.flow_hash,
fabric_md.bridged.base.inner_hash,
fabric_md.int_md.timestamp
});
flag = filter_get_and_set1.execute(fabric_md.bridged.base.flow_hash[31:16]);
flag = flag | filter_get_and_set2.execute(fabric_md.bridged.base.flow_hash[15:0]);
flag = filter_get_and_set1.execute(fabric_md.bridged.base.inner_hash[31:16]);
flag = flag | filter_get_and_set2.execute(fabric_md.bridged.base.inner_hash[15:0]);
// Generate report only when ALL register actions detect a change.
if (flag == 1) {
eg_dprsr_md.mirror_type = (bit<3>)FabricMirrorType_t.INVALID;
Expand Down Expand Up @@ -187,7 +187,7 @@ control IntIngress (
fabric_md.int_mirror_md.ip_eth_type = fabric_md.bridged.base.ip_eth_type;
fabric_md.int_mirror_md.eg_port = (bit<16>)ig_tm_md.ucast_egress_port;
fabric_md.int_mirror_md.queue_id = (bit<8>)ig_tm_md.qid;
fabric_md.int_mirror_md.flow_hash = fabric_md.bridged.base.flow_hash;
fabric_md.int_mirror_md.flow_hash = fabric_md.bridged.base.inner_hash;
ig_dprsr_md.drop_ctl = 1;
#ifdef WITH_DEBUG
drop_report_counter.count();
Expand Down Expand Up @@ -406,7 +406,7 @@ control IntEgress (
fabric_md.int_mirror_md.ig_tstamp = fabric_md.bridged.base.ig_tstamp[31:0];
fabric_md.int_mirror_md.eg_tstamp = eg_prsr_md.global_tstamp[31:0];
fabric_md.int_mirror_md.ip_eth_type = fabric_md.bridged.base.ip_eth_type;
fabric_md.int_mirror_md.flow_hash = fabric_md.bridged.base.flow_hash;
fabric_md.int_mirror_md.flow_hash = fabric_md.bridged.base.inner_hash;
// fabric_md.int_mirror_md.vlan_stripped set by egress_vlan table
// fabric_md.int_mirror_md.strip_gtpu will be initialized by the parser
}
Expand Down
2 changes: 1 addition & 1 deletion p4src/include/control/next.p4
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ control Next (inout parsed_headers_t hdr,
table hashed {
key = {
fabric_md.next_id : exact @name("next_id");
fabric_md.bridged.base.flow_hash : selector;
fabric_md.ecmp_hash : selector;
}
actions = {
output_hashed;
Expand Down
3 changes: 2 additions & 1 deletion p4src/include/header.p4
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ struct int_metadata_t {
// See: https://community.intel.com/t5/Intel-Connectivity-Research/Compiler-stuck-when-compiling-P4-code/m-p/1258087
// @flexible
struct bridged_metadata_base_t {
flow_hash_t inner_hash;
mpls_label_t mpls_label;
@padding bit<11> _pad0;
PortId_t ig_port;
Expand All @@ -262,7 +263,6 @@ struct bridged_metadata_base_t {
bit<8> mpls_ttl;
bit<48> ig_tstamp;
bit<16> ip_eth_type;
flow_hash_t flow_hash;
#ifdef WITH_DOUBLE_VLAN_TERMINATION
@padding bit<7> _pad1;
bool push_double_vlan;
Expand Down Expand Up @@ -303,6 +303,7 @@ struct acl_lookup_t {
@pa_auto_init_metadata
struct fabric_ingress_metadata_t {
bridged_metadata_t bridged;
flow_hash_t ecmp_hash;
acl_lookup_t acl_lkp;
bit<32> routing_ipv4_dst; // Outermost
bool skip_forwarding;
Expand Down
248 changes: 248 additions & 0 deletions ptf/tests/ptf/fabric.ptf/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,254 @@ def runTest(self):
self.verify_no_other_packets()


class FabricGtpUnicastEcmpBasedOnTeid(FabricTest):
"""
This test case verifies if the GTP encapsulated traffic
is distributed over next hops by hashing on the TEID.
"""

@tvsetup
@autocleanup
def doRunTest(self, pkt_type):
# In this test we check that packets are forwarded to all ports when we
# change one of the values used for hash calculcation and we have an ECMP-like
# distribution.
# In this case, we change TEID for GTP-encapsulated packets
vlan_id = 10
self.set_ingress_port_vlan(self.port1, False, 0, vlan_id)
self.set_forwarding_type(
self.port1,
SWITCH_MAC,
ethertype=ETH_TYPE_IPV4,
fwd_type=FORWARDING_TYPE_UNICAST_IPV4,
)
self.add_forwarding_routing_v4_entry(S1U_SGW_IPV4, 24, 300)
grp_id = 66
mbrs = [
(self.port2, SWITCH_MAC, HOST2_MAC),
(self.port3, SWITCH_MAC, HOST3_MAC),
]
self.add_next_routing_group(300, grp_id, mbrs)
self.set_egress_vlan(self.port2, vlan_id, False)
self.set_egress_vlan(self.port3, vlan_id, False)

pkt = getattr(testutils, "simple_%s_packet" % pkt_type)(
eth_src=HOST1_MAC,
eth_dst=SWITCH_MAC,
ip_src=HOST1_IPV4,
ip_dst=HOST2_IPV4,
ip_ttl=64,
)

# teid_toport list is used to learn the teid that causes the packet
# to be forwarded for each port
teid_toport = [None, None]
for i in range(50):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ccascone do you remember why we need to run 50 times?

Copy link
Member

@ccascone ccascone Apr 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. I believe the goal was to generate enough packet header combinations to hash on all buckets. This is consistent with other ECMP-related tests, but I agree it can be optimized. If you agree, please add this to #238, and unless you have other concerns I suggest we merge this change as-is.

test_teid = i

pkt_from1 = pkt_add_gtp(
pkt,
out_ipv4_src=S1U_ENB_IPV4,
out_ipv4_dst=S1U_SGW_IPV4,
teid=test_teid,
)

exp_pkt_to2 = pkt_from1.copy()
exp_pkt_to2[Ether].src = SWITCH_MAC
exp_pkt_to2[Ether].dst = HOST2_MAC
exp_pkt_to2[IP].ttl = 63

exp_pkt_to3 = pkt_from1.copy()
exp_pkt_to3[Ether].src = SWITCH_MAC
exp_pkt_to3[Ether].dst = HOST3_MAC
exp_pkt_to3[IP].ttl = 63

self.send_packet(self.port1, pkt_from1)
out_port_indx = self.verify_any_packet_any_port(
[exp_pkt_to2, exp_pkt_to3], [self.port2, self.port3]
)
teid_toport[out_port_indx] = test_teid

pkt_toport2 = pkt_add_gtp(
pkt,
out_ipv4_src=S1U_ENB_IPV4,
out_ipv4_dst=S1U_SGW_IPV4,
teid=teid_toport[0],
)

pkt_toport3 = pkt_add_gtp(
pkt,
out_ipv4_src=S1U_ENB_IPV4,
out_ipv4_dst=S1U_SGW_IPV4,
teid=teid_toport[1],
)

exp_pkt_to2 = pkt_toport2.copy()
exp_pkt_to2[Ether].src = SWITCH_MAC
exp_pkt_to2[Ether].dst = HOST2_MAC
exp_pkt_to2[IP].ttl = 63

exp_pkt_to3 = pkt_toport3.copy()
exp_pkt_to3[Ether].src = SWITCH_MAC
exp_pkt_to3[Ether].dst = HOST3_MAC
exp_pkt_to3[IP].ttl = 63

self.send_packet(self.port1, pkt_toport2)
self.send_packet(self.port1, pkt_toport3)
# In this assertion we are verifying:
# 1) all ports of the same group are used almost once
# 2) consistency of the forwarding decision, i.e. packets with the
# same hashed fields are always forwarded out of the same port
self.verify_each_packet_on_each_port(
[exp_pkt_to2, exp_pkt_to3], [self.port2, self.port3]
)

def runTest(self):
for pkt_type in PKT_TYPES_UNDER_TEST:
self.doRunTest(pkt_type)


@group("spgw")
class FabricSpgwDownlinkEcmpTest(SpgwSimpleTest):
"""
This test case verifies if traffic from PDN to UEs (downlink) served by the same
base station is distributed over next hops using GTP-aware load balancing.
"""

@tvsetup
@autocleanup
def doRunTest(self, pkt_type):
vlan_id = 10
self.set_ingress_port_vlan(self.port1, False, 0, vlan_id)
self.set_forwarding_type(
self.port1,
SWITCH_MAC,
ethertype=ETH_TYPE_IPV4,
fwd_type=FORWARDING_TYPE_UNICAST_IPV4,
)
self.add_forwarding_routing_v4_entry(S1U_ENB_IPV4, 24, 300)
grp_id = 66

# used for this test only
S1U_ENB_NEXTHOP1_MAC = "00:00:00:00:00:ee"
S1U_ENB_NEXTHOP2_MAC = "00:00:00:00:00:ff"
mbrs = [
(self.port2, SWITCH_MAC, S1U_ENB_NEXTHOP1_MAC),
(self.port3, SWITCH_MAC, S1U_ENB_NEXTHOP2_MAC),
]
self.add_next_routing_group(300, grp_id, mbrs)
self.set_egress_vlan(self.port2, vlan_id, False)
self.set_egress_vlan(self.port3, vlan_id, False)

# ue_ipv4_toport list is used to learn the ue_ipv4 address for a given packet.
ue_ipv4_toport = [None, None]
# teid_toport list is used to learn the teid
# assigned by SPGW for a downlink packet.
teid_toport = [None, None]
for i in range(50):
ue_ipv4 = "10.0.0." + str(i)
far_id = i
test_teid = i*3

self.setup_downlink(
s1u_sgw_addr=S1U_SGW_IPV4,
s1u_enb_addr=S1U_ENB_IPV4,
teid=test_teid,
ue_addr=ue_ipv4,
ctr_id=DOWNLINK_PDR_CTR_IDX,
far_id=far_id,
)

pkt_from1 = getattr(testutils, "simple_%s_packet" % pkt_type)(
eth_src=HOST1_MAC,
eth_dst=SWITCH_MAC,
ip_src=UE2_IPV4,
ip_dst=ue_ipv4,
ip_ttl=64,
)

exp_pkt_to2 = pkt_from1.copy()
exp_pkt_to2[IP].ttl = 63
exp_pkt_to2 = pkt_add_gtp(
exp_pkt_to2,
out_ipv4_src=S1U_SGW_IPV4,
out_ipv4_dst=S1U_ENB_IPV4,
teid=test_teid,
)
exp_pkt_to2[Ether].src = SWITCH_MAC
exp_pkt_to2[Ether].dst = S1U_ENB_NEXTHOP1_MAC

exp_pkt_to3 = pkt_from1.copy()
exp_pkt_to3[IP].ttl = 63
exp_pkt_to3 = pkt_add_gtp(
exp_pkt_to3,
out_ipv4_src=S1U_SGW_IPV4,
out_ipv4_dst=S1U_ENB_IPV4,
teid=test_teid,
)
exp_pkt_to3[Ether].src = SWITCH_MAC
exp_pkt_to3[Ether].dst = S1U_ENB_NEXTHOP2_MAC

self.send_packet(self.port1, pkt_from1)
out_port_indx = self.verify_any_packet_any_port(
[exp_pkt_to2, exp_pkt_to3], [self.port2, self.port3]
)
ue_ipv4_toport[out_port_indx] = ue_ipv4
teid_toport[out_port_indx] = test_teid

pkt_toport2 = getattr(testutils, "simple_%s_packet" % pkt_type)(
eth_src=HOST1_MAC,
eth_dst=SWITCH_MAC,
ip_src=UE2_IPV4,
ip_dst=ue_ipv4_toport[0],
ip_ttl=64,
)

pkt_toport3 = getattr(testutils, "simple_%s_packet" % pkt_type)(
eth_src=HOST1_MAC,
eth_dst=SWITCH_MAC,
ip_src=UE2_IPV4,
ip_dst=ue_ipv4_toport[1],
ip_ttl=64,
)

exp_pkt_to2 = pkt_toport2.copy()
exp_pkt_to2[IP].ttl = 63
exp_pkt_to2 = pkt_add_gtp(
exp_pkt_to2,
out_ipv4_src=S1U_SGW_IPV4,
out_ipv4_dst=S1U_ENB_IPV4,
teid=teid_toport[0],
)
exp_pkt_to2[Ether].src = SWITCH_MAC
exp_pkt_to2[Ether].dst = S1U_ENB_NEXTHOP1_MAC

exp_pkt_to3 = pkt_toport3.copy()
exp_pkt_to3[IP].ttl = 63
exp_pkt_to3 = pkt_add_gtp(
exp_pkt_to3,
out_ipv4_src=S1U_SGW_IPV4,
out_ipv4_dst=S1U_ENB_IPV4,
teid=teid_toport[1],
)
exp_pkt_to3[Ether].src = SWITCH_MAC
exp_pkt_to3[Ether].dst = S1U_ENB_NEXTHOP2_MAC

self.send_packet(self.port1, pkt_toport2)
self.send_packet(self.port1, pkt_toport3)
# In this assertion we are verifying:
# 1) all ports of the same group are used almost once
# 2) consistency of the forwarding decision, i.e. packets with the
# same 5-tuple fields are always forwarded out of the same port
self.verify_each_packet_on_each_port(
[exp_pkt_to2, exp_pkt_to3], [self.port2, self.port3]
)

def runTest(self):
for pkt_type in PKT_TYPES_UNDER_TEST:
self.doRunTest(pkt_type)


@group("spgw")
class FabricSpgwDownlinkTest(SpgwSimpleTest):
@tvsetup
Expand Down