Skip to content

[feature] ASIC-focused multicast replication and dendrite API #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
df62508
[feature] ASIC-focused 1st draft of multicast PRE
zeeshanlakhani Mar 4, 2025
4a945a8
[fix] change stage max
zeeshanlakhani Apr 15, 2025
bb876fa
update chaos tables, revert any p4 formatting for review
zeeshanlakhani Apr 16, 2025
2dcb98e
[minor] comments/align
zeeshanlakhani Apr 16, 2025
3ef78f0
Merge remote-tracking branch 'origin/multicast' into zl/p4-mcast-3
zeeshanlakhani Apr 21, 2025
37498e4
[review] address first pass of review
zeeshanlakhani Apr 21, 2025
d4953df
[review+] meta -> metadata, fix test issues
zeeshanlakhani Apr 21, 2025
dbe684f
[stages] test back to 14
zeeshanlakhani Apr 21, 2025
5537794
..
zeeshanlakhani Apr 22, 2025
1e29a70
[review] validation and underlying api transactions
zeeshanlakhani Apr 23, 2025
8a9fd45
[major changes] Rework groups+dataplane to handle external/underlay/b…
zeeshanlakhani Apr 23, 2025
105a296
Merge remote-tracking branch 'origin/multicast' into zl/p4-mcast
zeeshanlakhani May 22, 2025
1c0e660
[minor] chaos and remove pub
zeeshanlakhani May 22, 2025
57804d1
[minor] openapi update
zeeshanlakhani May 22, 2025
e55ddbc
[update] link-local hop limit handling + counter categories
zeeshanlakhani Jun 3, 2025
09fcec0
Change API into internal/replication vs external
zeeshanlakhani Jun 19, 2025
e3dda26
Merge remote-tracking branch 'origin/multicast' into zl/p4-mcast
zeeshanlakhani Jun 21, 2025
da0c3b3
..
zeeshanlakhani Jun 22, 2025
7158e91
..
zeeshanlakhani Jun 23, 2025
5a26a07
[review] updates on lock handling, ipv6 use, more
zeeshanlakhani Jun 29, 2025
5c096e9
..
zeeshanlakhani Jun 30, 2025
8167280
..
zeeshanlakhani Jun 30, 2025
91cd396
..
zeeshanlakhani Jul 1, 2025
4d6d726
..
zeeshanlakhani Jul 1, 2025
21d9274
[review] updates: locking cleanup and scoped free id(s)
zeeshanlakhani Jul 3, 2025
6f4082e
[review] keep mcast group lock for entire action
zeeshanlakhani Jul 4, 2025
9660726
[review] last bits++
zeeshanlakhani Jul 10, 2025
a3a8abb
minor: squiggly
zeeshanlakhani Jul 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,419 changes: 1,605 additions & 814 deletions dpd-client/tests/integration_tests/mcast.rs

Large diffs are not rendered by default.

93 changes: 25 additions & 68 deletions dpd-client/tests/integration_tests/table_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@ const IPV4_NAT_TABLE_SIZE: usize = 1024; // nat routing table
const IPV6_NAT_TABLE_SIZE: usize = 1024; // nat routing table
const IPV4_ARP_SIZE: usize = 512; // arp cache
const IPV6_NEIGHBOR_SIZE: usize = 512; // ipv6 neighbor cache
/// Multicast routing tables add two entries for each entry in the
/// replication table, one for each direction (ingress and egress).
///
/// We alternate between IPv4 and IPv6 multicast addresses, so it's
/// 512 entries for each type of address.
const MULTICAST_TABLE_SIZE: usize = 2048;
/// The size of the multicast table related to replication on
/// admin-scoped (internal) multicast groups.
const MULTICAST_TABLE_SIZE: usize = 1024;
const MCAST_TAG: &str = "mcast_table_test"; // multicast group tag

// The result of a table insert or delete API operation.
Expand All @@ -76,30 +73,16 @@ fn gen_ipv6_cidr(idx: usize) -> Ipv6Net {
Ipv6Net::new(gen_ipv6_addr(idx), 128).unwrap()
}

/// Generates valid IPv4 multicast addresses that avoid special-purpose ranges
fn gen_ipv4_multicast_addr(idx: usize) -> Ipv4Addr {
// Start with 224.1.0.0 to avoid the 224.0.0.0/24 range
// (which contains link-local multicast)
// 224.0.0.0/24 is reserved for local network control use
let base: u32 = 0xE0010000u32; // hex for 224.1.0.0

// Avoid special-purpose ranges:
// - 232.0.0.0/8 (Source-Specific Multicast)
// - 233.0.0.0/8 (GLOP addressing)
// - 239.0.0.0/8 (Administratively Scoped)
//
// Keep within 224.1.0.0 - 231.255.255.255
let addr: u32 = base + (idx as u32 % 0x00FFFFFF);

// Convert to Ipv4Addr
addr.into()
}

/// Generates valid IPv6 multicast addresses that avoid reserved ranges
// Generates valid IPv6 multicast addresses that are admin-scoped.
fn gen_ipv6_multicast_addr(idx: usize) -> Ipv6Addr {
// Use ff0e::/16 (global scope) to avoid link-local and other reserved scopes
// FF0E is global scope multicast (avoid ff00, ff01, ff02 which are reserved)
Ipv6Addr::new(0xFF0E, 0, 0, 0, 0, 0, 0, (1000 + idx) as u16)
// Use admin-scoped multicast addresses (ff04::/16, ff05::/16, ff08::/16)
// This ensures they will be created as internal groups
let scope = match idx % 3 {
0 => 0xFF04, // admin-scoped
1 => 0xFF05, // admin-scoped
_ => 0xFF08, // admin-scoped
};
Ipv6Addr::new(scope, 0, 0, 0, 0, 0, 0, (1000 + idx) as u16)
}

// For each table we want to test, we define functions to insert, delete, and
Expand Down Expand Up @@ -474,8 +457,10 @@ async fn test_routev6_full() -> TestResult {
test_table_capacity::<RouteV6, (), ()>(IPV6_LPM_SIZE).await
}

struct MulticastReplicationTableTest {}

impl TableTest<types::MulticastGroupResponse, ()>
for types::MulticastGroupCreateEntry
for MulticastReplicationTableTest
{
async fn insert_entry(
switch: &Switch,
Expand All @@ -484,64 +469,36 @@ impl TableTest<types::MulticastGroupResponse, ()>
let (port_id1, link_id1) = switch.link_id(PhysPort(11)).unwrap();
let (port_id2, link_id2) = switch.link_id(PhysPort(12)).unwrap();

// Alternate between IPv4 and IPv6 based on whether idx is even or odd
let group_ip = if idx % 2 == 0 {
IpAddr::V4(gen_ipv4_multicast_addr(idx))
} else {
IpAddr::V6(gen_ipv6_multicast_addr(idx))
};
// Only IPv6 admin-scoped multicast addresses for replication table testing
let group_ip = IpAddr::V6(gen_ipv6_multicast_addr(idx));

// Create a NAT target
let nat_target = types::NatTarget {
internal_ip: Ipv6Addr::new(0xff05, 0, 0, 0, 0, 0, 0, 1),
inner_mac: MacAddr::new(0xe1, 0xd5, 0x5e, 0x67, 0x89, 0xab).into(),
vni: (100 + idx as u32).into(),
};

// Alternate having a vlan_id based on whether idx is even or odd
let vlan_id = if idx % 2 == 0 {
Some(10 + (idx % 4000) as u16)
} else {
None
};

// Create the multicast group
let group_entry = types::MulticastGroupCreateEntry {
// Admin-scoped IPv6 groups are internal with replication info and members
let internal_entry = types::MulticastGroupCreateEntry {
group_ip,
tag: Some(MCAST_TAG.to_string()),
nat_target: Some(nat_target),
vlan_id,
sources: None,
replication_info: types::MulticastReplicationEntry {
level1_excl_id: Some(10),
level2_excl_id: Some(20),
},
members: vec![
types::MulticastGroupMember {
port_id: port_id1,
port_id: port_id1.clone(),
link_id: link_id1,
direction: types::Direction::External,
},
types::MulticastGroupMember {
port_id: port_id2,
port_id: port_id2.clone(),
link_id: link_id2,
direction: types::Direction::External,
},
],
};

switch.client.multicast_group_create(&group_entry).await
switch.client.multicast_group_create(&internal_entry).await
}

async fn delete_entry(switch: &Switch, idx: usize) -> OpResult<()> {
// Find the IP with the matching index
let ip = if idx % 2 == 0 {
IpAddr::V4(gen_ipv4_multicast_addr(idx))
} else {
IpAddr::V6(gen_ipv6_multicast_addr(idx))
};

// Delete the route entry
let ip = IpAddr::V6(gen_ipv6_multicast_addr(idx));
switch.client.multicast_group_delete(&ip).await
}

Expand All @@ -559,9 +516,9 @@ impl TableTest<types::MulticastGroupResponse, ()>

#[tokio::test]
#[ignore]
async fn test_multicast_full() -> TestResult {
async fn test_multicast_replication_table_full() -> TestResult {
test_table_capacity::<
types::MulticastGroupCreateEntry,
MulticastReplicationTableTest,
types::MulticastGroupResponse,
(),
>(MULTICAST_TABLE_SIZE)
Expand Down
12 changes: 6 additions & 6 deletions dpd/p4/constants.p4
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ const bit<2> MULTICAST_TAG_UNDERLAY = 1;
const bit<2> MULTICAST_TAG_UNDERLAY_EXTERNAL = 2;

/* IPv6 Address Mask Constants */
const bit<128> IPV6_SCOPE_MASK = 0xfff00000000000000000000000000000; // Match ff0X::/16
const bit<128> IPV6_ULA_MASK = 0xff00000000000000000000000000000; // Match fd00::/8
const bit<128> IPV6_SCOPE_MASK = 0xffff0000000000000000000000000000; // Match ff00::/16
const bit<128> IPV6_ULA_MASK = 0xff000000000000000000000000000000; // Match fd00::/8

/* IPv6 Address Pattern Constants */
const bit<128> IPV6_ADMIN_LOCAL_PATTERN = 0xff040000000000000000000000000000 & IPV6_SCOPE_MASK; // ff04::/16
const bit<128> IPV6_SITE_LOCAL_PATTERN = 0xff050000000000000000000000000000 & IPV6_SCOPE_MASK; // ff05::/16
const bit<128> IPV6_ORG_SCOPE_PATTERN = 0xff080000000000000000000000000000 & IPV6_SCOPE_MASK; // ff08::/16
const bit<128> IPV6_ULA_PATTERN = 0xFfd00000000000000000000000000000 & IPV6_ULA_MASK; // fd00::/8
const bit<128> IPV6_ADMIN_LOCAL_PATTERN = 0xff040000000000000000000000000000; // ff04::/16
const bit<128> IPV6_SITE_LOCAL_PATTERN = 0xff050000000000000000000000000000; // ff05::/16
const bit<128> IPV6_ORG_SCOPE_PATTERN = 0xff080000000000000000000000000000; // ff08::/16
const bit<128> IPV6_ULA_PATTERN = 0xfd000000000000000000000000000000; // fd00::/8

/* Reasons a packet may be dropped by the p4 pipeline */
const bit<8> DROP_IPV4_SWITCH_ADDR_MISS = 0x01;
Expand Down
127 changes: 47 additions & 80 deletions dpd/p4/sidecar.p4
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ control Filter(
// that follow the format 33:33:xxxx:xxxx where the last 32 bits
// are taken directly from the last 32 bits of the IPv6 address.
//
// Sadly, the first two conditions cannot e checked properly by
// Sadly, the first two conditions cannot be checked properly by
// the parser, as we reach the total available parser match
// registers on the device.
if (hdr.ethernet.dst_mac[47:40] != 8w0x33 ||
Expand Down Expand Up @@ -689,7 +689,7 @@ control NatIngress (
if (hdr.ipv4.isValid() && meta.is_valid) {
if (meta.is_mcast) {
ingress_ipv4_mcast.apply();
} else {
} else {
ingress_ipv4.apply();
}
} else if (hdr.ipv6.isValid() && meta.is_valid) {
Expand Down Expand Up @@ -1468,27 +1468,36 @@ control MulticastIngress (
in ingress_intrinsic_metadata_t ig_intr_md,
inout ingress_intrinsic_metadata_for_tm_t ig_tm_md)
{
DirectCounter<bit<32>>(CounterType_t.PACKETS_AND_BYTES) mcast_ipv4_ctr;
DirectCounter<bit<32>>(CounterType_t.PACKETS_AND_BYTES) mcast_ipv6_ctr;
DirectCounter<bit<32>>(CounterType_t.PACKETS_AND_BYTES) mcast_ipv4_ssm_ctr;
DirectCounter<bit<32>>(CounterType_t.PACKETS_AND_BYTES) mcast_ipv6_ssm_ctr;

Hash<bit<13>>(HashAlgorithm_t.CRC16) mcast_hashv4_level1;
Hash<bit<13>>(HashAlgorithm_t.CRC16) mcast_hashv4_level2;
Hash<bit<13>>(HashAlgorithm_t.CRC16) mcast_hashv6_level1;
Hash<bit<13>>(HashAlgorithm_t.CRC16) mcast_hashv6_level2;

// Drop action for IPv4 multicast packets with no group.
//
// At this point, We should only allow replication for IPv6 packets that
// are admin-scoped before possible decapping.
action drop_mcastv4_no_group() {
ig_dprsr_md.drop_ctl = 1;
meta.drop_reason = DROP_MULTICAST_NO_GROUP;
mcast_ipv4_ctr.count();
}

// Drop action for IPv6 multicast packets with no group.
//
// At this point, we should only allow replication for IPv6 packets that
// are admin-scoped before possible decapping.
action drop_mcastv6_no_group() {
ig_dprsr_md.drop_ctl = 1;
meta.drop_reason = DROP_MULTICAST_NO_GROUP;
}

// Drop action for IPv6 multicast packets with no group
// that is a valid admin-scoped multicast group.
action drop_mcastv6_admin_scoped_no_group() {
ig_dprsr_md.drop_ctl = 1;
meta.drop_reason = DROP_MULTICAST_NO_GROUP;
mcast_ipv6_ctr.count();
}

Expand Down Expand Up @@ -1520,35 +1529,9 @@ control MulticastIngress (
mcast_ipv6_ssm_ctr.count();
}

action configure_mcastv4(
MulticastGroupId_t mcast_grp_a,
bit<16> rid,
bit<16> level1_excl_id,
bit<9> level2_excl_id
) {
ig_tm_md.mcast_grp_a = mcast_grp_a;
ig_tm_md.rid = rid;
ig_tm_md.level1_exclusion_id = level1_excl_id;
ig_tm_md.level2_exclusion_id = level2_excl_id;

// Set multicast hash based on IPv4 packet fields
ig_tm_md.level1_mcast_hash = (bit<13>)mcast_hashv4_level1.get({
hdr.ipv4.src_addr,
hdr.ipv4.dst_addr,
hdr.ipv4.protocol,
meta.l4_src_port,
meta.l4_dst_port
});

// Set secondary multicast hash based on IPv4 packet fields
ig_tm_md.level2_mcast_hash = (bit<13>)mcast_hashv4_level2.get({
(bit<16>)hdr.ipv4.identification,
ig_intr_md.ingress_port
});

mcast_ipv4_ctr.count();
}

// Configure IPv6 multicast replication with bifurcated design:
// mcast_grp_a: external/customer replication group
// mcast_grp_b: underlay/infrastructure replication group
action configure_mcastv6(
MulticastGroupId_t mcast_grp_a,
MulticastGroupId_t mcast_grp_b,
Expand Down Expand Up @@ -1580,21 +1563,10 @@ control MulticastIngress (
mcast_ipv6_ctr.count();
}

table mcast_replication_ipv4 {
key = { hdr.ipv4.dst_addr: exact; }
actions = {
configure_mcastv4;
drop_mcastv4_no_group;
}
default_action = drop_mcastv4_no_group;
const size = IPV4_MULTICAST_TABLE_SIZE;
counters = mcast_ipv4_ctr;
}

table mcast_source_filter_ipv4 {
key = {
hdr.ipv4.src_addr: lpm;
hdr.ipv4.dst_addr: exact;
hdr.inner_ipv4.src_addr: lpm;
hdr.inner_ipv4.dst_addr: exact;
}
actions = {
allow_source_mcastv4;
Expand All @@ -1609,17 +1581,17 @@ control MulticastIngress (
key = { hdr.ipv6.dst_addr: exact; }
actions = {
configure_mcastv6;
drop_mcastv6_no_group;
drop_mcastv6_admin_scoped_no_group;
}
default_action = drop_mcastv6_no_group;
default_action = drop_mcastv6_admin_scoped_no_group;
const size = IPV6_MULTICAST_TABLE_SIZE;
counters = mcast_ipv6_ctr;
}

table mcast_source_filter_ipv6 {
key = {
hdr.ipv6.src_addr: exact;
hdr.ipv6.dst_addr: exact;
hdr.inner_ipv6.src_addr: exact;
hdr.inner_ipv6.dst_addr: exact;
}
actions = {
allow_source_mcastv6;
Expand Down Expand Up @@ -1650,7 +1622,6 @@ control MulticastIngress (

table mcast_tag_check {
key = {
hdr.ipv6.isValid() : ternary;
ig_tm_md.mcast_grp_a : ternary;
ig_tm_md.mcast_grp_b : ternary;
hdr.geneve.isValid() : ternary;
Expand All @@ -1666,50 +1637,46 @@ control MulticastIngress (
}

const entries = {
( true, _, _, true, true, MULTICAST_TAG_EXTERNAL ) : invalidate_underlay_grp_and_set_decap;
( true, _, _, true, true, MULTICAST_TAG_UNDERLAY ) : invalidate_external_grp;
( true, _, _, true, true, MULTICAST_TAG_UNDERLAY_EXTERNAL ) : NoAction;
( _, 0, _, _, _, _ ) : invalidate_external_grp;
( _, _, 0, _, _, _ ) : invalidate_underlay_grp;
( _, 0, 0, _, _, _ ) : invalidate_grps;
( _, _, true, true, MULTICAST_TAG_EXTERNAL ) : invalidate_underlay_grp_and_set_decap;
( _, _, true, true, MULTICAST_TAG_UNDERLAY ) : invalidate_external_grp;
( _, _, true, true, MULTICAST_TAG_UNDERLAY_EXTERNAL ) : NoAction;
( 0, _, _, _, _ ) : invalidate_external_grp;
( _, 0, _, _, _ ) : invalidate_underlay_grp;
( 0, 0, _, _, _ ) : invalidate_grps;
}

const size = 6;
}

// Note: SSM tables currently take one extra stage in the pipeline (17->18).
apply {
if (hdr.ipv4.isValid()) {
// Check if the destination address is an IPv4 SSM multicast
if (hdr.geneve.isValid() && hdr.inner_ipv4.isValid()) {
// Check if the inner destination address is an IPv4 SSM multicast
// address.
if (hdr.ipv4.dst_addr[31:24] == 8w0xe8) {
if (hdr.inner_ipv4.dst_addr[31:24] == 8w0xe8) {
mcast_source_filter_ipv4.apply();
if (meta.allow_source_mcast) {
mcast_replication_ipv4.apply();
}
} else {
// Otherwise, apply the multicast replication table for
// non-SSM multicast addresses.
mcast_replication_ipv4.apply();
meta.allow_source_mcast = true;
}
} else if (hdr.ipv6.isValid()) {
// Check if the destination address is an IPv6 SSM multicast
} else if (hdr.geneve.isValid() && hdr.inner_ipv6.isValid()) {
// Check if the inner destination address is an IPv6 SSM multicast
// address.
if ((hdr.ipv6.dst_addr[127:120] == 8w0xff)
&& ((hdr.ipv6.dst_addr[119:116] == 4w0x3))) {
if ((hdr.inner_ipv6.dst_addr[127:120] == 8w0xff)
&& ((hdr.inner_ipv6.dst_addr[119:116] == 4w0x3))) {
mcast_source_filter_ipv6.apply();
if (meta.allow_source_mcast) {
// Then, apply the multicast replication table.
mcast_replication_ipv6.apply();
}
} else {
// Otherwise, apply the multicast replication table for
// non-SSM multicast addresses.
mcast_replication_ipv6.apply();
meta.allow_source_mcast = true;
}
} else if (hdr.ipv4.isValid()) {
drop_mcastv4_no_group();
} else if (hdr.ipv6.isValid()) {
drop_mcastv6_no_group();
}

mcast_tag_check.apply();
if (hdr.ipv6.isValid() && meta.allow_source_mcast) {
mcast_replication_ipv6.apply();
mcast_tag_check.apply();
}
}
}

Expand Down
Loading