bluefield nic double arp packets

1. Prepere

1.1

enable hash mode

1
mlxconfig -d /dev/mst/<device-name> s LAG_RESOURCE_ALLOCATION=1

Add/edit the following field from /etc/mellanox/mlnx-bf.conf as follows:

1
LAG_HASH_MODE="yes"

1.2 Delete default ovs and sf interface

delete default ovs

command:

1
2
ovs-vsctl del-br ovsbr1
ovs-vsctl del-br ovsbr2

delete sf interface

1
2
3
4
5

sf_to_delete=$(mlnx-sf -a show | grep "SF Index" | cut -d" " -f3)
for sf in $sf_to_delete; do
mlnx-sf -a delete -i "$sf";
done

modify the config
file: /etc/mellanox/mlnx-sf.conf

1
2
3
4
5
root@localhost:/home/ubuntu# cat /etc/mellanox/mlnx-sf.conf

#/sbin/mlnx-sf --action create --device 0000:03:00.0 --sfnum 0 --hwaddr 02:2a:e6:4d:e1:ad
#/sbin/mlnx-sf --action create --device 0000:03:00.1 --sfnum 0 --hwaddr 02:06:f3:ce:ad:5a

1.3 Hide the second PF on host

1
2
3
4
5
6
# Hide the second PF on host
pf_hidden=$(mlxconfig -d "$mst_dev" q HIDE_PORT2_PF | grep True)
if [ -z "$pf_hidden" ]; then
echo y | mlxconfig -d "$mst_dev" s HIDE_PORT2_PF=True
echo Please power cycle host server to hide the second unused host representative
fi

Reboot the bluefield NIC

2. config the bond interface

2.1 command:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
ip link add bond0 type bond
ip link set bond0 down
ip link set bond0 type bond miimon 100 mode 4
ip link set p0 down
ip link set p1 down
ip link set p0 master bond0
ip link set p1 master bond0
ip link set p0 up
ip link set p1 up
ip link set bond0 up
ovs-vsctl add-br bf-lag
ovs-vsctl add-port bf-lag bond0
ovs-vsctl add-port bf-lag pf0hpf
ip link set bf-lag up

2.2 netplan config:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
network:
ethernets:
oob_net0:
addresses: [100.64.31.11/24]
gateway4: 100.64.31.1
nameservers:
addresses: [114.114.114.114]
tmfifo_net0:
addresses:
- 192.168.100.2/30
dhcp4: false
routes:
- metric: 1025
to: 0.0.0.0/0
via: 192.168.100.1
p0:
dhcp4: false
p1:
dhcp4: false
pf0hpf:
dhcp4: false
bonds:
bond0:
dhcp4: false
interfaces:
- p0
- p1
parameters:
mode: 802.3ad
lacp-rate: fast
transmit-hash-policy: layer3+4
mii-monitor-interval: 1
#renderer: NetworkManager
version: 2

2.3 check bond interface status

1
2
root@localhost:/home/ubuntu# cat /sys/class/net/bond0/bonding/slaves
p0 p1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
root@localhost:/home/ubuntu# cat /proc/net/bonding/bond0
Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011)

Bonding Mode: IEEE 802.3ad Dynamic link aggregation
Transmit Hash Policy: layer3+4 (1)
MII Status: up
MII Polling Interval (ms): 1
Up Delay (ms): 0
Down Delay (ms): 0
Peer Notification Delay (ms): 0

802.3ad info
LACP rate: fast
Min links: 0
Aggregator selection policy (ad_select): stable
System priority: 65535
System MAC address: 7a:3f:79:7b:96:fb
Active Aggregator Info:
Aggregator ID: 1
Number of ports: 2
Actor Key: 15
Partner Key: 32
Partner Mac Address: c0:42:d0:08:9a:80

Slave Interface: p1
MII Status: up
Speed: 10000 Mbps
Duplex: full
Link Failure Count: 1
Permanent HW addr: 08:c0:eb:2b:18:11
Slave queue ID: 0
Aggregator ID: 1
Actor Churn State: none
Partner Churn State: none
Actor Churned Count: 0
Partner Churned Count: 0
details actor lacp pdu:
system priority: 65535
system mac address: 7a:3f:79:7b:96:fb
port key: 15
port priority: 255
port number: 1
port state: 63
details partner lacp pdu:
system priority: 127
system mac address: c0:42:d0:08:9a:80
oper key: 32
port priority: 127
port number: 16
port state: 63

Slave Interface: p0
MII Status: up
Speed: 10000 Mbps
Duplex: full
Link Failure Count: 1
Permanent HW addr: 08:c0:eb:2b:18:10
Slave queue ID: 0
Aggregator ID: 1
Actor Churn State: none
Partner Churn State: none
Actor Churned Count: 0
Partner Churned Count: 0
details actor lacp pdu:
system priority: 65535
system mac address: 7a:3f:79:7b:96:fb
port key: 15
port priority: 255
port number: 2
port state: 63
details partner lacp pdu:
system priority: 127
system mac address: c0:42:d0:08:9a:80
oper key: 32
port priority: 127
port number: 2
port state: 63

2.4 remove or add slave interface to bonding

remove

1
echo "-p0" > /sys/class/net/bond0/bonding/slaves

add

1
echo "+p0" > /sys/class/net/bond0/bonding/slaves

3. use tc to copy arp packets

1
2
3
tc qdisc add dev pf0hpf handle ffff: ingress

tc filter add dev pf0hpf parent ffff: protocol arp prio 1 flower skip_hw dst_mac ff:ff:ff:ff:ff:ff action mirred egress mirror dev p0 pipe action mirred egress mirror dev p1

or
only match arp request packets

1
tc filter add dev pf0hpf parent ffff: protocol arp prio 1 flower skip_hw arp_op request action mirred egress mirror dev p0 pipe action mirred egress mirror dev p1

802.1Q vlan

1
2
tc filter add dev pf0hpf parent ffff: protocol 802.1Q prio 1 flower skip_hw vlan_ethtype arp action mirred egress mirror dev p0 pipe action mirred egress mirror dev p1

ref: https://man7.org/linux/man-pages/man8/tc-flower.8.html

3.1 check tc filter rule

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
root@localhost:/home/ubuntu# tc -s -p filter ls dev pf0hpf parent ffff:
filter ingress protocol arp pref 1 flower chain 0
filter ingress protocol arp pref 1 flower chain 0 handle 0x1
dst_mac ff:ff:ff:ff:ff:ff
eth_type arp
not_in_hw
action order 1: mirred (Egress Mirror to device p0) pipe
index 3 ref 1 bind 1 installed 1514 sec used 0 sec
Action statistics:
Sent 69644 bytes 1514 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0

action order 2: mirred (Egress Mirror to device p1) pipe
index 5 ref 1 bind 1 installed 1514 sec used 0 sec
Action statistics:
Sent 69644 bytes 1514 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0

filter ingress protocol LLDP pref 2 flower chain 0
filter ingress protocol LLDP pref 2 flower chain 0 handle 0x1
dst_mac 01:80:c2:00:00:0e
src_mac 00:50:56:50:86:1d
eth_type 88cc
in_hw in_hw_count 1
action order 1: gact action drop
random type none pass val 0
index 1 ref 1 bind 1 installed 9 sec used 9 sec
Action statistics:
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
cookie 45b15fb785436a48db7baabfebadfc89
no_percpu

filter ingress protocol arp pref 5 flower chain 0
filter ingress protocol arp pref 5 flower chain 0 handle 0x1
dst_mac ff:ff:ff:ff:ff:ff
src_mac 00:0c:29:72:e7:c4
eth_type arp
arp_sip 11.1.230.10
arp_tip 11.1.230.1
arp_op request
not_in_hw
action order 1: skbedit ptype host pipe
index 1 ref 1 bind 1 installed 7 sec used 7 sec
Action statistics:
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0

action order 2: mirred (Ingress Mirror to device bf-lag) pipe
index 1 ref 1 bind 1 installed 7 sec used 7 sec
Action statistics:
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
cookie 688501ce534a508863c24fbf6f995a36
no_percpu

action order 3: mirred (Egress Redirect to device bond0) stolen
index 2 ref 1 bind 1 installed 7 sec used 7 sec
Action statistics:
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
cookie 688501ce534a508863c24fbf6f995a36
no_percpu

3.2 delete tc filter

1
tc filter del dev pf0hpf parent ffff: protocol arp prio 1

4 interface up down script in networkd-dispatcher

4.1 networkd-dispatcher service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
networkd-dispatcher.service - Dispatcher daemon for systemd-networkd
Loaded: loaded (/lib/systemd/system/networkd-dispatcher.service; enabled; vendor preset: enabled)
Active: active (running) since Thu 2021-12-30 01:59:25 UTC; 1h 46min ago
Main PID: 1402437 (networkd-dispat)
Tasks: 1 (limit: 19076)
Memory: 12.4M
CGroup: /system.slice/networkd-dispatcher.service
└─1402437 /usr/bin/python3 /usr/bin/networkd-dispatcher --run-startup-triggers --script-dir /etc/networkd-dispatcher

Dec 30 03:24:14 localhost.localdomain networkd-dispatcher[1449900]: p1 change to enslaved, add tc filter rule
Dec 30 03:31:35 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/no-carrier.d/50arp-mirror.sh' for interface p0
Dec 30 03:31:51 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/enslaved.d/50arp-mirror.sh' for interface p0
Dec 30 03:31:51 localhost.localdomain networkd-dispatcher[1454738]: p0 change to enslaved, add tc filter rule
Dec 30 03:32:23 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/no-carrier.d/50arp-mirror.sh' for interface p0
Dec 30 03:32:32 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/enslaved.d/50arp-mirror.sh' for interface p0
Dec 30 03:32:32 localhost.localdomain networkd-dispatcher[1455161]: p0 change to enslaved, add tc filter rule
Dec 30 03:32:41 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/no-carrier.d/50arp-mirror.sh' for interface p0
Dec 30 03:32:47 localhost.localdomain networkd-dispatcher[1402437]: INFO:Invoking '/etc/networkd-dispatcher/enslaved.d/50arp-mirror.sh' for interface p0
Dec 30 03:32:47 localhost.localdomain networkd-dispatcher[1455339]: p0 change to enslaved, add tc filter rule

4.2 modify networkd-dispatcher configure

1
2
3
4
5
6
7
cat /etc/default/networkd-dispatcher
# Specify command line options here. This config file is used
# by the included systemd service file.
networkd_dispatcher_args="--run-startup-triggers --script-dir /etc/networkd-dispatcher"

# -v for debug information

4.3 add script-dir directory

1
mkdir -p /etc/networkd-dispatcher/{enslaved.d,no-carrier.d}

4.4 add script

enslaved.d/50arp-mirror.sh

1
2
3
4
5
6
#!/bin/bash
if [[ $IFACE == "p0" || $IFACE == "p1" ]]; then
echo "$IFACE change to enslaved, add tc filter rule" | tee -a /var/log/syslog;
tc filter add dev pf0hpf parent ffff: protocol 802.1Q prio 2 flower skip_hw vlan_ethtype arp action mirred egress mirror dev p0 pipe action mirred egress mirror dev p1;
tc filter add dev pf0hpf parent ffff: protocol arp prio 3 flower skip_hw arp_op request action mirred egress mirror dev p0 pipe action mirred egress mirror dev p1;
fi

no-carrier.d/50arp-mirror.sh

1
2
3
4
5
6
 #!/bin/bash
if [[ $IFACE == "p0" || $IFACE == "p1" ]]; then
echo "$IFACE change to no-carrie, delete tc filter rule" | tee -a /var/log/syslog;
tc filter del dev pf0hpf parent ffff: protocol 802.1Q prio 2;
tc filter del dev pf0hpf parent ffff: protocol arp prio 3;
fi

add script execute

1
2
chmod +x enslaved.d/50arp-mirror.sh  
chmod +x no-carrier.d/50arp-mirror.sh

ref: networkd-dispatcher

5 log

/var/log/syslog

1
2
3
4
5
6
7
8
Sep  7 18:52:14 localhost kernel: [  898.282891] tc mirred to Houston: device bf-lag is down
Sep 7 18:52:15 localhost kernel: [ 899.283762] tc mirred to Houston: device bf-lag is down
Sep 7 18:52:16 localhost kernel: [ 900.284601] tc mirred to Houston: device bf-lag is down
Sep 7 18:52:17 localhost systemd[1]: Starting Cleanup of Temporary Directories...
Sep 7 18:52:17 localhost systemd[1]: systemd-tmpfiles-clean.service: Succeeded.
Sep 7 18:52:17 localhost systemd[1]: Finished Cleanup of Temporary Directories.
Sep 7 18:52:17 localhost kernel: [ 901.285391] tc mirred to Houston: device bf-lag is down
Sep 7 18:52:18 localhost kernel: [ 902.286219] tc mirred to Houston: device bf-lag is down