#添加并绑定网桥
brctl addbr br0
brctl addif br0 bond0
ifconfig br0 up
#启动虚机
virt-install --name VM1 --ram 2048 --vcpus 2 \
--disk path=/opt/CentOS-7-x86_64-GenericCloud.qcow2,size=10,format=qcow2 --accelerate \
--vnc --vncport=5911 --vnclisten=0.0.0.0 \
--network bridge=br0,model=virtio \
--import \
--noautoconsole
#进入虚机,创建Vlan接口及配置IP
ip link add link eth0 name eth0.100 type vlan id 100
ip link add link eth0 name eth0.200 type vlan id 200
ip addr add 100.0.0.2/24 dev eth0.100
ip addr add 200.0.0.2/24 dev eth0.200
ip link set dev eth0.100 up
ip link set dev eth0.200 up
ifconfig eth0.100 hw ether 52:54:00:34:c8:d9
ifconfig eth0.200 hw ether 52:54:00:34:c8:d0
#添加并绑定网桥
brctl addbr br0
brctl addif br0 bond0
ifconfig br0 up
#启动虚机
virt-install --name VM1 --ram 2048 --vcpus 2 \
--disk path=/opt/CentOS-7-x86_64-GenericCloud.qcow2,size=10,format=qcow2 --accelerate \
--vnc --vncport=5911 --vnclisten=0.0.0.0 \
--network bridge=br0,model=virtio \
--import \
--noautoconsole
#进入虚机,创建Vlan接口及配置IP
ip link add link eth0 name eth0.100 type vlan id 100
ip link add link eth0 name eth0.200 type vlan id 200
ip addr add 100.0.0.3/24 dev eth0.100
ip addr add 200.0.0.3/24 dev eth0.200
ip link set dev eth0.100 up
ip link set dev eth0.200 up
#添加并绑定网桥
brctl addbr br0
brctl addif br0 bond0
ifconfig br0 up
#启动虚机
virt-install --name VM1 --ram 2048 --vcpus 2 \
--disk path=/opt/CentOS-7-x86_64-GenericCloud.qcow2,size=10,format=qcow2 --accelerate \
--vnc --vncport=5911 --vnclisten=0.0.0.0 \
--network bridge=br0,model=virtio \
--import \
--noautoconsole
#进入虚机,创建Vlan接口及配置IP
ip link add link eth0 name eth0.100 type vlan id 100
ip link add link eth0 name eth0.200 type vlan id 200
ip addr add 100.0.0.4/24 dev eth0.100
ip addr add 200.0.0.4/24 dev eth0.200
ip link set dev eth0.100 up
ip link set dev eth0.200 up
RADIUS(Remote Authentication and Dial-In User Service)是做拨号用户接入认证和服务请求认证的网络协议。RADIUS提供中心式认证、鉴权和计费(AAA)能力,用于管理接入用户使用网络资源。RADIUS允许使用集中式的数据库来保存所有用户的配置信息,以供所有用户共享使用。
# yum install -y freeradius freeradius-utils freeradius-mysql
# systemctl start radiusd.service
# systemctl enable radiusd.service
# mysql -u root -p radius < /etc/raddb/mods-config/sql/main/mysql/schema.sql
# vim /etc/raddb/mods-available/sql
# cat /etc/raddb/mods-available/sql | grep -v ^#
sql {
# The sub-module to use to execute queries. This should match
# the database you're attempting to connect to.
#
# * rlm_sql_mysql
# * rlm_sql_mssql
# * rlm_sql_oracle
# * rlm_sql_postgresql
# * rlm_sql_sqlite
# * rlm_sql_null (log queries to disk)
#
driver = "rlm_sql_mysql"
# The dialect of SQL you want to use, this should usually match
# the driver you selected above.
#
# If you're using rlm_sql_null, then it should be the type of
# database the logged queries are going to be executed against.
dialect = "mysql"
# Connection info:
#
server = "localhost"
port = 3306
login = "radius"
password = "radius"
# Database table configuration for everything except Oracle
radius_db = "radius"
# chgrp -h radiusd /etc/raddb/mods-available/sql
# systemctl restart radiusd.service
## NIC map
bm-2204kzq:252982:252982 [*] NCCL INFO NCCL_SOCKET_IFNAME set by environment to bond0
bm-2204kzq:252982:252982 [*] NCCL INFO Bootstrap : Using bond0:172.17.0.215<0>
bm-2204kzq:252982:252982 [*] NCCL INFO NCCL version 2.22.3+cuda12.6
bm-2204kzq:252985:253055 [*] NCCL INFO NET/IB : Using [0]mlx5_2:1/RoCE [1]mlx5_3:1/RoCE [2]mlx5_4:1/RoCE [3]mlx5_0:1/RoCE [RO]; OOB bond0:172.17.0.215<0>
bm-2204qhn:253837:253837 [*] NCCL INFO NCCL_SOCKET_IFNAME set by environment to bond0
bm-2204qhn:253837:253837 [*] NCCL INFO Bootstrap : Using bond0:172.17.0.81<0>
bm-2204qhn:253837:253837 [*] NCCL INFO NCCL version 2.22.3+cuda12.6
bm-2204qhn:253840:253908 [*] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [1]mlx5_2:1/RoCE [2]mlx5_3:1/RoCE [3]mlx5_4:1/RoCE [RO]; OOB bond0:172.17.0.81<0>
### ChannelNum:16
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 00/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 01/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 02/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 03/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 04/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 05/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 06/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 07/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 08/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 09/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 10/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 11/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 12/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 13/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 14/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 15/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
## Channel C0
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 00/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 00/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 00/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 00/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 00/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 00/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 00/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 00/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 00/0 : 2[2] -> 8[0] [send] via NET/IB/3(0)/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 00/0 : 2[2] -> 8[0] [receive] via NET/IB/0/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 00/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 00/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 00/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 00/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 00/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 00/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 00/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 00/0 : 10[2] -> 0[0] [send] via NET/IB/0(8)/GDRDMA mlx5_0:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 00/0 : 10[2] -> 0[0] [receive] via NET/IB/3/GDRDMA mlx5_0:1/RoCE
## Channel C1
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 01/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 01/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 01/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 01/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 01/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 01/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 01/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 01/0 : 1[1] -> 10[2] [send] via NET/IB/0(2)/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 01/0 : 1[1] -> 10[2] [receive] via NET/IB/1/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 01/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 01/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 01/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 01/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 01/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 01/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 01/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 01/0 : 9[1] -> 2[2] [send] via NET/IB/1(10)/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 01/0 : 9[1] -> 2[2] [receive] via NET/IB/0/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 01/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C2
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 02/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 02/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 02/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 02/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 02/0 : 6[6] -> 12[4] [send] via NET/IB/1(4)/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 02/0 : 6[6] -> 12[4] [receive] via NET/IB/2/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 02/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 02/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 02/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 02/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 02/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 02/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 02/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 02/0 : 14[6] -> 4[4] [send] via NET/IB/2(12)/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 02/0 : 14[6] -> 4[4] [receive] via NET/IB/1/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 02/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 02/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 02/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 02/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C3
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 03/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 03/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 03/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 03/0 : 5[5] -> 14[6] [send] via NET/IB/2(6)/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 03/0 : 5[5] -> 14[6] [receive] via NET/IB/3/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 03/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 03/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 03/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 03/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 03/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 03/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 03/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 03/0 : 13[5] -> 6[6] [send] via NET/IB/3(14)/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 03/0 : 13[5] -> 6[6] [receive] via NET/IB/2/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 03/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 03/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 03/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 03/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 03/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C4
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 04/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 04/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 04/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 04/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 04/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 04/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 04/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 04/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 04/0 : 2[2] -> 8[0] [send] via NET/IB/3(0)/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 04/0 : 2[2] -> 8[0] [receive] via NET/IB/0/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 04/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 04/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 04/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 04/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 04/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 04/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 04/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 04/0 : 10[2] -> 0[0] [send] via NET/IB/0(8)/GDRDMA mlx5_0:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 04/0 : 10[2] -> 0[0] [receive] via NET/IB/3/GDRDMA mlx5_0:1/RoCE
## Channel C5
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 05/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 05/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 05/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 05/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 05/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 05/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 05/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 05/0 : 1[1] -> 10[2] [send] via NET/IB/0(2)/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 05/0 : 1[1] -> 10[2] [receive] via NET/IB/1/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 05/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 05/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 05/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 05/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 05/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 05/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 05/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 05/0 : 9[1] -> 2[2] [send] via NET/IB/1(10)/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 05/0 : 9[1] -> 2[2] [receive] via NET/IB/0/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 05/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C6
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 06/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 06/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 06/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 06/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 06/0 : 6[6] -> 12[4] [send] via NET/IB/1(4)/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 06/0 : 6[6] -> 12[4] [receive] via NET/IB/2/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 06/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 06/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 06/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 06/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 06/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 06/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 06/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 06/0 : 14[6] -> 4[4] [send] via NET/IB/2(12)/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 06/0 : 14[6] -> 4[4] [receive] via NET/IB/1/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 06/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 06/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 06/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 06/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C7
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 07/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 07/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 07/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 07/0 : 5[5] -> 14[6] [send] via NET/IB/2(6)/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 07/0 : 5[5] -> 14[6] [receive] via NET/IB/3/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 07/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 07/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 07/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 07/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 07/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 07/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 07/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 07/0 : 13[5] -> 6[6] [send] via NET/IB/3(14)/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 07/0 : 13[5] -> 6[6] [receive] via NET/IB/2/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 07/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 07/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 07/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 07/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 07/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C8
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 08/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 08/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 08/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 08/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 08/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 08/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 08/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 08/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 08/0 : 2[2] -> 8[0] [send] via NET/IB/3(0)/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 08/0 : 2[2] -> 8[0] [receive] via NET/IB/0/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 08/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 08/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 08/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 08/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 08/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 08/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 08/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 08/0 : 10[2] -> 0[0] [send] via NET/IB/0(8)/GDRDMA mlx5_0:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 08/0 : 10[2] -> 0[0] [receive] via NET/IB/3/GDRDMA mlx5_0:1/RoCE
## Channel C9
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 09/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 09/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 09/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 09/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 09/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 09/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 09/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 09/0 : 1[1] -> 10[2] [send] via NET/IB/0(2)/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 09/0 : 1[1] -> 10[2] [receive] via NET/IB/1/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 09/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 09/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 09/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 09/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 09/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 09/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 09/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 09/0 : 9[1] -> 2[2] [send] via NET/IB/1(10)/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 09/0 : 9[1] -> 2[2] [receive] via NET/IB/0/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 09/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C10
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 10/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 10/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 10/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 10/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 10/0 : 6[6] -> 12[4] [send] via NET/IB/1(4)/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 10/0 : 6[6] -> 12[4] [receive] via NET/IB/2/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 10/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 10/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 10/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 10/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 10/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 10/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 10/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 10/0 : 14[6] -> 4[4] [send] via NET/IB/2(12)/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 10/0 : 14[6] -> 4[4] [receive] via NET/IB/1/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 10/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 10/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 10/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 10/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C11
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 11/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 11/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 11/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 11/0 : 5[5] -> 14[6] [send] via NET/IB/2(6)/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 11/0 : 5[5] -> 14[6] [receive] via NET/IB/3/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 11/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 11/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 11/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 11/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 11/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 11/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 11/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 11/0 : 13[5] -> 6[6] [send] via NET/IB/3(14)/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 11/0 : 13[5] -> 6[6] [receive] via NET/IB/2/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 11/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 11/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 11/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 11/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 11/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C12
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 12/16 : 0 7 5 6 4 3 1 2 8 15 13 14 12 11 9 10
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 12/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 12/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 12/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 12/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 12/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 12/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 12/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 12/0 : 2[2] -> 8[0] [send] via NET/IB/3(0)/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 12/0 : 2[2] -> 8[0] [receive] via NET/IB/0/GDRDMA mlx5_0:1/RoCE
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 12/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 12/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 12/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 12/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 12/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 12/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 12/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 12/0 : 10[2] -> 0[0] [send] via NET/IB/0(8)/GDRDMA mlx5_0:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 12/0 : 10[2] -> 0[0] [receive] via NET/IB/3/GDRDMA mlx5_0:1/RoCE
## Channel C13
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 13/16 : 0 7 5 6 4 3 1 10 8 15 13 14 12 11 9 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 13/0 : 2[2] -> 0[0] via P2P/CUMEM
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 13/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 13/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 13/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 13/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 13/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 13/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 13/0 : 1[1] -> 10[2] [send] via NET/IB/0(2)/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 13/0 : 1[1] -> 10[2] [receive] via NET/IB/1/GDRDMA mlx5_2:1/RoCE
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 13/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 13/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 13/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 13/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 13/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 13/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 13/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 13/0 : 9[1] -> 2[2] [send] via NET/IB/1(10)/GDRDMA mlx5_2:1/RoCE
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 13/0 : 9[1] -> 2[2] [receive] via NET/IB/0/GDRDMA mlx5_2:1/RoCE
## Channel C14
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 14/16 : 0 7 5 6 12 11 9 10 8 15 13 14 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 14/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 14/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 14/0 : 5[5] -> 6[6] via P2P/CUMEM
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 14/0 : 6[6] -> 12[4] [send] via NET/IB/1(4)/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 14/0 : 6[6] -> 12[4] [receive] via NET/IB/2/GDRDMA mlx5_3:1/RoCE
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 14/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 14/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 14/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 14/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 14/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 14/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 14/0 : 13[5] -> 14[6] via P2P/CUMEM
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 14/0 : 14[6] -> 4[4] [send] via NET/IB/2(12)/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 14/0 : 14[6] -> 4[4] [receive] via NET/IB/1/GDRDMA mlx5_3:1/RoCE
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 14/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 14/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 14/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 14/0 : 2[2] -> 0[0] via P2P/CUMEM
## Channel C15
bm-2204kzq:252978:253054 [0] NCCL INFO Channel 15/16 : 0 7 5 14 12 11 9 10 8 15 13 6 4 3 1 2
bm-2204kzq:
[0]mlx5_2:1/RoCE
[1]mlx5_3:1/RoCE
[2]mlx5_4:1/RoCE
[3]mlx5_0:1/RoCE
bm-2204qhn:
[0]mlx5_0:1/RoCE
[1]mlx5_2:1/RoCE
[2]mlx5_3:1/RoCE
[3]mlx5_4:1/RoCE
bm-2204kzq:252978:253115 [0] NCCL INFO Channel 15/0 : 0[0] -> 7[7] via P2P/CUMEM
bm-2204kzq:252985:253113 [7] NCCL INFO Channel 15/0 : 7[7] -> 5[5] via P2P/CUMEM
bm-2204kzq:252983:253114 [5] NCCL INFO Channel 15/0 : 5[5] -> 14[6] [send] via NET/IB/2(6)/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 15/0 : 5[5] -> 14[6] [receive] via NET/IB/3/GDRDMA mlx5_4:1/RoCE
bm-2204qhn:253840:253973 [6] NCCL INFO Channel 15/0 : 14[6] -> 12[4] via P2P/CUMEM
bm-2204qhn:253838:253972 [4] NCCL INFO Channel 15/0 : 12[4] -> 11[3] via P2P/CUMEM
bm-2204qhn:253837:253967 [3] NCCL INFO Channel 15/0 : 11[3] -> 9[1] via P2P/CUMEM
bm-2204qhn:253835:253971 [1] NCCL INFO Channel 15/0 : 9[1] -> 10[2] via P2P/CUMEM
bm-2204qhn:253836:253974 [2] NCCL INFO Channel 15/0 : 10[2] -> 8[0] via P2P/CUMEM
bm-2204qhn:253834:253970 [0] NCCL INFO Channel 15/0 : 8[0] -> 15[7] via P2P/CUMEM
bm-2204qhn:253841:253968 [7] NCCL INFO Channel 15/0 : 15[7] -> 13[5] via P2P/CUMEM
bm-2204qhn:253839:253969 [5] NCCL INFO Channel 15/0 : 13[5] -> 6[6] [send] via NET/IB/3(14)/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 15/0 : 13[5] -> 6[6] [receive] via NET/IB/2/GDRDMA mlx5_4:1/RoCE
bm-2204kzq:252984:253117 [6] NCCL INFO Channel 15/0 : 6[6] -> 4[4] via P2P/CUMEM
bm-2204kzq:252982:253118 [4] NCCL INFO Channel 15/0 : 4[4] -> 3[3] via P2P/CUMEM
bm-2204kzq:252981:253116 [3] NCCL INFO Channel 15/0 : 3[3] -> 1[1] via P2P/CUMEM
bm-2204kzq:252979:253119 [1] NCCL INFO Channel 15/0 : 1[1] -> 2[2] via P2P/CUMEM
bm-2204kzq:252980:253120 [2] NCCL INFO Channel 15/0 : 2[2] -> 0[0] via P2P/CUMEM