sudo docker build -t rochpl -f Dockerfile .
- Node A
docker save -o rochpl_image.tar rochpl
scp rochpl_image.tar user@10.0.0.12:~
- Node B
docker load -i ~/rochpl_image.tar
- Both nodes
sudo docker run --rm -it \
--device /dev/kfd \
--device /dev/dri \
--security-opt seccomp=unconfined \
--network=host \
--name=rochpl_node \
rochpl /bin/bash
- Setup SSH keys
# Both Nodes
ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -N ""
# Both Nodes
vim /etc/ssh/sshd_config
# change the line --- PasswordAuthentication yes
# add this line --- PermitRootLogin yes
# Node A
ssh-copy-id -p 2222 root@10.0.0.12
# Node B
ssh-copy-id -p 2222 root@10.0.0.14
- Add following to both nodes
vim ~/.ssh/config
Host 10.0.0.14
Port 2222
User root
Host 10.0.0.12
Port 2222
User root
- Test if it works
ssh 10.0.0.14 hostname
- Add the rochpl_hostfile on both node
10.0.0.14 slots=4
10.0.0.12 slots=4
- Run HPL using this command (modify the arguments to suit your environment)
export OMPI_MCA_pmix=pmix
mpirun --hostfile rochpl_hostfile -np 8 --bind-to none -x HIP_VISIBLE_DEVICES=0,1,2,3 --mca pml ucx --mca btl ^vader,tcp,openib,uct ./run_rochpl -P 2 -Q 4 -N 256000 --NB 512