ARG UBUNTU_VERSION="jammy"
FROM ubuntu:${UBUNTU_VERSION}
ARG ROCM_URL="https://repo.radeon.com/amdgpu-install/6.1.1/ubuntu/jammy/amdgpu-install_6.1.60101-1_all.deb"
ARG UCX_BRANCH="v1.16.0"
ARG UCC_BRANCH="v1.3.0"
ARG OMPI_BRANCH="v5.0.3"
ARG APT_GET_APPS=""
ARG GPU_TARGET="gfx908,gfx90a,gfx942"
# Update and Install basic Linux development tools
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
git \
ssh \
openssh-client \
openssh-server \
make \
vim \
nano \
libtinfo-dev\
initramfs-tools \
libelf-dev \
numactl \
curl \
wget \
tmux \
build-essential \
autoconf \
automake \
libtool \
pkg-config \
libnuma-dev \
gfortran \
flex \
hwloc \
libstdc++-12-dev \
libxml2-dev \
python3-dev \
python3-pip \
python3-distutils \
unzip ${APT_GET_APPS}\
&& apt-get clean
RUN wget -qO- https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | tee /etc/apt/trusted.gpg.d/rocm.gpg \
&& wget -O rocm.deb ${ROCM_URL} \
&& apt install -y ./rocm.deb \
&& amdgpu-install --usecase=rocm,hiplibsdk --no-dkms -y
RUN bash -c """IFS=',' read -r -a ARCH <<<${GPU_TARGET} \
&& for gpu_arch in \${ARCH[@]}; do \
echo \$gpu_arch >> /opt/rocm/bin/target.lst; \
done""" \
&& chmod a+r /opt/rocm/bin/target.lst
# # Requires cmake > 3.22
RUN mkdir -p /opt/cmake \
&& wget --no-check-certificate --quiet -O - https://cmake.org/files/v3.27/cmake-3.27.7-linux-x86_64.tar.gz | tar --strip-components=1 -xz -C /opt/cmake
ENV ROCM_PATH=/opt/rocm \
UCX_PATH=/opt/ucx \
UCC_PATH=/opt/ucc \
OMPI_PATH=/opt/ompi \
GPU_TARGET=${GPU_TARGET}
# Adding rocm/cmake to the Environment
ENV PATH=$ROCM_PATH/bin:/opt/cmake/bin:$PATH \
LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$ROCM_PATH/llvm/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/lib64:$LIBRARY_PATH \
C_INCLUDE_PATH=$ROCM_PATH/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$ROCM_PATH/include:$CPLUS_INCLUDE_PATH \
CMAKE_PREFIX_PATH=$ROCM_PATH/lib/cmake:$CMAKE_PREFIX_PATH
# Create the necessary directory for SSH
RUN mkdir /var/run/sshd
# Set root password for login
RUN echo 'root:redhat' | chpasswd
# Allow root login and password authentication
RUN sed -i 's/#PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config && \
sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
echo "StrictModes no" >> /etc/ssh/sshd_config
# Change the SSH port to 2222
RUN sed -i 's/#Port 22/Port 2222/' /etc/ssh/sshd_config
# Expose the new SSH port
EXPOSE 2222
# Start the SSH service and keep the container running
ENTRYPOINT service ssh restart && bash
WORKDIR /tmp
# Install UCX
RUN git clone https://github.com/openucx/ucx.git -b ${UCX_BRANCH} \
&& cd ucx \
&& ./autogen.sh \
&& mkdir build \
&& cd build \
&& ../contrib/configure-release --prefix=$UCX_PATH \
--with-rocm=$ROCM_PATH \
--without-knem \
--without-xpmem \
--without-cuda \
--enable-optimizations \
--disable-logging \
--disable-debug \
--disable-examples \
&& make -j $(nproc) \
&& make install
# Install UCC
RUN git clone -b ${UCC_BRANCH} https://github.com/openucx/ucc \
&& cd ucc \
&& ./autogen.sh \
&& sed -i 's/memoryType/type/g' ./src/components/mc/rocm/mc_rocm.c \
# offload-arch=native builds the local architecutre, which may not be present at build time for a container.
&& sed -i 's/--offload-arch=native//g' ./cuda_lt.sh \
&& mkdir build \
&& cd build \
&& ../configure --prefix=${UCC_PATH} --with-rocm=${ROCM_PATH} --with-ucx=${UCX_PATH} --with-rccl=no \
&& make -j $(nproc) \
&& make install
# Install OpenMPI
RUN git clone --recursive https://github.com/open-mpi/ompi.git -b ${OMPI_BRANCH} \
&& cd ompi \
&& ./autogen.pl \
&& mkdir build \
&& cd build \
&& ../configure --prefix=$OMPI_PATH --with-ucx=$UCX_PATH \
--with-ucc=${UCC_PATH} \
--enable-mca-no-build=btl-uct \
--without-verbs \
--with-pmix=internal \
--enable-mpi \
--enable-mpi-fortran=yes \
--disable-man-pages \
--disable-debug \
&& make -j $(nproc) \
&& make install
# Adding OpenMPI, UCX, and UCC to Environment
ENV PATH=$OMPI_PATH/bin:$UCX_PATH/bin:$UCC_PATH/bin:$PATH \
LD_LIBRARY_PATH=$OMPI_PATH/lib:$UCX_PATH/lib:$UCC_PATH/lib:$LD_LIBRARY_PATH \
LIBRARY_PATH=$OMPI_PATH/lib:$UCX_PATH/lib:$UCC_PATH/lib:$LIBRARY_PATH \
C_INCLUDE_PATH=$OMPI_PATH/include:$UCX_PATH/include:$UCC_PATH/include:$C_INCLUDE_PATH \
CPLUS_INCLUDE_PATH=$OMPI_PATH/include:$UCX_PATH/include:$UCC_PATH/include:$CPLUS_INCLUDE_PATH \
PKG_CONFIG_PATH=$OMPI_PATH/lib/pkgconfig:$UCX_PATH/lib/pkgconfig/:$PKG_CONFIG_PATH \
OMPI_ALLOW_RUN_AS_ROOT=1 \
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
UCX_WARN_UNUSED_ENV_VARS=n
# Install Additional Apps Below
ARG HPL_BRANCH="main"
WORKDIR /opt
# Installing rocHPL
RUN git clone -b ${HPL_BRANCH} https://github.com/ROCmSoftwarePlatform/rocHPL.git \
&& cd rocHPL \
&& ./install.sh \
--prefix=/opt/rochpl \
--with-rocm=/opt/rocm/ \
--with-mpi=/opt/ompi \
&& rm -rf /tmp/rocHPL
ENV PATH=$PATH:/opt/rochpl:/opt/rochpl/bin
ENV HIP_VISIBLE_DEVICES=0,1,2,3
#CMD ["/usr/sbin/sshd", "-D"]
CMD ["/bin/bash"]