#!/bin/bash
# 
## usage:  sbatch ./submit_gpu.sh
#
#SBATCH -J gpus 
#SBATCH -p zen3_0512_a100x2
#SBATCH --qos zen3_0512_a100x2
#SBATCH --gres gpu:1
##SBATCH --reservation training_gpu
#SBATCH --time 0-00:10:00 

module purge
module load cuda/11.8.0-gcc-9.5.0-ananl33
module load openmpi/4.1.4-gcc-9.5.0-rbertc2
module load arm/25.0.4_FORGE

SIMPLEPATH=$FORGE_TRAINING/correctness/debug/simple
MMULTPATH=$FORGE_TRAINING/performance/mmult.py

ddt --connect

# Profile matrix multiplication example
# cd `dirname $MMULTPATH`
# map --profile mpirun -n 8 python3 $MMULTPATH -s 3072
