Add more algorithms

This commit is contained in:
D-X-Y
2019-09-28 18:24:47 +10:00
parent bfd6b648fd
commit cfb462e463
286 changed files with 10557 additions and 122955 deletions

View File

@@ -0,0 +1,14 @@
# Commands on Cluster
## RNN
```
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 WT2-GDAS 1 "bash ./scripts-rnn/train-WT2.sh GDAS"
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 PTB-GDAS 1 "bash ./scripts-rnn/train-PTB.sh GDAS"
```
## CNN
```
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 CIFAR10-CUT-GDAS-F1 1 "bash ./scripts-cnn/train-cifar.sh GDAS_F1 cifar10 cut"
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 IMAGENET-GDAS-F1 1 "bash ./scripts-cnn/train-imagenet.sh GDAS_F1 52 14"
bash scripts-cluster/submit.sh yq01-v100-box-idl-2-8 IMAGENET-GDAS-V1 1 "bash ./scripts-cnn/train-imagenet.sh GDAS_V1 50 14"
```

View File

@@ -0,0 +1,36 @@
#!/bin/bash
#
echo "CHECK-DATA-DIR START"
sh /home/HGCP_Program/software-install/afs_mount/bin/afs_mount.sh \
COMM_KM_Data COMM_km_2018 \
`pwd`/hadoop-data \
afs://xingtian.afs.baidu.com:9902/user/COMM_KM_Data/dongxuanyi/datasets
export TORCH_HOME="./data/data/"
tar -xf ./hadoop-data/cifar.python.tar -C ${TORCH_HOME}
cifar_dir="${TORCH_HOME}/cifar.python"
if [ -d ${cifar_dir} ]; then
echo "Find cifar-dir: "${cifar_dir}
else
echo "Can not find cifar-dir: "${cifar_dir}
exit 1
fi
echo "CHECK-DATA-DIR DONE"
PID=$$
# config python
PYTHON_ENV=py36_pytorch1.0_env0.1.3.tar.gz
wget -e "http_proxy=cp01-sys-hic-gpu-02.cp01:8888" http://cp01-sys-hic-gpu-02.cp01/HGCP_DEMO/$PYTHON_ENV > screen.log 2>&1
tar xzf $PYTHON_ENV
echo "JOB-PID : "${PID}
echo "JOB-PWD : "$(pwd)
echo "JOB-files : "$(ls)
echo "JOB-CUDA_VISIBLE_DEVICES: " ${CUDA_VISIBLE_DEVICES}
./env/bin/python --version
echo "JOB-TORCH_HOME: "${TORCH_HOME}
# real commands

View File

@@ -0,0 +1,52 @@
#!/bin/bash
# bash ./scripts-cluster/submit.sh ${QUEUE} ${JOB-NAME} ${GPUs}
#find -name "._*" | xargs rm -rf
ODIR=$(pwd)
FDIR=$(cd $(dirname $0); pwd)
echo "Bash-Dir : "${ODIR}
echo "File-Dir : "${FDIR}
echo "File-Name : "${0}
if [ "$#" -ne 4 ] ;then
echo "Input illegal number of parameters " $#
echo "Need 4 parameters for the queue-name, the job-name, and the number-of-GPUs"
exit 1
fi
find -name "__pycache__" | xargs rm -rf
QUEUE=$1
NAME=$2
GPUs=$3
CMD=$4
TIME=$(date +"%Y-%h-%d--%T")
TIME="${TIME//:/-}"
JOB_SCRIPT="${FDIR}/tmps/job-${TIME}.sh"
HDFS_DIR="/user/COMM_KM_Data/${USER}/logs/alljobs/${NAME}-${TIME}"
echo "JOB-SCRIPT: "${JOB_SCRIPT}
cat ${FDIR}/job-script.sh > ${JOB_SCRIPT}
echo ${CMD} >> ${JOB_SCRIPT}
${HDP} -mkdir ${HDFS_DIR}
echo "Create "${HDFS_DIR}" done!"
sleep 1s
HGCP_CLIENT_BIN="${HOME}/.hgcp/software-install/HGCP_client/bin"
${HGCP_CLIENT_BIN}/submit \
--hdfs afs://xingtian.afs.baidu.com:9902 \
--hdfs-user COMM_KM_Data \
--hdfs-passwd COMM_km_2018 \
--hdfs-path ${HDFS_DIR} \
--file-dir ./ \
--job-name ${NAME} \
--queue-name ${QUEUE} \
--num-nodes 1 \
--num-task-pernode 1 \
--gpu-pnode ${GPUs} \
--time-limit 0 \
--job-script ${JOB_SCRIPT}
#--job-script ${FDIR}/job-script.sh
#echo "JOB-SCRIPT: " ${JOB_SCRIPT}