Commit 8113881f by Riccardo Vicedomini

attempt to fix macOS problem with getopt

parent aaa40ffe
...@@ -34,14 +34,8 @@ HHBLITS_ARG_EVALUE=1e-10 ...@@ -34,14 +34,8 @@ HHBLITS_ARG_EVALUE=1e-10
PVLIB_FORCE=false PVLIB_FORCE=false
PVLIB_PATH="" PVLIB_PATH=""
PVLIB_DOMID="" PVLIB_DOMID=""
NTHREADS=2 NTHREADS=4
NJOBS=8 NJOBS=1
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
function print_usage() { function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -d <hhblits_db> -n <lib_name> [options]\n" echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -d <hhblits_db> -n <lib_name> [options]\n"
...@@ -55,20 +49,27 @@ function print_usage() { ...@@ -55,20 +49,27 @@ function print_usage() {
-n, --lib-name <name>\tName of ProfileView library\n" | column -t -s $'\t' -n, --lib-name <name>\tName of ProfileView library\n" | column -t -s $'\t'
echo -en "\n" echo -en "\n"
echo -en " OTHER OPTIONS:\n echo -en " OTHER OPTIONS:\n
--force\tForce the construction of ProfileView's library even if the directory already exists\n -f, --force\tForce the construction of ProfileView's library even if the directory already exists\n
\tpossibly overwriting previously generated files\n \tpossibly overwriting previously generated files\n
-t, --threads <num>\tNumber of threads for each hhblits job (default:2)\n -t, --threads <num>\tNumber of threads for each hhblits job (default:${NTHREADS})\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:8)\n -j, --max-jobs <num>\tNumber of parallel jobs (default:${NJOBS})\n
-h, --help\tPrint this help message\n -h, --help\tPrint this help message\n
-V, --version\tPrint version\n" | column -t -s $'\t' -V, --version\tPrint version\n" | column -t -s $'\t'
echo -en "\n" echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
} }
# retrieve provided arguments # retrieve provided arguments
opts="i:D:d:n:t:j:hV" opts="i:D:d:n:ft:j:hV"
longopts="input:,domain-id:,db:,lib-name:,force,threads:,max-jobs:,help,version,cov:,qid:,max-id:,evalue:,iter:" longopts="input:,domain-id:,db:,lib-name:,force,threads:,max-jobs:,help,version,cov:,qid:,max-id:,evalue:,iter:"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}") if [ check_system = "Linux" ]; then
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test! ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
print_usage print_usage
exit 1 exit 1
fi fi
...@@ -93,7 +94,7 @@ while [ -n "${1}" ]; do ...@@ -93,7 +94,7 @@ while [ -n "${1}" ]; do
shift shift
PVLIB_PATH=${1} PVLIB_PATH=${1}
;; ;;
--force) -f|--force)
PVLIB_FORCE=true PVLIB_FORCE=true
;; ;;
-t|--threads) -t|--threads)
...@@ -184,6 +185,12 @@ fi ...@@ -184,6 +185,12 @@ fi
check_cmds "hhblits" "reformat.pl" "hmmbuild" "python3" check_cmds "hhblits" "reformat.pl" "hmmbuild" "python3"
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Create ProfileView library directory # Create ProfileView library directory
PVLIB_NAME=$(basename "${PVLIB_PATH}") PVLIB_NAME=$(basename "${PVLIB_PATH}")
PVLIB_PREFIX="$(dirname "${PVLIB_PATH}")"/"${PVLIB_NAME}" PVLIB_PREFIX="$(dirname "${PVLIB_PATH}")"/"${PVLIB_NAME}"
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
# Common functions and definitions to be used in ProfileView # Common functions and definitions to be used in ProfileView
PV_NAME="ProfileView" PV_NAME="ProfileView"
PV_VERSION='1.0' PV_VERSION='1.0'
PV_DATE='20190726' PV_DATE='20200513'
function abspath() { echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"; } function abspath() { echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"; }
...@@ -72,4 +72,16 @@ function check_files() { ...@@ -72,4 +72,16 @@ function check_files() {
return 0 return 0
} }
function check_system() {
machine="UNKNOWN"
case "$(uname -s)" in
Linux*) machine=Linux;;
Darwin*) machine=Mac;;
CYGWIN*) machine=Cygwin;;
MINGW*) machine=MinGw;;
*) machine="UNKNOWN:$(uname -s)";;
esac
echo ${machine}
}
...@@ -33,13 +33,7 @@ PV_OUTTREE="" ...@@ -33,13 +33,7 @@ PV_OUTTREE=""
PV_SEQDESC="" PV_SEQDESC=""
PV_OUTDIR="out_$(date +%Y%m%d_%H%M%S)" PV_OUTDIR="out_$(date +%Y%m%d_%H%M%S)"
PV_TMPDIR="" PV_TMPDIR=""
NJOBS=8 NJOBS=4
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
function print_usage() { function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -l <profileview_lib> -m <model-list> [options]\n" echo -en "\n USAGE: ${CMD_NAME} -l <profileview_lib> -m <model-list> [options]\n"
...@@ -51,22 +45,29 @@ function print_usage() { ...@@ -51,22 +45,29 @@ function print_usage() {
echo -en "\n" echo -en "\n"
echo -en " OTHER OPTIONS:\n echo -en " OTHER OPTIONS:\n
-o, --out-dir <name>\tPrefix of output files (default: out_<current-date>)\n -o, --out-dir <name>\tPrefix of output files (default: out_<current-date>)\n
--temp-dir <name>\tTemporary result directory\n -W, --temp-dir <name>\tTemporary result directory\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:8)\n -j, --max-jobs <num>\tNumber of parallel jobs (default:${NJOBS})\n
-h, --help\tPrint this help message and exit\n -h, --help\tPrint this help message and exit\n
-V, --version\tPrint version and exit\n -V, --version\tPrint version and exit\n
" | column -t -s $'\t' " | column -t -s $'\t'
echo -en "\n" echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
} }
# retrieve provided arguments # retrieve provided arguments
opts="l:m:o:j:hV" opts="l:m:o:W:t:hV"
longopts="lib:,models:,out-dir:,temp-dir:,max-jobs:,help,version" longopts="lib:,models:,out-dir:,temp-dir:,threads:,help,version"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}") if [ check_system = "Linux" ]; then
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # the order of this tests is important! ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then
print_usage print_usage
exit 2 exit 1
fi fi
eval set -- "${ARGS}" eval set -- "${ARGS}"
...@@ -85,11 +86,11 @@ while [ -n "${1}" ]; do ...@@ -85,11 +86,11 @@ while [ -n "${1}" ]; do
shift shift
PV_OUTDIR=${1} PV_OUTDIR=${1}
;; ;;
--temp-dir) -W|--temp-dir)
shift shift
PV_TMPDIR=${1} PV_TMPDIR=${1}
;; ;;
-j|--max-jobs) -t|--threads)
shift shift
NJOBS=${1} NJOBS=${1}
;; ;;
...@@ -114,6 +115,12 @@ done ...@@ -114,6 +115,12 @@ done
check_cmds "hhalign" "python3" check_cmds "hhalign" "python3"
check_pymodules "Bio" "weblogolib" "BitVector" check_pymodules "Bio" "weblogolib" "BitVector"
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Input parameters validation # Input parameters validation
if [ -z "${PV_LIBPATH}" ]; then if [ -z "${PV_LIBPATH}" ]; then
......
...@@ -34,13 +34,7 @@ PV_OUTPREFIX="out" ...@@ -34,13 +34,7 @@ PV_OUTPREFIX="out"
PV_TMPDIR="" PV_TMPDIR=""
PV_CVAR=0.99 PV_CVAR=0.99
PV_KBEST=3 PV_KBEST=3
NJOBS=8 NJOBS=4
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
function print_usage() { function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -l <profileview_lib> [options]\n" echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -l <profileview_lib> [options]\n"
...@@ -56,21 +50,28 @@ function print_usage() { ...@@ -56,21 +50,28 @@ function print_usage() {
-s, --seq-desc <name>\tInput sequence descriptor file, that is a CSV file containing the follwing fileds:\n -s, --seq-desc <name>\tInput sequence descriptor file, that is a CSV file containing the follwing fileds:\n
\t<sequence_id>,<function_id>,<family_id>,<sequence_length>\n \t<sequence_id>,<function_id>,<family_id>,<sequence_length>\n
-p, --prefix <name>\tPrefix of output files (default: ${PV_OUTPREFIX})\n -p, --prefix <name>\tPrefix of output files (default: ${PV_OUTPREFIX})\n
--temp-dir <name>\tTemporary result directory\n -W, --temp-dir <name>\tTemporary result directory\n
-j, --max-jobs <num>\tNumber of parallel jobs (default: ${NJOBS})\n -t, --threads <num>\tNumber of parallel jobs (default: ${NJOBS})\n
-h, --help\tPrint this help message\n -h, --help\tPrint this help message\n
-V, --version\tPrint version\n" | column -t -s $'\t' -V, --version\tPrint version\n" | column -t -s $'\t'
echo -en "\n" echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
} }
# retrieve provided arguments # retrieve provided arguments
opts="i:l:k:s:p:j:hV" opts="i:l:k:s:p:W:t:hV"
longopts="input:,lib:,k-best:,seq-desc:,prefix:,temp-dir:,max-jobs:,cvar:,help,version" longopts="input:,lib:,k-best:,seq-desc:,prefix:,temp-dir:,threads:,cvar:,help,version"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}") if [ check_system = "Linux" ]; then
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # the order of this tests is important! ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
print_usage print_usage
exit 2 exit 1
fi fi
eval set -- "${ARGS}" eval set -- "${ARGS}"
...@@ -93,14 +94,18 @@ while [ -n "${1}" ]; do ...@@ -93,14 +94,18 @@ while [ -n "${1}" ]; do
shift shift
PV_OUTPREFIX=${1} PV_OUTPREFIX=${1}
;; ;;
--temp-dir) -W|--temp-dir)
shift shift
PV_TMPDIR=${1} PV_TMPDIR=${1}
;; ;;
-j|--max-jobs) -t|--threads)
shift shift
NJOBS=${1} NJOBS=${1}
;; ;;
-k|--k-best)
shift
PV_KBEST=${1}
;;
--cvar) --cvar)
shift shift
PV_CVAR=${1} PV_CVAR=${1}
...@@ -165,10 +170,14 @@ if ! [[ "${PV_CVAR}" =~ ^(0(\.[0-9]+)?|\.[0-9]+|1(\.0+)?)$ ]] ; then ...@@ -165,10 +170,14 @@ if ! [[ "${PV_CVAR}" =~ ^(0(\.[0-9]+)?|\.[0-9]+|1(\.0+)?)$ ]] ; then
PV_CVAR=0.99 PV_CVAR=0.99
fi fi
check_cmds "hmmsearch" "python3" "Rscript" check_cmds "hmmsearch" "python3" "Rscript"
check_pymodules "ete3" "numpy" check_pymodules "ete3" "numpy"
#check_files "${SCRIPTS_DIR}"/{createHHdict.py,createHmmerDict.py,hh_utils.py,pv_utils.py}
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Create temp working directory # Create temp working directory
if [ -z "${PV_TMPDIR}" ]; then if [ -z "${PV_TMPDIR}" ]; then
...@@ -244,7 +253,7 @@ print_status "filtering sequences" ...@@ -244,7 +253,7 @@ print_status "filtering sequences"
PV_SEQDESC="${PV_TMPDIR}/sequences.filtered.csv" PV_SEQDESC="${PV_TMPDIR}/sequences.filtered.csv"
awk '/^#/{next} !x[$3]++{OFS=",";print $3,$6,$5,$4}' "${PV_SCOREFILE}" >"${PV_SEQDESC}" 2>/dev/null awk '/^#/{next} !x[$3]++{OFS=",";print $3,$6,$5,$4}' "${PV_SCOREFILE}" >"${PV_SEQDESC}" 2>/dev/null
print_status "building representation space" print_status "building representation space (using k=${PV_KBEST})"
python3 "${SCRIPTS_DIR}"/generateFeatures.py --seq-list "${PV_SEQDESC}" --hmm-list "${PV_LIBDIR}/${PV_LIBNAME}.models.list" --scores "${PV_SCOREFILE}" --prefix "${PV_TMPDIR}"/out -n 20 -k "${PV_KBEST}" 2>>"${PV_LOGFILE}" python3 "${SCRIPTS_DIR}"/generateFeatures.py --seq-list "${PV_SEQDESC}" --hmm-list "${PV_LIBDIR}/${PV_LIBNAME}.models.list" --scores "${PV_SCOREFILE}" --prefix "${PV_TMPDIR}"/out -n 20 -k "${PV_KBEST}" 2>>"${PV_LOGFILE}"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
print_error "error during feature generation, see log: ${PV_LOGFILE}" print_error "error during feature generation, see log: ${PV_LOGFILE}"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment