Commit 8113881f by Riccardo Vicedomini

attempt to fix macOS problem with getopt

parent aaa40ffe
......@@ -34,14 +34,8 @@ HHBLITS_ARG_EVALUE=1e-10
PVLIB_FORCE=false
PVLIB_PATH=""
PVLIB_DOMID=""
NTHREADS=2
NJOBS=8
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
NTHREADS=4
NJOBS=1
function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -d <hhblits_db> -n <lib_name> [options]\n"
......@@ -55,20 +49,27 @@ function print_usage() {
-n, --lib-name <name>\tName of ProfileView library\n" | column -t -s $'\t'
echo -en "\n"
echo -en " OTHER OPTIONS:\n
--force\tForce the construction of ProfileView's library even if the directory already exists\n
-f, --force\tForce the construction of ProfileView's library even if the directory already exists\n
\tpossibly overwriting previously generated files\n
-t, --threads <num>\tNumber of threads for each hhblits job (default:2)\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:8)\n
-t, --threads <num>\tNumber of threads for each hhblits job (default:${NTHREADS})\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:${NJOBS})\n
-h, --help\tPrint this help message\n
-V, --version\tPrint version\n" | column -t -s $'\t'
echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
}
# retrieve provided arguments
opts="i:D:d:n:t:j:hV"
opts="i:D:d:n:ft:j:hV"
longopts="input:,domain-id:,db:,lib-name:,force,threads:,max-jobs:,help,version,cov:,qid:,max-id:,evalue:,iter:"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
if [ check_system = "Linux" ]; then
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
print_usage
exit 1
fi
......@@ -93,7 +94,7 @@ while [ -n "${1}" ]; do
shift
PVLIB_PATH=${1}
;;
--force)
-f|--force)
PVLIB_FORCE=true
;;
-t|--threads)
......@@ -184,6 +185,12 @@ fi
check_cmds "hhblits" "reformat.pl" "hmmbuild" "python3"
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Create ProfileView library directory
PVLIB_NAME=$(basename "${PVLIB_PATH}")
PVLIB_PREFIX="$(dirname "${PVLIB_PATH}")"/"${PVLIB_NAME}"
......
......@@ -17,7 +17,7 @@
# Common functions and definitions to be used in ProfileView
PV_NAME="ProfileView"
PV_VERSION='1.0'
PV_DATE='20190726'
PV_DATE='20200513'
function abspath() { echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"; }
......@@ -72,4 +72,16 @@ function check_files() {
return 0
}
function check_system() {
machine="UNKNOWN"
case "$(uname -s)" in
Linux*) machine=Linux;;
Darwin*) machine=Mac;;
CYGWIN*) machine=Cygwin;;
MINGW*) machine=MinGw;;
*) machine="UNKNOWN:$(uname -s)";;
esac
echo ${machine}
}
......@@ -33,13 +33,7 @@ PV_OUTTREE=""
PV_SEQDESC=""
PV_OUTDIR="out_$(date +%Y%m%d_%H%M%S)"
PV_TMPDIR=""
NJOBS=8
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
NJOBS=4
function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -l <profileview_lib> -m <model-list> [options]\n"
......@@ -51,22 +45,29 @@ function print_usage() {
echo -en "\n"
echo -en " OTHER OPTIONS:\n
-o, --out-dir <name>\tPrefix of output files (default: out_<current-date>)\n
--temp-dir <name>\tTemporary result directory\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:8)\n
-W, --temp-dir <name>\tTemporary result directory\n
-j, --max-jobs <num>\tNumber of parallel jobs (default:${NJOBS})\n
-h, --help\tPrint this help message and exit\n
-V, --version\tPrint version and exit\n
" | column -t -s $'\t'
echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
}
# retrieve provided arguments
opts="l:m:o:j:hV"
longopts="lib:,models:,out-dir:,temp-dir:,max-jobs:,help,version"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # the order of this tests is important!
opts="l:m:o:W:t:hV"
longopts="lib:,models:,out-dir:,temp-dir:,threads:,help,version"
if [ check_system = "Linux" ]; then
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then
print_usage
exit 2
exit 1
fi
eval set -- "${ARGS}"
......@@ -85,11 +86,11 @@ while [ -n "${1}" ]; do
shift
PV_OUTDIR=${1}
;;
--temp-dir)
-W|--temp-dir)
shift
PV_TMPDIR=${1}
;;
-j|--max-jobs)
-t|--threads)
shift
NJOBS=${1}
;;
......@@ -114,6 +115,12 @@ done
check_cmds "hhalign" "python3"
check_pymodules "Bio" "weblogolib" "BitVector"
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Input parameters validation
if [ -z "${PV_LIBPATH}" ]; then
......
......@@ -34,13 +34,7 @@ PV_OUTPREFIX="out"
PV_TMPDIR=""
PV_CVAR=0.99
PV_KBEST=3
NJOBS=8
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
NJOBS=4
function print_usage() {
echo -en "\n USAGE: ${CMD_NAME} -i <input_fasta> -l <profileview_lib> [options]\n"
......@@ -56,21 +50,28 @@ function print_usage() {
-s, --seq-desc <name>\tInput sequence descriptor file, that is a CSV file containing the follwing fileds:\n
\t<sequence_id>,<function_id>,<family_id>,<sequence_length>\n
-p, --prefix <name>\tPrefix of output files (default: ${PV_OUTPREFIX})\n
--temp-dir <name>\tTemporary result directory\n
-j, --max-jobs <num>\tNumber of parallel jobs (default: ${NJOBS})\n
-W, --temp-dir <name>\tTemporary result directory\n
-t, --threads <num>\tNumber of parallel jobs (default: ${NJOBS})\n
-h, --help\tPrint this help message\n
-V, --version\tPrint version\n" | column -t -s $'\t'
echo -en "\n"
echo -en " Long options (e.g., --input) are supported only on Linux systems\n\n"
}
# retrieve provided arguments
opts="i:l:k:s:p:j:hV"
longopts="input:,lib:,k-best:,seq-desc:,prefix:,temp-dir:,max-jobs:,cvar:,help,version"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # the order of this tests is important!
opts="i:l:k:s:p:W:t:hV"
longopts="input:,lib:,k-best:,seq-desc:,prefix:,temp-dir:,threads:,cvar:,help,version"
if [ check_system = "Linux" ]; then
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
retcode=$?
else
ARGS=$(getopt "${opts}" $*)
retcode=$?
fi
if [ ${retcode} -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
print_usage
exit 2
exit 1
fi
eval set -- "${ARGS}"
......@@ -93,14 +94,18 @@ while [ -n "${1}" ]; do
shift
PV_OUTPREFIX=${1}
;;
--temp-dir)
-W|--temp-dir)
shift
PV_TMPDIR=${1}
;;
-j|--max-jobs)
-t|--threads)
shift
NJOBS=${1}
;;
-k|--k-best)
shift
PV_KBEST=${1}
;;
--cvar)
shift
PV_CVAR=${1}
......@@ -165,10 +170,14 @@ if ! [[ "${PV_CVAR}" =~ ^(0(\.[0-9]+)?|\.[0-9]+|1(\.0+)?)$ ]] ; then
PV_CVAR=0.99
fi
check_cmds "hmmsearch" "python3" "Rscript"
check_pymodules "ete3" "numpy"
#check_files "${SCRIPTS_DIR}"/{createHHdict.py,createHmmerDict.py,hh_utils.py,pv_utils.py}
PEXEC_CMD="parallel --halt now,fail=1 -j ${NJOBS}"
if ! command -v parallel >/dev/null 2>&1; then
print_warning "cannot find GNU parallel, all jobs will be run sequentially"
PEXEC_CMD="/usr/bin/env bash --"
fi
# Create temp working directory
if [ -z "${PV_TMPDIR}" ]; then
......@@ -244,7 +253,7 @@ print_status "filtering sequences"
PV_SEQDESC="${PV_TMPDIR}/sequences.filtered.csv"
awk '/^#/{next} !x[$3]++{OFS=",";print $3,$6,$5,$4}' "${PV_SCOREFILE}" >"${PV_SEQDESC}" 2>/dev/null
print_status "building representation space"
print_status "building representation space (using k=${PV_KBEST})"
python3 "${SCRIPTS_DIR}"/generateFeatures.py --seq-list "${PV_SEQDESC}" --hmm-list "${PV_LIBDIR}/${PV_LIBNAME}.models.list" --scores "${PV_SCOREFILE}" --prefix "${PV_TMPDIR}"/out -n 20 -k "${PV_KBEST}" 2>>"${PV_LOGFILE}"
if [ $? -ne 0 ]; then
print_error "error during feature generation, see log: ${PV_LOGFILE}"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment