Commit ba540c6f by Riccardo Vicedomini

added possibility to set DAMA's overlapping thresholds

parent 9b8c05d5
...@@ -29,6 +29,8 @@ source "${CMD_DIR}/metaclade2-common" ...@@ -29,6 +29,8 @@ source "${CMD_DIR}/metaclade2-common"
MCLADE_LIB_PATH="" MCLADE_LIB_PATH=""
MCLADE_EVALUECUTOFF=1e-3 MCLADE_EVALUECUTOFF=1e-3
MCLADE_EVALUECUTCONF=1e-10 MCLADE_EVALUECUTCONF=1e-10
DAMA_OLPAA=30
DAMA_OLPMAXDOM=50
MCLADE_USEDAMA=false MCLADE_USEDAMA=false
MCLADE_USESGE=false MCLADE_USESGE=false
MCLADE_WORKDIR=${PWD} MCLADE_WORKDIR=${PWD}
...@@ -51,12 +53,19 @@ function print_usage() { ...@@ -51,12 +53,19 @@ function print_usage() {
\t(e.g., \"PF00875,PF03441\")\n \t(e.g., \"PF00875,PF03441\")\n
-D, --domain-file <path>\tFile that contains the Pfam accession numbers\n -D, --domain-file <path>\tFile that contains the Pfam accession numbers\n
\tof the domains to be considered (one per line)\n \tof the domains to be considered (one per line)\n
-e, --evalue-cutoff <float>\tE-value cutoff (default:${MCLADE_EVALUECUTOFF})\n
-E, --evalue-cutconf <float>\tConfidence threshold used by DAMA to add new domains
\tin the architecture. (default:${MCLADE_EVALUECUTCONF})\n
-W, --work-dir <path>\tWorking directory, where jobs and results are saved\n -W, --work-dir <path>\tWorking directory, where jobs and results are saved\n
" | column -t -s $'\t' " | column -t -s $'\t'
echo -en "\n" echo -en "\n"
echo -en " DAMA OPTIONS:\n
-e, --evalue-cutoff <float>\tE-value cutoff (default:${MCLADE_EVALUECUTOFF})\n
-E, --evalue-cutconf <float>\tConfidence threshold to add new domains in the architecture.\n
\t(default:${MCLADE_EVALUECUTCONF})\n
--overlappingAA <num>\tnumber (>=0) of amino acids allowed in the domain overlapping.\n
\t(default:${DAMA_OLPAA})\n
--overlappingMaxDomain <num>\tdomain overlapping is allowed for at most <num>% of the match\n
\t(default:${DAMA_OLPMAXDOM})\n
" | column -t -s $'\t'
echo -en "\n"
echo -en " OTHER OPTIONS:\n echo -en " OTHER OPTIONS:\n
-j, --jobs <num>\tNumber of jobs to be created (default:${NJOBS})\n -j, --jobs <num>\tNumber of jobs to be created (default:${NJOBS})\n
-t, --threads <num>\tNumber of threads for each job (default:${NTHREADS})\n -t, --threads <num>\tNumber of threads for each job (default:${NTHREADS})\n
...@@ -77,7 +86,7 @@ function print_usage() { ...@@ -77,7 +86,7 @@ function print_usage() {
# retrieve provided arguments # retrieve provided arguments
opts="i:N:ad:D:e:E:W:t:j:hV" opts="i:N:ad:D:e:E:W:t:j:hV"
longopts="input:,name:,arch,domain-list:,domain-file:,evalue-cutoff:,evalue-cutconf:,work-dir,threads:,jobs:,help,version,sge,pe:,queue:,time-limit:" longopts="input:,name:,arch,domain-list:,domain-file:,evalue-cutoff:,evalue-cutconf:,overlappingAA:,overlappingMaxDomain:,work-dir,threads:,jobs:,help,version,sge,pe:,queue:,time-limit:"
ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}") ARGS=$(getopt -o "${opts}" -l "${longopts}" -n "${CMD_NAME}" -- "${@}")
if [ $? -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test! if [ $? -ne 0 ] || [ $# -eq 0 ]; then # do not change the order of this test!
print_usage print_usage
...@@ -106,6 +115,14 @@ while [ -n "${1}" ]; do ...@@ -106,6 +115,14 @@ while [ -n "${1}" ]; do
shift shift
MCLADE_EVALUECUTCONF=${1} MCLADE_EVALUECUTCONF=${1}
;; ;;
--overlappingAA)
shift
DAMA_OLPAA=${1}
;;
--overlappingMaxDomain)
shift
DAMA_OLPMAXDOM=${1}
;;
-d|--domain-list) -d|--domain-list)
shift shift
MCLADE_DOMLIST=${1} MCLADE_DOMLIST=${1}
...@@ -234,7 +251,10 @@ print_status "MetaCLADE working directory: ${MCLADE_WORKDIR}" ...@@ -234,7 +251,10 @@ print_status "MetaCLADE working directory: ${MCLADE_WORKDIR}"
# Create MetaCLADE scripts # Create MetaCLADE scripts
print_status "creating MetaCLADE script/job files" print_status "creating MetaCLADE script/job files"
python3 "${SCRIPTS_DIR}/mclade_create_jobs.py" -i "${INPUT_FASTA}" -N "${MCLADE_JOBNAME}" ${MCLADE_DOMARG} -W "${MCLADE_WORKDIR}" -e "${MCLADE_EVALUECUTOFF}" -E "${MCLADE_EVALUECUTCONF}" -j "${NJOBS}" ${MCLADE_DAMAARG} python3 "${SCRIPTS_DIR}/mclade_create_jobs.py" \
-i "${INPUT_FASTA}" -N "${MCLADE_JOBNAME}" ${MCLADE_DOMARG} -W "${MCLADE_WORKDIR}" \
-e "${MCLADE_EVALUECUTOFF}" -E "${MCLADE_EVALUECUTCONF}" --overlappingAA ${DAMA_OLPAA} --overlappingMaxDomain ${DAMA_OLPMAXDOM} ${MCLADE_DAMAARG} \
-j "${NJOBS}"
# Run MetaCLADE scripts (possibly using SGE) # Run MetaCLADE scripts (possibly using SGE)
......
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
# Common functions and definitions # Common functions and definitions
PV_NAME="MetaCLADE" PV_NAME="MetaCLADE"
PV_VERSION='2.0' PV_VERSION='2.0'
PV_DATE='20200425' PV_DATE='20200508'
function abspath() { echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"; } function abspath() { echo "$(cd "$(dirname "$1")"; pwd -P)/$(basename "$1")"; }
function print_error() { echo "[error] ${@}" >&2; } function print_error() { echo "[$(date +"%F %T")] error: ${@}" >&2; }
function print_warning() { echo "[warning] ${@}" >&2; } function print_warning() { echo "[$(date +"%F %T")] warning: ${@}" >&2; }
function print_status() { echo "[main] ${@}" >&2; } function print_status() { echo "[$(date +"%F %T")] ${@}" >&2; }
function print_version() { echo "${PV_NAME} ${PV_VERSION}-${PV_DATE}"; } function print_version() { echo "${PV_NAME} ${PV_VERSION}-${PV_DATE}"; }
function check_cmds() { function check_cmds() {
......
...@@ -19,6 +19,8 @@ def main(): ...@@ -19,6 +19,8 @@ def main():
parser.add_argument('--domainsInfoFile', dest='domainsInfoFile', help='.domains file of MetaCLADE library', type=str) parser.add_argument('--domainsInfoFile', dest='domainsInfoFile', help='.domains file of MetaCLADE library', type=str)
parser.add_argument('--knownArchFile', dest='knownArchFile', help='.knownArch file of MetaCLADE library', type=str) parser.add_argument('--knownArchFile', dest='knownArchFile', help='.knownArch file of MetaCLADE library', type=str)
parser.add_argument('--overlappingDomainFile', dest='overlappingDomainFile', help='.overlapping file of MetaCLADE library', type=str) parser.add_argument('--overlappingDomainFile', dest='overlappingDomainFile', help='.overlapping file of MetaCLADE library', type=str)
parser.add_argument('--overlappingAA', dest='overlappingAA', help='max aa overlap in DAMA architecture', type=int, default=30)
parser.add_argument('--overlappingMaxDomain', dest='overlappingMaxDomain', help='max percentage overlap in DAMA architecture', type=int, default=50)
parser.add_argument('--evalueCutOff', dest='evalueCutOff', help='e-value cutoff for DAMA', type=str) parser.add_argument('--evalueCutOff', dest='evalueCutOff', help='e-value cutoff for DAMA', type=str)
parser.add_argument('--evalueCutOffConf', dest='evalueCutOffConf', help='e-value confidence threshold for DAMA', type=str) parser.add_argument('--evalueCutOffConf', dest='evalueCutOffConf', help='e-value confidence threshold for DAMA', type=str)
parser.add_argument('--filterResDir', dest='filterResDir', help='results directory of MetaCLADE filter step', type=str) parser.add_argument('--filterResDir', dest='filterResDir', help='results directory of MetaCLADE filter step', type=str)
...@@ -55,6 +57,8 @@ def main(): ...@@ -55,6 +57,8 @@ def main():
'-overlappingDomainFile', args.overlappingDomainFile, '-overlappingDomainFile', args.overlappingDomainFile,
'-evalueCutOff', args.evalueCutOff, '-evalueCutOff', args.evalueCutOff,
'-evalueCutOffConf', args.evalueCutOffConf, '-evalueCutOffConf', args.evalueCutOffConf,
'-overlappingAA', args.overlappingAA,
'-overlappingMaxDomain', args.overlappingMaxDomain,
'-domainsHitFile', domains_hit_file, '-domainsHitFile', domains_hit_file,
'-outputFile', dama_temp_out ] '-outputFile', dama_temp_out ]
dama_cmd = ' '.join(dama_args) dama_cmd = ' '.join(dama_args)
......
...@@ -35,6 +35,8 @@ class CladeController: ...@@ -35,6 +35,8 @@ class CladeController:
self.usr_domain_file = args.usr_domain_file self.usr_domain_file = args.usr_domain_file
self.evalue_cutoff = args.evalue_cutoff self.evalue_cutoff = args.evalue_cutoff
self.evalue_cutconf = args.evalue_cutconf self.evalue_cutconf = args.evalue_cutconf
self.olp_aa = args.olp_aa
self.olp_maxdomain = args.olp_maxdomain
self.working_dir = os.path.abspath(args.work_dir) self.working_dir = os.path.abspath(args.work_dir)
self.temp_out_path = f'{self.working_dir}/temp' self.temp_out_path = f'{self.working_dir}/temp'
self.jobs_out_path = f'{self.working_dir}/jobs' self.jobs_out_path = f'{self.working_dir}/jobs'
...@@ -267,7 +269,7 @@ class CladeController: ...@@ -267,7 +269,7 @@ class CladeController:
bd_dir = f'{self.filter_results_path}' bd_dir = f'{self.filter_results_path}'
out_file = f'{self.arch_results_path}/{self.name}.arch.txt' out_file = f'{self.arch_results_path}/{self.name}.arch.txt'
taxdb_file = f'{self.taxid2name}' taxdb_file = f'{self.taxid2name}'
arch_cmd = f'{self.python_cmd} {self.dama_arch} --damaCmd {self.dama_cmd} --domainsInfoFile {self.domain_list} --knownArchFile {self.knownarchs} --overlappingDomainFile {self.overlapping} --evalueCutOff {self.evalue_cutoff} --evalueCutOffConf {self.evalue_cutconf} --filterResDir {bd_dir} --tempDir {self.temp_out_path} --outputFile {out_file} --taxid2name {taxdb_file}' arch_cmd = f'{self.python_cmd} {self.dama_arch} --damaCmd {self.dama_cmd} --domainsInfoFile {self.domain_list} --knownArchFile {self.knownarchs} --overlappingDomainFile {self.overlapping} --evalueCutOff {self.evalue_cutoff} --evalueCutOffConf {self.evalue_cutconf} --overlappingAA {self.olp_aa} --overlappingMaxDomain {self.olp_maxdomain} --filterResDir {bd_dir} --tempDir {self.temp_out_path} --outputFile {out_file} --taxid2name {taxdb_file}'
fh.write(f'{arch_cmd}\n') fh.write(f'{arch_cmd}\n')
# ~/software/DAMA/Release/DAMA -domainsHitFile /home/vicedomini/projects/metaclade2/test/pippo/results/domainsHitFile.best.res -knownArchFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.knownArch -domainsInfoFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.domains -outputFile /home/vicedomini/projects/metaclade2/test/pippo/results/3_arch/pippo.arch.txt -overlappingDomainFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.overlapping # ~/software/DAMA/Release/DAMA -domainsHitFile /home/vicedomini/projects/metaclade2/test/pippo/results/domainsHitFile.best.res -knownArchFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.knownArch -domainsInfoFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.domains -outputFile /home/vicedomini/projects/metaclade2/test/pippo/results/3_arch/pippo.arch.txt -overlappingDomainFile /home/vicedomini/projects/metaclade2/data/pfamLists/pfam32/pfam32.overlapping
...@@ -323,6 +325,8 @@ def main(): ...@@ -323,6 +325,8 @@ def main():
parser.add_argument('-D','--domain-file', dest='usr_domain_file', help='file containing Pfam accession numbers (one per line)', type=str) parser.add_argument('-D','--domain-file', dest='usr_domain_file', help='file containing Pfam accession numbers (one per line)', type=str)
parser.add_argument('-e','--evalue-cutoff', dest='evalue_cutoff', help='e-value cutoff', type=float, default=float(1e-3)) parser.add_argument('-e','--evalue-cutoff', dest='evalue_cutoff', help='e-value cutoff', type=float, default=float(1e-3))
parser.add_argument('-E','--evalue-cutconf', dest='evalue_cutconf', help='confidence threshold used by DAMA to add domains into the architecture', type=float, default=float(1e-10)) parser.add_argument('-E','--evalue-cutconf', dest='evalue_cutconf', help='confidence threshold used by DAMA to add domains into the architecture', type=float, default=float(1e-10))
parser.add_argument('--overlappingAA', dest='olp_aa', help='number of amino acids allowed by DAMA in the domain overlapping.', type=int, default=30)
parser.add_argument('--overlappingMaxDomain', dest='olp_maxdomain', help='domain overlapping percentage allowed by DAMA', type=int, default=50)
parser.add_argument('-W','--work-dir', dest='work_dir', help='working directory for MetaCLADE jobs/results', type=str) parser.add_argument('-W','--work-dir', dest='work_dir', help='working directory for MetaCLADE jobs/results', type=str)
parser.add_argument('-j','--jobs', dest='jobs_num', help='number of job scripts to create', type=int, default=int(16)) parser.add_argument('-j','--jobs', dest='jobs_num', help='number of job scripts to create', type=int, default=int(16))
args = parser.parse_args() args = parser.parse_args()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment