Commit faafebd8 by Mustafa Tekpinar

Added a timer function to sgemme.py to show total compute time.

parent a53a653e
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -9,27 +9,33 @@ then
# rm -rf BLAT.fasta
rm -rf BLAT*
rm -f default.conf caracTest.dat
rm -rf bin*.fasta
elif [ "$1" == "jetoff" ]
then
# If you have your own JET2 score file, you can turn off JET2 as follows:
# In ../tests folder, you can find a sample JET2 file called BLAT_jet.res
echo "Running GEMME with a user-provided alignment file."
echo "Running SGEMME with a user-provided alignment file."
echo "Using a previously produced prot_jet.res file to check reproducibility!"
cp ../tests/BLAT_jet.res .
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --jetfile BLAT_jet.res
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --jetfile BLAT_jet.res
elif [ "$1" == "withpdb" ]
then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-curated.pdb for the structural feature calculations!"
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode dfi
echo "Using blat-af2.pdb for the structural feature calculations!"
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc
else
echo "Running GEMME with a user-provided alignment file."
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta
elif [ "$1" == "withpdb-withmutfile" ]
then
fi
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!"
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc -m Stiffler_2015_BLAT_ECOLX.mut
else
echo "Running SGEMME with a user-provided alignment file."
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta
fi
......@@ -43,8 +43,10 @@ def extractQuerySeq(filename):
def getNbSeq(filename):
#TODO: Remove bash dependency and count the sequences within Python!
"""
# Get the number of sequences in a multi-fasta file
"""
if filename!='':
proc=subprocess.Popen("grep -c '^>' "+filename,stdout=subprocess.PIPE,shell=True)
......@@ -131,7 +133,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
chainID = chains[0]
subprocess.call("cp $GEMME_PATH/default.conf .",shell=True)
subprocess.call("cp $SGEMME_PATH/default.conf .",shell=True)
if retMet=="input":
if bFile!='':
#TODO: I think these two lines must be here as well but I am not sure.
......@@ -184,42 +186,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
print("\nRunning for SC1:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc1.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
# Calculate SC2
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 4"+" > "+prot+".out"
#One can also add: -g 'trace,pc,cv,clusters,axs'
print("\nRunning for SC2:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc2.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
# Calculate SC3
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 5"+" > "+prot+".out"
#One can also add: -g 'trace,pc,cv,clusters,axs'
print("\nRunning for SC3:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc3.res")
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
......@@ -228,118 +195,153 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
# # Calculate SC2
# jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
# prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 4"+" > "+prot+".out"
# #One can also add: -g 'trace,pc,cv,clusters,axs'
dfSC1 = pd.read_table(prot+"_jet_sc1.res", delimiter=r"\s+")
print(dfSC1)
# print("\nRunning for SC2:\n"+jetcmd)
# reCode=subprocess.call(jetcmd,shell=True)
# if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc2.res")
dfSC2 = pd.read_table(prot+"_jet_sc2.res", delimiter=r"\s+")
print(dfSC2)
# dir_name = prot+"/"
# if os.path.isdir(dir_name):
# for f in os.listdir(dir_name):
# f_path = os.path.join(dir_name, f)
# if os.path.isfile(f_path):
# os.remove(f_path)
# os.rmdir(dir_name)
dfSC3 = pd.read_table(prot+"_jet_sc3.res", delimiter=r"\s+")
print(dfSC3)
dfSC1['sc1'] =dfSC1['clusters']
dfSC1['sc2'] =dfSC2['clusters']
dfSC1['sc3'] =dfSC3['clusters']
# # Calculate SC3
# jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
# prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 5"+" > "+prot+".out"
# #One can also add: -g 'trace,pc,cv,clusters,axs'
print(dfSC1)
# print("\nRunning for SC3:\n"+jetcmd)
# reCode=subprocess.call(jetcmd,shell=True)
# if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc3.res")
# dir_name = prot+"/"
# if os.path.isdir(dir_name):
# for f in os.listdir(dir_name):
# f_path = os.path.join(dir_name, f)
# if os.path.isfile(f_path):
# os.remove(f_path)
# os.rmdir(dir_name)
# dfSC1 = pd.read_table(prot+"_jet_sc1.res", delimiter=r"\s+")
# print(dfSC1)
# # dfSC2 = pd.read_table(prot+"_jet_sc2.res", delimiter=r"\s+")
# # print(dfSC2)
# # dfSC3 = pd.read_table(prot+"_jet_sc3.res", delimiter=r"\s+")
# # print(dfSC3)
# dfSC1['sc1'] =dfSC1['clusters']
# # dfSC1['sc2'] =dfSC2['clusters']
# # dfSC1['sc3'] =dfSC3['clusters']
# print(dfSC1)
# #Calculate combined values of SC1, SC2 and SC3 and normalize them.
# combinedValuesV1 = []
# combinedValuesV2 = []
# value1List = []
# value2List = []
# value3List = []
# value4List = []
# value5List = []
# value6List = []
# temp1List = []
# temp2List = []
# temp3List = []
#Calculate combined values of SC1, SC2 and SC3 and normalize them.
combinedValuesV1 = []
combinedValuesV2 = []
value1List = []
value2List = []
value3List = []
value4List = []
value5List = []
value6List = []
temp1List = []
temp2List = []
temp3List = []
# for i in range(len(dfSC1)):
for i in range(len(dfSC1)):
# value1=(dfSC1.loc[i, "sc1"]*dfSC1.loc[i, "trace"]) + dfSC1.loc[i, "trace"]
# value1List.append(value1)
value1=(dfSC1.loc[i, "sc1"]*dfSC1.loc[i, "trace"]) + dfSC1.loc[i, "trace"]
value1List.append(value1)
# value2=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)
# value2List.append(value2)
value2=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)
value2List.append(value2)
# value3=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)
# value3List.append(value3)
value3=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)
value3List.append(value3)
# value4=(dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
# value4List.append(value4)
value4=(dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value4List.append(value4)
# value5=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
# value5List.append(value5)
value5=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value5List.append(value5)
# value6=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
# value6List.append(value6)
value6=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value6List.append(value6)
# temp1 = (dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2
# temp1List.append(temp1)
# temp2 = (dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2
# temp2List.append(temp2)
# temp3 = (dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2
# temp3List.append(temp3)
temp1 = (dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2
temp1List.append(temp1)
temp2 = (dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2
temp2List.append(temp2)
temp3 = (dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2
temp3List.append(temp3)
# combinedValuesV1.append(max([value1, value2, value3]))
# combinedValuesV2.append(max([value2, value3, value4]))
combinedValuesV1.append(max([value1, value2, value3]))
combinedValuesV2.append(max([value2, value3, value4]))
# #MinMax normalize the list
# combinedValuesV1MinMaxed = minMaxNormalization(combinedValuesV1)
# combinedValuesV2MinMaxed = minMaxNormalization(combinedValuesV2)
#MinMax normalize the list
combinedValuesV1MinMaxed = minMaxNormalization(combinedValuesV1)
combinedValuesV2MinMaxed = minMaxNormalization(combinedValuesV2)
# # value1ListMinMaxed = minMaxNormalization(value1List)
# # value2ListMinMaxed = minMaxNormalization(value2List)
# # value3ListMinMaxed = minMaxNormalization(value3List)
# # value4ListMinMaxed = minMaxNormalization(value4List)
# # combinedValuesV3MinMaxed = []
# # for i in range(len(dfSC1)):
# # combinedValuesV3MinMaxed.append(max([value2ListMinMaxed[i], value3ListMinMaxed[i], value4ListMinMaxed[i]]))
# value1ListMinMaxed = minMaxNormalization(value1List)
# value2ListMinMaxed = minMaxNormalization(value2List)
# value3ListMinMaxed = minMaxNormalization(value3List)
# value4ListMinMaxed = minMaxNormalization(value4List)
# #Let's skip that minmax normalization part
# combinedValuesV3MinMaxed = []
# for i in range(len(dfSC1)):
# combinedValuesV3MinMaxed.append(max([value2ListMinMaxed[i], value3ListMinMaxed[i], value4ListMinMaxed[i]]))
#Let's skip that minmax normalization part
combinedValuesV3MinMaxed = []
for i in range(len(dfSC1)):
maxTemp = max([value2List[i], value3List[i], value4List[i]])
if(maxTemp > 1.0):
combinedValuesV3MinMaxed.append(1.0)
else:
combinedValuesV3MinMaxed.append(maxTemp)
# maxTemp = max([value2List[i], value3List[i], value4List[i]])
# if(maxTemp > 1.0):
# combinedValuesV3MinMaxed.append(1.0)
# else:
# combinedValuesV3MinMaxed.append(maxTemp)
combinedValuesV4MinMaxed = []
for i in range(len(dfSC1)):
maxTemp = max([temp1List[i], temp2List[i], temp3List[i]])
if(maxTemp > 1.0):
combinedValuesV4MinMaxed.append(1.0)
else:
combinedValuesV4MinMaxed.append(maxTemp)
# combinedValuesV4MinMaxed = []
# for i in range(len(dfSC1)):
# maxTemp = max([temp1List[i], temp2List[i], temp3List[i]])
# if(maxTemp > 1.0):
# combinedValuesV4MinMaxed.append(1.0)
# else:
# combinedValuesV4MinMaxed.append(maxTemp)
dfSC1['combinedv1'] = combinedValuesV1MinMaxed.round(4)
dfSC1['combinedv2'] = combinedValuesV2MinMaxed.round(4)
dfSC1['combinedv3'] = list(np.around(np.array(combinedValuesV3MinMaxed), 4))
dfSC1['combinedv4'] = list(np.around(np.array(combinedValuesV4MinMaxed), 4))
#dfSC1['combinedv3'] = combinedValuesV3MinMaxed.round(4)
# dfSC1['combinedv1'] = combinedValuesV1MinMaxed.round(4)
# dfSC1['combinedv2'] = combinedValuesV2MinMaxed.round(4)
# dfSC1['combinedv3'] = list(np.around(np.array(combinedValuesV3MinMaxed), 4))
# dfSC1['combinedv4'] = list(np.around(np.array(combinedValuesV4MinMaxed), 4))
# #dfSC1['combinedv3'] = combinedValuesV3MinMaxed.round(4)
# dfSC1['cv'] = dfSC1['cv'].round(4)
# dfSC1['pc'] = dfSC1['pc'].round(4)
# dfSC1['tr'] = dfSC1['tr'].round(4)
# dfSC1['freq'] = dfSC1['freq'].round(1)
# dfSC1['trace'] = dfSC1['trace'].round(4)
# dfSC1['clusters'] = dfSC1['clusters'].round(4)
# dfSC1['clusnumber'] = dfSC1['clusnumber'].round(1)
# dfSC1['sc1'] = dfSC1['sc1'].round(4)
# dfSC1['sc2'] = dfSC1['sc2'].round(4)
# dfSC1['sc3'] = dfSC1['sc3'].round(4)
# # dfSC1['cv'] = dfSC1['cv'].round(4)
# # dfSC1['pc'] = dfSC1['pc'].round(4)
# # dfSC1['tr'] = dfSC1['tr'].round(4)
# # dfSC1['freq'] = dfSC1['freq'].round(1)
# # dfSC1['trace'] = dfSC1['trace'].round(4)
# # dfSC1['clusters'] = dfSC1['clusters'].round(4)
# # dfSC1['clusnumber'] = dfSC1['clusnumber'].round(1)
# # dfSC1['sc1'] = dfSC1['sc1'].round(4)
# # dfSC1['sc2'] = dfSC1['sc2'].round(4)
# # dfSC1['sc3'] = dfSC1['sc3'].round(4)
dfSC1.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# dfSC1.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
#dfSC1.to_csv(prot+"_jet.res", header=True, index=None, mode='w')
#sys.exit(-1)
......@@ -363,9 +365,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
def launchPred(prot,inAli,mutFile, normWeightMode, alphabet):
if mutFile!='':
rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
rcmd="Rscript --save $SGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
else:
rcmd="Rscript --save $GEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
rcmd="Rscript --save $SGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
print("\nRunning: \n"+rcmd)
reCode=subprocess.call(rcmd,shell=True)
......
# -*- coding: utf-8 -*-
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# Copyright (c) 2022-2023: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
import sys
import os
import time
import argparse
import re
import subprocess
......@@ -672,11 +673,17 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
def main():
tic = time.perf_counter()
args = parse_command_line()
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,\
args.fastaFile,args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\
args.normweightmode, args.alphabet)
toc = time.perf_counter()
print(f"S-GEMME computation finished in {toc - tic:0.4f} seconds!")
if (__name__ == '__main__'):
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment