Commit b20bee53 by Mustafa Tekpinar

Removed dynamics related functions: calcPercentDFI, calcBfactors and getBfactors.

parent 5ffaa0e3
...@@ -21,6 +21,10 @@ from prody import * ...@@ -21,6 +21,10 @@ from prody import *
from scipy.stats import rankdata from scipy.stats import rankdata
from Bio import AlignIO from Bio import AlignIO
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
import biotite.structure.io.pdb as pdb
#from gemmeAnal import * #from gemmeAnal import *
import pandas as pd import pandas as pd
...@@ -264,9 +268,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -264,9 +268,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
# Run Rscript to compute predictions # Run Rscript to compute predictions
def launchPred(prot,inAli,mutFile, normWeightMode, alphabet): def launchPred(prot,inAli,mutFile, normWeightMode, alphabet):
if (mutFile!=''): if (mutFile!=''):
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
#TODO: remember that there is a problem here!!!!
else: else:
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
...@@ -331,114 +333,114 @@ def rankSortProteinData(dataArray, inverted=True): ...@@ -331,114 +333,114 @@ def rankSortProteinData(dataArray, inverted=True):
else: else:
return (normalizedRankedDataArray) return (normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"): # def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
""" # """
Calculate rank normalized dynamic flexibility index. # Calculate rank normalized dynamic flexibility index.
""" # """
#confProDy(auto_show=False) # #confProDy(auto_show=False)
structure = parsePDB(pdbfile) # structure = parsePDB(pdbfile)
calphas = structure.select('name CA') # calphas = structure.select('name CA')
anm = ANM('ANM') # anm = ANM('ANM')
anm.buildHessian(calphas) # anm.buildHessian(calphas)
if(nmodes==None): # if(nmodes==None):
anm.calcModes(n_modes='all') # anm.calcModes(n_modes='all')
else: # else:
anm.calcModes(n_modes=int(nmodes)) # anm.calcModes(n_modes=int(nmodes))
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True) # dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
percentDfi = rankSortProteinData(dfi) # percentDfi = rankSortProteinData(dfi)
if(attenuate.lower()=='true'): # if(attenuate.lower()=='true'):
percentDfi = attenuateEndPoints(percentDfi) # percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None): # # if(outfile != None):
# np.savetxt(outfile, percentDfi) # # np.savetxt(outfile, percentDfi)
# else: # # else:
# np.savetxt(outfile, dfi) # # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'): # if(ranksorted.lower()=='true'):
return percentDfi # return percentDfi
else: # else:
return dfi # return dfi
def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true", # def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
ranksorted="true", inverted=False): # ranksorted="true", inverted=False):
""" # """
Calculate rank normalized mean squared fluctuations, # Calculate rank normalized mean squared fluctuations,
which correlate well with Bfactors. # which correlate well with Bfactors.
""" # """
#confProDy(auto_show=False) # #confProDy(auto_show=False)
structure = parsePDB(pdbfile) # structure = parsePDB(pdbfile)
calphas = structure.select('name CA') # calphas = structure.select('name CA')
anm = ANM('ANM') # anm = ANM('ANM')
anm.buildHessian(calphas) # anm.buildHessian(calphas)
if(nmodes==None): # if(nmodes==None):
anm.calcModes(n_modes='all') # anm.calcModes(n_modes='all')
else: # else:
anm.calcModes(n_modes=int(nmodes)) # anm.calcModes(n_modes=int(nmodes))
bfactors = calcSqFlucts(anm) # bfactors = calcSqFlucts(anm)
percentBfactors = rankSortProteinData(bfactors, inverted=False) # percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'): # if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors) # percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None): # # if(outfile != None):
# np.savetxt(outfile, percentBfactors) # # np.savetxt(outfile, percentBfactors)
# else: # # else:
# np.savetxt(outfile, dfi) # # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'): # if(ranksorted.lower()=='true'):
if(inverted==True): # if(inverted==True):
return (1.0 - percentBfactors) # return (1.0 - percentBfactors)
else: # else:
return percentBfactors # return percentBfactors
else: # else:
if(inverted==True): # if(inverted==True):
return (np.max(bfactors) - bfactors) # return (np.max(bfactors) - bfactors)
else: # else:
return bfactors # return bfactors
def getBfactors(pdbfile, outfile, attenuate="true", \ # def getBfactors(pdbfile, outfile, attenuate="true", \
ranksorted="true", inverted=True): # ranksorted="true", inverted=True):
""" # """
Calculate rank normalized dynamic flexibility index. # Calculate rank normalized dynamic flexibility index.
""" # """
#confProDy(auto_show=False) # #confProDy(auto_show=False)
structure = parsePDB(pdbfile) # structure = parsePDB(pdbfile)
calphas = structure.select('name CA') # calphas = structure.select('name CA')
bfactors = calphas.getBetas() # bfactors = calphas.getBetas()
percentBfactors = rankSortProteinData(bfactors, inverted=False) # percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'): # if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors) # percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None): # # if(outfile != None):
# np.savetxt(outfile, percentBfactors) # # np.savetxt(outfile, percentBfactors)
# else: # # else:
# np.savetxt(outfile, dfi) # # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'): # if(ranksorted.lower()=='true'):
if(inverted==True): # if(inverted==True):
return (1.0 - percentBfactors) # return (1.0 - percentBfactors)
else: # else:
return percentBfactors # return percentBfactors
else: # else:
if(inverted==True): # if(inverted==True):
return (np.max(bfactors) - bfactors) # return (np.max(bfactors) - bfactors)
else: # else:
return bfactors # return bfactors
def attenuateEndPoints(dfi): def attenuateEndPoints(dfi):
""" """
...@@ -646,17 +648,17 @@ def check_argument_groups(parser, arg_dict, group, argument): ...@@ -646,17 +648,17 @@ def check_argument_groups(parser, arg_dict, group, argument):
""" """
c = 0 c = 0
for myArg in argument: for myArg in argument:
arg_name = myArg.replace("-", "") arg_name = myArg.replace("-", "")
if arg_dict[arg_name]!='': if arg_dict[arg_name]!='':
c = c+1 c = c+1
group_name = group.replace("-", "") group_name = group.replace("-", "")
if arg_dict[group_name]=="input": if arg_dict[group_name]=="input":
if c!=1: if c!=1:
parser.error("gemme requires " + str(argument) + parser.error("esgemme requires " + str(argument) + \
" if " + group + " is set to input.") " if " + group + " is set to input.")
else: else:
if c>0: if c>0:
parser.error("gemme requires " + group + parser.error("esgemme requires " + group +
" to be set to input if " + str(argument) + " is used.") " to be set to input if " + str(argument) + " is used.")
return None return None
def calculateSecondaryStructure(pdbfile): def calculateSecondaryStructure(pdbfile):
...@@ -674,8 +676,7 @@ def calculateSecondaryStructure(pdbfile): ...@@ -674,8 +676,7 @@ def calculateSecondaryStructure(pdbfile):
A structure file in PDB format is the main input. A structure file in PDB format is the main input.
""" """
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
#pdbfile: Name of the input pdb file #pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp" #outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
...@@ -724,23 +725,23 @@ def countCoilSegments(inputfile): ...@@ -724,23 +725,23 @@ def countCoilSegments(inputfile):
def parse_command_line(): def parse_command_line():
""" """
Parse command line. Parse command line.
It uses argparse to parse phylosofs' command line arguments and returns the It uses argparse to parse esgemme' command line arguments and returns the
argparse parser. argparse parser.
""" """
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="gemme", prog="gemme",
description=""" description="""
GEMME (Global Epistasis Model for predicting Mutational Effects) ESGEMME (Evolutionary and Structural Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence analysis is a tool to predict mutational outcomes based on sequence and structure analysis
""", """,
epilog=""" epilog="""
It has been developed at LCQB (Laboratory of Computational and It has been developed at LCQB (Laboratory of Computational and
Quantitative Biology), UMR 7238 CNRS, Sorbonne Université. Quantitative Biology), UMR 7238 CNRS, Sorbonne Université.
If you use it, please cite: If you use it, please cite:
Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes
using Evolutionary Conservation and Global Epistasis using Evolutionary Conservation and Global Epistasis
""", """,
) )
parser.add_argument( parser.add_argument(
...@@ -814,7 +815,7 @@ def parse_command_line(): ...@@ -814,7 +815,7 @@ def parse_command_line():
# Check flag arguments # Check flag arguments
if args.input is None: if args.input is None:
parser.error("GEMME requires an input alignment.") parser.error("ESGEMME requires an input alignment.")
return args return args
...@@ -855,11 +856,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -855,11 +856,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
structure = parsePDB(prot+".pdb") structure = parsePDB(prot+".pdb")
calphas = structure.select('name CA') calphas = structure.select('name CA')
chains = list(set(calphas.getChids())) chains = list(set(calphas.getChids()))
#print(chains) #print(chains)
# pdb_file = pdb.PDBFile.read(prot+".pdb")
# chains = list(set(pdb_file.get_chains()))
if((jetfile) == None): if((jetfile) == None):
#I intend to run JET2 completely externally!! #I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies. #It is too much buggy and it has too many dependencies.
...@@ -878,22 +881,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -878,22 +881,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil.copy2(jetfile, prot+"_jet.res") shutil.copy2(jetfile, prot+"_jet.res")
print("done") print("done")
#If a real pdb file is given, calculate dfi for the residues. # #If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))): # if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
if (pdbfile == None): # if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.") # print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1) # sys.exit(-1)
else: # else:
print("Calculating %DFI data per residue!") # print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true") # dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi # dfi = dfi
df = pd.read_table(prot+"_jet.res") # df = pd.read_table(prot+"_jet.res")
print(df) # print(df)
df['dfi'] = dfi.round(4) # df['dfi'] = dfi.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w') # df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df) # print(df)
if((normWeightMode=='ssjetormax')): if((normWeightMode=='ssjetormax')):
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.") print("ERROR: There is not any pdb file to calculate DFI.")
...@@ -1034,6 +1037,6 @@ if (__name__ == '__main__'): ...@@ -1034,6 +1037,6 @@ if (__name__ == '__main__'):
toc = time.perf_counter() toc = time.perf_counter()
print(f"S-GEMME computation finished in {toc - tic:0.4f} seconds!") print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment