Commit b20bee53 by Mustafa Tekpinar

Removed dynamics related functions: calcPercentDFI, calcBfactors and getBfactors.

parent 5ffaa0e3
......@@ -21,6 +21,10 @@ from prody import *
from scipy.stats import rankdata
from Bio import AlignIO
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
import biotite.structure.io.pdb as pdb
#from gemmeAnal import *
import pandas as pd
......@@ -264,9 +268,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
# Run Rscript to compute predictions
def launchPred(prot,inAli,mutFile, normWeightMode, alphabet):
if (mutFile!=''):
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
#TODO: remember that there is a problem here!!!!
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
else:
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
......@@ -331,114 +333,114 @@ def rankSortProteinData(dataArray, inverted=True):
else:
return (normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
# def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
# """
# Calculate rank normalized dynamic flexibility index.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
# anm = ANM('ANM')
# anm.buildHessian(calphas)
# if(nmodes==None):
# anm.calcModes(n_modes='all')
# else:
# anm.calcModes(n_modes=int(nmodes))
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
percentDfi = rankSortProteinData(dfi)
# dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
# percentDfi = rankSortProteinData(dfi)
if(attenuate.lower()=='true'):
percentDfi = attenuateEndPoints(percentDfi)
# if(attenuate.lower()=='true'):
# percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None):
# np.savetxt(outfile, percentDfi)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentDfi)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentDfi
else:
return dfi
# if(ranksorted.lower()=='true'):
# return percentDfi
# else:
# return dfi
def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
ranksorted="true", inverted=False):
"""
Calculate rank normalized mean squared fluctuations,
which correlate well with Bfactors.
"""
# def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
# ranksorted="true", inverted=False):
# """
# Calculate rank normalized mean squared fluctuations,
# which correlate well with Bfactors.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
# anm = ANM('ANM')
# anm.buildHessian(calphas)
# if(nmodes==None):
# anm.calcModes(n_modes='all')
# else:
# anm.calcModes(n_modes=int(nmodes))
bfactors = calcSqFlucts(anm)
# bfactors = calcSqFlucts(anm)
percentBfactors = rankSortProteinData(bfactors, inverted=False)
# percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(attenuate.lower()=='true'):
# percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentBfactors)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else:
if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
# if(ranksorted.lower()=='true'):
# if(inverted==True):
# return (1.0 - percentBfactors)
# else:
# return percentBfactors
# else:
# if(inverted==True):
# return (np.max(bfactors) - bfactors)
# else:
# return bfactors
def getBfactors(pdbfile, outfile, attenuate="true", \
ranksorted="true", inverted=True):
"""
Calculate rank normalized dynamic flexibility index.
"""
# def getBfactors(pdbfile, outfile, attenuate="true", \
# ranksorted="true", inverted=True):
# """
# Calculate rank normalized dynamic flexibility index.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
bfactors = calphas.getBetas()
# bfactors = calphas.getBetas()
percentBfactors = rankSortProteinData(bfactors, inverted=False)
# percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(attenuate.lower()=='true'):
# percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentBfactors)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else:
if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
# if(ranksorted.lower()=='true'):
# if(inverted==True):
# return (1.0 - percentBfactors)
# else:
# return percentBfactors
# else:
# if(inverted==True):
# return (np.max(bfactors) - bfactors)
# else:
# return bfactors
def attenuateEndPoints(dfi):
"""
......@@ -646,17 +648,17 @@ def check_argument_groups(parser, arg_dict, group, argument):
"""
c = 0
for myArg in argument:
arg_name = myArg.replace("-", "")
if arg_dict[arg_name]!='':
c = c+1
arg_name = myArg.replace("-", "")
if arg_dict[arg_name]!='':
c = c+1
group_name = group.replace("-", "")
if arg_dict[group_name]=="input":
if c!=1:
parser.error("gemme requires " + str(argument) +
parser.error("esgemme requires " + str(argument) + \
" if " + group + " is set to input.")
else:
if c>0:
parser.error("gemme requires " + group +
parser.error("esgemme requires " + group +
" to be set to input if " + str(argument) + " is used.")
return None
def calculateSecondaryStructure(pdbfile):
......@@ -674,8 +676,7 @@ def calculateSecondaryStructure(pdbfile):
A structure file in PDB format is the main input.
"""
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
#pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
......@@ -724,23 +725,23 @@ def countCoilSegments(inputfile):
def parse_command_line():
"""
Parse command line.
It uses argparse to parse phylosofs' command line arguments and returns the
argparse parser.
Parse command line.
It uses argparse to parse esgemme' command line arguments and returns the
argparse parser.
"""
parser = argparse.ArgumentParser(
prog="gemme",
description="""
GEMME (Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence analysis
""",
ESGEMME (Evolutionary and Structural Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence and structure analysis
""",
epilog="""
It has been developed at LCQB (Laboratory of Computational and
Quantitative Biology), UMR 7238 CNRS, Sorbonne Université.
If you use it, please cite:
Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes
using Evolutionary Conservation and Global Epistasis
""",
It has been developed at LCQB (Laboratory of Computational and
Quantitative Biology), UMR 7238 CNRS, Sorbonne Université.
If you use it, please cite:
Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes
using Evolutionary Conservation and Global Epistasis
""",
)
parser.add_argument(
......@@ -814,7 +815,7 @@ def parse_command_line():
# Check flag arguments
if args.input is None:
parser.error("GEMME requires an input alignment.")
parser.error("ESGEMME requires an input alignment.")
return args
......@@ -855,11 +856,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
structure = parsePDB(prot+".pdb")
calphas = structure.select('name CA')
chains = list(set(calphas.getChids()))
#print(chains)
# pdb_file = pdb.PDBFile.read(prot+".pdb")
# chains = list(set(pdb_file.get_chains()))
if((jetfile) == None):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
......@@ -878,22 +881,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil.copy2(jetfile, prot+"_jet.res")
print("done")
#If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1)
else:
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
df = pd.read_table(prot+"_jet.res")
print(df)
df['dfi'] = dfi.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
# #If a real pdb file is given, calculate dfi for the residues.
# if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
# if (pdbfile == None):
# print("ERROR: There is not any pdb file to calculate DFI.")
# sys.exit(-1)
# else:
# print("Calculating %DFI data per residue!")
# dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
# dfi = dfi
# df = pd.read_table(prot+"_jet.res")
# print(df)
# df['dfi'] = dfi.round(4)
# df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# print(df)
if((normWeightMode=='ssjetormax')):
if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.")
......@@ -1034,6 +1037,6 @@ if (__name__ == '__main__'):
toc = time.perf_counter()
print(f"S-GEMME computation finished in {toc - tic:0.4f} seconds!")
print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment