Commit b20bee53 by Mustafa Tekpinar

Removed dynamics related functions: calcPercentDFI, calcBfactors and getBfactors.

parent 5ffaa0e3
......@@ -21,6 +21,10 @@ from prody import *
from scipy.stats import rankdata
from Bio import AlignIO
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
import biotite.structure.io.pdb as pdb
#from gemmeAnal import *
import pandas as pd
......@@ -265,8 +269,6 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
def launchPred(prot,inAli,mutFile, normWeightMode, alphabet):
if (mutFile!=''):
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" FALSE "+mutFile+" "+normWeightMode+" "+alphabet
#TODO: remember that there is a problem here!!!!
else:
rcmd="Rscript --save $ESGEMME_PATH/computePred.R "+prot+" "+inAli+" TRUE none "+normWeightMode+" "+alphabet
......@@ -331,114 +333,114 @@ def rankSortProteinData(dataArray, inverted=True):
else:
return (normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
# def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
# """
# Calculate rank normalized dynamic flexibility index.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
# anm = ANM('ANM')
# anm.buildHessian(calphas)
# if(nmodes==None):
# anm.calcModes(n_modes='all')
# else:
# anm.calcModes(n_modes=int(nmodes))
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
percentDfi = rankSortProteinData(dfi)
# dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
# percentDfi = rankSortProteinData(dfi)
if(attenuate.lower()=='true'):
percentDfi = attenuateEndPoints(percentDfi)
# if(attenuate.lower()=='true'):
# percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None):
# np.savetxt(outfile, percentDfi)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentDfi)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentDfi
else:
return dfi
# if(ranksorted.lower()=='true'):
# return percentDfi
# else:
# return dfi
def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
ranksorted="true", inverted=False):
"""
Calculate rank normalized mean squared fluctuations,
which correlate well with Bfactors.
"""
# def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
# ranksorted="true", inverted=False):
# """
# Calculate rank normalized mean squared fluctuations,
# which correlate well with Bfactors.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
# anm = ANM('ANM')
# anm.buildHessian(calphas)
# if(nmodes==None):
# anm.calcModes(n_modes='all')
# else:
# anm.calcModes(n_modes=int(nmodes))
bfactors = calcSqFlucts(anm)
# bfactors = calcSqFlucts(anm)
percentBfactors = rankSortProteinData(bfactors, inverted=False)
# percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(attenuate.lower()=='true'):
# percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentBfactors)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else:
if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
# if(ranksorted.lower()=='true'):
# if(inverted==True):
# return (1.0 - percentBfactors)
# else:
# return percentBfactors
# else:
# if(inverted==True):
# return (np.max(bfactors) - bfactors)
# else:
# return bfactors
def getBfactors(pdbfile, outfile, attenuate="true", \
ranksorted="true", inverted=True):
"""
Calculate rank normalized dynamic flexibility index.
"""
# def getBfactors(pdbfile, outfile, attenuate="true", \
# ranksorted="true", inverted=True):
# """
# Calculate rank normalized dynamic flexibility index.
# """
#confProDy(auto_show=False)
# #confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
# structure = parsePDB(pdbfile)
# calphas = structure.select('name CA')
bfactors = calphas.getBetas()
# bfactors = calphas.getBetas()
percentBfactors = rankSortProteinData(bfactors, inverted=False)
# percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(attenuate.lower()=='true'):
# percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
# # if(outfile != None):
# # np.savetxt(outfile, percentBfactors)
# # else:
# # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else:
if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
# if(ranksorted.lower()=='true'):
# if(inverted==True):
# return (1.0 - percentBfactors)
# else:
# return percentBfactors
# else:
# if(inverted==True):
# return (np.max(bfactors) - bfactors)
# else:
# return bfactors
def attenuateEndPoints(dfi):
"""
......@@ -652,11 +654,11 @@ def check_argument_groups(parser, arg_dict, group, argument):
group_name = group.replace("-", "")
if arg_dict[group_name]=="input":
if c!=1:
parser.error("gemme requires " + str(argument) +
parser.error("esgemme requires " + str(argument) + \
" if " + group + " is set to input.")
else:
if c>0:
parser.error("gemme requires " + group +
parser.error("esgemme requires " + group +
" to be set to input if " + str(argument) + " is used.")
return None
def calculateSecondaryStructure(pdbfile):
......@@ -674,8 +676,7 @@ def calculateSecondaryStructure(pdbfile):
A structure file in PDB format is the main input.
"""
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
#pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
......@@ -725,14 +726,14 @@ def countCoilSegments(inputfile):
def parse_command_line():
"""
Parse command line.
It uses argparse to parse phylosofs' command line arguments and returns the
It uses argparse to parse esgemme' command line arguments and returns the
argparse parser.
"""
parser = argparse.ArgumentParser(
prog="gemme",
description="""
GEMME (Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence analysis
ESGEMME (Evolutionary and Structural Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence and structure analysis
""",
epilog="""
It has been developed at LCQB (Laboratory of Computational and
......@@ -814,7 +815,7 @@ def parse_command_line():
# Check flag arguments
if args.input is None:
parser.error("GEMME requires an input alignment.")
parser.error("ESGEMME requires an input alignment.")
return args
......@@ -860,6 +861,8 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
#print(chains)
# pdb_file = pdb.PDBFile.read(prot+".pdb")
# chains = list(set(pdb_file.get_chains()))
if((jetfile) == None):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
......@@ -878,22 +881,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil.copy2(jetfile, prot+"_jet.res")
print("done")
#If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1)
else:
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
df = pd.read_table(prot+"_jet.res")
print(df)
# #If a real pdb file is given, calculate dfi for the residues.
# if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
# if (pdbfile == None):
# print("ERROR: There is not any pdb file to calculate DFI.")
# sys.exit(-1)
# else:
# print("Calculating %DFI data per residue!")
# dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
# dfi = dfi
# df = pd.read_table(prot+"_jet.res")
# print(df)
df['dfi'] = dfi.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# df['dfi'] = dfi.round(4)
# df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
# print(df)
if((normWeightMode=='ssjetormax')):
if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.")
......@@ -1034,6 +1037,6 @@ if (__name__ == '__main__'):
toc = time.perf_counter()
print(f"S-GEMME computation finished in {toc - tic:0.4f} seconds!")
print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment