Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
PRESCOTT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mustafa Tekpinar
PRESCOTT
Commits
b20bee53
Commit
b20bee53
authored
Mar 15, 2023
by
Mustafa Tekpinar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed dynamics related functions: calcPercentDFI, calcBfactors and getBfactors.
parent
5ffaa0e3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
128 additions
and
125 deletions
+128
-125
esgemme.py
esgemme.py
+128
-125
example-esgemme-script.sh
example/example-esgemme-script.sh
+0
-0
No files found.
esgemme.py
View file @
b20bee53
...
...
@@ -21,6 +21,10 @@ from prody import *
from
scipy.stats
import
rankdata
from
Bio
import
AlignIO
import
biotite.structure.io
as
strucio
import
biotite.application.dssp
as
dssp
import
biotite.structure.io.pdb
as
pdb
#from gemmeAnal import *
import
pandas
as
pd
...
...
@@ -264,9 +268,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
# Run Rscript to compute predictions
def
launchPred
(
prot
,
inAli
,
mutFile
,
normWeightMode
,
alphabet
):
if
(
mutFile
!=
''
):
rcmd
=
"Rscript --save $ESGEMME_PATH/computePred.R "
+
prot
+
" "
+
inAli
+
" FALSE "
+
mutFile
+
" "
+
normWeightMode
+
" "
+
alphabet
#TODO: remember that there is a problem here!!!!
rcmd
=
"Rscript --save $ESGEMME_PATH/computePred.R "
+
prot
+
" "
+
inAli
+
" FALSE "
+
mutFile
+
" "
+
normWeightMode
+
" "
+
alphabet
else
:
rcmd
=
"Rscript --save $ESGEMME_PATH/computePred.R "
+
prot
+
" "
+
inAli
+
" TRUE none "
+
normWeightMode
+
" "
+
alphabet
...
...
@@ -331,114 +333,114 @@ def rankSortProteinData(dataArray, inverted=True):
else
:
return
(
normalizedRankedDataArray
)
def
calcPercentDFI
(
pdbfile
,
outfile
,
nmodes
=
None
,
attenuate
=
"true"
,
ranksorted
=
"true"
):
"""
Calculate rank normalized dynamic flexibility index.
"""
#
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
#
"""
#
Calculate rank normalized dynamic flexibility index.
#
"""
#confProDy(auto_show=False)
#
#confProDy(auto_show=False)
structure
=
parsePDB
(
pdbfile
)
calphas
=
structure
.
select
(
'name CA'
)
#
structure = parsePDB(pdbfile)
#
calphas = structure.select('name CA')
anm
=
ANM
(
'ANM'
)
anm
.
buildHessian
(
calphas
)
if
(
nmodes
==
None
):
anm
.
calcModes
(
n_modes
=
'all'
)
else
:
anm
.
calcModes
(
n_modes
=
int
(
nmodes
))
#
anm = ANM('ANM')
#
anm.buildHessian(calphas)
#
if(nmodes==None):
#
anm.calcModes(n_modes='all')
#
else:
#
anm.calcModes(n_modes=int(nmodes))
dfi
=
calcDynamicFlexibilityIndex
(
anm
,
calphas
,
select
=
'ca'
,
norm
=
True
)
percentDfi
=
rankSortProteinData
(
dfi
)
#
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
#
percentDfi = rankSortProteinData(dfi)
if
(
attenuate
.
lower
()
==
'true'
):
percentDfi
=
attenuateEndPoints
(
percentDfi
)
#
if(attenuate.lower()=='true'):
#
percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None):
# np.savetxt(outfile, percentDfi)
# else:
# np.savetxt(outfile, dfi)
#
# if(outfile != None):
#
# np.savetxt(outfile, percentDfi)
#
# else:
#
# np.savetxt(outfile, dfi)
if
(
ranksorted
.
lower
()
==
'true'
):
return
percentDfi
else
:
return
dfi
#
if(ranksorted.lower()=='true'):
#
return percentDfi
#
else:
#
return dfi
def
calcBfactors
(
pdbfile
,
outfile
,
nmodes
=
None
,
attenuate
=
"true"
,
ranksorted
=
"true"
,
inverted
=
False
):
"""
Calculate rank normalized mean squared fluctuations,
which correlate well with Bfactors.
"""
#
def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
#
ranksorted="true", inverted=False):
#
"""
#
Calculate rank normalized mean squared fluctuations,
#
which correlate well with Bfactors.
#
"""
#confProDy(auto_show=False)
#
#confProDy(auto_show=False)
structure
=
parsePDB
(
pdbfile
)
calphas
=
structure
.
select
(
'name CA'
)
#
structure = parsePDB(pdbfile)
#
calphas = structure.select('name CA')
anm
=
ANM
(
'ANM'
)
anm
.
buildHessian
(
calphas
)
if
(
nmodes
==
None
):
anm
.
calcModes
(
n_modes
=
'all'
)
else
:
anm
.
calcModes
(
n_modes
=
int
(
nmodes
))
#
anm = ANM('ANM')
#
anm.buildHessian(calphas)
#
if(nmodes==None):
#
anm.calcModes(n_modes='all')
#
else:
#
anm.calcModes(n_modes=int(nmodes))
bfactors
=
calcSqFlucts
(
anm
)
#
bfactors = calcSqFlucts(anm)
percentBfactors
=
rankSortProteinData
(
bfactors
,
inverted
=
False
)
#
percentBfactors = rankSortProteinData(bfactors, inverted=False)
if
(
attenuate
.
lower
()
==
'true'
):
percentBfactors
=
attenuateEndPoints
(
percentBfactors
)
#
if(attenuate.lower()=='true'):
#
percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
#
# if(outfile != None):
#
# np.savetxt(outfile, percentBfactors)
#
# else:
#
# np.savetxt(outfile, dfi)
if
(
ranksorted
.
lower
()
==
'true'
):
if
(
inverted
==
True
):
return
(
1.0
-
percentBfactors
)
else
:
return
percentBfactors
else
:
if
(
inverted
==
True
):
return
(
np
.
max
(
bfactors
)
-
bfactors
)
else
:
return
bfactors
#
if(ranksorted.lower()=='true'):
#
if(inverted==True):
#
return (1.0 - percentBfactors)
#
else:
#
return percentBfactors
#
else:
#
if(inverted==True):
#
return (np.max(bfactors) - bfactors)
#
else:
#
return bfactors
def
getBfactors
(
pdbfile
,
outfile
,
attenuate
=
"true"
,
\
ranksorted
=
"true"
,
inverted
=
True
):
"""
Calculate rank normalized dynamic flexibility index.
"""
#
def getBfactors(pdbfile, outfile, attenuate="true", \
#
ranksorted="true", inverted=True):
#
"""
#
Calculate rank normalized dynamic flexibility index.
#
"""
#confProDy(auto_show=False)
#
#confProDy(auto_show=False)
structure
=
parsePDB
(
pdbfile
)
calphas
=
structure
.
select
(
'name CA'
)
#
structure = parsePDB(pdbfile)
#
calphas = structure.select('name CA')
bfactors
=
calphas
.
getBetas
()
#
bfactors = calphas.getBetas()
percentBfactors
=
rankSortProteinData
(
bfactors
,
inverted
=
False
)
#
percentBfactors = rankSortProteinData(bfactors, inverted=False)
if
(
attenuate
.
lower
()
==
'true'
):
percentBfactors
=
attenuateEndPoints
(
percentBfactors
)
#
if(attenuate.lower()=='true'):
#
percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
#
# if(outfile != None):
#
# np.savetxt(outfile, percentBfactors)
#
# else:
#
# np.savetxt(outfile, dfi)
if
(
ranksorted
.
lower
()
==
'true'
):
if
(
inverted
==
True
):
return
(
1.0
-
percentBfactors
)
else
:
return
percentBfactors
else
:
if
(
inverted
==
True
):
return
(
np
.
max
(
bfactors
)
-
bfactors
)
else
:
return
bfactors
#
if(ranksorted.lower()=='true'):
#
if(inverted==True):
#
return (1.0 - percentBfactors)
#
else:
#
return percentBfactors
#
else:
#
if(inverted==True):
#
return (np.max(bfactors) - bfactors)
#
else:
#
return bfactors
def
attenuateEndPoints
(
dfi
):
"""
...
...
@@ -646,17 +648,17 @@ def check_argument_groups(parser, arg_dict, group, argument):
"""
c
=
0
for
myArg
in
argument
:
arg_name
=
myArg
.
replace
(
"-"
,
""
)
if
arg_dict
[
arg_name
]
!=
''
:
c
=
c
+
1
arg_name
=
myArg
.
replace
(
"-"
,
""
)
if
arg_dict
[
arg_name
]
!=
''
:
c
=
c
+
1
group_name
=
group
.
replace
(
"-"
,
""
)
if
arg_dict
[
group_name
]
==
"input"
:
if
c
!=
1
:
parser
.
error
(
"
gemme requires "
+
str
(
argument
)
+
parser
.
error
(
"
esgemme requires "
+
str
(
argument
)
+
\
" if "
+
group
+
" is set to input."
)
else
:
if
c
>
0
:
parser
.
error
(
"gemme requires "
+
group
+
parser
.
error
(
"
es
gemme requires "
+
group
+
" to be set to input if "
+
str
(
argument
)
+
" is used."
)
return
None
def
calculateSecondaryStructure
(
pdbfile
):
...
...
@@ -674,8 +676,7 @@ def calculateSecondaryStructure(pdbfile):
A structure file in PDB format is the main input.
"""
import
biotite.structure.io
as
strucio
import
biotite.application.dssp
as
dssp
#pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
...
...
@@ -724,23 +725,23 @@ def countCoilSegments(inputfile):
def
parse_command_line
():
"""
Parse command line.
It uses argparse to parse phylosofs
' command line arguments and returns the
argparse parser.
Parse command line.
It uses argparse to parse esgemme
' command line arguments and returns the
argparse parser.
"""
parser
=
argparse
.
ArgumentParser
(
prog
=
"gemme"
,
description
=
"""
GEMME (
Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequenc
e analysis
"""
,
ESGEMME (Evolutionary and Structural
Global Epistasis Model for predicting Mutational Effects)
is a tool to predict mutational outcomes based on sequence and structur
e analysis
"""
,
epilog
=
"""
It has been developed at LCQB (Laboratory of Computational and
Quantitative Biology), UMR 7238 CNRS, Sorbonne Université.
If you use it, please cite:
Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes
using Evolutionary Conservation and Global Epistasis
"""
,
It has been developed at LCQB (Laboratory of Computational and
Quantitative Biology), UMR 7238 CNRS, Sorbonne Université.
If you use it, please cite:
Laine E, Karami Y, Carbone A. Predicting Protein Mutational Landscapes
using Evolutionary Conservation and Global Epistasis
"""
,
)
parser
.
add_argument
(
...
...
@@ -814,7 +815,7 @@ def parse_command_line():
# Check flag arguments
if
args
.
input
is
None
:
parser
.
error
(
"
GEMME requires an input alignment."
)
parser
.
error
(
"ES
GEMME requires an input alignment."
)
return
args
...
...
@@ -855,11 +856,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
structure
=
parsePDB
(
prot
+
".pdb"
)
calphas
=
structure
.
select
(
'name CA'
)
chains
=
list
(
set
(
calphas
.
getChids
()))
#print(chains)
# pdb_file = pdb.PDBFile.read(prot+".pdb")
# chains = list(set(pdb_file.get_chains()))
if
((
jetfile
)
==
None
):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
...
...
@@ -878,22 +881,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil
.
copy2
(
jetfile
,
prot
+
"_jet.res"
)
print
(
"done"
)
#If a real pdb file is given, calculate dfi for the residues.
if
(((
normWeightMode
==
'dfi'
)
or
(
normWeightMode
==
'maxtracedfi'
)
or
(
normWeightMode
==
'maxdfitrace'
))):
if
(
pdbfile
==
None
):
print
(
"ERROR: There is not any pdb file to calculate DFI."
)
sys
.
exit
(
-
1
)
else
:
print
(
"Calculating
%
DFI data per residue!"
)
dfi
=
calcPercentDFI
(
pdbfile
,
outfile
=
None
,
nmodes
=
None
,
attenuate
=
"true"
,
ranksorted
=
"true"
)
dfi
=
dfi
df
=
pd
.
read_table
(
prot
+
"_jet.res"
)
print
(
df
)
df
[
'dfi'
]
=
dfi
.
round
(
4
)
df
.
to_csv
(
prot
+
"_jet.res"
,
header
=
True
,
index
=
None
,
sep
=
'
\t
'
,
mode
=
'w'
)
print
(
df
)
#
#
If a real pdb file is given, calculate dfi for the residues.
#
if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
#
if (pdbfile == None):
#
print("ERROR: There is not any pdb file to calculate DFI.")
#
sys.exit(-1)
#
else:
#
print("Calculating %DFI data per residue!")
#
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
#
dfi = dfi
#
df = pd.read_table(prot+"_jet.res")
#
print(df)
#
df['dfi'] = dfi.round(4)
#
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
#
print(df)
if
((
normWeightMode
==
'ssjetormax'
)):
if
(
pdbfile
==
None
):
print
(
"ERROR: There is not any pdb file to calculate DFI."
)
...
...
@@ -1034,6 +1037,6 @@ if (__name__ == '__main__'):
toc
=
time
.
perf_counter
()
print
(
f
"
S-
GEMME computation finished in {toc - tic:0.4f} seconds!"
)
print
(
f
"
ES
GEMME computation finished in {toc - tic:0.4f} seconds!"
)
example/example-sgemme-script.sh
→
example/example-
e
sgemme-script.sh
View file @
b20bee53
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment