Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
PRESCOTT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mustafa Tekpinar
PRESCOTT
Commits
2af63caf
Commit
2af63caf
authored
Apr 29, 2022
by
Mustafa Tekpinar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented --isjet2on option for reproducibility checks.
parent
05fb8c57
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
62 additions
and
35 deletions
+62
-35
computePred.R
computePred.R
+30
-23
example-gemme-script.sh
example/example-gemme-script.sh
+5
-0
gemme.py
gemme.py
+26
-12
pred.R
pred.R
+1
-0
No files found.
computePred.R
View file @
2af63caf
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
...
...
@@ -43,12 +44,6 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
jet
=
read.table
(
paste
(
prot
,
"_jet.res"
,
sep
=
""
),
head
=
TRUE
)
if
(
sum
(
colnames
(
jet
)
==
"traceMax"
)
==
1
){
trace
=
jet
[,
"traceMax"
]}
else
{
trace
=
jet
[,
"trace"
]}
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, CV or Trace
#trace = c()
#for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
#traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query
distTrace
=
binAli
[
2
:
N
[
1
],]
%*%
trace
^
2
...
...
@@ -75,44 +70,56 @@ nbGaps = N[1] - apply(nbSeqs,2,sum)
# output the conservation values
dat
=
rbind
(
trace
,
KL
=
res
[[
1
]][[
2
]],
SE
=
res
[[
1
]][[
1
]],
gap
=
nbGaps
/
N
[
1
],
KL60
=
res
[[
2
]][[
2
]],
SE60
=
res
[[
2
]][[
1
]],
KL80
=
res
[[
3
]][[
2
]],
SE80
=
res
[[
3
]][[
1
]])
write.table
(
dat
,
paste0
(
prot
,
"_conservation.txt"
))
# compute log-odd ratios between mutated and wt sequence counts
predInd
=
computePredNbSeqs
(
wt
,
nbSeqs
)
# output the sequence counts log-odd ratios
write.table
(
predInd
,
paste0
(
prot
,
"_pred_evolInd.txt"
))
# Do the normalization for the independent component here!
rownames
(
predInd
)
=
aa
if
(
simple
){
normPredInd
=
normalizePredWithNbSeqsZero
(
predInd
,
trace
,
wt
)
rownames
(
normPredInd
)
=
aa
}
else
{
normPredInd
=
normalizePredWithNbSeqsZeroSelMult
(
predInd
,
trace
,
wt
,
list
(
pos
,
subsaa
))
names
(
normPredInd
)
=
rawMut
}
# output the
predicted mutational effects based on sequence counts (conservation at the bottom)
write.table
(
normPredInd
,
paste0
(
prot
,
"_normP
red_evolInd.txt"
))
# output the
sequence counts log-odd ratios
write.table
(
predInd
,
paste0
(
prot
,
"_p
red_evolInd.txt"
))
print
(
"done"
)
print
(
"running global epistatic model..."
)
pred
=
computePredSimple
(
ali
,
distTrace
,
wt
,
5
)
rownames
(
pred
)
=
aa
# output the evolutionary distances between the query and the closest variants
evolDist
=
pred
/
sum
(
trace
^
2
)
evolDist
[
is.na
(
evolDist
)]
=
1
write.table
(
evolDist
,
paste0
(
prot
,
"_pred_evolEpi.txt"
))
print
(
"done"
)
print
(
"running normalization..."
)
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, or Trace
# trace = c()
# print(trace)
# for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
# print(trace)
# Do the normalization for the epistatic component here!
rownames
(
pred
)
=
aa
if
(
simple
){
#Independent model normalization
normPredInd
=
normalizePredWithNbSeqsZero
(
predInd
,
trace
,
wt
)
rownames
(
normPredInd
)
=
aa
# Epistatic model normalization
normPred
=
normalizePred
(
pred
,
trace
,
wt
)
rownames
(
normPred
)
=
aa
}
else
{
#Independent model normalization
normPredInd
=
normalizePredWithNbSeqsZeroSelMult
(
predInd
,
trace
,
wt
,
list
(
pos
,
subsaa
))
names
(
normPredInd
)
=
rawMut
# Epistatic model normalization
normPred
=
normalizePredSelMult
(
pred
,
trace
,
wt
,
list
(
pos
,
subsaa
))
names
(
normPred
)
=
rawMut
}
# output the predicted mutational effects based on evolutionary distance (conservation at the bottom)
# output the normalized predicted mutational effects based on sequence counts (conservation at the bottom)
write.table
(
normPredInd
,
paste0
(
prot
,
"_normPred_evolInd.txt"
))
# output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom)
write.table
(
normPred
,
paste0
(
prot
,
"_normPred_evolEpi.txt"
))
print
(
"done"
)
...
...
example/example-gemme-script.sh
View file @
2af63caf
...
...
@@ -9,3 +9,8 @@ else
echo
"Running GEMME with a user-provided alignment file."
python
$GEMME_PATH
/gemme.py aliBLAT.fasta
-r
input
-f
aliBLAT.fasta
fi
# If you have your own JET2 score file, you can turn off JET2 as follows:
# python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --isjet2on false
# This option can be useful for testing reproducibility!
gemme.py
View file @
2af63caf
...
...
@@ -11,6 +11,7 @@ import argparse
import
re
import
subprocess
import
math
from
xmlrpc.client
import
boolean
import
numpy
as
np
import
matplotlib.pylab
as
plt
...
...
@@ -254,6 +255,9 @@ def parse_command_line():
help
=
'fasta file containing related sequences'
,
default
=
''
)
retMet_args
.
add_argument
(
'--isjet2on'
,
dest
=
'isjet2on'
,
type
=
str
,
\
help
=
"If false, it will skip JET2 calculation and use a precalculated JET2 file. Default is true"
,
required
=
False
,
default
=
"True"
)
args
=
parser
.
parse_args
()
...
...
@@ -268,22 +272,32 @@ def parse_command_line():
return
args
def
doit
(
inAli
,
mutFile
,
retMet
,
bFile
,
fFile
,
n
,
N
):
def
doit
(
inAli
,
mutFile
,
retMet
,
bFile
,
fFile
,
n
,
N
,
isjet2on
):
"""
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile)
Fonksiyon aciklamasi ile taniminin ayni olmasi super olmus!
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.isjet2on)
"""
simple
=
True
prot
,
seq
,
nl
=
extractQuerySeq
(
inAli
)
createPDB
(
prot
,
seq
)
print
(
"query protein: "
+
prot
)
print
(
"computing conservation levels..."
)
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
launchJET
(
prot
,
retMet
,
bFile
,
fFile
,
n
,
N
,
nl
)
print
(
"done"
)
if
((
isjet2on
.
lower
())
==
"true"
):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
print
(
"computing conservation levels..."
)
launchJET
(
prot
,
retMet
,
bFile
,
fFile
,
n
,
N
,
nl
)
print
(
"done"
)
elif
((
isjet2on
.
lower
())
==
"false"
):
print
(
"using previously calculated JET2 conservation levels..."
)
print
(
"done"
)
else
:
print
(
"ERROR: You can only use true or false after --isjet2on!"
)
sys
.
exit
(
-
1
)
launchPred
(
prot
,
inAli
,
mutFile
)
#Do Python plotting here
...
...
@@ -292,9 +306,8 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
#TODO: Mark the original (wildtype) residue locations with a dot or something
# special to show the original amino acid.
#TODO: You can even put letters on the top line like in EVmutation output.
simple
=
True
if
(
simple
):
print
(
"generating the plots..."
)
gemmeData
=
parseGEMMEoutput
(
prot
+
"_normPred_evolEpi.txt"
,
verbose
=
False
)
...
...
@@ -315,7 +328,8 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
def
main
():
args
=
parse_command_line
()
doit
(
args
.
input
,
args
.
mutations
,
args
.
retrievingMethod
,
args
.
blastFile
,
args
.
fastaFile
,
args
.
nIter
,
args
.
NSeqs
)
doit
(
args
.
input
,
args
.
mutations
,
args
.
retrievingMethod
,
args
.
blastFile
,
\
args
.
fastaFile
,
args
.
nIter
,
args
.
NSeqs
,
args
.
isjet2on
)
if
(
__name__
==
'__main__'
):
main
()
...
...
pred.R
View file @
2af63caf
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment