Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
PRESCOTT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mustafa Tekpinar
PRESCOTT
Commits
f61b87d1
Commit
f61b87d1
authored
Apr 28, 2022
by
Mustafa Tekpinar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Corrected a bug of overwriting on the fasta file
parent
2d1df3ad
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
76 additions
and
14 deletions
+76
-14
computePred.R
computePred.R
+7
-0
default.conf
default.conf
+9
-7
gemme.py
gemme.py
+6
-0
gemmeAnal.py
gemmeAnal.py
+54
-7
No files found.
computePred.R
View file @
f61b87d1
...
...
@@ -42,6 +42,13 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# read evolutionary traces computed by JET
jet
=
read.table
(
paste
(
prot
,
"_jet.res"
,
sep
=
""
),
head
=
TRUE
)
if
(
sum
(
colnames
(
jet
)
==
"traceMax"
)
==
1
){
trace
=
jet
[,
"traceMax"
]}
else
{
trace
=
jet
[,
"trace"
]}
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, CV or Trace
#trace = c()
#for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
#traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query
distTrace
=
binAli
[
2
:
N
[
1
],]
%*%
trace
^
2
...
...
default.conf
View file @
f61b87d1
...
...
@@ -20,6 +20,7 @@ url http://www.rcsb.org/pdb/downloadFile.do URL of PDB server
>
Filter
min_identity
0
.
20
min
sequence
identity
max_identity
0
.
98
max
sequence
identity
*****************************************
>
Sample
length_cutoff
0
.
8
minimum
sequence
length
expressed
in
number
of
residues
...
...
@@ -27,13 +28,14 @@ length_cutoff 0.8 minimum sequence length expressed in number of residues
*****************************************
>
Software
clustalW
/
usr
/
local
/
bin
/
clustalw2
clustalW
system
dependent
command
muscle
/
usr
/
bin
/
muscle
muscle
system
dependent
command
naccess
/
opt
/
JET2
/
naccess2
.
1
.
1
/
naccess
naccess
system
dependent
command
psiblast
/
opt
/
blast
-
2
.
2
.
27
+/
bin
/
psiblast
psiblast
system
dependent
command
muscle
/
usr
/
bin
/
muscle
muscle
system
dependent
command
naccess
/
home
/
tekpinar
/
research
/
carbone
-
lab
-
software
/
naccess2
.
1
.
1
/
naccess
naccess
system
dependent
command
psiblast
/
usr
/
bin
/
psiblast
psiblast
system
dependent
command
*****************************************
>
Data
substMatrix
/
opt
/
JET2
/
matrix
directory
location
of
matrices
used
in
JET
(
Blosum62
,
gonnet
and
hsdm
)
blastDatabases
/
disk1
/
blastdb
/
directory
location
of
databases
used
for
local
blast
(
nr
{
0
-
7
})
substMatrix
/
home
/
tekpinar
/
research
/
carbone
-
lab
-
software
/
JET2
/
matrix
directory
location
of
matrices
used
in
JET
(
Blosum62
,
gonnet
and
hsdm
)
blastDatabases
/
opt
/
blastdb
directory
location
of
databases
used
for
local
blast
(
nr
{
0
-
7
})
*****************************************
>
ET
...
...
@@ -56,9 +58,9 @@ max_dist 20.0 max distance
>
Interface
cutoff
0
minimum
percentage
accessible
surface
variation
of
an
interface
residu
ligand
no
(
yes
|
no
)
keep
contact
of
ligand
(
SUBSTRATE
,
PRODUCT
and
COFACTOR
of
database
ENZYME
)
to
compute
interface
of
protein
enzymeCpd
/
opt
/
JET
/
jet
/
data
/
enzyme
.
txt
location
of
file
containing
database
ENZYME
enzymeCpd
/
home
/
tekpinar
/
research
/
carbone
-
lab
-
software
/
JET2
/
jet
/
data
/
enzyme
.
txt
location
of
file
containing
database
ENZYME
homologousPDB
no
(
yes
|
no
)
add
interface
residues
of
homologous
structures
(
find
in
pdb
database
clustered
at
95
%
of
identities
)
to
interface
of
protein
clusteredPDB
/
opt
/
JET
/
jet
/
data
/
clusters95
.
txt
location
of
pdb
database
clustered
at
95
%
of
identities
clusteredPDB
/
home
/
tekpinar
/
research
/
carbone
-
lab
-
software
/
JET2
/
jet
/
data
/
clusters95
.
txt
location
of
pdb
database
clustered
at
95
%
of
identities
*****************************************
>
Cluster
...
...
gemme.py
View file @
f61b87d1
...
...
@@ -132,9 +132,15 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile)
"""
prot
,
seq
,
nl
=
extractQuerySeq
(
inAli
)
createPDB
(
prot
,
seq
)
print
"query protein: "
+
prot
print
"computing conservation levels..."
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
launchJET
(
prot
,
retMet
,
bFile
,
fFile
,
n
,
N
,
nl
)
print
"done"
launchPred
(
prot
,
inAli
,
mutFile
)
...
...
gemmeAnal.py
View file @
f61b87d1
...
...
@@ -10,6 +10,7 @@ import argparse
import
re
import
math
import
subprocess
import
shutil
# Extract the query sequence from the input alignment
...
...
@@ -67,21 +68,64 @@ def editConfJET(N):
# Run JET to compute TJET values
def
launchJET
(
prot
,
retMet
,
bFile
,
fFile
,
n
,
N
,
nl
):
"""
Call JET2 and produce prot+"_jet.res" file.
prot+"_jet.res" will be used in the following steps (in launchPred)
to calculate independent and epistatic models.
Ideally, this call to JET2 should be from Dockers or Singularity
because installing all requirements of JET2 is a pain in the ass!
Parameters
----------
prot: string ???
Name of the protein ???
retMet: string
Retreival method of multiple sequence alignments file
It can be 'input', 'local' or 'server'. Default is local.
bFile: string
A multiple sequence alignment file obtained with psiblast.
It is used only if the retMet (explained above) is input.
fFile: string
A multiple sequence alignment file obtained with psiblast.
It is used only if the retMet (explained above) is input.
n: int
Number of JET2 iterations.
N: int
Default 20000
nl: int
Number of lines after > character in the query sequences file.
It is obtained in extractQuerySeq() function.
Returns
-------
Nothing
"""
subprocess
.
call
(
"cp $GEMME_PATH/default.conf ."
,
shell
=
True
)
if
retMet
==
"input"
:
if
bFile
!=
''
:
subprocess
.
call
(
"cp "
+
bFile
+
" "
+
prot
+
"_A.psiblast"
,
shell
=
True
)
jetcmd
=
"java -Xmx1000m -cp $JET
_PATH:$JET
_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r input -b "
+
prot
+
"_A.psiblast -d chain -n "
+
n
+
" > "
+
prot
+
".out"
jetcmd
=
"java -Xmx1000m -cp $JET
2_PATH:$JET2
_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r input -b "
+
prot
+
"_A.psiblast -d chain -n "
+
n
+
" > "
+
prot
+
".out"
else
:
print
N
editConfJET
(
N
)
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
subprocess
.
call
(
"grep -m "
+
str
(
int
(
N
)
+
1
)
+
" -A "
+
str
(
nl
)
+
" '^>' "
+
fFile
+
" > "
+
prot
+
"_A.fasta"
,
shell
=
True
)
jetcmd
=
"java -Xmx1000m -cp $JET_PATH:$JET_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r input -f "
+
prot
+
"_A.fasta -d chain -n "
+
n
+
" > "
+
prot
+
".out"
if
(
fFile
==
prot
+
"_A.fasta"
):
shutil
.
copy2
(
fFile
,
fFile
+
".orig"
)
#I think this subprocess call causes overwriting of the fasta file.
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd
=
"grep -m "
+
str
(
int
(
N
)
+
1
)
+
" -A "
+
str
(
nl
)
+
" '^>' "
+
fFile
+
".orig > "
+
prot
+
"_A.fasta"
else
:
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd
=
"grep -m "
+
str
(
int
(
N
)
+
1
)
+
" -A "
+
str
(
nl
)
+
" '^>' "
+
fFile
+
" > "
+
prot
+
"_A.fasta"
print
(
"
\n
Running:
\n
"
+
grpcmd
)
subprocess
.
call
(
grpcmd
,
shell
=
True
)
jetcmd
=
"java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r input -f "
+
prot
+
"_A.fasta -d chain -n "
+
n
+
" > "
+
prot
+
".out"
else
:
jetcmd
=
"java -Xmx1000m -cp $JET
_PATH:$JET
_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r "
+
retMet
+
" -d chain -n "
+
n
+
" > "
+
prot
+
".out"
print
jetcmd
jetcmd
=
"java -Xmx1000m -cp $JET
2_PATH:$JET2
_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "
+
prot
+
".pdb -o `pwd` -p J -r "
+
retMet
+
" -d chain -n "
+
n
+
" > "
+
prot
+
".out"
print
(
"
\n
Running:
\n
"
+
jetcmd
)
reCode
=
subprocess
.
call
(
jetcmd
,
shell
=
True
)
if
os
.
path
.
isfile
(
prot
+
"/"
+
prot
+
"_jet.res"
):
os
.
rename
(
prot
+
"/"
+
prot
+
"_jet.res"
,
prot
+
"_jet.res"
)
...
...
@@ -94,8 +138,11 @@ def launchPred(prot,inAli,mutFile):
rcmd
=
"Rscript --save $GEMME_PATH/computePred.R "
+
prot
+
" "
+
inAli
+
" FALSE "
+
mutFile
else
:
rcmd
=
"Rscript --save $GEMME_PATH/computePred.R "
+
prot
+
" "
+
inAli
+
" TRUE none"
print
rcmd
reCode
=
subprocess
.
call
(
rcmd
,
shell
=
True
)
#Add plots here with gemmemore
return
(
reCode
)
# Remove temporary files
...
...
@@ -113,7 +160,7 @@ def cleanTheMess(prot,bFile,fFile):
os
.
remove
(
prot
+
"_A.fasta"
)
# if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
os
.
remove
(
prot
+
".pdb"
)
#
os.remove(prot+".pdb")
dir_name
=
prot
+
"/"
if
os
.
path
.
isdir
(
dir_name
):
for
f
in
os
.
listdir
(
dir_name
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment