Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
PRESCOTT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mustafa Tekpinar
PRESCOTT
Commits
a823960e
Commit
a823960e
authored
Feb 01, 2023
by
Mustafa Tekpinar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added jetormax to the normalization options
parent
54a3c39a
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
52 additions
and
34 deletions
+52
-34
computePred.R
computePred.R
+18
-4
example-sgemme-script.sh
example/example-sgemme-script.sh
+0
-0
gemmeAnal.py
gemmeAnal.py
+25
-25
pred.R
pred.R
+2
-2
sgemme.py
sgemme.py
+7
-3
No files found.
computePred.R
View file @
a823960e
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# Copyright (c) 2022
-2023
: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
# Please see the LICENSE.txt file included as part of this package.
# Usage: Rscript --save computePred.R XXXX
# Usage: Rscript --save computePred.R XXXX
library
(
"seqinr"
)
library
(
"seqinr"
)
source
(
paste
(
Sys.getenv
(
"GEMME_PATH"
),
"/pred.R"
,
sep
=
""
))
source
(
paste
(
Sys.getenv
(
"
S
GEMME_PATH"
),
"/pred.R"
,
sep
=
""
))
# amino acid full alphabet
# amino acid full alphabet
aa
=
c
(
"a"
,
"c"
,
"d"
,
"e"
,
"f"
,
"g"
,
"h"
,
"i"
,
"k"
,
"l"
,
"m"
,
"n"
,
"p"
,
"q"
,
"r"
,
"s"
,
"t"
,
"v"
,
"w"
,
"y"
)
aa
=
c
(
"a"
,
"c"
,
"d"
,
"e"
,
"f"
,
"g"
,
"h"
,
"i"
,
"k"
,
"l"
,
"m"
,
"n"
,
"p"
,
"q"
,
"r"
,
"s"
,
"t"
,
"v"
,
"w"
,
"y"
)
# BLOSUM62 matrix
# BLOSUM62 matrix
blosum62p
=
as.matrix
(
read.table
(
paste0
(
Sys.getenv
(
"GEMME_PATH"
),
"/blosum62p.txt"
),
row.names
=
1
))
blosum62p
=
as.matrix
(
read.table
(
paste0
(
Sys.getenv
(
"
S
GEMME_PATH"
),
"/blosum62p.txt"
),
row.names
=
1
))
bgp
=
blosum62p
[
order
(
rownames
(
blosum62p
)),
order
(
colnames
(
blosum62p
))]
bgp
=
blosum62p
[
order
(
rownames
(
blosum62p
)),
order
(
colnames
(
blosum62p
))]
bg
=
apply
(
bgp
,
1
,
sum
)
bg
=
apply
(
bgp
,
1
,
sum
)
...
@@ -136,6 +136,19 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
...
@@ -136,6 +136,19 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
trace
<-
append
(
trace
,
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"pc"
])
/
2.0
,
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"cv"
])
/
2.0
,
(
jet
[
row
,
"pc"
]
+
jet
[
row
,
"cv"
])
/
2.0
)))
trace
<-
append
(
trace
,
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"pc"
])
/
2.0
,
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"cv"
])
/
2.0
,
(
jet
[
row
,
"pc"
]
+
jet
[
row
,
"cv"
])
/
2.0
)))
}
}
}
}
}
else
if
((
normWeightMode
==
"jetormax"
)){
print
(
paste
(
"Using "
,
normWeightMode
))
for
(
row
in
1
:
nrow
(
jet
))
{
if
(
sum
(
colnames
(
jet
)
==
"traceMax"
)
==
1
){
maxValue
=
max
((
jet
[
row
,
"traceMax"
]
+
jet
[
row
,
"pc"
])
/
2.0
,
max
((
jet
[
row
,
"traceMax"
]
+
jet
[
row
,
"cv"
])
/
2.0
,
(
jet
[
row
,
"pc"
]
+
jet
[
row
,
"cv"
])
/
2.0
))
traceValue
=
jet
[
row
,
"traceMax"
]
trace
<-
append
(
trace
,
max
((
traceValue
),
(
maxValue
)))
}
else
{
maxValue
=
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"pc"
])
/
2.0
,
max
((
jet
[
row
,
"trace"
]
+
jet
[
row
,
"cv"
])
/
2.0
,
(
jet
[
row
,
"pc"
]
+
jet
[
row
,
"cv"
])
/
2.0
))
traceValue
=
jet
[
row
,
"trace"
]
trace
<-
append
(
trace
,
max
((
traceValue
),
(
maxValue
)))
}
}
}
else
if
((
normWeightMode
==
"maxtracepchalfpccv"
)){
}
else
if
((
normWeightMode
==
"maxtracepchalfpccv"
)){
print
(
paste
(
"Using "
,
normWeightMode
))
print
(
paste
(
"Using "
,
normWeightMode
))
for
(
row
in
1
:
nrow
(
jet
))
{
for
(
row
in
1
:
nrow
(
jet
))
{
...
@@ -244,7 +257,8 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
...
@@ -244,7 +257,8 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
print
(
"ERROR: Unknown --normWeightMode selected!"
)
print
(
"ERROR: Unknown --normWeightMode selected!"
)
print
(
"It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor',"
)
print
(
"It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor',"
)
print
(
" 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'halftracepc', 'maxtracepchalfpccv',"
)
print
(
" 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'halftracepc', 'maxtracepchalfpccv',"
)
print
(
" 'maxtracesc', 'maxtracescpc', 'maxtracedfi' or 'maxtracebfactor'!"
)
print
(
" 'maxtracesc', 'maxtracescpc', 'maxtracedfi', 'maxtracebfactor', 'maxhalftracepchalftracecvhalfcvpc'"
)
print
(
" or 'jetormax'!"
)
}
}
print
(
trace
)
print
(
trace
)
#####################################################################################################################
#####################################################################################################################
...
...
example/example-gemme-script.sh
→
example/example-
s
gemme-script.sh
View file @
a823960e
File moved
gemmeAnal.py
View file @
a823960e
...
@@ -16,30 +16,30 @@ import pandas as pd
...
@@ -16,30 +16,30 @@ import pandas as pd
import
numpy
as
np
import
numpy
as
np
# Moved to sgemme.py
def
extractQuerySeq
(
filename
):
#
def extractQuerySeq(filename):
"""
#
"""
# Extract the query sequence from the input alignment
#
# Extract the query sequence from the input alignment
"""
#
"""
fIN
=
open
(
filename
,
"r"
)
#
fIN = open(filename,"r")
lines
=
fIN
.
readlines
()
#
lines = fIN.readlines()
fIN
.
close
()
#
fIN.close()
if
lines
[
0
][
0
]
!=
">"
:
#
if lines[0][0]!=">":
raise
Exception
(
'bad FASTA format'
)
#
raise Exception('bad FASTA format')
else
:
#
else:
prot
=
re
.
compile
(
"[^A-Z0-9a-z]"
)
.
split
(
lines
[
0
][
1
:])[
0
]
#
prot = re.compile("[^A-Z0-9a-z]").split(lines[0][1:])[0]
fOUT
=
open
(
prot
+
".fasta"
,
"w"
)
#
fOUT = open(prot+".fasta","w")
fOUT
.
write
(
">"
+
prot
+
"
\n
"
)
#
fOUT.write(">"+prot+"\n")
seq
=
""
#
seq=""
i
=
1
#
i = 1
while
lines
[
i
][
0
]
!=
">"
:
#
while lines[i][0]!=">":
seq
=
seq
+
lines
[
i
]
.
strip
()
.
strip
(
"."
)
.
strip
(
"-"
)
#
seq = seq + lines[i].strip().strip(".").strip("-")
fOUT
.
write
(
lines
[
i
])
#
fOUT.write(lines[i])
i
=
i
+
1
#
i = i + 1
fOUT
.
close
()
#
fOUT.close()
return
prot
,
seq
,
i
-
1
#
return prot,seq,i-1
def
getNbSeq
(
filename
):
def
getNbSeq
(
filename
):
...
@@ -135,7 +135,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
...
@@ -135,7 +135,7 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
#TODO: Remove Bash dependency here. Make the copying process in Python
#TODO: Remove Bash dependency here. Make the copying process in Python
subprocess
.
call
(
"cp $SGEMME_PATH/default.conf ."
,
shell
=
True
)
subprocess
.
call
(
"cp $SGEMME_PATH/default.conf ."
,
shell
=
True
)
if
retMet
==
"input"
:
if
retMet
==
"input"
:
if
bFile
!=
''
:
if
bFile
!=
''
:
#TODO: I think these two lines must be here as well but I am not sure.
#TODO: I think these two lines must be here as well but I am not sure.
...
...
pred.R
View file @
a823960e
...
@@ -174,7 +174,7 @@ computePSSM2<-function(mat){
...
@@ -174,7 +174,7 @@ computePSSM2<-function(mat){
}
}
computeNbSeqsAlph
<-
function
(
nbSeqs
,
alphabet
){
computeNbSeqsAlph
<-
function
(
nbSeqs
,
alphabet
){
path
=
paste
(
Sys.getenv
(
"GEMME_PATH"
),
"/alphabets/"
,
sep
=
""
)
path
=
paste
(
Sys.getenv
(
"
S
GEMME_PATH"
),
"/alphabets/"
,
sep
=
""
)
AA
=
read.table
(
paste
(
path
,
alphabet
,
".txt"
,
sep
=
""
))
$
V
1
AA
=
read.table
(
paste
(
path
,
alphabet
,
".txt"
,
sep
=
""
))
$
V
1
facAA
=
rep
(
NA
,
20
)
facAA
=
rep
(
NA
,
20
)
for
(
Let
in
AA
){
for
(
Let
in
AA
){
...
@@ -210,7 +210,7 @@ readMut<-function(fname){
...
@@ -210,7 +210,7 @@ readMut<-function(fname){
}
}
which.class
<-
function
(
a
,
alphabet
){
which.class
<-
function
(
a
,
alphabet
){
path
=
paste
(
Sys.getenv
(
"GEMME_PATH"
),
"/alphabets/"
,
sep
=
""
)
path
=
paste
(
Sys.getenv
(
"
S
GEMME_PATH"
),
"/alphabets/"
,
sep
=
""
)
AA
=
read.table
(
paste
(
path
,
alphabet
,
".txt"
,
sep
=
""
))
$
V
1
AA
=
read.table
(
paste
(
path
,
alphabet
,
".txt"
,
sep
=
""
))
$
V
1
for
(
Let
in
AA
){
for
(
Let
in
AA
){
Let
=
toString
(
Let
)
Let
=
toString
(
Let
)
...
...
sgemme.py
View file @
a823960e
...
@@ -22,6 +22,8 @@ from gemmeAnal import *
...
@@ -22,6 +22,8 @@ from gemmeAnal import *
import
pandas
as
pd
import
pandas
as
pd
#############The following part between # signs are moved from gemmeAnal.py to
# make the code more simple.
def
extractQuerySeq
(
filename
):
def
extractQuerySeq
(
filename
):
"""
"""
# Extract the query sequence from the input alignment
# Extract the query sequence from the input alignment
...
@@ -45,7 +47,7 @@ def extractQuerySeq(filename):
...
@@ -45,7 +47,7 @@ def extractQuerySeq(filename):
i
=
i
+
1
i
=
i
+
1
fOUT
.
close
()
fOUT
.
close
()
return
prot
,
seq
,
i
-
1
return
prot
,
seq
,
i
-
1
###############################################################################
def
rankSortProteinData
(
dataArray
,
inverted
=
True
):
def
rankSortProteinData
(
dataArray
,
inverted
=
True
):
"""
"""
This function ranksorts protein data and converts it
This function ranksorts protein data and converts it
...
@@ -477,7 +479,8 @@ def parse_command_line():
...
@@ -477,7 +479,8 @@ def parse_command_line():
retMet_args
.
add_argument
(
'--normweightmode'
,
dest
=
'normweightmode'
,
type
=
str
,
\
retMet_args
.
add_argument
(
'--normweightmode'
,
dest
=
'normweightmode'
,
type
=
str
,
\
help
=
"It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"
+
\
help
=
"It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"
+
\
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv' or 'halfcvpc'. Default is 'trace'."
,
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv', "
+
\
"'halfcvpc', maxhalftracepchalftracecvhalfcvpc or jetormax. Default is 'trace'."
,
required
=
False
,
default
=
"trace"
)
required
=
False
,
default
=
"trace"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
@@ -548,6 +551,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
...
@@ -548,6 +551,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(
normWeightMode
!=
'maxtracehalfcvpc'
)
and
\
(
normWeightMode
!=
'maxtracehalfcvpc'
)
and
\
(
normWeightMode
!=
'maxtracehalftracecvhalfcvpc'
)
and
\
(
normWeightMode
!=
'maxtracehalftracecvhalfcvpc'
)
and
\
(
normWeightMode
!=
'maxhalftracepchalftracecvhalfcvpc'
)
and
\
(
normWeightMode
!=
'maxhalftracepchalftracecvhalfcvpc'
)
and
\
(
normWeightMode
!=
'jetormax'
)
and
\
(
normWeightMode
!=
'tracemovingaverage'
)
and
\
(
normWeightMode
!=
'tracemovingaverage'
)
and
\
(
normWeightMode
!=
'maxtracehalfpccv'
)):
(
normWeightMode
!=
'maxtracehalfpccv'
)):
print
(
"ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,
\n
"
+
\
print
(
"ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,
\n
"
+
\
...
@@ -555,7 +559,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
...
@@ -555,7 +559,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
" 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "
+
\
" 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "
+
\
" 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"
+
\
" 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"
+
\
" 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"
+
\
" 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"
+
\
" 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc'
or maxtracehalftracecvhalfcvpc
!"
)
" 'halfcvpc', 'halftracepc', 'maxtracehalfcvpc'
, 'maxtracehalftracecvhalfcvpc' or 'jetormax'
!"
)
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
structure
=
parsePDB
(
prot
+
".pdb"
)
structure
=
parsePDB
(
prot
+
".pdb"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment