Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
PRESCOTT
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mustafa Tekpinar
PRESCOTT
Commits
42dbf51a
Commit
42dbf51a
authored
Oct 06, 2023
by
Mustafa Tekpinar
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added escottfile and ranksorted args to prescott UI.
parent
f98b9cb1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
32 deletions
+51
-32
prescott.py
prescott/prescott.py
+50
-30
requirements.txt
requirements.txt
+1
-2
No files found.
prescott/prescott.py
View file @
42dbf51a
...
@@ -328,7 +328,16 @@ def main():
...
@@ -328,7 +328,16 @@ def main():
main_parser
.
add_argument
(
'--usefrequencies'
,
dest
=
'usefrequencies'
,
type
=
str
,
\
main_parser
.
add_argument
(
'--usefrequencies'
,
dest
=
'usefrequencies'
,
type
=
str
,
\
help
=
'Do not touch this if you don
\'
t know what you are doing! Default is true'
,
help
=
'Do not touch this if you don
\'
t know what you are doing! Default is true'
,
required
=
False
,
default
=
'true'
)
required
=
False
,
default
=
'true'
)
main_parser
.
add_argument
(
'--ranksorted'
,
dest
=
'ranksorted'
,
type
=
str
,
\
help
=
'If your data is already ranksorted, change this argument to true. Default is false'
,
required
=
False
,
default
=
'false'
)
main_parser
.
add_argument
(
'--escottformat'
,
dest
=
'escottformat'
,
type
=
str
,
\
help
=
'Main format of escott file. There are two possibilities: gemme or singleline.
\n
'
+
\
'gemme: a horizontal format of 20 rows and N columns.
\n
'
+
\
'singleline: each line contains a mutation and its value separated by a space.
\n
'
+
\
'M1A 0.378
\n
'
,
required
=
False
,
default
=
'gemme'
)
# main_parser.add_argument('--colormap', dest='colormap', type=str, \
# main_parser.add_argument('--colormap', dest='colormap', type=str, \
# help='A colormap as defined in matplotlib',
# help='A colormap as defined in matplotlib',
# required=False, default='coolwarm_r')
# required=False, default='coolwarm_r')
...
@@ -354,7 +363,7 @@ def main():
...
@@ -354,7 +363,7 @@ def main():
print
(
"@> Use population max. freq : {}"
.
format
(
str
(
args
.
usepopmax
)
.
lower
()))
print
(
"@> Use population max. freq : {}"
.
format
(
str
(
args
.
usepopmax
)
.
lower
()))
print
(
"@> Which equation to use (Default=2): {}"
.
format
(
str
(
args
.
equation
)))
print
(
"@> Which equation to use (Default=2): {}"
.
format
(
str
(
args
.
equation
)))
print
(
"@> Scaling coefficient (Default=1.0): {}"
.
format
(
args
.
coefficient
))
print
(
"@> Scaling coefficient (Default=1.0): {}"
.
format
(
args
.
coefficient
))
print
(
"@> Frequency cutoff (Default=-4.0) : {}"
.
format
(
args
.
frequencycutoff
))
print
(
"@> Frequency cutoff (Default=-4.0)
: {}"
.
format
(
args
.
frequencycutoff
))
print
(
"@> Name of the output file : {}"
.
format
(
args
.
outputfile
))
print
(
"@> Name of the output file : {}"
.
format
(
args
.
outputfile
))
# End of argument parsing!
# End of argument parsing!
...
@@ -366,32 +375,45 @@ def main():
...
@@ -366,32 +375,45 @@ def main():
usePopMaxOrNot
=
args
.
usepopmax
.
lower
()
usePopMaxOrNot
=
args
.
usepopmax
.
lower
()
version
=
args
.
equation
version
=
args
.
equation
if
(
os
.
path
.
exists
(
escottDataPath
)):
if
(
os
.
path
.
exists
(
escottDataPath
)):
#Parse the file containing raw ESCOTT scores.
scanningMatrix
=
parseGEMMEoutput
(
args
.
escottfile
,
verbose
=
False
)
if
(
args
.
escottformat
==
'gemme'
):
#Convert the matrix format to singleline format
#Parse the file containing raw ESCOTT scores.
localResidueList
=
None
scanningMatrix
=
parseGEMMEoutput
(
args
.
escottfile
,
verbose
=
False
)
if
(
args
.
sequencefile
!=
None
):
referenceSeq
=
SeqIO
.
read
(
args
.
sequencefile
,
'fasta'
)
#Convert the matrix format to singleline format
localResidueList
=
list
(
referenceSeq
.
seq
)
localResidueList
=
None
aaOrderList
=
list
(
'ACDEFGHIKLMNPQRSTVWY'
)
if
(
args
.
sequencefile
!=
None
):
writeSinglelineFormat
(
scanningMatrix
,
protein
+
'_singleline.txt'
,
residueList
=
localResidueList
,
\
referenceSeq
=
SeqIO
.
read
(
args
.
sequencefile
,
'fasta'
)
beg
=
0
,
end
=
None
,
aaOrder
=
aaOrderList
,
\
localResidueList
=
list
(
referenceSeq
.
seq
)
offSet
=
0
)
aaOrderList
=
list
(
'ACDEFGHIKLMNPQRSTVWY'
)
writeSinglelineFormat
(
scanningMatrix
,
protein
+
'_singleline.txt'
,
residueList
=
localResidueList
,
\
#Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header.
beg
=
0
,
end
=
None
,
aaOrder
=
aaOrderList
,
\
df
=
pd
.
read_table
(
protein
+
'_singleline.txt'
,
sep
=
"
\
s+"
,
header
=
None
)
offSet
=
0
)
#data = np.genfromtxt(args.input,dtype=None)
#Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header.
data
=
df
.
to_numpy
()
df
=
pd
.
read_table
(
protein
+
'_singleline.txt'
,
sep
=
"
\
s+"
,
header
=
None
)
rawData
=
data
.
T
[
1
]
processedData
=
1.0
-
rankSortData
(
rawData
)
elif
(
args
.
escottformat
==
'singleline'
):
with
open
(
protein
+
'_singleline_1-ranksort.txt'
,
'w'
)
as
f
:
#f.write("#Resid Value\n")
df
=
pd
.
read_table
(
args
.
escottfile
,
sep
=
"
\
s+"
,
header
=
None
)
for
i
in
range
(
len
(
processedData
)):
else
:
f
.
write
(
"{:} {:6.2f}
\n
"
.
format
(
data
.
T
[
0
][
i
],
processedData
[
i
]))
print
(
'@> ERROR: Unknown escott format. It should be gemme or singleline!'
)
sys
.
exit
(
-
1
)
dfESCOTT
=
pd
.
read_table
(
protein
+
'_singleline_1-ranksort.txt'
,
sep
=
'
\
s+'
,
header
=
None
)
if
(
args
.
ranksorted
==
'false'
):
#data = np.genfromtxt(args.input,dtype=None)
data
=
df
.
to_numpy
()
rawData
=
data
.
T
[
1
]
processedData
=
1.0
-
rankSortData
(
rawData
)
with
open
(
protein
+
'_singleline_1-ranksort.txt'
,
'w'
)
as
f
:
#f.write("#Resid Value\n")
for
i
in
range
(
len
(
processedData
)):
f
.
write
(
"{:} {:6.2f}
\n
"
.
format
(
data
.
T
[
0
][
i
],
processedData
[
i
]))
dfESCOTT
=
pd
.
read_table
(
protein
+
'_singleline_1-ranksort.txt'
,
sep
=
'
\
s+'
,
header
=
None
)
else
:
dfESCOTT
=
df
dfESCOTT
.
columns
=
[
'mutant'
,
'ESCOTT'
]
dfESCOTT
.
columns
=
[
'mutant'
,
'ESCOTT'
]
dfESCOTT
[
'protein'
]
=
protein
dfESCOTT
[
'protein'
]
=
protein
...
@@ -416,10 +438,8 @@ def main():
...
@@ -416,10 +438,8 @@ def main():
(
row
[
'ClinVar Clinical Significance'
]
==
'Pathogenic'
)
or
\
(
row
[
'ClinVar Clinical Significance'
]
==
'Pathogenic'
)
or
\
(
row
[
'ClinVar Clinical Significance'
]
==
'Likely pathogenic'
)):
(
row
[
'ClinVar Clinical Significance'
]
==
'Likely pathogenic'
)):
gnomadDF
.
at
[
index
,
'labels'
]
=
1
gnomadDF
.
at
[
index
,
'labels'
]
=
1
print
(
gnomadDF
.
loc
[(
gnomadDF
[
'labels'
]
==
0
)
|
(
gnomadDF
[
'labels'
]
==
1
)])
print
(
gnomadDF
.
loc
[(
gnomadDF
[
'labels'
]
==
0
)
|
(
gnomadDF
[
'labels'
]
==
1
)])
# print(gnomadDF['ClinVar Clinical Significance'])
# print(gnomadDF['ClinVar Clinical Significance'])
# Add frequency column and a dummy frequency to each row in myBigMergedDF
# Add frequency column and a dummy frequency to each row in myBigMergedDF
myBigMergedDF
[
'frequency'
]
=
999.0
myBigMergedDF
[
'frequency'
]
=
999.0
myBigMergedDF
[
'labels'
]
=
np
.
nan
myBigMergedDF
[
'labels'
]
=
np
.
nan
...
...
requirements.txt
View file @
42dbf51a
...
@@ -5,5 +5,4 @@ scipy
...
@@ -5,5 +5,4 @@ scipy
pandas
pandas
biopython
<=1.79
biopython
<=1.79
biotite
biotite
sklearn
scikit-learn
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment