Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SENSE-PPI
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Konstantin Volzhenin
SENSE-PPI
Commits
f5945547
Commit
f5945547
authored
Dec 19, 2023
by
Konstantin Volzhenin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
0.6.5 updated names for tmp files for predict and predict string
parent
04ca5c16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
6 deletions
+16
-6
__init__.py
senseppi/__init__.py
+1
-1
predict.py
senseppi/commands/predict.py
+3
-1
predict_string.py
senseppi/commands/predict_string.py
+0
-0
network_utils.py
senseppi/network_utils.py
+12
-4
No files found.
senseppi/__init__.py
View file @
f5945547
__version__
=
"0.6.
4
"
__version__
=
"0.6.
5
"
__author__
=
"Konstantin Volzhenin"
__author__
=
"Konstantin Volzhenin"
from
.
import
model
,
commands
,
esm2_model
,
dataset
,
utils
,
network_utils
from
.
import
model
,
commands
,
esm2_model
,
dataset
,
utils
,
network_utils
...
...
senseppi/commands/predict.py
View file @
f5945547
...
@@ -9,6 +9,7 @@ from ..dataset import PairSequenceData
...
@@ -9,6 +9,7 @@ from ..dataset import PairSequenceData
from
..model
import
SensePPIModel
from
..model
import
SensePPIModel
from
..utils
import
*
from
..utils
import
*
from
..esm2_model
import
add_esm_args
,
compute_embeddings
from
..esm2_model
import
add_esm_args
,
compute_embeddings
from
datetime
import
datetime
def
predict
(
params
):
def
predict
(
params
):
...
@@ -120,7 +121,8 @@ def get_protein_names(fasta_file):
...
@@ -120,7 +121,8 @@ def get_protein_names(fasta_file):
def
main
(
params
):
def
main
(
params
):
tmp_pairs
=
'senseppi_pairs_for_prediction.tmp'
current_time
=
str
(
datetime
.
now
())
.
replace
(
' '
,
'_'
)
tmp_pairs
=
current_time
+
'_senseppi_pairs_for_prediction.tsv.tmp'
try
:
try
:
fasta_max_len
=
get_max_len
(
params
.
fasta_file
)
fasta_max_len
=
get_max_len
(
params
.
fasta_file
)
if
params
.
max_len
is
None
:
if
params
.
max_len
is
None
:
...
...
senseppi/commands/predict_string.py
View file @
f5945547
This diff is collapsed.
Click to expand it.
senseppi/network_utils.py
View file @
f5945547
...
@@ -7,11 +7,12 @@ import urllib.request
...
@@ -7,11 +7,12 @@ import urllib.request
import
requests
import
requests
import
gzip
import
gzip
import
shutil
import
shutil
from
datetime
import
datetime
DOWNLOAD_LINK_STRING
=
"https://stringdb-downloads.org/download/"
DOWNLOAD_LINK_STRING
=
"https://stringdb-downloads.org/download/"
def
generate_pairs_string
(
fasta_file
,
output_file
,
delete_proteins
=
None
):
def
generate_pairs_string
(
fasta_file
,
pairs_file
,
output_file
,
delete_proteins
=
None
):
ids
=
[]
ids
=
[]
for
record
in
SeqIO
.
parse
(
fasta_file
,
"fasta"
):
for
record
in
SeqIO
.
parse
(
fasta_file
,
"fasta"
):
ids
.
append
(
record
.
id
)
ids
.
append
(
record
.
id
)
...
@@ -23,7 +24,7 @@ def generate_pairs_string(fasta_file, output_file, delete_proteins=None):
...
@@ -23,7 +24,7 @@ def generate_pairs_string(fasta_file, output_file, delete_proteins=None):
pairs
=
pd
.
DataFrame
(
pairs
,
columns
=
[
'seq1'
,
'seq2'
])
pairs
=
pd
.
DataFrame
(
pairs
,
columns
=
[
'seq1'
,
'seq2'
])
data
=
pd
.
read_csv
(
'string_interactions.tsv'
,
delimiter
=
'
\t
'
)
data
=
pd
.
read_csv
(
pairs_file
,
delimiter
=
'
\t
'
)
# Creating a dictionary of string ids and gene names
# Creating a dictionary of string ids and gene names
ids_dict
=
dict
(
zip
(
data
[
'preferredName_A'
],
data
[
'stringId_A'
]))
ids_dict
=
dict
(
zip
(
data
[
'preferredName_A'
],
data
[
'stringId_A'
]))
...
@@ -70,6 +71,11 @@ def get_string_url():
...
@@ -70,6 +71,11 @@ def get_string_url():
def
get_interactions_from_string
(
gene_names
,
species
=
9606
,
add_nodes
=
10
,
required_score
=
500
,
network_type
=
'physical'
):
def
get_interactions_from_string
(
gene_names
,
species
=
9606
,
add_nodes
=
10
,
required_score
=
500
,
network_type
=
'physical'
):
current_time
=
str
(
datetime
.
now
())
.
replace
(
' '
,
'_'
)
pairs_file
=
current_time
+
'_protein.pairs_string.tsv'
fasta_file
=
current_time
+
'_sequences.fasta'
string_api_url
,
version
=
get_string_url
()
string_api_url
,
version
=
get_string_url
()
output_format
=
"tsv"
output_format
=
"tsv"
method
=
"network"
method
=
"network"
...
@@ -134,11 +140,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
...
@@ -134,11 +140,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
string_names_input_genes
[
'stringId'
]
.
to_list
()
string_names_input_genes
[
'stringId'
]
.
to_list
()
ids
=
set
(
ids
)
ids
=
set
(
ids
)
with
open
(
'sequences.fasta'
,
'w'
)
as
f
:
with
open
(
fasta_file
,
'w'
)
as
f
:
for
record
in
SeqIO
.
parse
(
'{}.protein.sequences.v{}.fa'
.
format
(
species
,
version
),
"fasta"
):
for
record
in
SeqIO
.
parse
(
'{}.protein.sequences.v{}.fa'
.
format
(
species
,
version
),
"fasta"
):
if
record
.
id
in
ids
:
if
record
.
id
in
ids
:
SeqIO
.
write
(
record
,
f
,
"fasta"
)
SeqIO
.
write
(
record
,
f
,
"fasta"
)
string_interactions
.
to_csv
(
'string_interactions.tsv'
,
sep
=
'
\t
'
,
index
=
False
)
string_interactions
.
to_csv
(
pairs_file
,
sep
=
'
\t
'
,
index
=
False
)
return
pairs_file
,
fasta_file
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment