Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SENSE-PPI
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Konstantin Volzhenin
SENSE-PPI
Commits
b8fdcb75
Commit
b8fdcb75
authored
Sep 22, 2023
by
Konstantin Volzhenin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
0.5.5 docs updated
parent
5b93c030
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
42 additions
and
21 deletions
+42
-21
README.md
README.md
+9
-3
__init__.py
senseppi/__init__.py
+1
-1
__main__.py
senseppi/__main__.py
+7
-4
test.py
senseppi/commands/test.py
+2
-1
train.py
senseppi/commands/train.py
+1
-1
network_utils.py
senseppi/network_utils.py
+5
-5
utils.py
senseppi/utils.py
+17
-6
No files found.
README.md
View file @
b8fdcb75
SENSE-PPI
=======================================
========================================
[
![DOI - 10.1101/2023.09.19.558413
](
https://img.shields.io/badge/DOI-10.1101%2F2023.09.19.558413-blue
)
](https://doi.org/10.1101/2023.09.19.558413)
[
![PyPI
](
https://img.shields.io/pypi/v/senseppi?logo=PyPi
)
](https://pypi.org/project/senseppi/)
[
![Licence - MIT
](
https://img.shields.io/badge/Licence-MIT-2ea44f
)
](http://gitlab.lcqb.upmc.fr/Konstvv/SENSE-PPI/blob/master/LICENSE)
SENSE-PPI is a Deep Learning model for predicting physical protein-protein interactions based on amino acid sequences.
It is based on embeddings generated by ESM2 and uses Siamese RNN architecture to perform a binary classification.
...
...
@@ -36,4 +40,6 @@ The original SENSE-PPI repository contains two models (checkpoints with weights)
The package already comes with preinstalled model
`senseppi.ckpt`
that is used by default if model path is not specified.
**N.B.**
: Both pretrained models were made to work with proteins in range 50-800 amino acids.
\ No newline at end of file
**N.B.**
: Both pretrained models were made to work with proteins in range 50-800 amino acids.
In order to cite the original SENSE-PPI paper, please use the following link: https://doi.org/10.1101/2023.09.19.558413
\ No newline at end of file
senseppi/__init__.py
View file @
b8fdcb75
__version__
=
"0.5.
4
"
__version__
=
"0.5.
5
"
__author__
=
"Konstantin Volzhenin"
from
.
import
model
,
commands
,
esm2_model
,
dataset
,
utils
,
network_utils
...
...
senseppi/__main__.py
View file @
b8fdcb75
...
...
@@ -3,21 +3,21 @@ import logging
import
torch
from
.commands
import
*
from
senseppi
import
__version__
from
senseppi.utils
import
block_mps
from
senseppi.utils
import
ArgumentParserWithDefaults
,
block_mps
,
determine_device
def
main
():
logging
.
basicConfig
(
level
=
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
parser
=
ArgumentParserWithDefaults
(
description
=
"SENSE_PPI: Sequence-based EvolutIoNary ScalE Protein-Protein Interaction prediction"
,
usage
=
"senseppi <command> [<args>]"
,
formatter_class
=
argparse
.
ArgumentDefaults
HelpFormatter
)
formatter_class
=
argparse
.
RawText
HelpFormatter
)
parser
.
add_argument
(
"-v"
,
"--version"
,
action
=
"version"
,
version
=
"SENSE-PPI v{} "
.
format
(
__version__
))
subparsers
=
parser
.
add_subparsers
(
title
=
"The list of SEINE-PPI commands
:
"
,
required
=
True
,
dest
=
"cmd"
)
subparsers
=
parser
.
add_subparsers
(
title
=
"The list of SEINE-PPI commands"
,
required
=
True
,
dest
=
"cmd"
)
modules
=
{
'train'
:
train
,
'predict'
:
predict
,
...
...
@@ -34,6 +34,9 @@ def main():
params
=
parser
.
parse_args
()
if
hasattr
(
params
,
'device'
):
if
params
.
device
==
'auto'
:
params
.
device
=
determine_device
()
if
params
.
device
==
'gpu'
:
torch
.
set_float32_matmul_precision
(
'high'
)
...
...
senseppi/commands/test.py
View file @
b8fdcb75
...
...
@@ -52,7 +52,8 @@ def add_args(parser):
"(.tsv format will be added automatically)"
)
test_args
.
add_argument
(
"--crop_data_to_model_lims"
,
action
=
"store_true"
,
help
=
"If set, the data will be cropped to the limits of the model: "
"evaluations will be done only for proteins >50aa and <800aa."
)
"evaluations will be done only for proteins >50aa and <800aa. WARNING: "
"this will modify the original input files."
)
parser
=
SensePPIModel
.
add_model_specific_args
(
parser
)
remove_argument
(
parser
,
"--lr"
)
...
...
senseppi/commands/train.py
View file @
b8fdcb75
...
...
@@ -45,7 +45,7 @@ def main(params):
def
add_args
(
parser
):
parser
=
add_general_args
(
parser
)
train_args
=
parser
.
add_argument_group
(
title
=
"Training args"
)
train_args
=
parser
.
add_argument_group
(
title
=
"Training args"
,
description
=
"Arguments for training the model."
)
parser
.
_action_groups
[
0
]
.
add_argument
(
"pairs_file"
,
type
=
str
,
help
=
"A path to a .tsv file containing training pairs. "
"Required format: 3 tab separated columns: first protein, "
...
...
senseppi/network_utils.py
View file @
b8fdcb75
...
...
@@ -111,14 +111,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
if
len
(
string_interactions
)
==
0
:
raise
Exception
(
'No interactions found. Please revise your input parameters.'
)
# Remov
e
duplicated interactions
# Remov
ing
duplicated interactions
string_interactions
.
drop_duplicates
(
inplace
=
True
)
# Mak
e the interactions symmetric: add
the interactions where the first and second columns are swapped
# Mak
ing the interactions symmetric: adding
the interactions where the first and second columns are swapped
string_interactions
=
pd
.
concat
([
string_interactions
,
string_interactions
.
rename
(
columns
=
{
'stringId_A'
:
'stringId_B'
,
'stringId_B'
:
'stringId_A'
,
'preferredName_A'
:
'preferredName_B'
,
'preferredName_B'
:
'preferredName_A'
})])
# Getting the sequences for hparams.genes in case there are proteins with no connections and add ghost self_connections to keep gene names in the file
string_names_input_genes
=
get_names_from_string
(
gene_names
,
species
)
string_names_input_genes
[
'stringId_A'
]
=
string_names_input_genes
[
'stringId'
]
string_names_input_genes
[
'preferredName_A'
]
=
string_names_input_genes
[
'preferredName'
]
...
...
@@ -128,10 +127,11 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
[
'stringId_A'
,
'preferredName_A'
,
'stringId_B'
,
'preferredName_B'
]]])
string_interactions
.
fillna
(
0
,
inplace
=
True
)
# For all the proteins in the first ans second columns extract their sequences from 9606.protein.sequences.v11.5.fasta and write them to sequences.fasta
ids
=
list
(
string_interactions
[
'stringId_A'
]
.
values
)
+
list
(
string_interactions
[
'stringId_B'
]
.
values
)
+
\
ids
=
list
(
string_interactions
[
'stringId_A'
]
.
values
)
+
\
list
(
string_interactions
[
'stringId_B'
]
.
values
)
+
\
string_names_input_genes
[
'stringId'
]
.
to_list
()
ids
=
set
(
ids
)
with
open
(
'sequences.fasta'
,
'w'
)
as
f
:
for
record
in
SeqIO
.
parse
(
'{}.protein.sequences.v{}.fa'
.
format
(
species
,
version
),
"fasta"
):
if
record
.
id
in
ids
:
...
...
senseppi/utils.py
View file @
b8fdcb75
...
...
@@ -3,6 +3,18 @@ import os
from
senseppi
import
__version__
import
torch
import
logging
import
argparse
class
ArgumentParserWithDefaults
(
argparse
.
ArgumentParser
):
def
add_argument
(
self
,
*
args
,
help
=
None
,
default
=
None
,
**
kwargs
):
if
help
is
not
None
:
kwargs
[
'help'
]
=
help
if
default
is
not
None
and
args
[
0
]
!=
'-h'
:
kwargs
[
'default'
]
=
default
if
help
is
not
None
:
kwargs
[
'help'
]
+=
' Default: {}'
.
format
(
default
)
super
()
.
add_argument
(
*
args
,
**
kwargs
)
def
add_general_args
(
parser
):
...
...
@@ -13,8 +25,8 @@ def add_general_args(parser):
parser
.
add_argument
(
"--max_len"
,
type
=
int
,
default
=
800
,
help
=
"Maximum length of the protein sequence. The sequences with larger length will not be "
"considered and will be deleted from the fasta file."
)
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
determine_device
(),
choices
=
[
'cpu'
,
'gpu'
,
'mps
'
],
help
=
"Device to use
d for computations. Options include: cpu, gpu, mps (for MacOS)
."
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
'auto'
,
choices
=
[
'cpu'
,
'gpu'
,
'mps'
,
'auto
'
],
help
=
"Device to use
for computations. Options include: cpu, gpu, mps (for MacOS), and auto
."
"If not selected the device is set by torch automatically. WARNING: mps is temporarily "
"disabled, if it is chosen, cpu will be used instead."
)
...
...
@@ -23,12 +35,11 @@ def add_general_args(parser):
def
determine_device
():
if
torch
.
cuda
.
is_available
():
device
=
'gpu'
return
'gpu'
elif
torch
.
backends
.
mps
.
is_available
()
and
torch
.
backends
.
mps
.
is_built
():
device
=
'mps'
return
'mps'
else
:
device
=
'cpu'
return
device
return
'cpu'
def
block_mps
(
params
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment