0.5.5 docs updated

parent 5b93c030
SENSE-PPI
=======================================
========================================
[![DOI - 10.1101/2023.09.19.558413](https://img.shields.io/badge/DOI-10.1101%2F2023.09.19.558413-blue)](https://doi.org/10.1101/2023.09.19.558413)
[![PyPI](https://img.shields.io/pypi/v/senseppi?logo=PyPi)](https://pypi.org/project/senseppi/)
[![Licence - MIT](https://img.shields.io/badge/Licence-MIT-2ea44f)](http://gitlab.lcqb.upmc.fr/Konstvv/SENSE-PPI/blob/master/LICENSE)
SENSE-PPI is a Deep Learning model for predicting physical protein-protein interactions based on amino acid sequences.
It is based on embeddings generated by ESM2 and uses Siamese RNN architecture to perform a binary classification.
......@@ -36,4 +40,6 @@ The original SENSE-PPI repository contains two models (checkpoints with weights)
The package already comes with preinstalled model `senseppi.ckpt` that is used by default if model path is not specified.
**N.B.**: Both pretrained models were made to work with proteins in range 50-800 amino acids.
\ No newline at end of file
**N.B.**: Both pretrained models were made to work with proteins in range 50-800 amino acids.
In order to cite the original SENSE-PPI paper, please use the following link: https://doi.org/10.1101/2023.09.19.558413
\ No newline at end of file
__version__ = "0.5.4"
__version__ = "0.5.5"
__author__ = "Konstantin Volzhenin"
from . import model, commands, esm2_model, dataset, utils, network_utils
......
......@@ -3,21 +3,21 @@ import logging
import torch
from .commands import *
from senseppi import __version__
from senseppi.utils import block_mps
from senseppi.utils import ArgumentParserWithDefaults, block_mps, determine_device
def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(
parser = ArgumentParserWithDefaults(
description="SENSE_PPI: Sequence-based EvolutIoNary ScalE Protein-Protein Interaction prediction",
usage="senseppi <command> [<args>]",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"-v", "--version", action="version", version="SENSE-PPI v{} ".format(__version__))
subparsers = parser.add_subparsers(title="The list of SEINE-PPI commands:", required=True, dest="cmd")
subparsers = parser.add_subparsers(title="The list of SEINE-PPI commands", required=True, dest="cmd")
modules = {'train': train,
'predict': predict,
......@@ -34,6 +34,9 @@ def main():
params = parser.parse_args()
if hasattr(params, 'device'):
if params.device == 'auto':
params.device = determine_device()
if params.device == 'gpu':
torch.set_float32_matmul_precision('high')
......
......@@ -52,7 +52,8 @@ def add_args(parser):
"(.tsv format will be added automatically)")
test_args.add_argument("--crop_data_to_model_lims", action="store_true",
help="If set, the data will be cropped to the limits of the model: "
"evaluations will be done only for proteins >50aa and <800aa.")
"evaluations will be done only for proteins >50aa and <800aa. WARNING: "
"this will modify the original input files.")
parser = SensePPIModel.add_model_specific_args(parser)
remove_argument(parser, "--lr")
......
......@@ -45,7 +45,7 @@ def main(params):
def add_args(parser):
parser = add_general_args(parser)
train_args = parser.add_argument_group(title="Training args")
train_args = parser.add_argument_group(title="Training args", description="Arguments for training the model.")
parser._action_groups[0].add_argument("pairs_file", type=str,
help="A path to a .tsv file containing training pairs. "
"Required format: 3 tab separated columns: first protein, "
......
......@@ -111,14 +111,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
if len(string_interactions) == 0:
raise Exception('No interactions found. Please revise your input parameters.')
# Remove duplicated interactions
# Removing duplicated interactions
string_interactions.drop_duplicates(inplace=True)
# Make the interactions symmetric: add the interactions where the first and second columns are swapped
# Making the interactions symmetric: adding the interactions where the first and second columns are swapped
string_interactions = pd.concat([string_interactions, string_interactions.rename(
columns={'stringId_A': 'stringId_B', 'stringId_B': 'stringId_A', 'preferredName_A': 'preferredName_B',
'preferredName_B': 'preferredName_A'})])
# Getting the sequences for hparams.genes in case there are proteins with no connections and add ghost self_connections to keep gene names in the file
string_names_input_genes = get_names_from_string(gene_names, species)
string_names_input_genes['stringId_A'] = string_names_input_genes['stringId']
string_names_input_genes['preferredName_A'] = string_names_input_genes['preferredName']
......@@ -128,10 +127,11 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
['stringId_A', 'preferredName_A', 'stringId_B', 'preferredName_B']]])
string_interactions.fillna(0, inplace=True)
# For all the proteins in the first ans second columns extract their sequences from 9606.protein.sequences.v11.5.fasta and write them to sequences.fasta
ids = list(string_interactions['stringId_A'].values) + list(string_interactions['stringId_B'].values) + \
ids = list(string_interactions['stringId_A'].values) + \
list(string_interactions['stringId_B'].values) + \
string_names_input_genes['stringId'].to_list()
ids = set(ids)
with open('sequences.fasta', 'w') as f:
for record in SeqIO.parse('{}.protein.sequences.v{}.fa'.format(species, version), "fasta"):
if record.id in ids:
......
......@@ -3,6 +3,18 @@ import os
from senseppi import __version__
import torch
import logging
import argparse
class ArgumentParserWithDefaults(argparse.ArgumentParser):
def add_argument(self, *args, help=None, default=None, **kwargs):
if help is not None:
kwargs['help'] = help
if default is not None and args[0] != '-h':
kwargs['default'] = default
if help is not None:
kwargs['help'] += ' Default: {}'.format(default)
super().add_argument(*args, **kwargs)
def add_general_args(parser):
......@@ -13,8 +25,8 @@ def add_general_args(parser):
parser.add_argument("--max_len", type=int, default=800,
help="Maximum length of the protein sequence. The sequences with larger length will not be "
"considered and will be deleted from the fasta file.")
parser.add_argument("--device", type=str, default=determine_device(), choices=['cpu', 'gpu', 'mps'],
help="Device to used for computations. Options include: cpu, gpu, mps (for MacOS)."
parser.add_argument("--device", type=str, default='auto', choices=['cpu', 'gpu', 'mps', 'auto'],
help="Device to use for computations. Options include: cpu, gpu, mps (for MacOS), and auto."
"If not selected the device is set by torch automatically. WARNING: mps is temporarily "
"disabled, if it is chosen, cpu will be used instead.")
......@@ -23,12 +35,11 @@ def add_general_args(parser):
def determine_device():
if torch.cuda.is_available():
device = 'gpu'
return 'gpu'
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = 'mps'
return 'mps'
else:
device = 'cpu'
return device
return 'cpu'
def block_mps(params):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment