0.6.5 updated names for tmp files for predict and predict string

f5945547 · Konstantin Volzhenin · 04ca5c16 · f5945547 · f5945547 · f5945547
Commit f5945547 authored Dec 19, 2023 by Konstantin Volzhenin
Showing with 16 additions and 6 deletions

__init__.py senseppi/__init__.py +1 -1

predict.py senseppi/commands/predict.py +3 -1

predict_string.py senseppi/commands/predict_string.py +0 -0

network_utils.py senseppi/network_utils.py +12 -4

No files found.
--- a/senseppi/__init__.py
+++ b/senseppi/__init__.py
-__version__ = "0.6.4"
+__version__ = "0.6.5"
 __author__ = "Konstantin Volzhenin"

 from . import model, commands, esm2_model, dataset, utils, network_utils

--- a/senseppi/commands/predict.py
+++ b/senseppi/commands/predict.py
@@ -9,6 +9,7 @@ from ..dataset import PairSequenceData
 from ..model import SensePPIModel
 from ..utils import *
 from ..esm2_model import add_esm_args, compute_embeddings
+from datetime import datetime


 def predict(params):
@@ -120,7 +121,8 @@ def get_protein_names(fasta_file):


 def main(params):
-    tmp_pairs = 'senseppi_pairs_for_prediction.tmp'
+    current_time = str(datetime.now()).replace(' ', '_')
+    tmp_pairs = current_time + '_senseppi_pairs_for_prediction.tsv.tmp'
    try:
        fasta_max_len = get_max_len(params.fasta_file)
        if params.max_len is None:

--- a/senseppi/commands/predict_string.py
+++ b/senseppi/commands/predict_string.py
--- a/senseppi/network_utils.py
+++ b/senseppi/network_utils.py
@@ -7,11 +7,12 @@ import urllib.request
 import requests
 import gzip
 import shutil
+from datetime import datetime

 DOWNLOAD_LINK_STRING = "https://stringdb-downloads.org/download/"


-def generate_pairs_string(fasta_file, output_file, delete_proteins=None):
+def generate_pairs_string(fasta_file, pairs_file, output_file, delete_proteins=None):
    ids = []
    for record in SeqIO.parse(fasta_file, "fasta"):
        ids.append(record.id)
@@ -23,7 +24,7 @@ def generate_pairs_string(fasta_file, output_file, delete_proteins=None):

    pairs = pd.DataFrame(pairs, columns=['seq1', 'seq2'])

-    data = pd.read_csv('string_interactions.tsv', delimiter='\t')
+    data = pd.read_csv(pairs_file, delimiter='\t')

    # Creating a dictionary of string ids and gene names
    ids_dict = dict(zip(data['preferredName_A'], data['stringId_A']))
@@ -70,6 +71,11 @@ def get_string_url():


 def get_interactions_from_string(gene_names, species=9606, add_nodes=10, required_score=500, network_type='physical'):
+
+    current_time = str(datetime.now()).replace(' ', '_')
+    pairs_file = current_time + '_protein.pairs_string.tsv'
+    fasta_file = current_time + '_sequences.fasta'
+
    string_api_url, version = get_string_url()
    output_format = "tsv"
    method = "network"
@@ -134,11 +140,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
          string_names_input_genes['stringId'].to_list()
    ids = set(ids)

-    with open('sequences.fasta', 'w') as f:
+    with open(fasta_file, 'w') as f:
        for record in SeqIO.parse('{}.protein.sequences.v{}.fa'.format(species, version), "fasta"):
            if record.id in ids:
                SeqIO.write(record, f, "fasta")
-    string_interactions.to_csv('string_interactions.tsv', sep='\t', index=False)
+    string_interactions.to_csv(pairs_file, sep='\t', index=False)
+
+    return pairs_file, fasta_file


 if __name__ == '__main__':