0.6.5 updated names for tmp files for predict and predict string

parent 04ca5c16
__version__ = "0.6.4" __version__ = "0.6.5"
__author__ = "Konstantin Volzhenin" __author__ = "Konstantin Volzhenin"
from . import model, commands, esm2_model, dataset, utils, network_utils from . import model, commands, esm2_model, dataset, utils, network_utils
......
...@@ -9,6 +9,7 @@ from ..dataset import PairSequenceData ...@@ -9,6 +9,7 @@ from ..dataset import PairSequenceData
from ..model import SensePPIModel from ..model import SensePPIModel
from ..utils import * from ..utils import *
from ..esm2_model import add_esm_args, compute_embeddings from ..esm2_model import add_esm_args, compute_embeddings
from datetime import datetime
def predict(params): def predict(params):
...@@ -120,7 +121,8 @@ def get_protein_names(fasta_file): ...@@ -120,7 +121,8 @@ def get_protein_names(fasta_file):
def main(params): def main(params):
tmp_pairs = 'senseppi_pairs_for_prediction.tmp' current_time = str(datetime.now()).replace(' ', '_')
tmp_pairs = current_time + '_senseppi_pairs_for_prediction.tsv.tmp'
try: try:
fasta_max_len = get_max_len(params.fasta_file) fasta_max_len = get_max_len(params.fasta_file)
if params.max_len is None: if params.max_len is None:
......
...@@ -7,11 +7,12 @@ import urllib.request ...@@ -7,11 +7,12 @@ import urllib.request
import requests import requests
import gzip import gzip
import shutil import shutil
from datetime import datetime
DOWNLOAD_LINK_STRING = "https://stringdb-downloads.org/download/" DOWNLOAD_LINK_STRING = "https://stringdb-downloads.org/download/"
def generate_pairs_string(fasta_file, output_file, delete_proteins=None): def generate_pairs_string(fasta_file, pairs_file, output_file, delete_proteins=None):
ids = [] ids = []
for record in SeqIO.parse(fasta_file, "fasta"): for record in SeqIO.parse(fasta_file, "fasta"):
ids.append(record.id) ids.append(record.id)
...@@ -23,7 +24,7 @@ def generate_pairs_string(fasta_file, output_file, delete_proteins=None): ...@@ -23,7 +24,7 @@ def generate_pairs_string(fasta_file, output_file, delete_proteins=None):
pairs = pd.DataFrame(pairs, columns=['seq1', 'seq2']) pairs = pd.DataFrame(pairs, columns=['seq1', 'seq2'])
data = pd.read_csv('string_interactions.tsv', delimiter='\t') data = pd.read_csv(pairs_file, delimiter='\t')
# Creating a dictionary of string ids and gene names # Creating a dictionary of string ids and gene names
ids_dict = dict(zip(data['preferredName_A'], data['stringId_A'])) ids_dict = dict(zip(data['preferredName_A'], data['stringId_A']))
...@@ -70,6 +71,11 @@ def get_string_url(): ...@@ -70,6 +71,11 @@ def get_string_url():
def get_interactions_from_string(gene_names, species=9606, add_nodes=10, required_score=500, network_type='physical'): def get_interactions_from_string(gene_names, species=9606, add_nodes=10, required_score=500, network_type='physical'):
current_time = str(datetime.now()).replace(' ', '_')
pairs_file = current_time + '_protein.pairs_string.tsv'
fasta_file = current_time + '_sequences.fasta'
string_api_url, version = get_string_url() string_api_url, version = get_string_url()
output_format = "tsv" output_format = "tsv"
method = "network" method = "network"
...@@ -134,11 +140,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require ...@@ -134,11 +140,13 @@ def get_interactions_from_string(gene_names, species=9606, add_nodes=10, require
string_names_input_genes['stringId'].to_list() string_names_input_genes['stringId'].to_list()
ids = set(ids) ids = set(ids)
with open('sequences.fasta', 'w') as f: with open(fasta_file, 'w') as f:
for record in SeqIO.parse('{}.protein.sequences.v{}.fa'.format(species, version), "fasta"): for record in SeqIO.parse('{}.protein.sequences.v{}.fa'.format(species, version), "fasta"):
if record.id in ids: if record.id in ids:
SeqIO.write(record, f, "fasta") SeqIO.write(record, f, "fasta")
string_interactions.to_csv('string_interactions.tsv', sep='\t', index=False) string_interactions.to_csv(pairs_file, sep='\t', index=False)
return pairs_file, fasta_file
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment