0.1.5

output contains gene names instead of STRING IDs

0.1.5
output contains gene names instead of STRING IDs
15fc56cf · Konstantin Volzhenin · a3873ec7 · 15fc56cf · 15fc56cf
Commit 15fc56cf authored Jul 26, 2023 by Konstantin Volzhenin
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

__init__.py senseppi/__init__.py +1 -1

predict_string.py senseppi/commands/predict_string.py +6 -1

No files found.
--- a/senseppi/__init__.py
+++ b/senseppi/__init__.py
-__version__ = "0.1.4"
+__version__ = "0.1.5"
 __author__ = "Konstantin Volzhenin"
 from . import model, commands, esm2_model, dataset, utils, network_utils

--- a/senseppi/commands/predict_string.py
+++ b/senseppi/commands/predict_string.py
@@ -62,7 +62,6 @@ def main(params):
    data['preds'] = preds
    print(data.sort_values(by=['preds'], ascending=False).to_string())
-    data.to_csv(params.output + '.tsv', sep='\t', index=False)
    # Calculate torch metrics based on data['binary_label'] and data['preds']
    torch_labels = torch.tensor(data['binary_label'])
@@ -82,6 +81,12 @@ def main(params):
        string_ids[row['stringId_A']] = row['preferredName_A']
        string_ids[row['stringId_B']] = row['preferredName_B']
+    data_to_save = data.copy()
+    data_to_save['seq1'] = data_to_save['seq1'].apply(lambda x: string_ids[x])
+    data_to_save['seq2'] = data_to_save['seq2'].apply(lambda x: string_ids[x])
+    data_to_save = data_to_save.sort_values(by=['preds'], ascending=False)
+    data_to_save.to_csv(params.output + '.tsv', sep='\t', index=False)
    # This part was needed to color the pairs belonging to the train data, temporarily removed
    # print('Fetching gene names for training set from STRING...')