0.2.2 fixed tsv bug in predict

78f70acf · Konstantin Volzhenin · 4b1847e2 · 78f70acf · 78f70acf · 78f70acf
Commit 78f70acf authored Aug 08, 2023 by Konstantin Volzhenin
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 6 deletions

.gitignore .gitignore +3 -2

__init__.py senseppi/__init__.py +1 -1

predict.py senseppi/commands/predict.py +10 -3

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -126,4 +126,6 @@ dmypy.json
 /esm2_embs_3B
 *.sh
 draft.py
\ No newline at end of file
+/data/string_species/mmseqs_dbs/
+/data/human_virus/all_test_viruses.csv
--- a/senseppi/__init__.py
+++ b/senseppi/__init__.py
-__version__ = "0.2.1"
+__version__ = "0.2.2"
 __author__ = "Konstantin Volzhenin"
 from . import model, commands, esm2_model, dataset, utils, network_utils

--- a/senseppi/commands/predict.py
+++ b/senseppi/commands/predict.py
@@ -99,13 +99,20 @@ def main(params):
    logging.info('Predicting...')
    preds = predict(params)
-    data = pd.read_csv(params.pairs_file, delimiter='\t', names=["seq1", "seq2"])
+    data = pd.read_csv(params.pairs_file, delimiter='\t')
+    #if 3 columns, then assign names ['seq1', 'seq2', 'label'] if 2 columns, then names ['seq1', 'seq2']
+    if len(data.columns) == 3:
+        data.columns = ['seq1', 'seq2', 'label']
+    elif len(data.columns) == 2:
+        data.columns = ['seq1', 'seq2']
+    else:
+        raise ValueError('The pairs file must have 2 or 3 columns: seq1, seq2 and label(optional)')
    data['preds'] = preds
-    data.to_csv(params.output + '.tsv', sep='\t', index=False, header=False)
+    data.to_csv(params.output + '.tsv', sep='\t', index=False, header=True)
    data_positive = data[data['preds'] >= params.pred_threshold]
-    data_positive.to_csv(params.output + '_positive_interactions.tsv', sep='\t', index=False, header=False)
+    data_positive.to_csv(params.output + '_positive_interactions.tsv', sep='\t', index=False, header=True)
 if __name__ == '__main__':