0.1.5

output contains gene names instead of STRING IDs
parent a3873ec7
__version__ = "0.1.4"
__version__ = "0.1.5"
__author__ = "Konstantin Volzhenin"
from . import model, commands, esm2_model, dataset, utils, network_utils
......
......@@ -62,7 +62,6 @@ def main(params):
data['preds'] = preds
print(data.sort_values(by=['preds'], ascending=False).to_string())
data.to_csv(params.output + '.tsv', sep='\t', index=False)
# Calculate torch metrics based on data['binary_label'] and data['preds']
torch_labels = torch.tensor(data['binary_label'])
......@@ -82,6 +81,12 @@ def main(params):
string_ids[row['stringId_A']] = row['preferredName_A']
string_ids[row['stringId_B']] = row['preferredName_B']
data_to_save = data.copy()
data_to_save['seq1'] = data_to_save['seq1'].apply(lambda x: string_ids[x])
data_to_save['seq2'] = data_to_save['seq2'].apply(lambda x: string_ids[x])
data_to_save = data_to_save.sort_values(by=['preds'], ascending=False)
data_to_save.to_csv(params.output + '.tsv', sep='\t', index=False)
# This part was needed to color the pairs belonging to the train data, temporarily removed
# print('Fetching gene names for training set from STRING...')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment