First commit of the SENSE-PPI (version 0.1.0, unfinished), only "senseppi…

First commit of the SENSE-PPI (version 0.1.0, unfinished), only "senseppi predict" is working. Everything else still has to be transferred.
parent 5a815a1f
/esm2_embs_3B
/pretrained_models
*.tsv
*.fasta
\ No newline at end of file
__version__ = "0.1.0"
__author__ = "Konstantin Volzhenin"
from . import model, commands, esm2_model, dataset, utils
__all__ = [
"model",
"commands",
"esm2_model",
"dataset",
"utils"
]
\ No newline at end of file
import argparse
from .commands import *
from senseppi import __version__
def main():
parser = argparse.ArgumentParser(
description="SENSE_PPI: Sequence-based EvolutIoNary ScalE Protein-Protein Interaction prediction",
usage="senseppi <command> [<args>]",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"-v", "--version", action="version", version="SENSE-PPI v{} ".format(__version__))
subparsers = parser.add_subparsers(title="The list of SEINE-PPI commands:", required=True, dest="cmd")
modules = {'train': train, 'predict': predict}
for name, module in modules.items():
sp = subparsers.add_parser(name)
sp = module.add_args(sp)
sp.set_defaults(func=module.main)
params = parser.parse_args()
params.func(params)
if __name__ == "__main__":
main()
__all__ = ['predict', 'train']
\ No newline at end of file
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from itertools import permutations, product
import numpy as np
import pandas as pd
import logging
from ..dataset import PairSequenceData
from ..model import SensePPIModel
from ..utils import *
from ..esm2_model import add_esm_args, compute_embeddings
def predict(params):
test_data = PairSequenceData(emb_dir=params.output_dir_esm, actions_file=params.pairs,
max_len=params.max_len, labels=False)
pretrained_model = SensePPIModel(params)
if params.device == 'gpu':
checkpoint = torch.load(params.model_path)
elif params.device == 'mps':
checkpoint = torch.load(params.model_path, map_location=torch.device('mps'))
else:
checkpoint = torch.load(params.model_path, map_location=torch.device('cpu'))
pretrained_model.load_state_dict(checkpoint['state_dict'])
trainer = pl.Trainer(accelerator=params.device, logger=False)
test_loader = DataLoader(dataset=test_data,
batch_size=params.batch_size,
num_workers=4)
preds = [pred for batch in trainer.predict(pretrained_model, test_loader) for pred in batch.squeeze().tolist()]
preds = np.asarray(preds)
data = pd.read_csv(params.pairs, delimiter='\t', names=["seq1", "seq2"])
data['preds'] = preds
return data
def generate_pairs(fasta_file, output_path, with_self=False):
ids = []
for record in SeqIO.parse(fasta_file, "fasta"):
ids.append(record.id)
if with_self:
all_pairs = [p for p in product(ids, repeat=2)]
else:
all_pairs = [p for p in permutations(ids, 2)]
pairs = []
for p in all_pairs:
if (p[1], p[0]) not in pairs and (p[0], p[1]) not in pairs:
pairs.append(p)
pairs = pd.DataFrame(pairs, columns=['seq1', 'seq2'])
pairs.to_csv(output_path, sep='\t', index=False, header=False)
def add_args(parser):
parser = add_general_args(parser)
predict_args = parser.add_argument_group(title="Predict args")
parser._action_groups[0].add_argument("model_path", type=str,
help="A path to .ckpt file that contains weights to a pretrained model.")
predict_args.add_argument("--pairs", type=str, default=None,
help="A path to a .tsv file with pairs of proteins to test (Optional). If not provided, all-to-all pairs will be generated.")
predict_args.add_argument("-o", "--output", type=str, default="predictions",
help="A path to a file where the predictions will be saved. (.tsv format will be added automatically)")
predict_args.add_argument("--with_self", action='store_true',
help="Include self-interactions in the predictions."
"By default they are not included since they were not part of training but"
"they can be included by setting this flag to True.")
predict_args.add_argument("-p", "--pred_threshold", type=float, default=0.5,
help="Prediction threshold to determine interacting pairs that will be written to a separate file. Range: (0, 1).")
parser = SensePPIModel.add_model_specific_args(parser)
remove_argument(parser, "--lr")
add_esm_args(parser)
return parser
def main(params):
logging.info("Device used: ", params.device)
process_string_fasta(params.fasta_file, min_len=params.min_len, max_len=params.max_len)
if params.pairs is None:
generate_pairs(params.fasta_file, 'protein.pairs.tsv', with_self=params.with_self)
params.pairs = 'protein.pairs.tsv'
compute_embeddings(params)
logging.info('Predicting...')
data = predict(params)
data.to_csv(params.output + '.tsv', sep='\t', index=False, header=False)
data_positive = data[data['preds'] >= params.pred_threshold]
data_positive.to_csv(params.output + '_positive_interactions.tsv', sep='\t', index=False, header=False)
if __name__ == '__main__':
parser = add_args()
params = parser.parse_args()
main(params)
import os
from pathlib import Path
import torch
import pytorch_lightning as pl
import sys
from pytorch_lightning.callbacks import TQDMProgressBar, ModelCheckpoint
from ..model import SensePPIModel
from ..dataset import PairSequenceData
from ..utils import *
from ..esm2_model import add_esm_args, compute_embeddings
# training_th.py
def main(params):
if params.seed is not None:
pl.seed_everything(params.seed, workers=True)
dataset = PairSequenceData(emb_dir=params.output_dir_esm, actions_file=params.pairs,
max_len=params.max_len, labels=False)
model = SensePPIModel(params)
model.load_data(dataset=dataset, valid_size=0.1)
train_set = model.train_dataloader()
val_set = model.val_dataloader()
logger = pl.loggers.TensorBoardLogger("logs", name='SENSE-PPI')
callbacks = [
TQDMProgressBar(refresh_rate=250),
ModelCheckpoint(filename='chkpt_loss_based_{epoch}-{val_loss:.3f}-{val_BinaryF1Score:.3f}', verbose=True,
monitor='val_loss', mode='min', save_top_k=1)
]
trainer = pl.Trainer(accelerator="gpu" if torch.cuda.is_available() else "cpu", devices=params.devices, num_nodes=params.num_nodes, max_epochs=100,
logger=logger, callbacks=callbacks, strategy=params.strategy)
trainer.fit(model, train_set, val_set)
def esm_check(fasta_file, output_dir, params):
params.model_location = 'esm2_t36_3B_UR50D'
params.fasta_file = fasta_file
params.output_dir = output_dir
with open(params.fasta_file, 'r') as f:
seq_ids = [line.strip().split(' ')[0].replace('>', '') for line in f.readlines() if line.startswith('>')]
if not os.path.exists(params.output_dir):
print('Computing ESM embeddings...')
esm2_model.run(params)
else:
for seq_id in seq_ids:
if not os.path.exists(os.path.join(params.output_dir, seq_id + '.pt')):
print('Computing ESM embeddings...')
esm2_model.run(params)
break
def add_args(parser):
parser = add_general_args(parser)
predict_args = parser.add_argument_group(title="Training args")
parser = SensePPIModel.add_model_specific_args(parser)
remove_argument(parser, "--lr")
add_esm_args(parser)
return parser
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser = add_args(parser)
params = parser.parse_args()
esm_check(Path(os.path.join('Data', 'Dscript', 'human.fasta')),
Path(os.path.join('Data', 'Dscript', 'esm_emb_3B_human')),
params)
main(params)
\ No newline at end of file
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
import os
class PairSequenceData(Dataset):
def __init__(self,
actions_file,
emb_dir,
max_len=800,
pad_inputs=True,
labels=True):
super(PairSequenceData, self).__init__()
self.max_len = max_len
self.pad_inputs = pad_inputs
self.emb_dir = emb_dir
self.action_path = actions_file
self.labels = labels
dtypes = {'seq1': str, 'seq2': str}
if self.labels:
dtypes.update({'label': np.float16})
self.actions = pd.read_csv(self.action_path, delimiter='\t', names=["seq1", "seq2", "label"], dtype=dtypes)
else:
self.actions = pd.read_csv(self.action_path, delimiter='\t', names=["seq1", "seq2"], dtype=dtypes)
def get_emb(self, emb_id):
f = os.path.join(self.emb_dir, '{}.pt'.format(emb_id))
try:
emb = torch.load(f)
except FileNotFoundError as _:
raise Exception(
'Embedding file {} not found. Check your fasta file and make sure it contains all the sequences used in training/testing.'.format(
f))
tensor_emb = emb['representations'][36] # [33]
tensor_len = tensor_emb.size(0)
if self.pad_inputs:
if tensor_emb.shape[0] > self.max_len:
tensor_emb = tensor_emb[:self.max_len]
if tensor_emb.shape[0] < self.max_len:
tensor_emb = F.pad(tensor_emb, (0, 0, 0, self.max_len - tensor_emb.size(0)), "constant", 0)
return tensor_emb, tensor_len
def __len__(self):
return len(self.actions)
def __getitem__(self, idx):
id1 = self.actions["seq1"][idx]
id2 = self.actions["seq2"][idx]
if self.labels:
label = int(self.actions["label"][idx])
else:
label = 0
emb1, len1 = self.get_emb(id1)
emb2, len2 = self.get_emb(id2)
return {"emb1": emb1,
"len1": len1,
"emb2": emb2,
"len2": len2,
"label": label,
"prot1": id1,
"prot2": id2}
if __name__ == '__main__':
pass
#!/usr/bin/env python3 -u
# Copyright (c) Meta Platforms, Inc. and affiliates.
import argparse
import pathlib
import torch
import os
import logging
from esm import FastaBatchedDataset, pretrained
def add_esm_args(parent_parser):
parser = parent_parser.add_argument_group(title="ESM2 model args",
description="ESM2: Extract per-token representations and model "
"outputs for sequences in a FASTA file. "
"If you would like to use the basic version of SENSE-PPI "
"do no edit the default values of the arguments below. ")
parser.add_argument(
"--model_location_esm",
type=str, default="esm2_t36_3B_UR50D",
help="PyTorch model file OR name of pretrained model to download. If not default, "
"the number of encoder_features has to be modified according to the embedding dimensionality. "
)
parser.add_argument(
"--output_dir_esm",
type=pathlib.Path, default=pathlib.Path('esm2_embs_3B'),
help="output directory for extracted representations",
)
parser.add_argument("--toks_per_batch_esm", type=int, default=4096, help="maximum batch size")
parser.add_argument(
"--repr_layers_esm",
type=int,
default=[-1],
nargs="+",
help="layers indices from which to extract representations (0 to num_layers, inclusive)",
)
parser.add_argument(
"--truncation_seq_length_esm",
type=int,
default=1022,
help="truncate sequences longer than the given value",
)
def run(args):
model, alphabet = pretrained.load_model_and_alphabet(args.model_location_esm)
model.eval()
if args.device == 'cuda':
model = model.cuda()
print("Transferred the ESM2 model to GPU")
elif args.device == 'mps':
model = model.to('mps')
print("Transferred the ESM2 model to MPS")
dataset = FastaBatchedDataset.from_file(args.fasta_file)
batches = dataset.get_batch_indices(args.toks_per_batch_esm, extra_toks_per_seq=1)
data_loader = torch.utils.data.DataLoader(
dataset, collate_fn=alphabet.get_batch_converter(args.truncation_seq_length_esm), batch_sampler=batches
)
print(f"Read {args.fasta_file} with {len(dataset)} sequences")
args.output_dir_esm.mkdir(parents=True, exist_ok=True)
assert all(-(model.num_layers + 1) <= i <= model.num_layers for i in args.repr_layers_esm)
repr_layers = [(i + model.num_layers + 1) % (model.num_layers + 1) for i in args.repr_layers_esm]
with torch.no_grad():
for batch_idx, (labels, strs, toks) in enumerate(data_loader):
print(
f"Processing {batch_idx + 1} of {len(batches)} batches ({toks.size(0)} sequences)"
)
if args.device == 'cuda':
toks = toks.to(device="cuda", non_blocking=True)
elif args.device == 'mps':
toks = toks.to(device="mps", non_blocking=True)
out = model(toks, repr_layers=repr_layers, return_contacts=False)
representations = {
layer: t.to(device="cpu") for layer, t in out["representations"].items()
}
for i, label in enumerate(labels):
args.output_file_esm = args.output_dir_esm / f"{label}.pt"
args.output_file_esm.parent.mkdir(parents=True, exist_ok=True)
result = {"label": label}
truncate_len = min(args.truncation_seq_length_esm, len(strs[i]))
# Call clone on tensors to ensure tensors are not views into a larger representation
# See https://github.com/pytorch/pytorch/issues/1995
result["representations"] = {
layer: t[i, 1 : truncate_len + 1].clone()
for layer, t in representations.items()
}
torch.save(
result,
args.output_file_esm,
)
def compute_embeddings(params):
# Compute ESM embeddings
logging.info('Computing ESM embeddings if they are not already computed. '
'If all the files alreaady exist in output_dir_esm, this step will be skipped.')
if not os.path.exists(params.output_dir_esm):
run(params)
else:
with open(params.fasta_file, 'r') as f:
seq_ids = [line.strip().split(' ')[0].replace('>', '') for line in f.readlines() if line.startswith('>')]
for seq_id in seq_ids:
if not os.path.exists(os.path.join(params.output_dir_esm, seq_id + '.pt')):
run(params)
break
if __name__ == "__main__":
parser = argparse.ArgumentParser()
add_esm_args(parser)
args = parser.parse_args()
run(args)
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
import pytorch_lightning as pl
import torch.utils.data as data
from torch.utils.data import Subset
from torchmetrics import AUROC, ROC, Accuracy, Precision, Recall, F1Score, MatthewsCorrCoef, AveragePrecision
from torchmetrics.collections import MetricCollection
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.optim as optim
import numpy as np
class DynamicLSTM(pl.LightningModule):
"""
Dynamic LSTM module, which can handle variable length input sequence.
Parameters
----------
input_size : input size
hidden_size : hidden size
num_layers : number of hidden layers. Default: 1
dropout : dropout rate. Default: 0
bidirectional : If True, becomes a bidirectional RNN. Default: False.
Inputs
------
input: tensor, shaped [batch, max_step, input_size]
seq_lens: tensor, shaped [batch], sequence lengths of batch
Outputs
-------
output: tensor, shaped [batch, max_step, num_directions * hidden_size],
tensor containing the output features (h_t) from the last layer
of the LSTM, for each t.
"""
def __init__(self, input_size, hidden_size=100,
num_layers=1, dropout=0., bidirectional=False, return_sequences=False):
super(DynamicLSTM, self).__init__()
self.lstm = torch.nn.GRU(
input_size, hidden_size, num_layers, bias=True,
batch_first=True, dropout=dropout, bidirectional=bidirectional)
self.return_sequences = return_sequences
def forward(self, x, seq_lens):
# sort input by descending length
_, idx_sort = torch.sort(seq_lens, dim=0, descending=True)
_, idx_unsort = torch.sort(idx_sort, dim=0)
x_sort = torch.index_select(x, dim=0, index=idx_sort)
seq_lens_sort = torch.index_select(seq_lens, dim=0, index=idx_sort).to('cpu')
# pack input
x_packed = pack_padded_sequence(
x_sort, seq_lens_sort, batch_first=True)
# pass through rnn
y_packed, _ = self.lstm(x_packed)
# unpack output
y_sort, length = pad_packed_sequence(y_packed, batch_first=True)
# unsort output to original order
y = torch.index_select(y_sort, dim=0, index=idx_unsort)
if self.return_sequences:
return y
y_new = torch.unsqueeze(y[0, seq_lens[0] - 1, :].squeeze(), 0)
for i in range(1, len(seq_lens)):
y_i = torch.unsqueeze(y[i, seq_lens[i] - 1].squeeze(), 0)
y_new = torch.cat((y_new, y_i), dim=0)
return y_new
class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):
def __init__(self, optimizer, warmup, max_iters):
self.warmup = warmup
self.max_num_iters = max_iters
super().__init__(optimizer)
def get_lr(self):
lr_factor = self.get_lr_factor(epoch=self.last_epoch)
# print('Current lr: ', [base_lr * lr_factor for base_lr in self.base_lrs])
return [base_lr * lr_factor for base_lr in self.base_lrs]
def get_lr_factor(self, epoch):
lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_num_iters))
if epoch <= self.warmup:
lr_factor *= (epoch + 1) * 1.0 / self.warmup
return lr_factor
class BaselineModel(pl.LightningModule):
def __init__(self, params):
super(BaselineModel, self).__init__()
self.save_hyperparameters(params)
# Transfer to hyperparameters
self.train_set = None
self.val_set = None
self.test_set = None
# Defining whether to sync the logs or not depending on the number of gpus
if hasattr(self.hparams, 'devices') and int(self.hparams.devices) > 1:
self.hparams.sync_dist = True
else:
self.hparams.sync_dist = False
self.valid_metrics = MetricCollection([
Accuracy(task="binary"),
Precision(task="binary"),
Recall(task="binary"),
F1Score(task="binary"),
MatthewsCorrCoef(task="binary", num_classes=2),
AUROC(task="binary"),
AveragePrecision(task="binary")
], prefix='val_')
self.train_metrics = self.valid_metrics.clone(prefix="train_")
self.test_metrics = self.valid_metrics.clone(prefix="test_")
def _single_step(self, batch):
preds = self.forward(batch)
preds = preds.view(-1)
loss = F.binary_cross_entropy(preds, batch["label"].to(torch.float32))
return batch["label"], preds, loss
def training_step(self, batch, batch_idx):
trues, preds, loss = self._single_step(batch)
self.train_metrics.update(preds, trues)
return loss
def test_step(self, batch, batch_idx):
trues, preds, test_loss = self._single_step(batch)
self.test_metrics.update(preds, trues)
self.log("test_loss", test_loss, batch_size=self.hparams.batch_size, sync_dist=self.hparams.sync_dist)
def validation_step(self, batch, batch_idx):
trues, preds, val_loss = self._single_step(batch)
self.valid_metrics.update(preds, trues)
self.log("val_loss", val_loss, batch_size=self.hparams.batch_size, sync_dist=self.hparams.sync_dist)
def training_epoch_end(self, outputs) -> None:
result = self.train_metrics.compute()
self.train_metrics.reset()
self.log_dict(result, on_epoch=True, sync_dist=self.hparams.sync_dist)
def test_epoch_end(self, outputs) -> None:
result = self.test_metrics.compute()
self.test_metrics.reset()
self.log_dict(result, on_epoch=True, sync_dist=self.hparams.sync_dist)
def validation_epoch_end(self, outputs) -> None:
result = self.valid_metrics.compute()
self.valid_metrics.reset()
self.log_dict(result, on_epoch=True, sync_dist=self.hparams.sync_dist)
def load_data(self, dataset, valid_size=0.2, indices=None):
if indices is None:
dataset_length = len(dataset)
valid_length = int(valid_size * dataset_length)
train_length = dataset_length - valid_length
self.train_set, self.val_set = data.random_split(dataset, [train_length, valid_length]) # , test_size])
print('Data has been randomly divided into train/val sets with sizes {} and {}'.format(len(self.train_set),
len(self.val_set)))
else:
train_indices, val_indices = indices
self.train_set = Subset(dataset, train_indices)
self.val_set = Subset(dataset, val_indices)
print('Data has been divided into train/val sets with sizes {} and {} based on selected indices'.format(
len(self.train_set), len(self.val_set)))
def train_dataloader(self, train_set=None, num_workers=8):
if train_set is not None:
self.train_set = train_set
return DataLoader(dataset=self.train_set,
batch_size=self.hparams.batch_size,
num_workers=num_workers,
shuffle=True)
def test_dataloader(self, test_set=None, num_workers=8):
if test_set is not None:
self.test_set = test_set
return DataLoader(dataset=self.test_set,
batch_size=self.hparams.batch_size,
num_workers=num_workers)
def val_dataloader(self, val_set=None, num_workers=8):
if val_set is not None:
self.val_set = val_set
return DataLoader(dataset=self.val_set,
batch_size=self.hparams.batch_size,
num_workers=num_workers)
@staticmethod
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("Args_model")
parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate for training.")
parser.add_argument("--batch_size", type=int, default=64, help="Batch size for training/testing.")
parser.add_argument("--encoder_features", type=int, default=2560,
help="Number of features in the encoder "
"(Corresponds to the dimentionality of per-token embedding of ESM2 model.) "
"If not a 3B version of ESM2 is chosen, this parameter needs to be set accordingly.")
return parent_parser
class SensePPIModel(BaselineModel):
def __init__(self, params):
super(SensePPIModel, self).__init__(params)
self.encoder_features = self.hparams.encoder_features # 2560
self.hidden_dim = 256
self.lstm = DynamicLSTM(self.encoder_features, hidden_size=128, num_layers=3, dropout=0.5, bidirectional=True)
self.dense_head = torch.nn.Sequential(
torch.nn.Dropout(p=0.5),
torch.nn.Linear(self.hidden_dim, 32),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(32, 1),
torch.nn.Sigmoid()
)
def forward(self, batch):
x1 = batch["emb1"]
x2 = batch["emb2"]
len1 = batch["len1"]
len2 = batch["len2"]
x1 = self.lstm(x1, len1)
x2 = self.lstm(x2, len2)
return self.dense_head(x1 * x2)
def configure_optimizers(self):
optimizer = torch.optim.AdamW(self.parameters(), lr=self.hparams.lr)
# optimizer = torch.optim.RAdam(self.parameters(), lr=self.hparams.lr)
lr_dict = {
"scheduler": CosineWarmupScheduler(optimizer=optimizer, warmup=5, max_iters=200),
"name": 'CosineWarmupScheduler',
}
return [optimizer], [lr_dict]
from Bio import SeqIO
import os
import argparse
from senseppi import __version__
import pathlib
import torch
def add_general_args(parser):
parser.add_argument("-v", "--version", action="version", version="SENSE_PPI v{}".format(__version__))
parser.add_argument(
"fasta_file",
type=pathlib.Path,
help="FASTA file on which to extract the ESM2 representations and then train or test.",
)
parser.add_argument("--min_len", type=int, default=50,
help="Minimum length of the protein sequence. "
"The sequences with smaller length will not be considered. Default: 50")
parser.add_argument("--max_len", type=int, default=800,
help="Maximum length of the protein sequence. "
"The sequences with larger length will not be considered. Default: 800")
parser.add_argument("--device", type=str, default=determine_device(), choices=['cpu', 'gpu', 'mps'],
help="Device to used for computations. Options include: cpu, gpu, mps (for MacOS)."
"If not selected the device is set by torch automatically.")
return parser
def determine_device():
if torch.cuda.is_available():
device = 'cuda'
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = 'mps'
else:
device = 'cpu'
return device
def process_string_fasta(fasta_file, min_len, max_len):
with open('file.tmp', 'w') as f:
for record in SeqIO.parse(fasta_file, "fasta"):
if len(record.seq) < min_len or len(record.seq) > max_len:
continue
record.id = record.id.split(' ')[0]
record.description = ''
record.name = ''
SeqIO.write(record, f, "fasta")
# Rename the temporary file to the original file
os.remove(fasta_file)
os.rename('file.tmp', fasta_file)
def remove_argument(parser, arg):
for action in parser._actions:
opts = action.option_strings
if (opts and opts[0] == arg) or action.dest == arg:
parser._remove_action(action)
break
for action in parser._action_groups:
for group_action in action._group_actions:
opts = group_action.option_strings
if (opts and opts[0] == arg) or group_action.dest == arg:
action._group_actions.remove(group_action)
return
from setuptools import setup, find_packages
import senseppi
with open("README.md", "r") as fh:
long_description = fh.read()
setup(
name="dscript_data",
version=senseppi.__version__,
description="SENSE_PPI: Sequence-based EvolutIoNary ScalE Protein-Protein Interaction prediction",
author="Konstantin Volzhenin",
author_email="konstantin.volzhenin@sorbonne-universite.fr",
url="",
license="MIT",
packages=find_packages(),
long_description=long_description,
long_description_content_type="text/markdown",
include_package_data=True,
install_requires=[
"numpy",
"pandas",
"torch>=1.12",
"matplotlib",
"tqdm",
"scikit-learn",
"pytorch-lightning",
"torchmetrics",
"biopython",
"fair-esm"
],
)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment