Commit 06d1659f by Mustafa Tekpinar

Reorganized the code for pip installation.

parent 4223d132
...@@ -3,4 +3,6 @@ ...@@ -3,4 +3,6 @@
.Rhistory .Rhistory
example/.RData example/.RData
gemmeAnal.pyc gemmeAnal.pyc
__pycache__/* __pycache__/*
\ No newline at end of file esgemme.egg-info/*
.RData
\ No newline at end of file
"""
Program Name: esgemme
Author : Mustafa TEKPINAR
Copyright : Mustafa TEKPINAR - Alessandra Carbone- 2022-2023
Email : tekpinar@buffalo.edu
Purpose : A Python program to predict mutational effects of proteins.
"""
__all__ = ['esgemme']
__version__ = '1.3.0'
...@@ -13,7 +13,9 @@ import re ...@@ -13,7 +13,9 @@ import re
import subprocess import subprocess
import math import math
import numpy as np import numpy as np
import matplotlib.pylab as plt import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import shutil import shutil
import glob import glob
...@@ -944,7 +946,8 @@ def parse_command_line(): ...@@ -944,7 +946,8 @@ def parse_command_line():
return args return args
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, alphabet, verbosity): def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\
alphabet, verbosity, offset, colormap):
""" """
Perfect explanation for a function: typing the function call exactly! Perfect explanation for a function: typing the function call exactly!
doit is basically the main function in disguise! doit is basically the main function in disguise!
...@@ -1106,7 +1109,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1106,7 +1109,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
if(simple): if(simple):
print("generating the plots...") print("generating the plots...")
#Check if the normalized data files were created. #Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolEpi.txt") and args.verbose==True): if(os.path.exists(prot+"_normPred_evolEpi.txt") and verbosity==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False) gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
sequenceLength = len(gemmeData[0]) sequenceLength = len(gemmeData[0])
beginning = 1 beginning = 1
...@@ -1118,19 +1121,21 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1118,19 +1121,21 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+1), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+1), \
i*rowLength + beginning, \ i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\ (i+1)*rowLength + beginning -1,\
colorMap='Blues_r', offSet=i*rowLength + args.offset, pixelType='square',\ colorMap='Blues_r', offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0): if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+2), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \ (i+1)*rowLength + beginning, \
end,\ end,\
colorMap='Blues_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\ colorMap='Blues_r', offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
else:
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi", beginning, end,\
colorMap='Blues_r', offSet=offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
# colorMap='Blues_r', offSet=0, pixelType='square')
#Check if the normalized data files were created. #Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolInd.txt") and args.verbose==True): if(os.path.exists(prot+"_normPred_evolInd.txt") and verbosity==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False) gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False)
sequenceLength = len(gemmeData[0]) sequenceLength = len(gemmeData[0])
beginning = 1 beginning = 1
...@@ -1142,16 +1147,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1142,16 +1147,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+1), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+1), \
i*rowLength + beginning, \ i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\ (i+1)*rowLength + beginning -1,\
colorMap='Greens_r', offSet=i*rowLength + args.offset, pixelType='square',\ colorMap='Greens_r', offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0): if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+2), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \ (i+1)*rowLength + beginning, \
end,\ end,\
colorMap='Greens_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\ colorMap='Greens_r', offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
else:
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd",beginning, end,\
colorMap='Greens_r', offSet=offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
# colorMap='Greens_r', offSet=0, pixelType='square')
#Check if the normalized data files were created. #Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolCombi.txt")): if(os.path.exists(prot+"_normPred_evolCombi.txt")):
...@@ -1166,18 +1173,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1166,18 +1173,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+1), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+1), \
i*rowLength + beginning, \ i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\ (i+1)*rowLength + beginning -1,\
colorMap=args.colormap, offSet=i*rowLength + args.offset, pixelType='square',\ colorMap=colormap, offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0): if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+2), \ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \ (i+1)*rowLength + beginning, \
end,\ end,\
colorMap=args.colormap, offSet=(i+1)*rowLength + args.offset, pixelType='square',\ colorMap=colormap, offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True) sequence=prot+".fasta", isColorBarOn=True)
else:
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\ plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi",beginning, end,\
# colorMap='Oranges_r', offSet=0, pixelType='square') colorMap=colormap, offSet=offset, pixelType='square',\
sequence=prot+".fasta", interactive=False, isColorBarOn=True)
#Convert standard combined output to a transposed format to increase #Convert standard combined output to a transposed format to increase
#legibility. #legibility.
...@@ -1188,7 +1196,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1188,7 +1196,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
aaAndPosition = [] aaAndPosition = []
oldNamesList = gemmeDF.columns.tolist() oldNamesList = gemmeDF.columns.tolist()
for i in range(len(list(seq))): for i in range(len(list(seq))):
aaAndPosition.append(list(seq)[i]+str(i+1+args.offset)) aaAndPosition.append(list(seq)[i]+str(i+1+offset))
# gemmeDFtrans = pd.DataFrame(gemmeDFtrans, index=aaAndPosition) # gemmeDFtrans = pd.DataFrame(gemmeDFtrans, index=aaAndPosition)
gemmeDFtrans.rename(index = dict(map(lambda i,j : (i,j) , oldNamesList,aaAndPosition)), inplace=True) gemmeDFtrans.rename(index = dict(map(lambda i,j : (i,j) , oldNamesList,aaAndPosition)), inplace=True)
# gemmeDFtrans.set_axis(aaAndPosition) # gemmeDFtrans.set_axis(aaAndPosition)
...@@ -1205,16 +1213,23 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1205,16 +1213,23 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
cleanTheMess(prot,bFile,fFile, chainID=chains[0], verbosity=verbosity) cleanTheMess(prot,bFile,fFile, chainID=chains[0], verbosity=verbosity)
if (__name__ == '__main__'): def main():
"""
Main function (and yes, I know. The name and the documentation are ingenious :).
"""
tic = time.perf_counter() tic = time.perf_counter()
args = parse_command_line() args = parse_command_line()
doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\ doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\
args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\ args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\
args.normweightmode, args.alphabet, args.verbose) args.normweightmode, args.alphabet, args.verbose, args.offset, args.colormap)
toc = time.perf_counter() toc = time.perf_counter()
print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!") print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!")
if (__name__ == '__main__'):
main()
...@@ -18,7 +18,7 @@ then ...@@ -18,7 +18,7 @@ then
echo "Running ESGEMME with a user-provided alignment file." echo "Running ESGEMME with a user-provided alignment file."
echo "Using a previously produced prot_jet.res file to check reproducibility!" echo "Using a previously produced prot_jet.res file to check reproducibility!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta --jetfile ../data/BLAT_jet.res esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta --jetfile ../data/BLAT_jet.res
elif [ "$1" == "withpdb" ] elif [ "$1" == "withpdb" ]
then then
...@@ -26,7 +26,7 @@ then ...@@ -26,7 +26,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!" echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Entire mutational map of the protein will be calculated!" echo "Entire mutational map of the protein will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode max --pdbfile ../data/blat-af2.pdb --normweightmode max
elif [ "$1" == "withpdb-withmutfile" ] elif [ "$1" == "withpdb-withmutfile" ]
...@@ -35,7 +35,7 @@ then ...@@ -35,7 +35,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!" echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!" echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode max \ --pdbfile ../data/blat-af2.pdb --normweightmode max \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut -m ../data/Stiffler_2015_BLAT_ECOLX.mut
...@@ -45,7 +45,7 @@ then ...@@ -45,7 +45,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!" echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!" echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode sstjetormax \ --pdbfile ../data/blat-af2.pdb --normweightmode sstjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut -m ../data/Stiffler_2015_BLAT_ECOLX.mut
#demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline #demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
...@@ -55,7 +55,7 @@ then ...@@ -55,7 +55,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!" echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!" echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \ esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode tjetormax \ --pdbfile ../data/blat-af2.pdb --normweightmode tjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut -m ../data/Stiffler_2015_BLAT_ECOLX.mut
demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
...@@ -63,6 +63,6 @@ then ...@@ -63,6 +63,6 @@ then
else else
echo "Running EGEMME with a user-provided alignment file." echo "Running EGEMME with a user-provided alignment file."
echo "Since a pdb file is not provided, only evolutionary information will be used!" echo "Since a pdb file is not provided, only evolutionary information will be used!"
python3 $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta
fi fi
################################################################################
# esgemme - A Python program to predict mutational effects of proteins. #
# Authors: Mustafa Tekpinar #
# Copyright (c) Mustafa Tekpinar - Alessandra Carbone 2023 #
# #
# This file is part of esgemme. #
# #
# License: MIT License #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal#
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in #
# all copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,#
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE#
# SOFTWARE. #
################################################################################
from setuptools import setup, find_packages
from esgemme import __version__ as cp_vers
setup(name='esgemme',
version=cp_vers,
description="A Python program to predict mutational effects of proteins.",
long_description=open('README.md').read(),
long_description_content_type="text/markdown",
author="Mustafa Tekpinar",
author_email="tekpinar@buffalo.edu",
url="https://github.com/tekpinar/esgemme",
download_url="https://github.com/tekpinar/esgemme",
license="MIT",
classifiers=[
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Operating System :: POSIX',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'License :: OSI Approved :: MIT License',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Topic :: Scientific/Engineering :: Chemistry'
],
python_requires='>=3.6',
install_requires=[i for i in [l.strip() for l in open("requirements.txt").read().split('\n')] if i],
# zip_safe=False,
packages=[p for p in find_packages() if p != 'tests'],
# file where some variables must be fixed by install
entry_points={
'console_scripts': [
'esgemme=esgemme.esgemme:main'
]
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment