Commit 06d1659f by Mustafa Tekpinar

Reorganized the code for pip installation.

parent 4223d132
......@@ -4,3 +4,5 @@
example/.RData
gemmeAnal.pyc
__pycache__/*
esgemme.egg-info/*
.RData
\ No newline at end of file
"""
Program Name: esgemme
Author : Mustafa TEKPINAR
Copyright : Mustafa TEKPINAR - Alessandra Carbone- 2022-2023
Email : tekpinar@buffalo.edu
Purpose : A Python program to predict mutational effects of proteins.
"""
__all__ = ['esgemme']
__version__ = '1.3.0'
......@@ -13,7 +13,9 @@ import re
import subprocess
import math
import numpy as np
import matplotlib.pylab as plt
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import shutil
import glob
......@@ -944,7 +946,8 @@ def parse_command_line():
return args
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, alphabet, verbosity):
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\
alphabet, verbosity, offset, colormap):
"""
Perfect explanation for a function: typing the function call exactly!
doit is basically the main function in disguise!
......@@ -1106,7 +1109,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
if(simple):
print("generating the plots...")
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolEpi.txt") and args.verbose==True):
if(os.path.exists(prot+"_normPred_evolEpi.txt") and verbosity==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
sequenceLength = len(gemmeData[0])
beginning = 1
......@@ -1118,19 +1121,21 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap='Blues_r', offSet=i*rowLength + args.offset, pixelType='square',\
colorMap='Blues_r', offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap='Blues_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\
colorMap='Blues_r', offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
else:
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi", beginning, end,\
colorMap='Blues_r', offSet=offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolEpi.png", 1, None,\
# colorMap='Blues_r', offSet=0, pixelType='square')
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolInd.txt") and args.verbose==True):
if(os.path.exists(prot+"_normPred_evolInd.txt") and verbosity==True):
gemmeData = parseGEMMEoutput(prot+"_normPred_evolInd.txt", verbose=False)
sequenceLength = len(gemmeData[0])
beginning = 1
......@@ -1142,16 +1147,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap='Greens_r', offSet=i*rowLength + args.offset, pixelType='square',\
colorMap='Greens_r', offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap='Greens_r', offSet=(i+1)*rowLength + args.offset, pixelType='square',\
colorMap='Greens_r', offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
else:
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd",beginning, end,\
colorMap='Greens_r', offSet=offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolInd.png", 1, None,\
# colorMap='Greens_r', offSet=0, pixelType='square')
#Check if the normalized data files were created.
if(os.path.exists(prot+"_normPred_evolCombi.txt")):
......@@ -1166,18 +1173,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+1), \
i*rowLength + beginning, \
(i+1)*rowLength + beginning -1,\
colorMap=args.colormap, offSet=i*rowLength + args.offset, pixelType='square',\
colorMap=colormap, offSet=i*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
if(sequenceLength%rowLength != 0):
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi"+"_part_"+str(i+2), \
(i+1)*rowLength + beginning, \
end,\
colorMap=args.colormap, offSet=(i+1)*rowLength + args.offset, pixelType='square',\
colorMap=colormap, offSet=(i+1)*rowLength + offset, pixelType='square',\
sequence=prot+".fasta", isColorBarOn=True)
# plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi.png", 1, None,\
# colorMap='Oranges_r', offSet=0, pixelType='square')
else:
plotGEMMEmatrix(gemmeData, prot+"_normPred_evolCombi",beginning, end,\
colorMap=colormap, offSet=offset, pixelType='square',\
sequence=prot+".fasta", interactive=False, isColorBarOn=True)
#Convert standard combined output to a transposed format to increase
#legibility.
......@@ -1188,7 +1196,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
aaAndPosition = []
oldNamesList = gemmeDF.columns.tolist()
for i in range(len(list(seq))):
aaAndPosition.append(list(seq)[i]+str(i+1+args.offset))
aaAndPosition.append(list(seq)[i]+str(i+1+offset))
# gemmeDFtrans = pd.DataFrame(gemmeDFtrans, index=aaAndPosition)
gemmeDFtrans.rename(index = dict(map(lambda i,j : (i,j) , oldNamesList,aaAndPosition)), inplace=True)
# gemmeDFtrans.set_axis(aaAndPosition)
......@@ -1205,16 +1213,23 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
cleanTheMess(prot,bFile,fFile, chainID=chains[0], verbosity=verbosity)
if (__name__ == '__main__'):
def main():
"""
Main function (and yes, I know. The name and the documentation are ingenious :).
"""
tic = time.perf_counter()
args = parse_command_line()
doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\
args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\
args.normweightmode, args.alphabet, args.verbose)
args.normweightmode, args.alphabet, args.verbose, args.offset, args.colormap)
toc = time.perf_counter()
print(f"ESGEMME computation finished in {toc - tic:0.4f} seconds!")
if (__name__ == '__main__'):
main()
......@@ -18,7 +18,7 @@ then
echo "Running ESGEMME with a user-provided alignment file."
echo "Using a previously produced prot_jet.res file to check reproducibility!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta --jetfile ../data/BLAT_jet.res
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta --jetfile ../data/BLAT_jet.res
elif [ "$1" == "withpdb" ]
then
......@@ -26,7 +26,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Entire mutational map of the protein will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode max
elif [ "$1" == "withpdb-withmutfile" ]
......@@ -35,7 +35,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode max \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut
......@@ -45,7 +45,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode sstjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut
#demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
......@@ -55,7 +55,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode tjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut
demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
......@@ -63,6 +63,6 @@ then
else
echo "Running EGEMME with a user-provided alignment file."
echo "Since a pdb file is not provided, only evolutionary information will be used!"
python3 $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta
esgemme ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta
fi
################################################################################
# esgemme - A Python program to predict mutational effects of proteins. #
# Authors: Mustafa Tekpinar #
# Copyright (c) Mustafa Tekpinar - Alessandra Carbone 2023 #
# #
# This file is part of esgemme. #
# #
# License: MIT License #
# #
# Permission is hereby granted, free of charge, to any person obtaining a copy #
# of this software and associated documentation files (the "Software"), to deal#
# in the Software without restriction, including without limitation the rights #
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #
# copies of the Software, and to permit persons to whom the Software is #
# furnished to do so, subject to the following conditions: #
# #
# The above copyright notice and this permission notice shall be included in #
# all copies or substantial portions of the Software. #
# #
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,#
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE#
# SOFTWARE. #
################################################################################
from setuptools import setup, find_packages
from esgemme import __version__ as cp_vers
setup(name='esgemme',
version=cp_vers,
description="A Python program to predict mutational effects of proteins.",
long_description=open('README.md').read(),
long_description_content_type="text/markdown",
author="Mustafa Tekpinar",
author_email="tekpinar@buffalo.edu",
url="https://github.com/tekpinar/esgemme",
download_url="https://github.com/tekpinar/esgemme",
license="MIT",
classifiers=[
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Operating System :: POSIX',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'License :: OSI Approved :: MIT License',
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Topic :: Scientific/Engineering :: Chemistry'
],
python_requires='>=3.6',
install_requires=[i for i in [l.strip() for l in open("requirements.txt").read().split('\n')] if i],
# zip_safe=False,
packages=[p for p in find_packages() if p != 'tests'],
# file where some variables must be fixed by install
entry_points={
'console_scripts': [
'esgemme=esgemme.esgemme:main'
]
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment