Commit dca69ed1 by Mustafa Tekpinar

Changed CV cutoff radius from 20 to 6.

parent 6187ee8a
...@@ -98,21 +98,21 @@ RUN pip3 install https://github.com/debbiemarkslab/EVcouplings/archive/develop.z ...@@ -98,21 +98,21 @@ RUN pip3 install https://github.com/debbiemarkslab/EVcouplings/archive/develop.z
# If run command on M1 Mac with macOS Monterey, you can remove sudo # If run command on M1 Mac with macOS Monterey, you can remove sudo
# Build command # Build command
# sudo docker build -t tekpinar/prescott-docker:v1.5.0 . # sudo docker build -t tekpinar/prescott-docker:v1.5.1 .
# sudo docker login # sudo docker login
# sudo docker push tekpinar/prescott-docker:v1.5.0 # sudo docker push tekpinar/prescott-docker:v1.5.1
# Pull command on M1 Mac with macOS Monterey # Pull command on M1 Mac with macOS Monterey
# sudo docker pull tekpinar/prescott-docker:v1.5.0 # sudo docker pull tekpinar/prescott-docker:v1.5.1
# Run commands # Run commands
# sudo docker run -i -t -v $(pwd) --platform linux/amd64 tekpinar/prescott-docker:v1.5.0 /bin/bash # sudo docker run -i -t -v $(pwd) --platform linux/amd64 tekpinar/prescott-docker:v1.5.1 /bin/bash
# Bind your current folder to a particular folder in docker. # Bind your current folder to a particular folder in docker.
# In this way, you fall into the bash terminal of the docker image. # In this way, you fall into the bash terminal of the docker image.
# docker run -ti --rm --mount type=bind,source=$PWD,target=/home/tekpinar/research/lcqb tekpinar/prescott-docker:v1.5.0 # docker run -ti --rm --mount type=bind,source=$PWD,target=/home/tekpinar/research/lcqb tekpinar/prescott-docker:v1.5.1
#If you want to use the docker image like an executable file: #If you want to use the docker image like an executable file:
# Please note that you must have aliBLAT.fasta and blat-af2.pdb inside your current folder and run this command! # Please note that you must have aliBLAT.fasta and blat-af2.pdb inside your current folder and run this command!
# sudo docker run --rm -v $PWD:/home/tekpinar/research/lcqb tekpinar/prescott-docker:v1.5.0 escott aliBLAT.fasta --pdbfile blat-af2.pdb # sudo docker run --rm -v $PWD:/home/tekpinar/research/lcqb tekpinar/prescott-docker:v1.5.1 escott aliBLAT.fasta --pdbfile blat-af2.pdb
...@@ -13,17 +13,17 @@ ...@@ -13,17 +13,17 @@
# https://github.com/sylabs/singularity/releases # https://github.com/sylabs/singularity/releases
#STEP 1: Building singularity from the docker image online: #STEP 1: Building singularity from the docker image online:
# Please note that you will need to change (v1.5.0) to the version you want # Please note that you will need to change (v1.5.1) to the version you want
# if there is an update in the version. # if there is an update in the version.
singularity build prescott-docker-v1.5.0.sif docker://tekpinar/prescott-docker:v1.5.0 singularity build prescott-docker-v1.5.1.sif docker://tekpinar/prescott-docker:v1.5.1
#STEP 2: Running escott with the sif file: #STEP 2: Running escott with the sif file:
singularity exec prescott-docker-v1.5.0.sif escott -h singularity exec prescott-docker-v1.5.1.sif escott -h
#STEP 3: Running prescott with the sif file: #STEP 3: Running prescott with the sif file:
singularity exec prescott-docker-v1.5.0.sif prescott -h singularity exec prescott-docker-v1.5.1.sif prescott -h
#STEP 4: Go to the folder where both aliBLAT.fasta and blat-af2.pdb are located. #STEP 4: Go to the folder where both aliBLAT.fasta and blat-af2.pdb are located.
# These two files are provided in the data folder of this repository. # These two files are provided in the data folder of this repository.
# YOUR_SINGULARITY_DIR is where prescott-docker-v1.5.0.sif file located. # YOUR_SINGULARITY_DIR is where prescott-docker-v1.5.1.sif file located.
singularity exec --home `pwd` $YOUR_SINGULARITY_DIR/prescott-docker-v1.5.0.sif escott aliBLAT.fasta --pdbfile blat-af2.pdb singularity exec --home `pwd` $YOUR_SINGULARITY_DIR/prescott-docker-v1.5.1.sif escott aliBLAT.fasta --pdbfile blat-af2.pdb
...@@ -23,7 +23,7 @@ copyright = '2023, Mustafa Tekpinar' ...@@ -23,7 +23,7 @@ copyright = '2023, Mustafa Tekpinar'
author = 'Mustafa Tekpinar' author = 'Mustafa Tekpinar'
# The full version, including alpha/beta/rc tags # The full version, including alpha/beta/rc tags
release = '1.5.0' release = '1.5.1'
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
......
...@@ -39,6 +39,16 @@ then ...@@ -39,6 +39,16 @@ then
escott ../data/aliBLAT.fasta --pdbfile ../data/blat-af2.pdb -m ../data/Stiffler_2015_BLAT_ECOLX.mut escott ../data/aliBLAT.fasta --pdbfile ../data/blat-af2.pdb -m ../data/Stiffler_2015_BLAT_ECOLX.mut
demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
elif [ "$1" == "cvrc-test" ]
then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
escott ../data/aliBLAT.fasta --pdbfile ../data/blat-af2.pdb -m ../data/Stiffler_2015_BLAT_ECOLX.mut --cvrc 9
demust compare -i ../data/BLAT_ECOLX_Stiffler_2015_experimental.dat --itype singleline -j BLAT_normPred_evolCombi.txt --jtype singleline
else else
echo "Running ESCOTT with a user-provided alignment file." echo "Running ESCOTT with a user-provided alignment file."
echo "Since a pdb file is not provided, only evolutionary information will be used!" echo "Since a pdb file is not provided, only evolutionary information will be used!"
......
#!/bin/bash #!/bin/bash
#If you have gnomad data from GnomAD version 2.1.1 or version 3.1.2 #If you have gnomad data from GnomAD version 2.1.1 or version 3.1.2
prescott -e ../data/MLH1_normPred_evolCombi.txt -g ../data/gnomAD_v2.1.1_MLH1_HUMAN_ENSG00000076242.csv -s ../data/MLH1.fasta --gnomadversion 2 #prescott -e ../data/MLH1_normPred_evolCombi.txt -g ../data/gnomAD_v2.1.1_MLH1_HUMAN_ENSG00000076242.csv -s ../data/MLH1.fasta --gnomadversion 2
#If you are using GnomAD v4.0.0 data. #If you are using GnomAD v4.0.0 data.
prescott -e ../data/MLH1_normPred_evolCombi.txt -g ../data/gnomAD_v4.0.0_MLH1_HUMAN_ENSG00000076242.csv -s ../data/MLH1.fasta prescott -e ../data/MLH1_normPred_evolCombi.txt -g ../data/gnomAD_v4.0.0_MLH1_HUMAN_ENSG00000076242.csv -s ../data/MLH1.fasta
...@@ -8,4 +8,4 @@ Purpose : A Python program to predict mutational effects of proteins. ...@@ -8,4 +8,4 @@ Purpose : A Python program to predict mutational effects of proteins.
__all__ = ['prescott'] __all__ = ['prescott']
__version__ = '1.5.0' __version__ = '1.5.1'
...@@ -53,7 +53,7 @@ accessType chain change this parameter with -d option of JET. See usage of ...@@ -53,7 +53,7 @@ accessType chain change this parameter with -d option of JET. See usage of
***************************************** *****************************************
>CV >CV
max_dist 20.0 max distance max_dist 6.0 max distance
***************************************** *****************************************
>Interface >Interface
cutoff 0 minimum percentage accessible surface variation of an interface residu cutoff 0 minimum percentage accessible surface variation of an interface residu
......
...@@ -149,6 +149,23 @@ def editConfJETpython(N): ...@@ -149,6 +149,23 @@ def editConfJETpython(N):
#reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True) #reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
return 0 return 0
def editDefaultConfChangeCVRC(CVRc):
"""
Edit JET configuration file to modify cutoff radius
when calculating CV (circular variance).
"""
with open("default.conf", "r+" ) as file:
fileContents = file.read()
textPattern = re.compile( re.escape(">CV\nmax_dist 6.0"))
fileContents = textPattern.sub(">CV\nmax_dist {:.1f}".format(CVRc), fileContents)
file.seek( 0 )
file.truncate()
file.write(fileContents)
#reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
return 0
def minMaxNormalization(data): def minMaxNormalization(data):
""" """
Min-max normalization of a data array. Min-max normalization of a data array.
...@@ -156,7 +173,7 @@ def minMaxNormalization(data): ...@@ -156,7 +173,7 @@ def minMaxNormalization(data):
return (data - np.min(data)) / (np.max(data) - np.min(data)) return (data - np.min(data)) / (np.max(data) - np.min(data))
# Run JET to compute TJET values # Run JET to compute TJET values
def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl, CVRc):
""" """
Call JET2 and produce prot+"_jet.res" file. Call JET2 and produce prot+"_jet.res" file.
...@@ -194,6 +211,8 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -194,6 +211,8 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
nl: int nl: int
Number of lines after > character in the query sequences file. Number of lines after > character in the query sequences file.
It is obtained in extractQuerySeq() function. It is obtained in extractQuerySeq() function.
CVRc: float or None
Cutoff radius for CV (Circular Variance) calculation.
Returns Returns
------- -------
...@@ -211,6 +230,10 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -211,6 +230,10 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
#third_party_api_requiring_file_system_path(eml) #third_party_api_requiring_file_system_path(eml)
shutil.copy2(prescott_default_conf_path, os.getcwd()) shutil.copy2(prescott_default_conf_path, os.getcwd())
#If you specified a different cutoff radius for CV calculation, it will take effect here
#by changing the default value in default.conf file.
if (CVRc != None):
editDefaultConfChangeCVRC(CVRc)
if retMet=="input": if retMet=="input":
if bFile!='': if bFile!='':
...@@ -381,8 +404,8 @@ def cleanTheMess(prot,bFile,fFile, chainID, verbosity=False): ...@@ -381,8 +404,8 @@ def cleanTheMess(prot,bFile,fFile, chainID, verbosity=False):
if os.path.isfile("caracTest.dat"): if os.path.isfile("caracTest.dat"):
os.remove("caracTest.dat") os.remove("caracTest.dat")
if os.path.isfile("default.conf"): # if os.path.isfile("default.conf"):
os.remove("default.conf") # os.remove("default.conf")
if verbosity == False: if verbosity == False:
if os.path.isfile(prot+"_normPred_evolInd.txt"): if os.path.isfile(prot+"_normPred_evolInd.txt"):
...@@ -674,7 +697,7 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \ ...@@ -674,7 +697,7 @@ def plotGEMMEmatrix(scanningMatrix, outFile, beg, end, \
subMatrix = scanningMatrix[:, (beg-1):end] subMatrix = scanningMatrix[:, (beg-1):end]
#print(subMatrix) #print(subMatrix)
########################################################################## ###############################################################################
# Set plotting parameters # Set plotting parameters
nres_shown = len(subMatrix[0]) nres_shown = len(subMatrix[0])
# print(nres_shown) # print(nres_shown)
...@@ -992,6 +1015,10 @@ def parse_command_line(): ...@@ -992,6 +1015,10 @@ def parse_command_line():
help="If coil length is > maxcoillength, it will use JET values.", help="If coil length is > maxcoillength, it will use JET values.",
required=False, default=5) required=False, default=5)
parser.add_argument('--cvrc', dest='cvrc', type=float, \
help="Cutoff radius for CV (Circular Variance) calculation. Default is 6.0 Angstrom in default.conf file.)",
required=False, default=None)
# parser.add_argument('--coilbreakerlength', dest='coilbreakerlength', type=int, \ # parser.add_argument('--coilbreakerlength', dest='coilbreakerlength', type=int, \
# help="Length of coil breaking S or T residues between C(S|T)C pattern.", # help="Length of coil breaking S or T residues between C(S|T)C pattern.",
# required=False, default=1) # required=False, default=1)
...@@ -1020,11 +1047,11 @@ def parse_command_line(): ...@@ -1020,11 +1047,11 @@ def parse_command_line():
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\
alphabet, verbosity, offset, colormap, maxCoilLength = 5): alphabet, verbosity, offset, colormap, CVRc, maxCoilLength = 5):
""" """
Perfect explanation for a function: typing the function call exactly! Perfect explanation for a function: typing the function call exactly!
doit is basically the main function in disguise! doit is basically the main function in disguise!
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.jetfile)
""" """
debug = 0 debug = 0
if(mutFile != ''): if(mutFile != ''):
...@@ -1073,7 +1100,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1073,7 +1100,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
#It is too much buggy and it has too many dependencies. #It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution! #Using it with a Docker or Singularity may be the best solution!
print("computing conservation levels...") print("computing conservation levels...")
launchJET(prot,retMet,bFile,fFile, pdbfile, chains, n,N,nl) launchJET(prot,retMet,bFile,fFile, pdbfile, chains, n,N,nl, CVRc)
print("done") print("done")
else: else:
print("using previously calculated JET2 data from "+jetfile+"...") print("using previously calculated JET2 data from "+jetfile+"...")
...@@ -1484,7 +1511,7 @@ def main(): ...@@ -1484,7 +1511,7 @@ def main():
doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\ doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\
args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\ args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\
args.normweightmode, args.alphabet, args.verbose, args.offset, \ args.normweightmode, args.alphabet, args.verbose, args.offset, \
args.colormap, maxCoilLength=args.maxcoillength) args.colormap, args.cvrc, maxCoilLength=args.maxcoillength)
toc = time.perf_counter() toc = time.perf_counter()
......
...@@ -926,10 +926,10 @@ def main(): ...@@ -926,10 +926,10 @@ def main():
my_file.write("{:.2f},".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0]))) my_file.write("{:.2f},".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])))
# if(os.path.exists(protein+'_singleline.txt')): if(os.path.exists(protein+'_singleline.txt')):
# os.remove(protein+'_singleline.txt') os.remove(protein+'_singleline.txt')
# if(os.path.exists(protein+'_singleline_1-ranksort.txt')): if(os.path.exists(protein+'_singleline_1-ranksort.txt')):
# os.remove(protein+'_singleline_1-ranksort.txt') os.remove(protein+'_singleline_1-ranksort.txt')
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment