Commit 1383201b by Mustafa Tekpinar

Added DFI calculation and weighting option.

parent e82bf1a9
...@@ -118,7 +118,16 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -118,7 +118,16 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"]))
} }
} }
} else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){ } else if ((normWeightMode=="max-trace-dfi") | (normWeightMode=="max-dfi-trace")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, max(jet[row, "traceMax"], jet[row, "dfi"]))
}else{
trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"]))
}
}
}else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -165,6 +174,11 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -165,6 +174,11 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "pc"]) trace<-append(trace, jet[row, "pc"])
} }
}else if (normWeightMode=="dfi"){
print("Using only DFI values")
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "dfi"])
}
}else{ }else{
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!") print("It can only be 'trace', 'pc', 'cv', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
......
...@@ -24,7 +24,7 @@ then ...@@ -24,7 +24,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file. #Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-curated.pdb for the structural feature calculations!" echo "Using blat-curated.pdb for the structural feature calculations!"
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode max-trace-pc-cv python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode dfi
else else
echo "Running GEMME with a user-provided alignment file." echo "Running GEMME with a user-provided alignment file."
......
...@@ -11,12 +11,115 @@ import argparse ...@@ -11,12 +11,115 @@ import argparse
import re import re
import subprocess import subprocess
import math import math
from xmlrpc.client import boolean
import numpy as np import numpy as np
import matplotlib.pylab as plt import matplotlib.pylab as plt
from prody import *
from scipy.stats import rankdata
from gemmeAnal import * from gemmeAnal import *
import pandas as pd
def rankSortProteinData(dataArray):
"""
This function ranksorts protein data and converts it
to values [0,1.0].
Parameters:
----------
dataArray: numpy array of arrays
data read by numpy.loadtxt
Returns:
-------
normalizedRankedDataArray: numpy array
"""
normalizedRankedDataArray = rankdata(dataArray)/float(len(dataArray))
# np.savetxt(outfile, np.array([dataArray, normalizedRankedDataArray]).T)
return (1.0 - normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
#confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
percentDfi = rankSortProteinData(dfi)
if(attenuate.lower()=='true'):
percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None):
# np.savetxt(outfile, percentDfi)
# else:
# np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentDfi
else:
return dfi
def attenuateEndPoints(dfi):
"""
Reduce DFI on N and C terminals with a sigmoid function.
"""
debug = 0
n = len(dfi)
if(debug):
#Check array lengths
print(n/2)
print(len(dfi))
if(n%2 == 0):
x1 = np.linspace(-0, 10, int(n/2))
x2 = np.linspace(-10, 0, int(n/2))
else:
x1 = np.linspace(-0, 10, int((n-1)/2))
x2 = np.linspace(-10, 0, int((n+1)/2))
z1 = 1/(1 + np.exp(-15.0*x1))
z2 = 1.0 - (1/(1 + np.exp(-15.0*x2)))
z = np.append(z1, z2)
if(debug):
#Check array lengths
print(len(z1))
print(len(z2))
print(len(z))
#Plot the arrays
plt.plot(z)
plt.plot(dfi)
plt.plot(dfi*z)
plt.xlabel("x")
plt.ylabel("Sigmoid(X)")
plt.show()
if(len(dfi) == len(z)):
return (dfi*z)
else:
print("ERROR: Can not attenuate N and C terminal dfi signals!")
sys.exit(-1)
alphabeticalAminoAcidsList = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', alphabeticalAminoAcidsList = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'] 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
def parseGEMMEoutput(inputFile, verbose): def parseGEMMEoutput(inputFile, verbose):
...@@ -265,7 +368,7 @@ def parse_command_line(): ...@@ -265,7 +368,7 @@ def parse_command_line():
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.", help="It can be one of these: 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.",
required=False, default="trace") required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -306,19 +409,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -306,19 +409,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
if( (normWeightMode != 'trace') and \ if( (normWeightMode != 'trace') and \
(normWeightMode != 'cv') and \ (normWeightMode != 'cv') and \
(normWeightMode != 'pc') and \ (normWeightMode != 'pc') and \
(normWeightMode != 'dfi') and \
(normWeightMode != 'max-trace-pc') and \ (normWeightMode != 'max-trace-pc') and \
(normWeightMode != 'max-pc-trace') and \ (normWeightMode != 'max-pc-trace') and \
(normWeightMode != 'max-pc-cv') and \ (normWeightMode != 'max-pc-cv') and \
(normWeightMode != 'max-cv-pc') and \ (normWeightMode != 'max-cv-pc') and \
(normWeightMode != 'max-trace-cv') and \ (normWeightMode != 'max-trace-cv') and \
(normWeightMode != 'max-cv-trace') and \ (normWeightMode != 'max-cv-trace') and \
(normWeightMode != 'max-trace-dfi') and \
(normWeightMode != 'max-dfi-trace') and \
(normWeightMode != 'max-trace-pc-cv') and \ (normWeightMode != 'max-trace-pc-cv') and \
(normWeightMode != 'max-trace-cv-pc') and \ (normWeightMode != 'max-trace-cv-pc') and \
(normWeightMode != 'half-pc+cv') and \ (normWeightMode != 'half-pc+cv') and \
(normWeightMode != 'half-cv+pc') and \ (normWeightMode != 'half-cv+pc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \ (normWeightMode != 'max-trace-half-cv+pc') and \
(normWeightMode != 'max-trace-half-pc+cv')): (normWeightMode != 'max-trace-half-pc+cv')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!") print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!")
sys.exit(-1) sys.exit(-1)
if((jetfile) == None): if((jetfile) == None):
...@@ -339,6 +445,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -339,6 +445,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
shutil.copy2(jetfile, prot+"_jet.res") shutil.copy2(jetfile, prot+"_jet.res")
print("done") print("done")
#If a real pdb file is given, calculate dfi for the residues.
if((pdbfile != None) and (normWeightMode=='dfi')):
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
df = pd.read_table(prot+"_jet.res")
print(df)
df['dfi'] = dfi.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
launchPred(prot,inAli,mutFile, normWeightMode) launchPred(prot,inAli,mutFile, normWeightMode)
#Do Python plotting here #Do Python plotting here
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment