Commit 1383201b by Mustafa Tekpinar

Added DFI calculation and weighting option.

parent e82bf1a9
......@@ -118,7 +118,16 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"]))
}
}
} else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){
} else if ((normWeightMode=="max-trace-dfi") | (normWeightMode=="max-dfi-trace")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, max(jet[row, "traceMax"], jet[row, "dfi"]))
}else{
trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"]))
}
}
}else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
......@@ -165,6 +174,11 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "pc"])
}
}else if (normWeightMode=="dfi"){
print("Using only DFI values")
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "dfi"])
}
}else{
print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
......
......@@ -24,7 +24,7 @@ then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-curated.pdb for the structural feature calculations!"
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode max-trace-pc-cv
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-curated.pdb --normweightmode dfi
else
echo "Running GEMME with a user-provided alignment file."
......
......@@ -11,12 +11,115 @@ import argparse
import re
import subprocess
import math
from xmlrpc.client import boolean
import numpy as np
import matplotlib.pylab as plt
from prody import *
from scipy.stats import rankdata
from gemmeAnal import *
import pandas as pd
def rankSortProteinData(dataArray):
"""
This function ranksorts protein data and converts it
to values [0,1.0].
Parameters:
----------
dataArray: numpy array of arrays
data read by numpy.loadtxt
Returns:
-------
normalizedRankedDataArray: numpy array
"""
normalizedRankedDataArray = rankdata(dataArray)/float(len(dataArray))
# np.savetxt(outfile, np.array([dataArray, normalizedRankedDataArray]).T)
return (1.0 - normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
#confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
dfi = calcDynamicFlexibilityIndex(anm, calphas, select='ca', norm=True)
percentDfi = rankSortProteinData(dfi)
if(attenuate.lower()=='true'):
percentDfi = attenuateEndPoints(percentDfi)
# if(outfile != None):
# np.savetxt(outfile, percentDfi)
# else:
# np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentDfi
else:
return dfi
def attenuateEndPoints(dfi):
"""
Reduce DFI on N and C terminals with a sigmoid function.
"""
debug = 0
n = len(dfi)
if(debug):
#Check array lengths
print(n/2)
print(len(dfi))
if(n%2 == 0):
x1 = np.linspace(-0, 10, int(n/2))
x2 = np.linspace(-10, 0, int(n/2))
else:
x1 = np.linspace(-0, 10, int((n-1)/2))
x2 = np.linspace(-10, 0, int((n+1)/2))
z1 = 1/(1 + np.exp(-15.0*x1))
z2 = 1.0 - (1/(1 + np.exp(-15.0*x2)))
z = np.append(z1, z2)
if(debug):
#Check array lengths
print(len(z1))
print(len(z2))
print(len(z))
#Plot the arrays
plt.plot(z)
plt.plot(dfi)
plt.plot(dfi*z)
plt.xlabel("x")
plt.ylabel("Sigmoid(X)")
plt.show()
if(len(dfi) == len(z)):
return (dfi*z)
else:
print("ERROR: Can not attenuate N and C terminal dfi signals!")
sys.exit(-1)
alphabeticalAminoAcidsList = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
def parseGEMMEoutput(inputFile, verbose):
......@@ -265,7 +368,7 @@ def parse_command_line():
required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.",
help="It can be one of these: 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.",
required=False, default="trace")
args = parser.parse_args()
......@@ -306,19 +409,22 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
if( (normWeightMode != 'trace') and \
(normWeightMode != 'cv') and \
(normWeightMode != 'pc') and \
(normWeightMode != 'dfi') and \
(normWeightMode != 'max-trace-pc') and \
(normWeightMode != 'max-pc-trace') and \
(normWeightMode != 'max-pc-cv') and \
(normWeightMode != 'max-cv-pc') and \
(normWeightMode != 'max-trace-cv') and \
(normWeightMode != 'max-cv-trace') and \
(normWeightMode != 'max-trace-dfi') and \
(normWeightMode != 'max-dfi-trace') and \
(normWeightMode != 'max-trace-pc-cv') and \
(normWeightMode != 'max-trace-cv-pc') and \
(normWeightMode != 'half-pc+cv') and \
(normWeightMode != 'half-cv+pc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \
(normWeightMode != 'max-trace-half-pc+cv')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!")
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!")
sys.exit(-1)
if((jetfile) == None):
......@@ -339,6 +445,19 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
shutil.copy2(jetfile, prot+"_jet.res")
print("done")
#If a real pdb file is given, calculate dfi for the residues.
if((pdbfile != None) and (normWeightMode=='dfi')):
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
df = pd.read_table(prot+"_jet.res")
print(df)
df['dfi'] = dfi.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
launchPred(prot,inAli,mutFile, normWeightMode)
#Do Python plotting here
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment