Commit 282a71e7 by Mustafa Tekpinar

Added bfactor and max-bfactor-trace options to normweightmode

parent 7bd7fcd5
......@@ -128,6 +128,15 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"]))
}
}
} else if ((normWeightMode=="max-trace-bfactor") | (normWeightMode=="max-bfactor-trace")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, max(jet[row, "traceMax"], jet[row, "bfactor"]))
}else{
trace<-append(trace, max(jet[row, "trace"], jet[row, "bfactor"]))
}
}
}else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
......@@ -180,9 +189,14 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "dfi"])
}
}else if (normWeightMode=="bfactor"){
print("Using only Bfactor values")
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "bfactor"])
}
}else{
print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
print("It can only be 'trace', 'pc', 'cv', 'dfi', 'bfactor', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc', 'max-trace-dfi' or 'max-trace-bfactor'!")
}
print(trace)
......@@ -219,7 +233,7 @@ print(paste("Reading frequencies:"))
for (row in 1:nrow(jet)) {
frequencies<-append(frequencies, jet[row, "freq"] )
}
print(frequencies)
#print(frequencies)
freq_mean = mean(frequencies)
#alpha = 0.6
......@@ -238,7 +252,7 @@ for (row in 1:nrow(jet)) {
# alpha<-append(alpha, (1.0 - alpha_val))
# }
# }
print(alpha)
#print(alpha)
......
......@@ -21,7 +21,7 @@ from gemmeAnal import *
import pandas as pd
def rankSortProteinData(dataArray):
def rankSortProteinData(dataArray, inverted=True):
"""
This function ranksorts protein data and converts it
to values [0,1.0].
......@@ -38,7 +38,10 @@ def rankSortProteinData(dataArray):
normalizedRankedDataArray = rankdata(dataArray)/float(len(dataArray))
# np.savetxt(outfile, np.array([dataArray, normalizedRankedDataArray]).T)
if(inverted==True):
return (1.0 - normalizedRankedDataArray)
else:
return (normalizedRankedDataArray)
def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
......@@ -73,6 +76,68 @@ def calcPercentDFI(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="
else:
return dfi
def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
#confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
anm = ANM('ANM')
anm.buildHessian(calphas)
if(nmodes==None):
anm.calcModes(n_modes='all')
else:
anm.calcModes(n_modes=int(nmodes))
bfactors = calcSqFlucts(anm)
percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentBfactors
else:
return bfactors
def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"):
"""
Calculate rank normalized dynamic flexibility index.
"""
#confProDy(auto_show=False)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
bfactors = calphas.getBetas()
percentBfactors = rankSortProteinData(bfactors, inverted=False)
if(attenuate.lower()=='true'):
percentBfactors = attenuateEndPoints(percentBfactors)
# if(outfile != None):
# np.savetxt(outfile, percentBfactors)
# else:
# np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'):
return percentBfactors
else:
return bfactors
def attenuateEndPoints(dfi):
"""
Reduce DFI on N and C terminals with a sigmoid function.
......@@ -414,6 +479,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'cv') and \
(normWeightMode != 'pc') and \
(normWeightMode != 'dfi') and \
(normWeightMode != 'bfactor') and \
(normWeightMode != 'max-trace-pc') and \
(normWeightMode != 'max-pc-trace') and \
(normWeightMode != 'max-pc-cv') and \
......@@ -421,15 +487,18 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'max-trace-cv') and \
(normWeightMode != 'max-cv-trace') and \
(normWeightMode != 'max-trace-dfi') and \
(normWeightMode != 'max-trace-bfactor') and \
(normWeightMode != 'max-dfi-trace') and \
(normWeightMode != 'max-bfactor-trace') and \
(normWeightMode != 'max-trace-pc-cv') and \
(normWeightMode != 'max-trace-cv-pc') and \
(normWeightMode != 'half-pc+cv') and \
(normWeightMode != 'half-cv+pc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \
(normWeightMode != 'max-trace-half-pc+cv')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', \n"+\
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', bfactor,\n"+\
" 'max-trace-pc', 'max-trace-dfi', 'max-pc-cv', "+\
" 'max-trace-bfactor'"+\
" 'max-trace-cv', 'max-trace-pc-cv'"+\
" 'half-cv+pc' or max-trace-half-cv+pc!")
sys.exit(-1)
......@@ -460,7 +529,11 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("done")
#If a real pdb file is given, calculate dfi for the residues.
if((pdbfile != None) and ((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))):
if(((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))):
if (pdbfile != None):
print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1)
else:
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
......@@ -472,6 +545,29 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print(df)
#If a real pdb file is given, calculate or get Bfactors for the residues.
if(((normWeightMode=='bfactor') or (normWeightMode=='max-trace-bfactor') or (normWeightMode=='max-bfactor-trace'))):
isCalc = False
if (pdbfile != None):
print("ERROR: There is not any pdb file to calculate or get Bfactors!")
sys.exit(-1)
else:
print("Calculating or getting Bfactor data per residue!")
if (isCalc):
bfactors = calcBfactors(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
else:
bfactors = getBfactors(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
bfactors = bfactors
df = pd.read_table(prot+"_jet.res")
print(df)
df['bfactor'] = bfactors.round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
launchPred(prot,inAli,mutFile, normWeightMode, alphabet)
#Do Python plotting here
......@@ -512,7 +608,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print(" Can not generate "+prot+"_normPred_evolCombi.png file!")
print("done")
#cleanTheMess(prot,bFile,fFile, chainID=chains[0])
cleanTheMess(prot,bFile,fFile, chainID=chains[0])
def main():
......
......@@ -354,7 +354,6 @@ computePredSimple<-function(mat, distTrace, wt, thresh){
for(a in aa){
if(a!=wt[i]){
sel = which(mat[,i]==a)
<<<<<<< HEAD
# print(sel)
if(length(sel)>0){
sortedDist=sort(distTrace[sel-1])
......@@ -367,9 +366,7 @@ computePredSimple<-function(mat, distTrace, wt, thresh){
}
}
=======
if(length(sel)>0){sortedDist=sort(distTrace[sel-1])}
>>>>>>> parent of bd075e8... Added mt.20.txt, namely full alphabet!
else{sortedDist = c()}
res = c(res,findMinDist(sortedDist,thresh))
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment