Commit 7db0ac49 by Mustafa Tekpinar

Several interface and computational changes.

1-Moved the normweighmode to the beginning of the program.
In this way, the weight will be applied at the beginning
instead of just the final normalization step. Our tests
show that it improves the results when Max(JET, (PC+CV)2)
is used at the beginning. Most of the changes in this part
are in computePred.R file.
2-When the user provides a mutation list
as a file, it was giving an error. The error in apply(...)
part of normalizePredWithNbSeqsPCSelMult function. The bug
is corrected now and it produces results consistent with
the web server.

3-A small bug in gemme.py was also corrected.
If a mutation list file is given, the model is not 'simple'
and therefore, it will not produce a map of single point
mutations.
parent 7f465557
...@@ -118,7 +118,8 @@ def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ...@@ -118,7 +118,8 @@ def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
else: else:
return bfactors return bfactors
def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"): def getBfactors(pdbfile, outfile, attenuate="true", \
ranksorted="true", inverted=True):
""" """
Calculate rank normalized dynamic flexibility index. Calculate rank normalized dynamic flexibility index.
""" """
...@@ -141,10 +142,15 @@ def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="tru ...@@ -141,10 +142,15 @@ def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="tru
# np.savetxt(outfile, dfi) # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'): if(ranksorted.lower()=='true'):
return percentBfactors if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else: else:
return bfactors if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
def attenuateEndPoints(dfi): def attenuateEndPoints(dfi):
""" """
...@@ -437,15 +443,15 @@ def parse_command_line(): ...@@ -437,15 +443,15 @@ def parse_command_line():
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--alphabet', dest='alphabet', type=str, \ retMet_args.add_argument('--alphabet', dest='alphabet', type=str, \
help="Which alphabet to use. Default is lz-bl.7", help="Which alphabet to use. Default is lw-i.7",
required=False, default="lz-bl.7") required=False, default="lw-i.7")
retMet_args.add_argument('-p', '--pdbfile', dest='pdbfile', type=str, \ retMet_args.add_argument('-p', '--pdbfile', dest='pdbfile', type=str, \
help="If a pdb file is provided, it will skip fake pdb file production step and use that file. Default is None", help="If a pdb file is provided, it will skip fake pdb file production step and use that file. Default is None",
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.", help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracepc', 'maxtracecv', 'maxtracepccv' or 'halfcvpc'. Default is 'trace'.",
required=False, default="trace") required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -467,7 +473,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -467,7 +473,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
doit is basically the main function in disguise! doit is basically the main function in disguise!
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.jetfile) doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.jetfile)
""" """
simple = True if(mutFile != ''):
simple = False
else:
simple = True
prot,seq,nl=extractQuerySeq(inAli) prot,seq,nl=extractQuerySeq(inAli)
...@@ -488,27 +497,28 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -488,27 +497,28 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'pc') and \ (normWeightMode != 'pc') and \
(normWeightMode != 'dfi') and \ (normWeightMode != 'dfi') and \
(normWeightMode != 'bfactor') and \ (normWeightMode != 'bfactor') and \
(normWeightMode != 'max-trace-pc') and \ (normWeightMode != 'maxtracepc') and \
(normWeightMode != 'max-pc-trace') and \ (normWeightMode != 'maxpctrace') and \
(normWeightMode != 'max-pc-cv') and \ (normWeightMode != 'maxpccv') and \
(normWeightMode != 'max-cv-pc') and \ (normWeightMode != 'maxcvpc') and \
(normWeightMode != 'max-trace-cv') and \ (normWeightMode != 'maxtracecv') and \
(normWeightMode != 'max-cv-trace') and \ (normWeightMode != 'maxcvtrace') and \
(normWeightMode != 'max-trace-dfi') and \ (normWeightMode != 'maxtracedfi') and \
(normWeightMode != 'max-trace-bfactor') and \ (normWeightMode != 'maxtracebfactor') and \
(normWeightMode != 'max-dfi-trace') and \ (normWeightMode != 'maxdfitrace') and \
(normWeightMode != 'max-bfactor-trace') and \ (normWeightMode != 'maxbfactortrace') and \
(normWeightMode != 'max-trace-pc-cv') and \ (normWeightMode != 'maxtracepccv') and \
(normWeightMode != 'max-trace-cv-pc') and \ (normWeightMode != 'maxtracecvpc') and \
(normWeightMode != 'half-pc+cv') and \ (normWeightMode != 'halfpccv') and \
(normWeightMode != 'half-cv+pc') and \ (normWeightMode != 'halfcvpc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \ (normWeightMode != 'maxtracehalfcvpc') and \
(normWeightMode != 'max-trace-half-pc+cv')): (normWeightMode != 'tracemovingaverage') and \
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', bfactor,\n"+\ (normWeightMode != 'maxtracehalfpccv')):
" 'max-trace-pc', 'max-trace-dfi', 'max-pc-cv', "+\ print("ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,\n"+\
" 'max-trace-bfactor'"+\ " 'maxtracepc', 'maxtracedfi', 'maxpccv', "+\
" 'max-trace-cv', 'max-trace-pc-cv'"+\ " 'maxtracebfactor'"+\
" 'half-cv+pc' or max-trace-half-cv+pc!") " 'maxtracecv', 'maxtracepccv'"+\
" 'halfcvpc' or maxtracehalfcvpc!")
sys.exit(-1) sys.exit(-1)
structure = parsePDB(prot+".pdb") structure = parsePDB(prot+".pdb")
...@@ -537,7 +547,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -537,7 +547,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("done") print("done")
#If a real pdb file is given, calculate dfi for the residues. #If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))): if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.") print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1) sys.exit(-1)
...@@ -553,21 +563,49 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -553,21 +563,49 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print(df) print(df)
#If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='tracemovingaverage'))):
print("Calculating trace moving average per residue!")
# tma = tracemovingaverage
df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+")
print(df.columns)
# Get the window of series
# of observations of specified window size
window_size = 3
windows = df['trace'].rolling(window_size)
# Create a series of moving
# averages of each window
moving_averages = windows.mean()
tma = moving_averages
print(df)
df['tracemovingaverage'] = tma.round(4)
#The first two elements are just copied bc they don't exist due to the moving average.
df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4)
df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
#If a real pdb file is given, calculate or get Bfactors for the residues. #If a real pdb file is given, calculate or get Bfactors for the residues.
if(((normWeightMode=='bfactor') or (normWeightMode=='max-trace-bfactor') or (normWeightMode=='max-bfactor-trace'))): if(((normWeightMode=='bfactor') or (normWeightMode=='maxtracebfactor') or (normWeightMode=='maxbfactortrace'))):
isCalc = False isCalc = True
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate or get Bfactors!") print("ERROR: There is not any pdb file to calculate or get Bfactors!")
sys.exit(-1) sys.exit(-1)
else: else:
if (isCalc): if (isCalc):
print("Computing Bfactors from the user-provided pdb file using ANM with all modes.") print("Computing Bfactors from the user-provided pdb file using ANM with all modes.")
bfactors = calcBfactors(pdbfile, outfile=None, nmodes=None, \ #Original
attenuate="true", ranksorted="true", inverted=False) # bfactors = calcBfactors(pdbfile, outfile=None, nmodes=None, \
# attenuate="true", ranksorted="true", inverted=False)
bfactors = calcBfactors(pdbfile, outfile=None, nmodes=10, \
attenuate="false", ranksorted="true", inverted=True)
else: else:
print("Getting Bfactors from the user-provided pdb file.") print("Getting Bfactors from the user-provided pdb file.")
bfactors = getBfactors(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true") bfactors = getBfactors(pdbfile, outfile=None,\
attenuate="true", ranksorted="true", inverted=False)
bfactors = bfactors bfactors = bfactors
df = pd.read_table(prot+"_jet.res") df = pd.read_table(prot+"_jet.res")
......
...@@ -358,12 +358,12 @@ computePredSimple<-function(mat, distTrace, wt, thresh){ ...@@ -358,12 +358,12 @@ computePredSimple<-function(mat, distTrace, wt, thresh){
if(length(sel)>0){ if(length(sel)>0){
sortedDist=sort(distTrace[sel-1]) sortedDist=sort(distTrace[sel-1])
if((i==694) & (a=="v")) { # if((i==694) & (a=="v")) {
print(i) # print(i)
print(sel) # print(sel)
print(distTrace[sel-1]) # print(distTrace[sel-1])
print(sortedDist) # print(sortedDist)
} # }
} }
if(length(sel)>0){sortedDist=sort(distTrace[sel-1])} if(length(sel)>0){sortedDist=sort(distTrace[sel-1])}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment