Commit 4eaa0f37 by Mustafa Tekpinar

Added an amino acid based alpha index called alpha[i].

For now, alpha[i] is still 0.6 for the all residues to produce
similar outputs for all residues. In addition to this change,
corrected a bug related to the chain naming.
parent 33693642
......@@ -212,7 +212,35 @@ write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"))
print("done")
print("running combined model...")
alpha = 0.6
#Read frequencies from the jet output file.
frequencies = c()
print(paste("Reading frequencies:"))
for (row in 1:nrow(jet)) {
frequencies<-append(frequencies, jet[row, "freq"] )
}
print(frequencies)
freq_mean = mean(frequencies)
#alpha = 0.6
alpha = c()
#If we want to assign each aa the same weight of epistatic and independent
#contribution, that's the way to do it.
for (row in 1:nrow(jet)) {
alpha<-append(alpha, 0.6)
}
# alpha_val = 0.6
# for (row in 1:nrow(jet)) {
# if(frequencies[row]>freq_mean){
# alpha<-append(alpha, alpha_val)
# } else{
# alpha<-append(alpha, (1.0 - alpha_val))
# }
# }
print(alpha)
alphabet = "lz-bl.7"
if(simple){
normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet)
......
......@@ -93,8 +93,8 @@ def attenuateEndPoints(dfi):
x1 = np.linspace(-0, 10, int((n-1)/2))
x2 = np.linspace(-10, 0, int((n+1)/2))
z1 = 1/(1 + np.exp(-15.0*x1))
z2 = 1.0 - (1/(1 + np.exp(-15.0*x2)))
z1 = 1/(1 + np.exp(-2.50*x1))
z2 = 1.0 - (1/(1 + np.exp(-2.50*x2)))
z = np.append(z1, z2)
......@@ -424,15 +424,25 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
(normWeightMode != 'half-cv+pc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \
(normWeightMode != 'max-trace-half-pc+cv')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!")
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', \n"+\
" 'max-trace-pc', 'max-trace-dfi', 'max-pc-cv', "+\
" 'max-trace-cv', 'max-trace-pc-cv'"+\
" 'half-cv+pc' or max-trace-half-cv+pc!")
sys.exit(-1)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
chains = list(set(calphas.getChids()))
#print(chains)
if((jetfile) == None):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
print("computing conservation levels...")
launchJET(prot,retMet,bFile,fFile, pdbfile, n,N,nl)
launchJET(prot,retMet,bFile,fFile, pdbfile, chains, n,N,nl)
print("done")
else:
print("using previously calculated JET2 data from "+jetfile+"...")
......@@ -446,7 +456,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
print("done")
#If a real pdb file is given, calculate dfi for the residues.
if((pdbfile != None) and (normWeightMode=='dfi')):
if((pdbfile != None) and ((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))):
print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi
......@@ -498,7 +508,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
print(" Can not generate "+prot+"_normPred_evolCombi.png file!")
print("done")
cleanTheMess(prot,bFile,fFile)
cleanTheMess(prot,bFile,fFile, chainID=chains[0])
def main():
......
......@@ -74,7 +74,7 @@ def editConfJET(N):
return(reCode)
# Run JET to compute TJET values
def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl):
def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
"""
Call JET2 and produce prot+"_jet.res" file.
......@@ -102,6 +102,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl):
If it is None, only JET and PC scores are calculated.
Otherwise, CV and other structural-dynamical features also
can be calculated.
chains: list
A list of chains available in the pdb file.
Most of the time, it is supposed to be just one!
n: int
Number of JET2 iterations.
N: int
......@@ -115,42 +118,44 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl):
Nothing
"""
chainID = chains[0]
subprocess.call("cp $GEMME_PATH/default.conf .",shell=True)
if retMet=="input":
if bFile!='':
if(bFile == prot+"_A.psiblast"):
if(bFile == prot+"_"+chainID+".psiblast"):
shutil.copy2(bFile, bFile+".orig")
shutil.copy2(bFile+".orig ", prot+"_A.psiblast")
shutil.copy2(bFile+".orig ", prot+"_"+chainID+".psiblast")
else:
shutil.copy2(bFile+" ", prot+"_A.psiblast")
shutil.copy2(bFile+" ", prot+"_"+chainID+".psiblast")
if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p J -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out"
prot+".pdb -o `pwd` -p J -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
else:
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJ -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out"
prot+".pdb -o `pwd` -p AVJ -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
else:
print(N)
editConfJET(N)
if(fFile == prot+"_A.fasta"):
if(fFile == prot+"_"+chainID+".fasta"):
shutil.copy2(fFile, fFile+".orig")
#I think this subprocess call causes overwriting of the fasta file.
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_A.fasta"
#subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_"+chainID+".fasta"
else:
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_A.fasta"
#subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_"+chainID+".fasta"
print("\nRunning:\n"+grpcmd)
subprocess.call(grpcmd,shell=True)
if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p J -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out"
prot+".pdb -o `pwd` -p J -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
else:
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJ -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out"
prot+".pdb -o `pwd` -p AVJ -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
else:
if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
......@@ -179,18 +184,18 @@ def launchPred(prot,inAli,mutFile, normWeightMode):
return(reCode)
# Remove temporary files
def cleanTheMess(prot,bFile,fFile):
def cleanTheMess(prot,bFile,fFile, chainID):
if bFile!='':
if bFile!=prot+"_A.psiblast":
os.remove(prot+"_A.psiblast")
if bFile!=prot+"_"+chainID+".psiblast":
os.remove(prot+"_"+chainID+".psiblast")
else:
if os.path.isfile(prot+"/"+prot+"_A.psiblast"):
os.rename(prot+"/"+prot+"_A.psiblast",prot+"_A.psiblast")
if os.path.isfile(prot+"/"+prot+"_"+chainID+".psiblast"):
os.rename(prot+"/"+prot+"_"+chainID+".psiblast",prot+"_"+chainID+".psiblast")
if fFile!='':
if fFile!=prot+"_A.fasta":
if os.path.isfile(prot+"_A.fasta"):
os.remove(prot+"_A.fasta")
if fFile!=prot+"_"+chainID+".fasta":
if os.path.isfile(prot+"_"+chainID+".fasta"):
os.remove(prot+"_"+chainID+".fasta")
# if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
# os.remove(prot+".pdb")
......
......@@ -404,7 +404,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
B = which.class(a,alphabet)
# I think this is the place I should modify. alpha will become alpha[i]
normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
}
normPred[,i] = normPred[,i] * trace[i]
normPred[aa==wt[i],i] = NA
......@@ -435,7 +435,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe
ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
if(is.na(val)){val=- log(1/maxRefVal)}
predTMP=c(predTMP, alpha * val - (1-alpha) * ratio)
predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio)
}
normPred[i] = sum(predTMP * trace[myPos])
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment