Commit 4eaa0f37 by Mustafa Tekpinar

Added an amino acid based alpha index called alpha[i].

For now, alpha[i] is still 0.6 for the all residues to produce
similar outputs for all residues. In addition to this change,
corrected a bug related to the chain naming.
parent 33693642
...@@ -212,7 +212,35 @@ write.table(normPred,paste0(prot,"_normPred_evolEpi.txt")) ...@@ -212,7 +212,35 @@ write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"))
print("done") print("done")
print("running combined model...") print("running combined model...")
alpha = 0.6 #Read frequencies from the jet output file.
frequencies = c()
print(paste("Reading frequencies:"))
for (row in 1:nrow(jet)) {
frequencies<-append(frequencies, jet[row, "freq"] )
}
print(frequencies)
freq_mean = mean(frequencies)
#alpha = 0.6
alpha = c()
#If we want to assign each aa the same weight of epistatic and independent
#contribution, that's the way to do it.
for (row in 1:nrow(jet)) {
alpha<-append(alpha, 0.6)
}
# alpha_val = 0.6
# for (row in 1:nrow(jet)) {
# if(frequencies[row]>freq_mean){
# alpha<-append(alpha, alpha_val)
# } else{
# alpha<-append(alpha, (1.0 - alpha_val))
# }
# }
print(alpha)
alphabet = "lz-bl.7" alphabet = "lz-bl.7"
if(simple){ if(simple){
normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet) normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet)
......
...@@ -93,8 +93,8 @@ def attenuateEndPoints(dfi): ...@@ -93,8 +93,8 @@ def attenuateEndPoints(dfi):
x1 = np.linspace(-0, 10, int((n-1)/2)) x1 = np.linspace(-0, 10, int((n-1)/2))
x2 = np.linspace(-10, 0, int((n+1)/2)) x2 = np.linspace(-10, 0, int((n+1)/2))
z1 = 1/(1 + np.exp(-15.0*x1)) z1 = 1/(1 + np.exp(-2.50*x1))
z2 = 1.0 - (1/(1 + np.exp(-15.0*x2))) z2 = 1.0 - (1/(1 + np.exp(-2.50*x2)))
z = np.append(z1, z2) z = np.append(z1, z2)
...@@ -424,15 +424,25 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -424,15 +424,25 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
(normWeightMode != 'half-cv+pc') and \ (normWeightMode != 'half-cv+pc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \ (normWeightMode != 'max-trace-half-cv+pc') and \
(normWeightMode != 'max-trace-half-pc+cv')): (normWeightMode != 'max-trace-half-pc+cv')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-pc-cv', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc' or max-trace-half-cv+pc!") print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', \n"+\
" 'max-trace-pc', 'max-trace-dfi', 'max-pc-cv', "+\
" 'max-trace-cv', 'max-trace-pc-cv'"+\
" 'half-cv+pc' or max-trace-half-cv+pc!")
sys.exit(-1) sys.exit(-1)
structure = parsePDB(pdbfile)
calphas = structure.select('name CA')
chains = list(set(calphas.getChids()))
#print(chains)
if((jetfile) == None): if((jetfile) == None):
#I intend to run JET2 completely externally!! #I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies. #It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution! #Using it with a Docker or Singularity may be the best solution!
print("computing conservation levels...") print("computing conservation levels...")
launchJET(prot,retMet,bFile,fFile, pdbfile, n,N,nl) launchJET(prot,retMet,bFile,fFile, pdbfile, chains, n,N,nl)
print("done") print("done")
else: else:
print("using previously calculated JET2 data from "+jetfile+"...") print("using previously calculated JET2 data from "+jetfile+"...")
...@@ -446,7 +456,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -446,7 +456,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
print("done") print("done")
#If a real pdb file is given, calculate dfi for the residues. #If a real pdb file is given, calculate dfi for the residues.
if((pdbfile != None) and (normWeightMode=='dfi')): if((pdbfile != None) and ((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))):
print("Calculating %DFI data per residue!") print("Calculating %DFI data per residue!")
dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true") dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
dfi = dfi dfi = dfi
...@@ -498,7 +508,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode) ...@@ -498,7 +508,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode)
print(" Can not generate "+prot+"_normPred_evolCombi.png file!") print(" Can not generate "+prot+"_normPred_evolCombi.png file!")
print("done") print("done")
cleanTheMess(prot,bFile,fFile) cleanTheMess(prot,bFile,fFile, chainID=chains[0])
def main(): def main():
......
...@@ -74,7 +74,7 @@ def editConfJET(N): ...@@ -74,7 +74,7 @@ def editConfJET(N):
return(reCode) return(reCode)
# Run JET to compute TJET values # Run JET to compute TJET values
def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl): def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
""" """
Call JET2 and produce prot+"_jet.res" file. Call JET2 and produce prot+"_jet.res" file.
...@@ -102,6 +102,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl): ...@@ -102,6 +102,9 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl):
If it is None, only JET and PC scores are calculated. If it is None, only JET and PC scores are calculated.
Otherwise, CV and other structural-dynamical features also Otherwise, CV and other structural-dynamical features also
can be calculated. can be calculated.
chains: list
A list of chains available in the pdb file.
Most of the time, it is supposed to be just one!
n: int n: int
Number of JET2 iterations. Number of JET2 iterations.
N: int N: int
...@@ -115,42 +118,44 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl): ...@@ -115,42 +118,44 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, n, N, nl):
Nothing Nothing
""" """
chainID = chains[0]
subprocess.call("cp $GEMME_PATH/default.conf .",shell=True) subprocess.call("cp $GEMME_PATH/default.conf .",shell=True)
if retMet=="input": if retMet=="input":
if bFile!='': if bFile!='':
if(bFile == prot+"_A.psiblast"): if(bFile == prot+"_"+chainID+".psiblast"):
shutil.copy2(bFile, bFile+".orig") shutil.copy2(bFile, bFile+".orig")
shutil.copy2(bFile+".orig ", prot+"_A.psiblast") shutil.copy2(bFile+".orig ", prot+"_"+chainID+".psiblast")
else: else:
shutil.copy2(bFile+" ", prot+"_A.psiblast") shutil.copy2(bFile+" ", prot+"_"+chainID+".psiblast")
if(pdbfile == None): if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p J -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out" prot+".pdb -o `pwd` -p J -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
else: else:
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJ -r input -b "+prot+"_A.psiblast -d chain -n "+n+" > "+prot+".out" prot+".pdb -o `pwd` -p AVJ -r input -b "+prot+"_"+chainID+".psiblast -d chain -n "+n+" > "+prot+".out"
else: else:
print(N) print(N)
editConfJET(N) editConfJET(N)
if(fFile == prot+"_A.fasta"): if(fFile == prot+"_"+chainID+".fasta"):
shutil.copy2(fFile, fFile+".orig") shutil.copy2(fFile, fFile+".orig")
#I think this subprocess call causes overwriting of the fasta file. #I think this subprocess call causes overwriting of the fasta file.
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True) #subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_A.fasta" grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+".orig > "+prot+"_"+chainID+".fasta"
else: else:
#subprocess.call("cp "+fFile+" "+prot+"_A.fasta",shell=True) #subprocess.call("cp "+fFile+" "+prot+"_"+chainID+".fasta",shell=True)
grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_A.fasta" grpcmd="grep -m "+str(int(N)+1)+" -A "+str(nl)+" '^>' "+fFile+" > "+prot+"_"+chainID+".fasta"
print("\nRunning:\n"+grpcmd) print("\nRunning:\n"+grpcmd)
subprocess.call(grpcmd,shell=True) subprocess.call(grpcmd,shell=True)
if(pdbfile == None): if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p J -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out" prot+".pdb -o `pwd` -p J -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
else: else:
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJ -r input -f "+prot+"_A.fasta -d chain -n "+n+" > "+prot+".out" prot+".pdb -o `pwd` -p AVJ -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
else: else:
if(pdbfile == None): if(pdbfile == None):
jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx1000m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
...@@ -179,18 +184,18 @@ def launchPred(prot,inAli,mutFile, normWeightMode): ...@@ -179,18 +184,18 @@ def launchPred(prot,inAli,mutFile, normWeightMode):
return(reCode) return(reCode)
# Remove temporary files # Remove temporary files
def cleanTheMess(prot,bFile,fFile): def cleanTheMess(prot,bFile,fFile, chainID):
if bFile!='': if bFile!='':
if bFile!=prot+"_A.psiblast": if bFile!=prot+"_"+chainID+".psiblast":
os.remove(prot+"_A.psiblast") os.remove(prot+"_"+chainID+".psiblast")
else: else:
if os.path.isfile(prot+"/"+prot+"_A.psiblast"): if os.path.isfile(prot+"/"+prot+"_"+chainID+".psiblast"):
os.rename(prot+"/"+prot+"_A.psiblast",prot+"_A.psiblast") os.rename(prot+"/"+prot+"_"+chainID+".psiblast",prot+"_"+chainID+".psiblast")
if fFile!='': if fFile!='':
if fFile!=prot+"_A.fasta": if fFile!=prot+"_"+chainID+".fasta":
if os.path.isfile(prot+"_A.fasta"): if os.path.isfile(prot+"_"+chainID+".fasta"):
os.remove(prot+"_A.fasta") os.remove(prot+"_"+chainID+".fasta")
# if os.path.isfile(prot+"/"+prot+"_jet.res"): # if os.path.isfile(prot+"/"+prot+"_jet.res"):
# os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res") # os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
# os.remove(prot+".pdb") # os.remove(prot+".pdb")
......
...@@ -404,7 +404,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){ ...@@ -404,7 +404,7 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
B = which.class(a,alphabet) B = which.class(a,alphabet)
# I think this is the place I should modify. alpha will become alpha[i] # I think this is the place I should modify. alpha will become alpha[i]
normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i]) normPred[a,i] = alpha[i] * normPred[a,i] - (1-alpha[i]) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
} }
normPred[,i] = normPred[,i] * trace[i] normPred[,i] = normPred[,i] * trace[i]
normPred[aa==wt[i],i] = NA normPred[aa==wt[i],i] = NA
...@@ -435,7 +435,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe ...@@ -435,7 +435,7 @@ normalizePredWithNbSeqsPCSelMult<-function(pred, trace, wt, listMut, alpha, nbSe
ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]]) ratio = log(max(1,nbseqs[B,myPos[k]])/nbseqs[A,myPos[k]])
val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1) val = pred[myAA[k],myPos[k]] / maxVal * log(1/maxRefVal) * (-1)
if(is.na(val)){val=- log(1/maxRefVal)} if(is.na(val)){val=- log(1/maxRefVal)}
predTMP=c(predTMP, alpha * val - (1-alpha) * ratio) predTMP=c(predTMP, alpha[i] * val - (1-alpha[i]) * ratio)
} }
normPred[i] = sum(predTMP * trace[myPos]) normPred[i] = sum(predTMP * trace[myPos])
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment