Commit 33693642 by Mustafa Tekpinar

Some minor code annotations!

parent 1383201b
...@@ -181,7 +181,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -181,7 +181,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
} }
}else{ }else{
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!") print("It can only be 'trace', 'pc', 'cv', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
} }
print(trace) print(trace)
......
...@@ -13,8 +13,11 @@ import subprocess ...@@ -13,8 +13,11 @@ import subprocess
import shutil import shutil
# Extract the query sequence from the input alignment
def extractQuerySeq(filename): def extractQuerySeq(filename):
"""
# Extract the query sequence from the input alignment
"""
fIN = open(filename,"r") fIN = open(filename,"r")
lines = fIN.readlines() lines = fIN.readlines()
...@@ -35,21 +38,21 @@ def extractQuerySeq(filename): ...@@ -35,21 +38,21 @@ def extractQuerySeq(filename):
fOUT.close() fOUT.close()
return prot,seq,i-1 return prot,seq,i-1
# Get the number of sequences in a multi-fasta file
def getNbSeq(filename):
def getNbSeq(filename):
"""
# Get the number of sequences in a multi-fasta file
"""
if filename!='': if filename!='':
proc=subprocess.Popen("grep -c '^>' "+filename,stdout=subprocess.PIPE,shell=True) proc=subprocess.Popen("grep -c '^>' "+filename,stdout=subprocess.PIPE,shell=True)
return int(proc.stdout.read()) return int(proc.stdout.read())
else: else:
return 0 return 0
# Create a PDB file with dummy CA atoms based on the query sequence
def createPDB(prot,seq): def createPDB(prot,seq):
""" """
If there is not a real PDB file for a given sequence, If there is not a real PDB file for a given sequence,
create a fake PDB containing only CA atoms. create a fake PDB containing only dummy CA atoms.
""" """
d = {'C': 'CYS', 'D': 'ASP', 'S': 'SER', 'Q': 'GLN', 'K': 'LYS', d = {'C': 'CYS', 'D': 'ASP', 'S': 'SER', 'Q': 'GLN', 'K': 'LYS',
'I': 'ILE', 'P': 'PRO', 'T': 'THR', 'F': 'PHE', 'N': 'ASN', 'I': 'ILE', 'P': 'PRO', 'T': 'THR', 'F': 'PHE', 'N': 'ASN',
...@@ -63,8 +66,10 @@ def createPDB(prot,seq): ...@@ -63,8 +66,10 @@ def createPDB(prot,seq):
i += 1 i += 1
fOUT.close() fOUT.close()
# Edit JET configuration file with correct number of Seqs & MSA
def editConfJET(N): def editConfJET(N):
"""
# Edit JET configuration file with correct number of Seqs & MSA
"""
reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True) reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
return(reCode) return(reCode)
......
...@@ -224,6 +224,7 @@ which.class<-function(a,alphabet){ ...@@ -224,6 +224,7 @@ which.class<-function(a,alphabet){
# log-odd ratio between the mutated and wild-type sequence counts # log-odd ratio between the mutated and wild-type sequence counts
computePredNbSeqs<-function(wt, nbseqs){ computePredNbSeqs<-function(wt, nbseqs){
#Equation 7 in https://doi.org/10.1093/molbev/msz179
n = length(wt) n = length(wt)
pred = matrix(nc=n,nr=20) pred = matrix(nc=n,nr=20)
rownames(pred)=aa rownames(pred)=aa
...@@ -385,6 +386,8 @@ normalizePred<-function(pred, trace, wt){ ...@@ -385,6 +386,8 @@ normalizePred<-function(pred, trace, wt){
return(-normPred) return(-normPred)
} }
# Equation 2 in https://doi.org/10.1093/molbev/msz179
normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
n = length(trace) n = length(trace)
normPred = matrix(nc=n,nr=20) normPred = matrix(nc=n,nr=20)
...@@ -399,6 +402,8 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){ ...@@ -399,6 +402,8 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
for(a in aa){ for(a in aa){
A = which.class(wt[i],alphabet) A = which.class(wt[i],alphabet)
B = which.class(a,alphabet) B = which.class(a,alphabet)
# I think this is the place I should modify. alpha will become alpha[i]
normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i]) normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
} }
normPred[,i] = normPred[,i] * trace[i] normPred[,i] = normPred[,i] * trace[i]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment