Commit 33693642 by Mustafa Tekpinar

Some minor code annotations!

parent 1383201b
......@@ -181,7 +181,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
}
}else{
print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
print("It can only be 'trace', 'pc', 'cv', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'!")
}
print(trace)
......
......@@ -13,8 +13,11 @@ import subprocess
import shutil
# Extract the query sequence from the input alignment
def extractQuerySeq(filename):
"""
# Extract the query sequence from the input alignment
"""
fIN = open(filename,"r")
lines = fIN.readlines()
......@@ -35,21 +38,21 @@ def extractQuerySeq(filename):
fOUT.close()
return prot,seq,i-1
# Get the number of sequences in a multi-fasta file
def getNbSeq(filename):
def getNbSeq(filename):
"""
# Get the number of sequences in a multi-fasta file
"""
if filename!='':
proc=subprocess.Popen("grep -c '^>' "+filename,stdout=subprocess.PIPE,shell=True)
return int(proc.stdout.read())
else:
return 0
# Create a PDB file with dummy CA atoms based on the query sequence
def createPDB(prot,seq):
"""
If there is not a real PDB file for a given sequence,
create a fake PDB containing only CA atoms.
create a fake PDB containing only dummy CA atoms.
"""
d = {'C': 'CYS', 'D': 'ASP', 'S': 'SER', 'Q': 'GLN', 'K': 'LYS',
'I': 'ILE', 'P': 'PRO', 'T': 'THR', 'F': 'PHE', 'N': 'ASN',
......@@ -63,8 +66,10 @@ def createPDB(prot,seq):
i += 1
fOUT.close()
# Edit JET configuration file with correct number of Seqs & MSA
def editConfJET(N):
"""
# Edit JET configuration file with correct number of Seqs & MSA
"""
reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
return(reCode)
......
......@@ -224,6 +224,7 @@ which.class<-function(a,alphabet){
# log-odd ratio between the mutated and wild-type sequence counts
computePredNbSeqs<-function(wt, nbseqs){
#Equation 7 in https://doi.org/10.1093/molbev/msz179
n = length(wt)
pred = matrix(nc=n,nr=20)
rownames(pred)=aa
......@@ -385,6 +386,8 @@ normalizePred<-function(pred, trace, wt){
return(-normPred)
}
# Equation 2 in https://doi.org/10.1093/molbev/msz179
normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
n = length(trace)
normPred = matrix(nc=n,nr=20)
......@@ -399,6 +402,8 @@ normalizePredWithNbSeqsPC<-function(pred, trace, wt, alpha, nbSeqs, alphabet){
for(a in aa){
A = which.class(wt[i],alphabet)
B = which.class(a,alphabet)
# I think this is the place I should modify. alpha will become alpha[i]
normPred[a,i] = alpha * normPred[a,i] - (1-alpha) * log(max(1,nbseqs[B,i])/nbseqs[A,i])
}
normPred[,i] = normPred[,i] * trace[i]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment