Commit 2af63caf by Mustafa Tekpinar

Implemented --isjet2on option for reproducibility checks.

parent 05fb8c57
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
......@@ -43,12 +44,6 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
if(sum(colnames(jet)=="traceMax")==1){trace=jet[,"traceMax"]}else{trace=jet[,"trace"]}
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, CV or Trace
#trace = c()
#for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
#traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query
distTrace = binAli[2:N[1],] %*% trace^2
......@@ -75,44 +70,56 @@ nbGaps = N[1] - apply(nbSeqs,2,sum)
# output the conservation values
dat=rbind(trace,KL=res[[1]][[2]],SE=res[[1]][[1]],gap=nbGaps/N[1],KL60=res[[2]][[2]],SE60=res[[2]][[1]],KL80=res[[3]][[2]],SE80=res[[3]][[1]])
write.table(dat,paste0(prot,"_conservation.txt"))
# compute log-odd ratios between mutated and wt sequence counts
predInd = computePredNbSeqs(wt,nbSeqs)
# output the sequence counts log-odd ratios
write.table(predInd,paste0(prot,"_pred_evolInd.txt"))
# Do the normalization for the independent component here!
rownames(predInd)=aa
if(simple){
normPredInd = normalizePredWithNbSeqsZero(predInd,trace,wt)
rownames(normPredInd)=aa
}else{
normPredInd = normalizePredWithNbSeqsZeroSelMult(predInd, trace, wt, list(pos,subsaa))
names(normPredInd)=rawMut
}
# output the predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"))
# output the sequence counts log-odd ratios
write.table(predInd,paste0(prot,"_pred_evolInd.txt"))
print("done")
print("running global epistatic model...")
pred=computePredSimple(ali,distTrace,wt,5)
rownames(pred)=aa
# output the evolutionary distances between the query and the closest variants
evolDist = pred/sum(trace^2)
evolDist[is.na(evolDist)] = 1
write.table(evolDist,paste0(prot,"_pred_evolEpi.txt"))
print("done")
print("running normalization...")
#You should comment line 44 to use this functionality. Or maybe, it should go into normalize functions
#That was what we originally decided with Alessandra.
#To get max values of PC, or Trace
# trace = c()
# print(trace)
# for (row in 1:nrow(jet)) { trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) }
# print(trace)
# Do the normalization for the epistatic component here!
rownames(pred)=aa
if(simple){
#Independent model normalization
normPredInd = normalizePredWithNbSeqsZero(predInd,trace,wt)
rownames(normPredInd)=aa
# Epistatic model normalization
normPred=normalizePred(pred, trace, wt)
rownames(normPred)=aa
}else{
#Independent model normalization
normPredInd = normalizePredWithNbSeqsZeroSelMult(predInd, trace, wt, list(pos,subsaa))
names(normPredInd)=rawMut
# Epistatic model normalization
normPred=normalizePredSelMult(pred, trace, wt, list(pos,subsaa))
names(normPred)=rawMut
}
# output the predicted mutational effects based on evolutionary distance (conservation at the bottom)
# output the normalized predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"))
# output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom)
write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"))
print("done")
......
......@@ -9,3 +9,8 @@ else
echo "Running GEMME with a user-provided alignment file."
python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta
fi
# If you have your own JET2 score file, you can turn off JET2 as follows:
# python $GEMME_PATH/gemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --isjet2on false
# This option can be useful for testing reproducibility!
......@@ -11,6 +11,7 @@ import argparse
import re
import subprocess
import math
from xmlrpc.client import boolean
import numpy as np
import matplotlib.pylab as plt
......@@ -254,6 +255,9 @@ def parse_command_line():
help='fasta file containing related sequences',
default=''
)
retMet_args.add_argument('--isjet2on', dest='isjet2on', type=str, \
help="If false, it will skip JET2 calculation and use a precalculated JET2 file. Default is true",
required=False, default="True")
args = parser.parse_args()
......@@ -268,22 +272,32 @@ def parse_command_line():
return args
def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, isjet2on):
"""
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile)
Fonksiyon aciklamasi ile taniminin ayni olmasi super olmus!
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.isjet2on)
"""
simple = True
prot,seq,nl=extractQuerySeq(inAli)
createPDB(prot,seq)
print("query protein: "+prot)
print("computing conservation levels...")
if((isjet2on.lower()) == "true"):
#I intend to run JET2 completely externally!!
#It is too much buggy and it has too many dependencies.
#Using it with a Docker or Singularity may be the best solution!
print("computing conservation levels...")
launchJET(prot,retMet,bFile,fFile,n,N,nl)
print("done")
elif((isjet2on.lower()) == "false"):
print("using previously calculated JET2 conservation levels...")
print("done")
else:
print("ERROR: You can only use true or false after --isjet2on!")
sys.exit(-1)
launchPred(prot,inAli,mutFile)
#Do Python plotting here
......@@ -294,7 +308,6 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
#TODO: You can even put letters on the top line like in EVmutation output.
simple = True
if(simple):
print("generating the plots...")
gemmeData = parseGEMMEoutput(prot+"_normPred_evolEpi.txt", verbose=False)
......@@ -315,7 +328,8 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N):
def main():
args = parse_command_line()
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile,args.nIter,args.NSeqs)
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,\
args.fastaFile,args.nIter,args.NSeqs, args.isjet2on)
if (__name__ == '__main__'):
main()
......
# Copyright (c) 2018: Elodie Laine
# Copyright (c) 2022: Mustafa Tekpinar
# This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment