Commit 7db0ac49 by Mustafa Tekpinar

Several interface and computational changes.

1-Moved the normweighmode to the beginning of the program.
In this way, the weight will be applied at the beginning
instead of just the final normalization step. Our tests
show that it improves the results when Max(JET, (PC+CV)2)
is used at the beginning. Most of the changes in this part
are in computePred.R file.
2-When the user provides a mutation list
as a file, it was giving an error. The error in apply(...)
part of normalizePredWithNbSeqsPCSelMult function. The bug
is corrected now and it produces results consistent with
the web server.

3-A small bug in gemme.py was also corrected.
If a mutation list file is given, the model is not 'simple'
and therefore, it will not produce a map of single point
mutations.
parent 7f465557
...@@ -44,64 +44,9 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt")) ...@@ -44,64 +44,9 @@ write.table(res[[3]][[3]],paste0(prot,"_pssm80.txt"))
# read evolutionary traces computed by JET # read evolutionary traces computed by JET
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE) jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
if(sum(colnames(jet)=="traceMax")==1){trace=jet[,"traceMax"]}else{trace=jet[,"trace"]} ############################################Added by MT to test effect of PC, CV, DFI, Bfactor etc.
#traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query
distTrace = binAli[2:N[1],] %*% trace^2
wt=read.fasta(paste(prot,".fasta",sep=""))
n = length(wt[[1]])
wt = wt[[1]][1:n]
#indicesQ = getIndicesAli(ali$seq,1)
if(simple==FALSE){
print("reading selected mutations...")
res=readMut(fname)
pos = res[[1]]
subsaa = res[[2]]
rawMut = res[[3]]
print("done")
}
print("running independent model...")
# compute sequence counts for each position and each aa
nbSeqs = computeNbSeqs(ali)
# compute gap counts for each position
nbGaps = N[1] - apply(nbSeqs,2,sum)
# output the conservation values
dat=rbind(trace,KL=res[[1]][[2]],SE=res[[1]][[1]],gap=nbGaps/N[1],KL60=res[[2]][[2]],SE60=res[[2]][[1]],KL80=res[[3]][[2]],SE80=res[[3]][[1]])
write.table(dat,paste0(prot,"_conservation.txt"))
# compute log-odd ratios between mutated and wt sequence counts
predInd = computePredNbSeqs(wt,nbSeqs)
rownames(predInd)=aa
# output the sequence counts log-odd ratios
write.table(predInd,paste0(prot,"_pred_evolInd.txt"))
print("done")
print("running global epistatic model...")
pred=computePredSimple(ali,distTrace,wt,5)
rownames(pred)=aa
# output the evolutionary distances between the query and the closest variants
evolDist = pred/sum(trace^2)
evolDist[is.na(evolDist)] = 1
write.table(evolDist,paste0(prot,"_pred_evolEpi.txt"))
print("done")
print("running normalization...")
#normWeightMode="trace+pc+cv"
#normWeightMode="trace+pc"
#normWeightMode="trace+cv"
#normWeightMode="trace"
#print(normWeightMode)
#In future, you may need to comment line 44 to use this functionality if you do
#weighting not just in normalization.
#This part of the code obtains max values of Trace, PC, or CV for weighting the
#normalized results.
trace = c() trace = c()
if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -110,7 +55,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -110,7 +55,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"]))
} }
} }
} else if ((normWeightMode=="max-trace-cv") | (normWeightMode=="max-cv-trace")){ } else if ((normWeightMode=="maxtracecv") | (normWeightMode=="maxcvtrace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -119,7 +64,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -119,7 +64,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"]))
} }
} }
} else if ((normWeightMode=="max-trace-dfi") | (normWeightMode=="max-dfi-trace")){ } else if ((normWeightMode=="maxtracedfi") | (normWeightMode=="maxdfitrace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -128,7 +73,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -128,7 +73,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"]))
} }
} }
} else if ((normWeightMode=="max-trace-bfactor") | (normWeightMode=="max-bfactor-trace")){ } else if ((normWeightMode=="maxtracebfactor") | (normWeightMode=="maxbfactortrace")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -137,7 +82,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -137,7 +82,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], jet[row, "bfactor"])) trace<-append(trace, max(jet[row, "trace"], jet[row, "bfactor"]))
} }
} }
}else if ((normWeightMode=="max-trace-pc-cv")|(normWeightMode=="max-trace-cv-pc")){ }else if ((normWeightMode=="maxtracepccv")|(normWeightMode=="maxtracecvpc")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -146,7 +91,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -146,7 +91,7 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], max(jet[row, "pc"], jet[row, "cv"]))) trace<-append(trace, max(jet[row, "trace"], max(jet[row, "pc"], jet[row, "cv"])))
} }
} }
} else if ((normWeightMode=="max-trace-half-pc+cv")|(normWeightMode=="max-trace-half-cv+pc")){ } else if ((normWeightMode=="maxtracehalfpccv")|(normWeightMode=="maxtracehalfcvpc")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){ if(sum(colnames(jet)=="traceMax")==1){
...@@ -155,12 +100,12 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -155,12 +100,12 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, max(jet[row, "trace"], (jet[row, "pc"]+jet[row, "cv"])/2.0 )) trace<-append(trace, max(jet[row, "trace"], (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
} }
} }
}else if ((normWeightMode=="half-cv+pc") | (normWeightMode=="half-pc+cv")){ }else if ((normWeightMode=="halfcvpc") | (normWeightMode=="halfpccv")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
trace<-append(trace, (jet[row, "pc"]+jet[row, "cv"])/2.0) trace<-append(trace, (jet[row, "pc"]+jet[row, "cv"])/2.0)
} }
} else if ((normWeightMode=="max-cv-pc") | (normWeightMode=="max-pc-cv")){ } else if ((normWeightMode=="maxcvpc") | (normWeightMode=="maxpccv")){
print(paste("Using ", normWeightMode)) print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
trace<-append(trace, max(jet[row, "pc"], jet[row, "cv"])) trace<-append(trace, max(jet[row, "pc"], jet[row, "cv"]))
...@@ -174,6 +119,13 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -174,6 +119,13 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
trace<-append(trace, jet[row, "trace"]) trace<-append(trace, jet[row, "trace"])
} }
} }
}else if (normWeightMode=="tracemovingaverage"){
print("Using only tracemovingaverage")
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "tracemovingaverage"])
}
print("Here is the tracemovingaverage!")
print(trace)
}else if (normWeightMode=="cv"){ }else if (normWeightMode=="cv"){
print("Using only CV traces") print("Using only CV traces")
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
...@@ -196,9 +148,176 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){ ...@@ -196,9 +148,176 @@ if((normWeightMode=="max-trace-pc") | (normWeightMode=="max-pc-trace")){
} }
}else{ }else{
print("ERROR: Unknown --normWeightMode selected!") print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv', 'dfi', 'bfactor', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv', 'half-cv+pc', 'max-trace-dfi' or 'max-trace-bfactor'!") print("It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor', 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'maxtracedfi' or 'maxtracebfactor'!")
} }
print(trace) print(trace)
#####################################################################################################################
#if(sum(colnames(jet)=="traceMax")==1){trace=jet[,"traceMax"]}else{trace=jet[,"trace"]}
#traceAli = sweep(binAli, MARGIN=2, trace, `*`)
# compute evolutionary distances of all sequences with respect to the query
distTrace = binAli[2:N[1],] %*% trace^2
wt=read.fasta(paste(prot,".fasta",sep=""))
n = length(wt[[1]])
wt = wt[[1]][1:n]
#indicesQ = getIndicesAli(ali$seq,1)
if(simple==FALSE){
print("reading selected mutations...")
res=readMut(fname)
pos = res[[1]]
subsaa = res[[2]]
rawMut = res[[3]]
print("done")
}
print("running independent model...")
# compute sequence counts for each position and each aa
nbSeqs = computeNbSeqs(ali)
# compute gap counts for each position
nbGaps = N[1] - apply(nbSeqs,2,sum)
# output the conservation values
dat=rbind(trace,KL=res[[1]][[2]],SE=res[[1]][[1]],gap=nbGaps/N[1],KL60=res[[2]][[2]],SE60=res[[2]][[1]],KL80=res[[3]][[2]],SE80=res[[3]][[1]])
write.table(dat,paste0(prot,"_conservation.txt"))
# compute log-odd ratios between mutated and wt sequence counts
predInd = computePredNbSeqs(wt,nbSeqs)
rownames(predInd)=aa
# output the sequence counts log-odd ratios
write.table(predInd,paste0(prot,"_pred_evolInd.txt"))
print("done")
print("running global epistatic model...")
pred=computePredSimple(ali,distTrace,wt,5)
rownames(pred)=aa
# output the evolutionary distances between the query and the closest variants
evolDist = pred/sum(trace^2)
evolDist[is.na(evolDist)] = 1
write.table(evolDist,paste0(prot,"_pred_evolEpi.txt"))
print("done")
print("running normalization...")
#normWeightMode="trace+pc+cv"
#normWeightMode="trace+pc"
#normWeightMode="trace+cv"
#normWeightMode="trace"
#print(normWeightMode)
#In future, you may need to comment line 44 to use this functionality if you do
#weighting not just in normalization.
#This part of the code obtains max values of Trace, PC, or CV for weighting the
#normalized results.
# trace = c()
# if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], jet[row, "pc"]))
# }else{
# trace<-append(trace, max(jet[row, "trace"], jet[row, "pc"]))
# }
# }
# } else if ((normWeightMode=="maxtracecv") | (normWeightMode=="maxcvtrace")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], jet[row, "cv"]))
# }else{
# trace<-append(trace, max(jet[row, "trace"], jet[row, "cv"]))
# }
# }
# } else if ((normWeightMode=="maxtracedfi") | (normWeightMode=="maxdfitrace")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], jet[row, "dfi"]))
# }else{
# trace<-append(trace, max(jet[row, "trace"], jet[row, "dfi"]))
# }
# }
# } else if ((normWeightMode=="maxtracebfactor") | (normWeightMode=="maxbfactortrace")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], jet[row, "bfactor"]))
# }else{
# trace<-append(trace, max(jet[row, "trace"], jet[row, "bfactor"]))
# }
# }
# }else if ((normWeightMode=="maxtracepccv")|(normWeightMode=="maxtracecvpc")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], max(jet[row, "pc"], jet[row, "cv"])))
# }else{
# trace<-append(trace, max(jet[row, "trace"], max(jet[row, "pc"], jet[row, "cv"])))
# }
# }
# } else if ((normWeightMode=="maxtracehalfpccv")|(normWeightMode=="maxtracehalfcvpc")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, max(jet[row, "traceMax"], (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
# }else{
# trace<-append(trace, max(jet[row, "trace"], (jet[row, "pc"]+jet[row, "cv"])/2.0 ))
# }
# }
# }else if ((normWeightMode=="halfcvpc") | (normWeightMode=="halfpccv")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# trace<-append(trace, (jet[row, "pc"]+jet[row, "cv"])/2.0)
# }
# } else if ((normWeightMode=="maxcvpc") | (normWeightMode=="maxpccv")){
# print(paste("Using ", normWeightMode))
# for (row in 1:nrow(jet)) {
# trace<-append(trace, max(jet[row, "pc"], jet[row, "cv"]))
# }
# }else if (normWeightMode=="trace"){
# print("Using only JET2 traces")
# for (row in 1:nrow(jet)) {
# if(sum(colnames(jet)=="traceMax")==1){
# trace<-append(trace, jet[row, "traceMax"])
# }else{
# trace<-append(trace, jet[row, "trace"])
# }
# }
# }else if (normWeightMode=="tracemovingaverage"){
# print("Using only tracemovingaverage")
# for (row in 1:nrow(jet)) {
# trace<-append(trace, jet[row, "tracemovingaverage"])
# }
# print("Here is the tracemovingaverage!")
# print(trace)
# }else if (normWeightMode=="cv"){
# print("Using only CV traces")
# for (row in 1:nrow(jet)) {
# trace<-append(trace, jet[row, "cv"])
# }
# }else if (normWeightMode=="pc"){
# print("Using only PC traces")
# for (row in 1:nrow(jet)) {
# trace<-append(trace, jet[row, "pc"])
# }
# }else if (normWeightMode=="dfi"){
# print("Using only DFI values")
# for (row in 1:nrow(jet)) {
# trace<-append(trace, jet[row, "dfi"])
# }
# }else if (normWeightMode=="bfactor"){
# print("Using only Bfactor values")
# for (row in 1:nrow(jet)) {
# trace<-append(trace, jet[row, "bfactor"])
# }
# }else{
# print("ERROR: Unknown --normWeightMode selected!")
# print("It can only be 'trace', 'tracemovingaverage', 'pc', 'cv', 'dfi', 'bfactor', 'maxtracepc', 'maxtracecv', 'maxtracepccv', 'halfcvpc', 'maxtracedfi' or 'maxtracebfactor'!")
# }
#print(trace)
if(simple){ if(simple){
#Independent model normalization #Independent model normalization
...@@ -262,7 +381,9 @@ if(simple){ ...@@ -262,7 +381,9 @@ if(simple){
normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet) normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet)
rownames(normPredCombi)=aa rownames(normPredCombi)=aa
}else{ }else{
normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet) #normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet)
normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet)
names(normPredCombi)=rawMut
names(normPredCombi)=rawMut names(normPredCombi)=rawMut
} }
# output the predicted mutational effects based on sequence counts (conservation at the bottom) # output the predicted mutational effects based on sequence counts (conservation at the bottom)
......
...@@ -118,7 +118,8 @@ def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ...@@ -118,7 +118,8 @@ def calcBfactors(pdbfile, outfile, nmodes=None, attenuate="true",
else: else:
return bfactors return bfactors
def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="true"): def getBfactors(pdbfile, outfile, attenuate="true", \
ranksorted="true", inverted=True):
""" """
Calculate rank normalized dynamic flexibility index. Calculate rank normalized dynamic flexibility index.
""" """
...@@ -141,10 +142,15 @@ def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="tru ...@@ -141,10 +142,15 @@ def getBfactors(pdbfile, outfile, nmodes=None, attenuate="true", ranksorted="tru
# np.savetxt(outfile, dfi) # np.savetxt(outfile, dfi)
if(ranksorted.lower()=='true'): if(ranksorted.lower()=='true'):
return percentBfactors if(inverted==True):
return (1.0 - percentBfactors)
else:
return percentBfactors
else: else:
return bfactors if(inverted==True):
return (np.max(bfactors) - bfactors)
else:
return bfactors
def attenuateEndPoints(dfi): def attenuateEndPoints(dfi):
""" """
...@@ -437,15 +443,15 @@ def parse_command_line(): ...@@ -437,15 +443,15 @@ def parse_command_line():
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--alphabet', dest='alphabet', type=str, \ retMet_args.add_argument('--alphabet', dest='alphabet', type=str, \
help="Which alphabet to use. Default is lz-bl.7", help="Which alphabet to use. Default is lw-i.7",
required=False, default="lz-bl.7") required=False, default="lw-i.7")
retMet_args.add_argument('-p', '--pdbfile', dest='pdbfile', type=str, \ retMet_args.add_argument('-p', '--pdbfile', dest='pdbfile', type=str, \
help="If a pdb file is provided, it will skip fake pdb file production step and use that file. Default is None", help="If a pdb file is provided, it will skip fake pdb file production step and use that file. Default is None",
required=False, default=None) required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \ retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'cv', 'pc', 'dfi', 'max-trace-pc', 'max-trace-cv', 'max-trace-pc-cv' or 'half-cv+pc'. Default is 'trace'.", help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracepc', 'maxtracecv', 'maxtracepccv' or 'halfcvpc'. Default is 'trace'.",
required=False, default="trace") required=False, default="trace")
args = parser.parse_args() args = parser.parse_args()
...@@ -467,7 +473,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -467,7 +473,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
doit is basically the main function in disguise! doit is basically the main function in disguise!
doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.jetfile) doit(args.input,args.mutations,args.retrievingMethod,args.blastFile,args.fastaFile, args.jetfile)
""" """
simple = True if(mutFile != ''):
simple = False
else:
simple = True
prot,seq,nl=extractQuerySeq(inAli) prot,seq,nl=extractQuerySeq(inAli)
...@@ -488,27 +497,28 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -488,27 +497,28 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'pc') and \ (normWeightMode != 'pc') and \
(normWeightMode != 'dfi') and \ (normWeightMode != 'dfi') and \
(normWeightMode != 'bfactor') and \ (normWeightMode != 'bfactor') and \
(normWeightMode != 'max-trace-pc') and \ (normWeightMode != 'maxtracepc') and \
(normWeightMode != 'max-pc-trace') and \ (normWeightMode != 'maxpctrace') and \
(normWeightMode != 'max-pc-cv') and \ (normWeightMode != 'maxpccv') and \
(normWeightMode != 'max-cv-pc') and \ (normWeightMode != 'maxcvpc') and \
(normWeightMode != 'max-trace-cv') and \ (normWeightMode != 'maxtracecv') and \
(normWeightMode != 'max-cv-trace') and \ (normWeightMode != 'maxcvtrace') and \
(normWeightMode != 'max-trace-dfi') and \ (normWeightMode != 'maxtracedfi') and \
(normWeightMode != 'max-trace-bfactor') and \ (normWeightMode != 'maxtracebfactor') and \
(normWeightMode != 'max-dfi-trace') and \ (normWeightMode != 'maxdfitrace') and \
(normWeightMode != 'max-bfactor-trace') and \ (normWeightMode != 'maxbfactortrace') and \
(normWeightMode != 'max-trace-pc-cv') and \ (normWeightMode != 'maxtracepccv') and \
(normWeightMode != 'max-trace-cv-pc') and \ (normWeightMode != 'maxtracecvpc') and \
(normWeightMode != 'half-pc+cv') and \ (normWeightMode != 'halfpccv') and \
(normWeightMode != 'half-cv+pc') and \ (normWeightMode != 'halfcvpc') and \
(normWeightMode != 'max-trace-half-cv+pc') and \ (normWeightMode != 'maxtracehalfcvpc') and \
(normWeightMode != 'max-trace-half-pc+cv')): (normWeightMode != 'tracemovingaverage') and \
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', 'dfi', bfactor,\n"+\ (normWeightMode != 'maxtracehalfpccv')):
" 'max-trace-pc', 'max-trace-dfi', 'max-pc-cv', "+\ print("ERROR: normWeightMode can only be 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', bfactor,\n"+\
" 'max-trace-bfactor'"+\ " 'maxtracepc', 'maxtracedfi', 'maxpccv', "+\
" 'max-trace-cv', 'max-trace-pc-cv'"+\ " 'maxtracebfactor'"+\
" 'half-cv+pc' or max-trace-half-cv+pc!") " 'maxtracecv', 'maxtracepccv'"+\
" 'halfcvpc' or maxtracehalfcvpc!")
sys.exit(-1) sys.exit(-1)
structure = parsePDB(prot+".pdb") structure = parsePDB(prot+".pdb")
...@@ -537,7 +547,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -537,7 +547,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("done") print("done")
#If a real pdb file is given, calculate dfi for the residues. #If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='dfi') or (normWeightMode=='max-trace-dfi') or (normWeightMode=='max-dfi-trace'))): if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.") print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1) sys.exit(-1)
...@@ -553,21 +563,49 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -553,21 +563,49 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print(df) print(df)
#If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='tracemovingaverage'))):
print("Calculating trace moving average per residue!")
# tma = tracemovingaverage
df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+")
print(df.columns)
# Get the window of series
# of observations of specified window size
window_size = 3
windows = df['trace'].rolling(window_size)
# Create a series of moving
# averages of each window
moving_averages = windows.mean()
tma = moving_averages
print(df)
df['tracemovingaverage'] = tma.round(4)
#The first two elements are just copied bc they don't exist due to the moving average.
df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4)
df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
#If a real pdb file is given, calculate or get Bfactors for the residues. #If a real pdb file is given, calculate or get Bfactors for the residues.
if(((normWeightMode=='bfactor') or (normWeightMode=='max-trace-bfactor') or (normWeightMode=='max-bfactor-trace'))): if(((normWeightMode=='bfactor') or (normWeightMode=='maxtracebfactor') or (normWeightMode=='maxbfactortrace'))):
isCalc = False isCalc = True
if (pdbfile == None): if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate or get Bfactors!") print("ERROR: There is not any pdb file to calculate or get Bfactors!")
sys.exit(-1) sys.exit(-1)
else: else:
if (isCalc): if (isCalc):
print("Computing Bfactors from the user-provided pdb file using ANM with all modes.") print("Computing Bfactors from the user-provided pdb file using ANM with all modes.")
bfactors = calcBfactors(pdbfile, outfile=None, nmodes=None, \ #Original
attenuate="true", ranksorted="true", inverted=False) # bfactors = calcBfactors(pdbfile, outfile=None, nmodes=None, \
# attenuate="true", ranksorted="true", inverted=False)
bfactors = calcBfactors(pdbfile, outfile=None, nmodes=10, \
attenuate="false", ranksorted="true", inverted=True)
else: else:
print("Getting Bfactors from the user-provided pdb file.") print("Getting Bfactors from the user-provided pdb file.")
bfactors = getBfactors(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true") bfactors = getBfactors(pdbfile, outfile=None,\
attenuate="true", ranksorted="true", inverted=False)
bfactors = bfactors bfactors = bfactors
df = pd.read_table(prot+"_jet.res") df = pd.read_table(prot+"_jet.res")
......
...@@ -358,12 +358,12 @@ computePredSimple<-function(mat, distTrace, wt, thresh){ ...@@ -358,12 +358,12 @@ computePredSimple<-function(mat, distTrace, wt, thresh){
if(length(sel)>0){ if(length(sel)>0){
sortedDist=sort(distTrace[sel-1]) sortedDist=sort(distTrace[sel-1])
if((i==694) & (a=="v")) { # if((i==694) & (a=="v")) {
print(i) # print(i)
print(sel) # print(sel)
print(distTrace[sel-1]) # print(distTrace[sel-1])
print(sortedDist) # print(sortedDist)
} # }
} }
if(length(sel)>0){sortedDist=sort(distTrace[sel-1])} if(length(sel)>0){sortedDist=sort(distTrace[sel-1])}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment