Commit 33bc402b by Mustafa Tekpinar

Added (combined) computations that use SC1, SC2 and/or SC3.

parent ce67d931
...@@ -164,6 +164,46 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){ ...@@ -164,6 +164,46 @@ if((normWeightMode=="maxtracepc") | (normWeightMode=="maxpctrace")){
trace<-append(trace, jet[row, "trace"]) trace<-append(trace, jet[row, "trace"])
} }
} }
}else if (normWeightMode=="combinedv1"){
print("Using only combinedv1")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="combinedv1")==1){
trace<-append(trace, jet[row, "combinedv1"])
}else{
print("No field called combinedv1 in the JET output!")
quit(status=-1)
}
}
}else if (normWeightMode=="combinedv2"){
print("Using only combinedv2")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="combinedv2")==1){
trace<-append(trace, jet[row, "combinedv2"])
}else{
print("No field called combinedv2 in the JET output!")
quit(status=-1)
}
}
}else if (normWeightMode=="combinedv3"){
print("Using only combinedv3")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="combinedv3")==1){
trace<-append(trace, jet[row, "combinedv3"])
}else{
print("No field called combinedv3 in the JET output!")
quit(status=-1)
}
}
}else if (normWeightMode=="combinedv4"){
print("Using only combinedv4")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="combinedv4")==1){
trace<-append(trace, jet[row, "combinedv4"])
}else{
print("No field called combinedv4 in the JET output!")
quit(status=-1)
}
}
}else if (normWeightMode=="tracemovingaverage"){ }else if (normWeightMode=="tracemovingaverage"){
print("Using only tracemovingaverage") print("Using only tracemovingaverage")
for (row in 1:nrow(jet)) { for (row in 1:nrow(jet)) {
...@@ -374,7 +414,11 @@ if(simple){ ...@@ -374,7 +414,11 @@ if(simple){
# Epistatic model normalization # Epistatic model normalization
normPred=normalizePred(pred, trace, wt) normPred=normalizePred(pred, trace, wt)
rownames(normPred)=aa rownames(normPred)=aa
# output the normalized predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"))
# output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom)
write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"))
}else{ }else{
#Independent model normalization #Independent model normalization
normPredInd = normalizePredWithNbSeqsZeroSelMult(predInd, trace, wt, list(pos,subsaa)) normPredInd = normalizePredWithNbSeqsZeroSelMult(predInd, trace, wt, list(pos,subsaa))
...@@ -383,15 +427,16 @@ if(simple){ ...@@ -383,15 +427,16 @@ if(simple){
# Epistatic model normalization # Epistatic model normalization
normPred=normalizePredSelMult(pred, trace, wt, list(pos,subsaa)) normPred=normalizePredSelMult(pred, trace, wt, list(pos,subsaa))
names(normPred)=rawMut names(normPred)=rawMut
}
# output the normalized predicted mutational effects based on sequence counts (conservation at the bottom) # output the normalized predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt")) write.table(normPredInd,paste0(prot,"_normPred_evolInd.txt"), quote = FALSE, col.names=FALSE)
# output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom) # output the predicted normalized mutational effects based on evolutionary distance (conservation at the bottom)
write.table(normPred,paste0(prot,"_normPred_evolEpi.txt")) write.table(normPred,paste0(prot,"_normPred_evolEpi.txt"), quote = FALSE, col.names=FALSE)
}
print("done") print("done")
print("running combined model...") print("running combined model...")
#Read frequencies from the jet output file. #Read frequencies from the jet output file.
frequencies = c() frequencies = c()
...@@ -427,14 +472,18 @@ alphabet = selectedAlphabet ...@@ -427,14 +472,18 @@ alphabet = selectedAlphabet
if(simple){ if(simple){
normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet) normPredCombi = normalizePredWithNbSeqsPC(pred,trace,wt,alpha,nbSeqs,alphabet)
rownames(normPredCombi)=aa rownames(normPredCombi)=aa
# output the predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"))
}else{ }else{
#normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet) #normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs[[1]],alphabet)
normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet) normPredCombi = normalizePredWithNbSeqsPCSelMult(pred,trace,wt,list(pos,subsaa),alpha,nbSeqs,alphabet)
# names(normPredCombi)=rawMut # names(normPredCombi)=rawMut
names(normPredCombi)=rawMut names(normPredCombi)=rawMut
# output the predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"), quote = FALSE, col.names=FALSE)
} }
# output the predicted mutational effects based on sequence counts (conservation at the bottom)
write.table(normPredCombi,paste0(prot,"_normPred_evolCombi.txt"))
print("done") print("done")
# Since I am doing the plots with Python, I am commenting this part # Since I am doing the plots with Python, I am commenting this part
......
...@@ -515,6 +515,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -515,6 +515,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
(normWeightMode != 'maxtracecvpc') and \ (normWeightMode != 'maxtracecvpc') and \
(normWeightMode != 'halfpccv') and \ (normWeightMode != 'halfpccv') and \
(normWeightMode != 'halfcvpc') and \ (normWeightMode != 'halfcvpc') and \
(normWeightMode != 'combinedv1') and \
(normWeightMode != 'combinedv2') and \
(normWeightMode != 'combinedv3') and \
(normWeightMode != 'combinedv4') and \
(normWeightMode != 'maxtracehalfcvpc') and \ (normWeightMode != 'maxtracehalfcvpc') and \
(normWeightMode != 'maxtracehalftracecvhalfcvpc') and \ (normWeightMode != 'maxtracehalftracecvhalfcvpc') and \
(normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \ (normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \
...@@ -524,6 +528,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -524,6 +528,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
" 'maxtracepc', 'maxtracedfi', 'maxpccv', 'maxtracesc',"+\ " 'maxtracepc', 'maxtracedfi', 'maxpccv', 'maxtracesc',"+\
" 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "+\ " 'maxtracebfactor', 'maxtracepcsc', 'maxhalftracepchalftracecvhalfcvpc', "+\
" 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"+\ " 'maxtracecv', 'maxtracepccv', maxtracepchalfpccv"+\
" 'combinedv1', 'combinedv2', 'combinedv3', 'combinedv4'"+\
" 'halfcvpc', 'maxtracehalfcvpc' or maxtracehalftracecvhalfcvpc!") " 'halfcvpc', 'maxtracehalfcvpc' or maxtracehalftracecvhalfcvpc!")
sys.exit(-1) sys.exit(-1)
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2018: Elodie Laine # Copyright (c) 2018-2022: Elodie Laine - Mustafa Tekpinar
# This code is part of the gemme package and governed by its license. # This code is part of the gemme package and governed by its license.
# Please see the LICENSE.txt file included as part of this package. # Please see the LICENSE.txt file included as part of this package.
...@@ -12,6 +12,8 @@ import math ...@@ -12,6 +12,8 @@ import math
import subprocess import subprocess
import shutil import shutil
import glob import glob
import pandas as pd
import numpy as np
...@@ -76,6 +78,12 @@ def editConfJET(N): ...@@ -76,6 +78,12 @@ def editConfJET(N):
reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True) reCode=subprocess.call("sed -i 's/results\t\t5000/results\t\t"+str(N)+"/' default.conf",shell=True)
return(reCode) return(reCode)
def minMaxNormalization(data):
"""
Min-max normalization of a data array.
"""
return (data - np.min(data)) / (np.max(data) - np.min(data))
# Run JET to compute TJET values # Run JET to compute TJET values
def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
""" """
...@@ -163,12 +171,179 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -163,12 +171,179 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
if(pdbfile == None): if(pdbfile == None):
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p J -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out" prot+".pdb -o `pwd` -p J -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out"
print("\nRunning command:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
else: else:
# jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ # Calculate SC1
# prot+".pdb -o `pwd` -p AVJ -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" > "+prot+".out" jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 3"+" > "+prot+".out"
#One can also add: -g 'trace,pc,cv,clusters,axs'
print("\nRunning for SC1:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc1.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
# Calculate SC2
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 4"+" > "+prot+".out"
#One can also add: -g 'trace,pc,cv,clusters,axs'
print("\nRunning for SC2:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc2.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
# Calculate SC3
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 5"+" > "+prot+".out" prot+".pdb -o `pwd` -p AVJCG -r input -f "+prot+"_"+chainID+".fasta -d chain -n "+n+" -a 5"+" > "+prot+".out"
#One can also add: -g 'trace,pc,cv,clusters,axs' #One can also add: -g 'trace,pc,cv,clusters,axs'
print("\nRunning for SC3:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet_sc3.res")
dir_name = prot+"/"
if os.path.isdir(dir_name):
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isfile(f_path):
os.remove(f_path)
os.rmdir(dir_name)
dfSC1 = pd.read_table(prot+"_jet_sc1.res", delimiter=r"\s+")
print(dfSC1)
dfSC2 = pd.read_table(prot+"_jet_sc2.res", delimiter=r"\s+")
print(dfSC2)
dfSC3 = pd.read_table(prot+"_jet_sc3.res", delimiter=r"\s+")
print(dfSC3)
dfSC1['sc1'] =dfSC1['clusters']
dfSC1['sc2'] =dfSC2['clusters']
dfSC1['sc3'] =dfSC3['clusters']
print(dfSC1)
#Calculate combined values of SC1, SC2 and SC3 and normalize them.
combinedValuesV1 = []
combinedValuesV2 = []
value1List = []
value2List = []
value3List = []
value4List = []
value5List = []
value6List = []
temp1List = []
temp2List = []
temp3List = []
for i in range(len(dfSC1)):
value1=(dfSC1.loc[i, "sc1"]*dfSC1.loc[i, "trace"]) + dfSC1.loc[i, "trace"]
value1List.append(value1)
value2=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)
value2List.append(value2)
value3=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2 + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)
value3List.append(value3)
value4=(dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2 + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value4List.append(value4)
value5=(dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value5List.append(value5)
value6=(dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)
value6List.append(value6)
temp1 = (dfSC1.loc[i, "sc1"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "pc"])/2)**2
temp1List.append(temp1)
temp2 = (dfSC1.loc[i, "sc2"]*(dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "trace"]+dfSC1.loc[i, "cv"])/2)**2
temp2List.append(temp2)
temp3 = (dfSC1.loc[i, "sc3"]*(dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2) + ((dfSC1.loc[i, "pc"]+dfSC1.loc[i, "cv"])/2)**2
temp3List.append(temp3)
combinedValuesV1.append(max([value1, value2, value3]))
combinedValuesV2.append(max([value2, value3, value4]))
#MinMax normalize the list
combinedValuesV1MinMaxed = minMaxNormalization(combinedValuesV1)
combinedValuesV2MinMaxed = minMaxNormalization(combinedValuesV2)
# value1ListMinMaxed = minMaxNormalization(value1List)
# value2ListMinMaxed = minMaxNormalization(value2List)
# value3ListMinMaxed = minMaxNormalization(value3List)
# value4ListMinMaxed = minMaxNormalization(value4List)
# combinedValuesV3MinMaxed = []
# for i in range(len(dfSC1)):
# combinedValuesV3MinMaxed.append(max([value2ListMinMaxed[i], value3ListMinMaxed[i], value4ListMinMaxed[i]]))
#Let's skip that minmax normalization part
combinedValuesV3MinMaxed = []
for i in range(len(dfSC1)):
maxTemp = max([value2List[i], value3List[i], value4List[i]])
if(maxTemp > 1.0):
combinedValuesV3MinMaxed.append(1.0)
else:
combinedValuesV3MinMaxed.append(maxTemp)
combinedValuesV4MinMaxed = []
for i in range(len(dfSC1)):
maxTemp = max([temp1List[i], temp2List[i], temp3List[i]])
if(maxTemp > 1.0):
combinedValuesV4MinMaxed.append(1.0)
else:
combinedValuesV4MinMaxed.append(maxTemp)
dfSC1['combinedv1'] = combinedValuesV1MinMaxed.round(4)
dfSC1['combinedv2'] = combinedValuesV2MinMaxed.round(4)
dfSC1['combinedv3'] = list(np.around(np.array(combinedValuesV3MinMaxed), 4))
dfSC1['combinedv4'] = list(np.around(np.array(combinedValuesV4MinMaxed), 4))
#dfSC1['combinedv3'] = combinedValuesV3MinMaxed.round(4)
# dfSC1['cv'] = dfSC1['cv'].round(4)
# dfSC1['pc'] = dfSC1['pc'].round(4)
# dfSC1['tr'] = dfSC1['tr'].round(4)
# dfSC1['freq'] = dfSC1['freq'].round(1)
# dfSC1['trace'] = dfSC1['trace'].round(4)
# dfSC1['clusters'] = dfSC1['clusters'].round(4)
# dfSC1['clusnumber'] = dfSC1['clusnumber'].round(1)
# dfSC1['sc1'] = dfSC1['sc1'].round(4)
# dfSC1['sc2'] = dfSC1['sc2'].round(4)
# dfSC1['sc3'] = dfSC1['sc3'].round(4)
dfSC1.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
#dfSC1.to_csv(prot+"_jet.res", header=True, index=None, mode='w')
#sys.exit(-1)
else: else:
if(pdbfile == None): if(pdbfile == None):
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
...@@ -178,10 +353,10 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl): ...@@ -178,10 +353,10 @@ def launchJET(prot, retMet, bFile, fFile, pdbfile, chains, n, N, nl):
# prot+".pdb -o `pwd` -p AVJ -r "+retMet+" -d chain -n "+n+" > "+prot+".out" # prot+".pdb -o `pwd` -p AVJ -r "+retMet+" -d chain -n "+n+" > "+prot+".out"
jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\ jetcmd = "java -Xmx4096m -cp $JET2_PATH:$JET2_PATH/jet/extLibs/vecmath.jar jet.JET -c default.conf -i "+\
prot+".pdb -o `pwd` -p AVJCG -r "+retMet+" -d chain -n "+n+" -a 5"+" > "+prot+".out" prot+".pdb -o `pwd` -p AVJCG -r "+retMet+" -d chain -n "+n+" -a 5"+" > "+prot+".out"
print("\nRunning:\n"+jetcmd) # print("\nRunning:\n"+jetcmd)
reCode=subprocess.call(jetcmd,shell=True) # reCode=subprocess.call(jetcmd,shell=True)
if os.path.isfile(prot+"/"+prot+"_jet.res"): # if os.path.isfile(prot+"/"+prot+"_jet.res"):
os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res") # os.rename(prot+"/"+prot+"_jet.res",prot+"_jet.res")
return(reCode) return(reCode)
# Run Rscript to compute predictions # Run Rscript to compute predictions
......
...@@ -194,7 +194,8 @@ readMut<-function(fname){ ...@@ -194,7 +194,8 @@ readMut<-function(fname){
pos = list() pos = list()
subsaa = list() subsaa = list()
for(i in 1:n){ for(i in 1:n){
mut = strsplit(rawMut[i],",")[[1]] #Changed this line to handle multiple mutations separated by column(:) as well as comma(,) character.
mut = strsplit(rawMut[i],"\\,|\\:")[[1]]
tmpPos = c() tmpPos = c()
tmpSubsaa = c() tmpSubsaa = c()
for(m in mut){ for(m in mut){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment