Commit e7a7207f by Mustafa Tekpinar

Changed normWeightMode options.

Convert trace->tjet, maxhalftracepchalftracecvhalfcvpc->max
and ssjetormax->sstjetormax in ESGEMME.
parent eb8c519b
......@@ -46,7 +46,7 @@ res = list(computePSSM(ali,N[1],npos),computePSSM(aliCons,N[2],npos),computePSSM
jet=read.table(paste(prot,"_jet.res",sep=""),head=TRUE)
############################################Added by MT to test effect of PC, CV, DFI, Bfactor etc.
trace = c()
if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){
if ((normWeightMode=="max")){
print(paste("Using ", normWeightMode))
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
......@@ -55,8 +55,8 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){
trace<-append(trace, max((jet[row, "trace"]+jet[row, "pc"])/2.0, max((jet[row, "trace"]+jet[row, "cv"])/2.0, (jet[row, "pc"]+jet[row, "cv"])/2.0 )))
}
}
}else if (normWeightMode=="trace"){
print("Using only JET2 traces")
}else if (normWeightMode=="tjet"){
print("Using only T_JET2 traces")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="traceMax")==1){
trace<-append(trace, jet[row, "traceMax"])
......@@ -74,22 +74,21 @@ if ((normWeightMode=="maxhalftracepchalftracecvhalfcvpc")){
for (row in 1:nrow(jet)) {
trace<-append(trace, jet[row, "pc"])
}
}else if (normWeightMode=="ssjetormax"){
print("Using only ssjetormax")
}else if (normWeightMode=="sstjetormax"){
print("Using only sstjetormax")
for (row in 1:nrow(jet)) {
if(sum(colnames(jet)=="ssjetormax")==1){
trace<-append(trace, jet[row, "ssjetormax"])
if(sum(colnames(jet)=="sstjetormax")==1){
trace<-append(trace, jet[row, "sstjetormax"])
}else{
print("No field called ssjetormax in the JET output!")
print("No field called sstjetormax in the JET output!")
quit(status=-1)
}
}
}else{
print("ERROR: Unknown --normWeightMode selected!")
print("It can only be 'trace', 'pc', 'cv',")
print("'maxhalftracepchalftracecvhalfcvpc'")
print(" or 'ssjetormax'!")
print("It can only be 'tjet', 'pc', 'cv',")
print("'max' or 'sstjetormax'!")
}
print(trace)
#####################################################################################################################
......
......@@ -296,8 +296,8 @@ def cleanTheMess(prot,bFile,fFile, chainID):
if fFile!=prot+"_"+chainID+".fasta":
if os.path.isfile(prot+"_"+chainID+".fasta"):
os.remove(prot+"_"+chainID+".fasta")
if os.path.isfile(prot+"_jet.res"):
os.remove(prot+"_jet.res")
# if os.path.isfile(prot+"_jet.res"):
# os.remove(prot+"_jet.res")
if os.path.isfile(prot+".pdb"):
os.remove(prot+".pdb")
......@@ -820,10 +820,9 @@ def parse_command_line():
required=False, default=None)
retMet_args.add_argument('--normweightmode', dest='normweightmode', type=str, \
help="It can be one of these: 'trace', 'tracemovingaverage', 'cv', 'pc', 'dfi', 'maxtracesc',"+\
"'maxtracepc', 'maxtracecv', 'maxtracepccv', 'maxtracepcsc', 'maxtracepchalfpccv', "+\
"'halfcvpc', maxhalftracepchalftracecvhalfcvpc or jetormax. Default is 'trace'.",
required=False, default="trace")
help="It can be one of these: 'tjet', 'cv', 'pc',"+\
"max or sstjetormax. Default is 'tjet'.",
required=False, default="tjet")
args = parser.parse_args()
......@@ -863,13 +862,13 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("query protein: "+prot)
if( (normWeightMode != 'trace') and \
if( (normWeightMode != 'tjet') and \
(normWeightMode != 'cv') and \
(normWeightMode != 'pc') and \
(normWeightMode != 'maxhalftracepchalftracecvhalfcvpc') and \
(normWeightMode != 'ssjetormax')):
print("ERROR: normWeightMode can only be 'trace', 'cv', 'pc', "+\
"'maxhalftracepchalftracecvhalfcvpc' or 'ssjetormax'!")
(normWeightMode != 'max') and \
(normWeightMode != 'sstjetormax')):
print("ERROR: normWeightMode can only be 'tjet', 'cv', 'pc', "+\
"'max' or 'sstjetormax'!")
sys.exit(-1)
structure = parsePDB(prot+".pdb")
......@@ -899,31 +898,14 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
shutil.copy2(jetfile, prot+"_jet.res")
print("done")
# #If a real pdb file is given, calculate dfi for the residues.
# if(((normWeightMode=='dfi') or (normWeightMode=='maxtracedfi') or (normWeightMode=='maxdfitrace'))):
# if (pdbfile == None):
# print("ERROR: There is not any pdb file to calculate DFI.")
# sys.exit(-1)
# else:
# print("Calculating %DFI data per residue!")
# dfi = calcPercentDFI(pdbfile, outfile=None, nmodes=None, attenuate="true", ranksorted="true")
# dfi = dfi
# df = pd.read_table(prot+"_jet.res")
# print(df)
# df['dfi'] = dfi.round(4)
# df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# print(df)
if((normWeightMode=='ssjetormax')):
if((normWeightMode=='sstjetormax')):
if (pdbfile == None):
print("ERROR: There is not any pdb file to calculate DFI.")
print("ERROR: There is not any pdb file.")
sys.exit(-1)
else:
calculateSecondaryStructure(pdbfile)
countCoilSegments(pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/biotite-secondary-structure.py "+pdbfile+" "+pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/count-coil-segments-v2.py "+pdbfile+".dssp >"+pdbfile+".dssp.new")
df = pd.read_table(prot+"_jet.res", sep="\s+")
print(df['pos'])
print(pdbfile+".dssp")
......@@ -935,46 +917,46 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
mergedRes = pd.merge(df, df2, on ='pos', right_index=False)
print(mergedRes)
ssjetormaxList = []
sstjetormaxList = []
maxCoilLength = 5
print("WARNING: Max. coil length = {}".format(maxCoilLength))
for index, row in mergedRes.iterrows():
if(row['ss']=='C') and (row['length']>maxCoilLength):
ssjetormaxList.append(row['trace'])
sstjetormaxList.append(row['trace'])
else:
maxVal = max([((row['trace']+row['pc'])/2.0), ((row['trace']+row['cv'])/2.0), ((row['cv']+row['pc'])/2.0)])
ssjetormaxList.append(maxVal)
#ssjetormaxList=rankSortProteinData(ssjetormaxList, inverted=False)
df['ssjetormax'] = ssjetormaxList
df['ssjetormax'] = df['ssjetormax'].round(decimals = 4)
sstjetormaxList.append(maxVal)
#sstjetormaxList=rankSortProteinData(sstjetormaxList, inverted=False)
df['sstjetormax'] = sstjetormaxList
df['sstjetormax'] = df['sstjetormax'].round(decimals = 4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
#sys.exit(-1)
#If a real pdb file is given, calculate dfi for the residues.
if(((normWeightMode=='tracemovingaverage'))):
print("Calculating trace moving average per residue!")
# tma = tracemovingaverage
df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+")
print(df.columns)
# Get the window of series
# of observations of specified window size
window_size = 3
windows = df['trace'].rolling(window_size)
# #If a real pdb file is given, calculate dfi for the residues.
# if(((normWeightMode=='tracemovingaverage'))):
# print("Calculating trace moving average per residue!")
# # tma = tracemovingaverage
# df = pd.read_csv(prot+"_jet.res", delimiter=r"\s+")
# print(df.columns)
# # Get the window of series
# # of observations of specified window size
# window_size = 3
# windows = df['trace'].rolling(window_size)
# Create a series of moving
# averages of each window
moving_averages = windows.mean()
tma = moving_averages
# # Create a series of moving
# # averages of each window
# moving_averages = windows.mean()
# tma = moving_averages
print(df)
# print(df)
df['tracemovingaverage'] = tma.round(4)
#The first two elements are just copied bc they don't exist due to the moving average.
df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4)
df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4)
df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
# df['tracemovingaverage'] = tma.round(4)
# #The first two elements are just copied bc they don't exist due to the moving average.
# df['tracemovingaverage'].iloc[0] = df['trace'].iloc[0].round(4)
# df['tracemovingaverage'].iloc[1] = df['trace'].iloc[1].round(4)
# df.to_csv(prot+"_jet.res", header=True, index=None, sep='\t', mode='w')
print(df)
# print(df)
# #If a real pdb file is given, calculate or get Bfactors for the residues.
# if(((normWeightMode=='bfactor') or (normWeightMode=='maxtracebfactor') or (normWeightMode=='maxbfactortrace'))):
# isCalc = True
......
......@@ -27,7 +27,7 @@ then
echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Entire mutational map of the protein will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc
--pdbfile ../data/blat-af2.pdb --normweightmode max
elif [ "$1" == "withpdb-withmutfile" ]
then
......@@ -36,17 +36,17 @@ then
echo "Using blat-af2.pdb for the structural feature calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc \
--pdbfile ../data/blat-af2.pdb --normweightmode max \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut
elif [ "$1" == "ssjetormax" ]
elif [ "$1" == "sstjetormax" ]
then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!"
echo "Only effects of mutations specified in the Stiffler_2015_BLAT_ECOLX.mut file will be calculated!"
python $ESGEMME_PATH/esgemme.py ../data/aliBLAT.fasta -r input -f ../data/aliBLAT.fasta \
--pdbfile ../data/blat-af2.pdb --normweightmode ssjetormax \
--pdbfile ../data/blat-af2.pdb --normweightmode sstjetormax \
-m ../data/Stiffler_2015_BLAT_ECOLX.mut
else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment