Commit 83fcd07e by Mustafa Tekpinar

Residue ID correction for already ranksorted scores.

When your MSA file contains a subset of the real protein
(such as your MSA file contains residues 520-5207 instead
of 1-5207), it becomes problematic to write csv output with
correct residue IDs. I corrected this problem even though
it is not a common problem.
parent 09e6079f
...@@ -650,18 +650,20 @@ def main(): ...@@ -650,18 +650,20 @@ def main():
usePopMaxOrNot = args.usepopmax.lower() usePopMaxOrNot = args.usepopmax.lower()
version = args.equation version = args.equation
if (os.path.exists(args.escottfile)): if (os.path.exists(args.escottfile)):
if(args.escottformat=='gemme'):
#Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
#Convert the matrix format to singleline format #Convert the matrix format to singleline format
localResidueList = None localResidueList = None
if(args.sequencefile != None): if(args.sequencefile != None):
referenceSeq = SeqIO.read(args.sequencefile, 'fasta') referenceSeq = SeqIO.read(args.sequencefile, 'fasta')
localResidueList = list(referenceSeq.seq) localResidueList = list(referenceSeq.seq)
aaOrderList = list('ACDEFGHIKLMNPQRSTVWY') aaOrderList = list('ACDEFGHIKLMNPQRSTVWY')
if(args.escottformat=='gemme'):
#Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
writeSinglelineFormat(scanningMatrix, protein+'_singleline.txt', residueList = localResidueList,\ writeSinglelineFormat(scanningMatrix, protein+'_singleline.txt', residueList = localResidueList,\
beg=0, end=None, aaOrder = aaOrderList, \ beg=0, end=None, aaOrder = aaOrderList, \
offSet=0) offSet=0)
...@@ -887,9 +889,11 @@ def main(): ...@@ -887,9 +889,11 @@ def main():
# is actually log10 frequencies. Normally, one can deduce it from the values as well # is actually log10 frequencies. Normally, one can deduce it from the values as well
# but it is always better to be clear. # but it is always better to be clear.
myBigMergedDF = myBigMergedDF.rename(columns={'frequency': 'log10frequency'}) myBigMergedDF = myBigMergedDF.rename(columns={'frequency': 'log10frequency'})
myBigMergedDF['mutant'] = myBigMergedDF['mutant'].str.upper()
# myBigMergedDF = myBigMergedDF['mutant'].apply(lambda x: x.upper())
myBigMergedDF.to_csv(outfile+'-details.csv', index=None) myBigMergedDF.to_csv(outfile+'-details.csv', index=None)
print(localResidueList)
with open(outfile+'.csv', 'w') as my_file: with open(outfile+'.csv', 'w') as my_file:
my_file.write(",") my_file.write(",")
...@@ -900,24 +904,27 @@ def main(): ...@@ -900,24 +904,27 @@ def main():
else: else:
my_file.write(item+",") my_file.write(item+",")
posList = myBigMergedDF['position'].unique().tolist()
# print(posList)
for pos in range(len(localResidueList)): for pos in range(len(localResidueList)):
resAndPos = str(localResidueList[pos])+str(pos+1) resAndPos = str(localResidueList[pos])+str(posList[pos])
my_file.write("{},".format(resAndPos)) my_file.write("{},".format(resAndPos))
for item in alphabeticalAminoAcidsList: for item in alphabeticalAminoAcidsList:
variant = str(localResidueList[pos]).upper()+str(pos+1)+item variant = str(localResidueList[pos]).upper()+str(posList[pos])+item
# print(variant)
if(item=='Y'): if(item=='Y'):
#print(variant)
#print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0]) #print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])
my_file.write("{:.2f}\n".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0]))) my_file.write("{:.2f}\n".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])))
else: else:
#print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0]) # print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values)
my_file.write("{:.2f},".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0]))) my_file.write("{:.2f},".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])))
if(os.path.exists(protein+'_singleline.txt')): # if(os.path.exists(protein+'_singleline.txt')):
os.remove(protein+'_singleline.txt') # os.remove(protein+'_singleline.txt')
if(os.path.exists(protein+'_singleline_1-ranksort.txt')): # if(os.path.exists(protein+'_singleline_1-ranksort.txt')):
os.remove(protein+'_singleline_1-ranksort.txt') # os.remove(protein+'_singleline_1-ranksort.txt')
if __name__ == "__main__": if __name__ == "__main__":
main() main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment