Commit 83fcd07e by Mustafa Tekpinar

Residue ID correction for already ranksorted scores.

When your MSA file contains a subset of the real protein
(such as your MSA file contains residues 520-5207 instead
of 1-5207), it becomes problematic to write csv output with
correct residue IDs. I corrected this problem even though
it is not a common problem.
parent 09e6079f
......@@ -650,18 +650,20 @@ def main():
usePopMaxOrNot = args.usepopmax.lower()
version = args.equation
if (os.path.exists(args.escottfile)):
#Convert the matrix format to singleline format
localResidueList = None
if(args.sequencefile != None):
referenceSeq = SeqIO.read(args.sequencefile, 'fasta')
localResidueList = list(referenceSeq.seq)
aaOrderList = list('ACDEFGHIKLMNPQRSTVWY')
if(args.escottformat=='gemme'):
#Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
#Convert the matrix format to singleline format
localResidueList = None
if(args.sequencefile != None):
referenceSeq = SeqIO.read(args.sequencefile, 'fasta')
localResidueList = list(referenceSeq.seq)
aaOrderList = list('ACDEFGHIKLMNPQRSTVWY')
writeSinglelineFormat(scanningMatrix, protein+'_singleline.txt', residueList = localResidueList,\
beg=0, end=None, aaOrder = aaOrderList, \
offSet=0)
......@@ -887,10 +889,12 @@ def main():
# is actually log10 frequencies. Normally, one can deduce it from the values as well
# but it is always better to be clear.
myBigMergedDF = myBigMergedDF.rename(columns={'frequency': 'log10frequency'})
myBigMergedDF['mutant'] = myBigMergedDF['mutant'].str.upper()
# myBigMergedDF = myBigMergedDF['mutant'].apply(lambda x: x.upper())
myBigMergedDF.to_csv(outfile+'-details.csv', index=None)
print(localResidueList)
with open(outfile+'.csv', 'w') as my_file:
my_file.write(",")
......@@ -900,24 +904,27 @@ def main():
else:
my_file.write(item+",")
posList = myBigMergedDF['position'].unique().tolist()
# print(posList)
for pos in range(len(localResidueList)):
resAndPos = str(localResidueList[pos])+str(pos+1)
resAndPos = str(localResidueList[pos])+str(posList[pos])
my_file.write("{},".format(resAndPos))
for item in alphabeticalAminoAcidsList:
variant = str(localResidueList[pos]).upper()+str(pos+1)+item
variant = str(localResidueList[pos]).upper()+str(posList[pos])+item
# print(variant)
if(item=='Y'):
#print(variant)
#print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])
my_file.write("{:.2f}\n".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])))
else:
#print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])
# print(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values)
my_file.write("{:.2f},".format(float(myBigMergedDF.loc[myBigMergedDF['mutant']==variant, 'PRESCOTT'].values[0])))
if(os.path.exists(protein+'_singleline.txt')):
os.remove(protein+'_singleline.txt')
if(os.path.exists(protein+'_singleline_1-ranksort.txt')):
os.remove(protein+'_singleline_1-ranksort.txt')
# if(os.path.exists(protein+'_singleline.txt')):
# os.remove(protein+'_singleline.txt')
# if(os.path.exists(protein+'_singleline_1-ranksort.txt')):
# os.remove(protein+'_singleline_1-ranksort.txt')
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment