Commit 879fb656 by Mustafa Tekpinar

Cleaned/clarified prescott script output.

parent 1329302c
......@@ -449,7 +449,7 @@ def main():
main_parser.add_argument('-o', '--outputfile', dest='outputfile', type=str, \
help='Name of the output file without file extension. Default extension is txt.', \
required=False, default='prescott-scores.txt')
required=False, default='prescott-scores')
main_parser.add_argument('--usepopmax', dest='usepopmax', type=str, \
help='A true or false value to use population max. frequency of one of the eight populations available in GnomAD (Default is true).',
......@@ -508,14 +508,13 @@ def main():
print("@> Name of the output file : {}".format(args.outputfile))
# End of argument parsing!
escottDataPath = args.escottfile
protein = os.path.splitext(os.path.basename(escottDataPath))[0]
# esmVariantsPath="/mnt/data/tekpinar/software/esm-variants/entire-dataset/"
# print(escottDataPath)
protein = os.path.splitext(args.escottfile)[0]
outfile = os.path.splitext(args.outputfile)[0]
# Check if file exists
usePopMaxOrNot = args.usepopmax.lower()
version = args.equation
if (os.path.exists(escottDataPath)):
if (os.path.exists(args.escottfile)):
if(args.escottformat=='gemme'):
......@@ -579,6 +578,7 @@ def main():
(row['ClinVar Clinical Significance']=='Pathogenic') or \
(row['ClinVar Clinical Significance']=='Likely pathogenic')):
gnomadDF.at[index,'labels'] = 1
if (len(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)]) > 0):
print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
# print(gnomadDF['ClinVar Clinical Significance'])
# Add frequency column and a dummy frequency to each row in myBigMergedDF
......@@ -688,6 +688,8 @@ def main():
clinvarLabeledDF = myBigMergedDF.loc[(myBigMergedDF['labels']==0) | (myBigMergedDF['labels']==1)]
clinvarLabeledDF['labels'] = clinvarLabeledDF['labels'].astype('int64')
if(len(clinvarLabeledDF)>0):
print("\nMutations with ClinVar labels according to the gnomAD file:\n")
print(clinvarLabeledDF)
#print(myBigMergedDF.loc[(myBigMergedDF['labels']=='0') | (myBigMergedDF['labels']=='1'), 'labels'])
# print(clinvarLabeledDF['labels'].values)
......@@ -733,8 +735,8 @@ def main():
plt.close()
print("@> AUC= {:.3f} {:.3f}".format( AUC_ESCOTT, AUC_PRESCOTT))
myBigMergedDF.to_csv('myBigMergedDF-normalized-asm.csv', index=None)
myBigMergedDF.to_csv(args.outputfile, columns=['mutant', 'PRESCOTT'], index=False, header=None, sep=' ')
myBigMergedDF.to_csv(outfile+'-details.csv', index=None)
myBigMergedDF.to_csv(outfile+'.txt', columns=['mutant', 'PRESCOTT'], index=False, header=None, sep=' ')
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment