Cleaned/clarified prescott script output.

879fb656 · Mustafa Tekpinar · 1329302c · 879fb656
Commit 879fb656 authored Oct 25, 2023 by Mustafa Tekpinar
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 11 deletions

prescott.py prescott/prescott.py +13 -11

No files found.
--- a/prescott/prescott.py
+++ b/prescott/prescott.py
@@ -449,7 +449,7 @@ def main():
    main_parser.add_argument('-o', '--outputfile', dest='outputfile', type=str, \
        help='Name of the output file without file extension. Default extension is txt.', \
-        required=False, default='prescott-scores.txt')
+        required=False, default='prescott-scores')
    main_parser.add_argument('--usepopmax', dest='usepopmax', type=str, \
        help='A true or false value to use population max. frequency of one of the eight populations available in GnomAD (Default is true).',
@@ -508,14 +508,13 @@ def main():
    print("@> Name of the output file          : {}".format(args.outputfile))
    # End of argument parsing!
-    escottDataPath = args.escottfile
+    protein = os.path.splitext(args.escottfile)[0]
-    protein = os.path.splitext(os.path.basename(escottDataPath))[0]
+    outfile = os.path.splitext(args.outputfile)[0]
-    # esmVariantsPath="/mnt/data/tekpinar/software/esm-variants/entire-dataset/"
-    # print(escottDataPath)
    # Check if file exists
    usePopMaxOrNot = args.usepopmax.lower()
    version = args.equation
-    if (os.path.exists(escottDataPath)):
+    if (os.path.exists(args.escottfile)):
        if(args.escottformat=='gemme'):
@@ -578,8 +577,9 @@ def main():
        if((row['ClinVar Clinical Significance']=='Pathogenic/Likely pathogenic') or \
            (row['ClinVar Clinical Significance']=='Pathogenic') or \
            (row['ClinVar Clinical Significance']=='Likely pathogenic')):
-            gnomadDF.at[index,'labels'] = 1   
+            gnomadDF.at[index,'labels'] = 1
-    print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
+    if (len(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)]) > 0):   
+        print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
    # print(gnomadDF['ClinVar Clinical Significance'])
    # Add frequency column and a dummy frequency to each row in myBigMergedDF
    myBigMergedDF['frequency'] = 999.0
@@ -688,7 +688,9 @@ def main():
    clinvarLabeledDF = myBigMergedDF.loc[(myBigMergedDF['labels']==0) | (myBigMergedDF['labels']==1)]
    clinvarLabeledDF['labels'] = clinvarLabeledDF['labels'].astype('int64')
-    print(clinvarLabeledDF)
+    if(len(clinvarLabeledDF)>0):
+        print("\nMutations with ClinVar labels according to the gnomAD file:\n")
+        print(clinvarLabeledDF)
    #print(myBigMergedDF.loc[(myBigMergedDF['labels']=='0') | (myBigMergedDF['labels']=='1'), 'labels'])
    # print(clinvarLabeledDF['labels'].values)
    # print(clinvarLabeledDF['ESCOTT'].values)
@@ -733,8 +735,8 @@ def main():
        plt.close()
        print("@> AUC= {:.3f} {:.3f}".format( AUC_ESCOTT, AUC_PRESCOTT))
-    myBigMergedDF.to_csv('myBigMergedDF-normalized-asm.csv', index=None)
+    myBigMergedDF.to_csv(outfile+'-details.csv', index=None)
-    myBigMergedDF.to_csv(args.outputfile, columns=['mutant', 'PRESCOTT'], index=False, header=None, sep=' ')
+    myBigMergedDF.to_csv(outfile+'.txt', columns=['mutant', 'PRESCOTT'], index=False, header=None, sep=' ')
 if __name__ == "__main__":