Commit 42dbf51a by Mustafa Tekpinar

Added escottfile and ranksorted args to prescott UI.

parent f98b9cb1
......@@ -329,6 +329,15 @@ def main():
help='Do not touch this if you don\'t know what you are doing! Default is true',
required=False, default='true')
main_parser.add_argument('--ranksorted', dest='ranksorted', type=str, \
help='If your data is already ranksorted, change this argument to true. Default is false',
required=False, default='false')
main_parser.add_argument('--escottformat', dest='escottformat', type=str, \
help='Main format of escott file. There are two possibilities: gemme or singleline. \n'+\
'gemme: a horizontal format of 20 rows and N columns.\n'+\
'singleline: each line contains a mutation and its value separated by a space.\n'+\
'M1A 0.378\n', required=False, default='gemme')
# main_parser.add_argument('--colormap', dest='colormap', type=str, \
# help='A colormap as defined in matplotlib',
# required=False, default='coolwarm_r')
......@@ -366,6 +375,9 @@ def main():
usePopMaxOrNot = args.usepopmax.lower()
version = args.equation
if (os.path.exists(escottDataPath)):
if(args.escottformat=='gemme'):
#Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
......@@ -382,6 +394,14 @@ def main():
#Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header.
df = pd.read_table(protein+'_singleline.txt', sep="\s+", header=None)
elif(args.escottformat=='singleline'):
df = pd.read_table(args.escottfile, sep="\s+", header=None)
else:
print('@> ERROR: Unknown escott format. It should be gemme or singleline!')
sys.exit(-1)
if(args.ranksorted == 'false'):
#data = np.genfromtxt(args.input,dtype=None)
data = df.to_numpy()
rawData = data.T[1]
......@@ -392,6 +412,8 @@ def main():
f.write("{:} {:6.2f}\n".format(data.T[0][i], processedData[i]))
dfESCOTT = pd.read_table(protein+'_singleline_1-ranksort.txt', sep='\s+', header=None)
else:
dfESCOTT = df
dfESCOTT.columns = ['mutant', 'ESCOTT']
dfESCOTT['protein']=protein
......@@ -416,10 +438,8 @@ def main():
(row['ClinVar Clinical Significance']=='Pathogenic') or \
(row['ClinVar Clinical Significance']=='Likely pathogenic')):
gnomadDF.at[index,'labels'] = 1
print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
# print(gnomadDF['ClinVar Clinical Significance'])
# Add frequency column and a dummy frequency to each row in myBigMergedDF
myBigMergedDF['frequency'] = 999.0
myBigMergedDF['labels'] = np.nan
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment