Commit 42dbf51a by Mustafa Tekpinar

Added escottfile and ranksorted args to prescott UI.

parent f98b9cb1
...@@ -329,6 +329,15 @@ def main(): ...@@ -329,6 +329,15 @@ def main():
help='Do not touch this if you don\'t know what you are doing! Default is true', help='Do not touch this if you don\'t know what you are doing! Default is true',
required=False, default='true') required=False, default='true')
main_parser.add_argument('--ranksorted', dest='ranksorted', type=str, \
help='If your data is already ranksorted, change this argument to true. Default is false',
required=False, default='false')
main_parser.add_argument('--escottformat', dest='escottformat', type=str, \
help='Main format of escott file. There are two possibilities: gemme or singleline. \n'+\
'gemme: a horizontal format of 20 rows and N columns.\n'+\
'singleline: each line contains a mutation and its value separated by a space.\n'+\
'M1A 0.378\n', required=False, default='gemme')
# main_parser.add_argument('--colormap', dest='colormap', type=str, \ # main_parser.add_argument('--colormap', dest='colormap', type=str, \
# help='A colormap as defined in matplotlib', # help='A colormap as defined in matplotlib',
# required=False, default='coolwarm_r') # required=False, default='coolwarm_r')
...@@ -366,6 +375,9 @@ def main(): ...@@ -366,6 +375,9 @@ def main():
usePopMaxOrNot = args.usepopmax.lower() usePopMaxOrNot = args.usepopmax.lower()
version = args.equation version = args.equation
if (os.path.exists(escottDataPath)): if (os.path.exists(escottDataPath)):
if(args.escottformat=='gemme'):
#Parse the file containing raw ESCOTT scores. #Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False) scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
...@@ -382,6 +394,14 @@ def main(): ...@@ -382,6 +394,14 @@ def main():
#Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header. #Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header.
df = pd.read_table(protein+'_singleline.txt', sep="\s+", header=None) df = pd.read_table(protein+'_singleline.txt', sep="\s+", header=None)
elif(args.escottformat=='singleline'):
df = pd.read_table(args.escottfile, sep="\s+", header=None)
else:
print('@> ERROR: Unknown escott format. It should be gemme or singleline!')
sys.exit(-1)
if(args.ranksorted == 'false'):
#data = np.genfromtxt(args.input,dtype=None) #data = np.genfromtxt(args.input,dtype=None)
data = df.to_numpy() data = df.to_numpy()
rawData = data.T[1] rawData = data.T[1]
...@@ -392,6 +412,8 @@ def main(): ...@@ -392,6 +412,8 @@ def main():
f.write("{:} {:6.2f}\n".format(data.T[0][i], processedData[i])) f.write("{:} {:6.2f}\n".format(data.T[0][i], processedData[i]))
dfESCOTT = pd.read_table(protein+'_singleline_1-ranksort.txt', sep='\s+', header=None) dfESCOTT = pd.read_table(protein+'_singleline_1-ranksort.txt', sep='\s+', header=None)
else:
dfESCOTT = df
dfESCOTT.columns = ['mutant', 'ESCOTT'] dfESCOTT.columns = ['mutant', 'ESCOTT']
dfESCOTT['protein']=protein dfESCOTT['protein']=protein
...@@ -416,10 +438,8 @@ def main(): ...@@ -416,10 +438,8 @@ def main():
(row['ClinVar Clinical Significance']=='Pathogenic') or \ (row['ClinVar Clinical Significance']=='Pathogenic') or \
(row['ClinVar Clinical Significance']=='Likely pathogenic')): (row['ClinVar Clinical Significance']=='Likely pathogenic')):
gnomadDF.at[index,'labels'] = 1 gnomadDF.at[index,'labels'] = 1
print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)]) print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
# print(gnomadDF['ClinVar Clinical Significance']) # print(gnomadDF['ClinVar Clinical Significance'])
# Add frequency column and a dummy frequency to each row in myBigMergedDF # Add frequency column and a dummy frequency to each row in myBigMergedDF
myBigMergedDF['frequency'] = 999.0 myBigMergedDF['frequency'] = 999.0
myBigMergedDF['labels'] = np.nan myBigMergedDF['labels'] = np.nan
......
...@@ -5,5 +5,4 @@ scipy ...@@ -5,5 +5,4 @@ scipy
pandas pandas
biopython<=1.79 biopython<=1.79
biotite biotite
sklearn scikit-learn
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment