Commit 42dbf51a by Mustafa Tekpinar

Added escottfile and ranksorted args to prescott UI.

parent f98b9cb1
...@@ -328,7 +328,16 @@ def main(): ...@@ -328,7 +328,16 @@ def main():
main_parser.add_argument('--usefrequencies', dest='usefrequencies', type=str, \ main_parser.add_argument('--usefrequencies', dest='usefrequencies', type=str, \
help='Do not touch this if you don\'t know what you are doing! Default is true', help='Do not touch this if you don\'t know what you are doing! Default is true',
required=False, default='true') required=False, default='true')
main_parser.add_argument('--ranksorted', dest='ranksorted', type=str, \
help='If your data is already ranksorted, change this argument to true. Default is false',
required=False, default='false')
main_parser.add_argument('--escottformat', dest='escottformat', type=str, \
help='Main format of escott file. There are two possibilities: gemme or singleline. \n'+\
'gemme: a horizontal format of 20 rows and N columns.\n'+\
'singleline: each line contains a mutation and its value separated by a space.\n'+\
'M1A 0.378\n', required=False, default='gemme')
# main_parser.add_argument('--colormap', dest='colormap', type=str, \ # main_parser.add_argument('--colormap', dest='colormap', type=str, \
# help='A colormap as defined in matplotlib', # help='A colormap as defined in matplotlib',
# required=False, default='coolwarm_r') # required=False, default='coolwarm_r')
...@@ -354,7 +363,7 @@ def main(): ...@@ -354,7 +363,7 @@ def main():
print("@> Use population max. freq : {}".format(str(args.usepopmax).lower())) print("@> Use population max. freq : {}".format(str(args.usepopmax).lower()))
print("@> Which equation to use (Default=2): {}".format(str(args.equation))) print("@> Which equation to use (Default=2): {}".format(str(args.equation)))
print("@> Scaling coefficient (Default=1.0): {}".format(args.coefficient)) print("@> Scaling coefficient (Default=1.0): {}".format(args.coefficient))
print("@> Frequency cutoff (Default=-4.0) : {}".format(args.frequencycutoff)) print("@> Frequency cutoff (Default=-4.0) : {}".format(args.frequencycutoff))
print("@> Name of the output file : {}".format(args.outputfile)) print("@> Name of the output file : {}".format(args.outputfile))
# End of argument parsing! # End of argument parsing!
...@@ -366,32 +375,45 @@ def main(): ...@@ -366,32 +375,45 @@ def main():
usePopMaxOrNot = args.usepopmax.lower() usePopMaxOrNot = args.usepopmax.lower()
version = args.equation version = args.equation
if (os.path.exists(escottDataPath)): if (os.path.exists(escottDataPath)):
#Parse the file containing raw ESCOTT scores.
scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False) if(args.escottformat=='gemme'):
#Convert the matrix format to singleline format #Parse the file containing raw ESCOTT scores.
localResidueList = None scanningMatrix = parseGEMMEoutput(args.escottfile, verbose=False)
if(args.sequencefile != None):
referenceSeq = SeqIO.read(args.sequencefile, 'fasta') #Convert the matrix format to singleline format
localResidueList = list(referenceSeq.seq) localResidueList = None
aaOrderList = list('ACDEFGHIKLMNPQRSTVWY') if(args.sequencefile != None):
writeSinglelineFormat(scanningMatrix, protein+'_singleline.txt', residueList = localResidueList,\ referenceSeq = SeqIO.read(args.sequencefile, 'fasta')
beg=0, end=None, aaOrder = aaOrderList, \ localResidueList = list(referenceSeq.seq)
offSet=0) aaOrderList = list('ACDEFGHIKLMNPQRSTVWY')
writeSinglelineFormat(scanningMatrix, protein+'_singleline.txt', residueList = localResidueList,\
#Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header. beg=0, end=None, aaOrder = aaOrderList, \
df = pd.read_table(protein+'_singleline.txt', sep="\s+", header=None) offSet=0)
#data = np.genfromtxt(args.input,dtype=None) #Mostyl, I am using normPred_Combi_singleline as input file and it doesn't have a header.
data = df.to_numpy() df = pd.read_table(protein+'_singleline.txt', sep="\s+", header=None)
rawData = data.T[1]
processedData = 1.0 - rankSortData(rawData) elif(args.escottformat=='singleline'):
with open(protein+'_singleline_1-ranksort.txt', 'w') as f:
#f.write("#Resid Value\n") df = pd.read_table(args.escottfile, sep="\s+", header=None)
for i in range (len(processedData)): else:
f.write("{:} {:6.2f}\n".format(data.T[0][i], processedData[i])) print('@> ERROR: Unknown escott format. It should be gemme or singleline!')
sys.exit(-1)
dfESCOTT = pd.read_table(protein+'_singleline_1-ranksort.txt', sep='\s+', header=None)
if(args.ranksorted == 'false'):
#data = np.genfromtxt(args.input,dtype=None)
data = df.to_numpy()
rawData = data.T[1]
processedData = 1.0 - rankSortData(rawData)
with open(protein+'_singleline_1-ranksort.txt', 'w') as f:
#f.write("#Resid Value\n")
for i in range (len(processedData)):
f.write("{:} {:6.2f}\n".format(data.T[0][i], processedData[i]))
dfESCOTT = pd.read_table(protein+'_singleline_1-ranksort.txt', sep='\s+', header=None)
else:
dfESCOTT = df
dfESCOTT.columns = ['mutant', 'ESCOTT'] dfESCOTT.columns = ['mutant', 'ESCOTT']
dfESCOTT['protein']=protein dfESCOTT['protein']=protein
...@@ -416,10 +438,8 @@ def main(): ...@@ -416,10 +438,8 @@ def main():
(row['ClinVar Clinical Significance']=='Pathogenic') or \ (row['ClinVar Clinical Significance']=='Pathogenic') or \
(row['ClinVar Clinical Significance']=='Likely pathogenic')): (row['ClinVar Clinical Significance']=='Likely pathogenic')):
gnomadDF.at[index,'labels'] = 1 gnomadDF.at[index,'labels'] = 1
print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)]) print(gnomadDF.loc[(gnomadDF['labels']==0) | (gnomadDF['labels']==1)])
# print(gnomadDF['ClinVar Clinical Significance']) # print(gnomadDF['ClinVar Clinical Significance'])
# Add frequency column and a dummy frequency to each row in myBigMergedDF # Add frequency column and a dummy frequency to each row in myBigMergedDF
myBigMergedDF['frequency'] = 999.0 myBigMergedDF['frequency'] = 999.0
myBigMergedDF['labels'] = np.nan myBigMergedDF['labels'] = np.nan
......
...@@ -5,5 +5,4 @@ scipy ...@@ -5,5 +5,4 @@ scipy
pandas pandas
biopython<=1.79 biopython<=1.79
biotite biotite
sklearn scikit-learn
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment