Commit 8603802b by Mustafa Tekpinar

Tested new argument (coilbreakerlength) but gave upon it eventually!

parent e61bdc0c
...@@ -791,7 +791,7 @@ def check_argument_groups(parser, arg_dict, group, argument): ...@@ -791,7 +791,7 @@ def check_argument_groups(parser, arg_dict, group, argument):
parser.error("esgemme requires " + group + parser.error("esgemme requires " + group +
" to be set to input if " + str(argument) + " is used.") " to be set to input if " + str(argument) + " is used.")
return None return None
def calculateSecondaryStructure(pdbfile): def calculateSecondaryStructure(pdbfile, coilBreakerLength):
""" """
Calls dssp, calculates secondary structure and saves the results as a Calls dssp, calculates secondary structure and saves the results as a
text file with .dssp extention. The result contains a residue index and text file with .dssp extention. The result contains a residue index and
...@@ -807,7 +807,6 @@ def calculateSecondaryStructure(pdbfile): ...@@ -807,7 +807,6 @@ def calculateSecondaryStructure(pdbfile):
A structure file in PDB format is the main input. A structure file in PDB format is the main input.
""" """
#pdbfile: Name of the input pdb file #pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp" #outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
#for each residue in a new line #for each residue in a new line
...@@ -820,11 +819,47 @@ def calculateSecondaryStructure(pdbfile): ...@@ -820,11 +819,47 @@ def calculateSecondaryStructure(pdbfile):
# print(sse) # print(sse)
calphas = array[array.atom_name == "CA"] calphas = array[array.atom_name == "CA"]
residList = (calphas.res_id) residList = (calphas.res_id)
i=0
# #################################################################
# # Check S or T residues that are between coils and convert them to coils.
# # These S or Ts break the continuity of a coil and reduce coil length parameter,
# # which is used later do select Tjet or Max score.
# # We have to do it in a loop because we change S or T to C. When you check ss in a
# # second round, you will capture new C(S|T)+S patterns. We do this until this pattern
# # can not be found!
# debug = True
# myRegex = r'C(S|T){'+str(coilBreakerLength)+',6}C'
# print(myRegex)
# while(1):
# secondaryStructureString=""
# for item in sse:
# secondaryStructureString = secondaryStructureString + item
# # print(secondaryStructureString)
# #allMatcheSum = sum(1 for _ in re.finditer(r'C(S|T)+C', secondaryStructureString))
# allMatcheSum = sum(1 for _ in re.finditer(myRegex, secondaryStructureString))
# # allMatches = re.finditer(r'C(S|T)+C', secondaryStructureString)
# if(allMatcheSum==0):
# break
# else:
# for match in re.finditer(myRegex, secondaryStructureString):
# if(debug):
# print(secondaryStructureString[match.span()[0]:match.span()[1]], match.span()[0], match.span()[1])
# for i in range(match.span()[0], match.span()[1]):
# sse[i]='C'
# secondaryStructureString=""
# for item in sse:
# secondaryStructureString = secondaryStructureString + item
# #print(secondaryStructureString)
# # print(match.span())
# #sys.exit(-1)
# #################################################################
with open(pdbfile+".dssp", 'w') as file: with open(pdbfile+".dssp", 'w') as file:
for item in sse: for j in range(len(sse)):
file.write(str(residList[i])+","+item+"\n") file.write(str(residList[j])+","+sse[j]+"\n")
i +=1
def countCoilSegments(inputfile): def countCoilSegments(inputfile):
""" """
...@@ -959,6 +994,10 @@ def parse_command_line(): ...@@ -959,6 +994,10 @@ def parse_command_line():
help="If coil length is > maxcoillength, it will use JET values.", help="If coil length is > maxcoillength, it will use JET values.",
required=False, default=5) required=False, default=5)
parser.add_argument('--coilbreakerlength', dest='coilbreakerlength', type=int, \
help="Length of coil breaking S or T residues between C(S|T)C pattern.",
required=False, default=1)
args = parser.parse_args() args = parser.parse_args()
arg_dict = vars(args) arg_dict = vars(args)
...@@ -973,7 +1012,7 @@ def parse_command_line(): ...@@ -973,7 +1012,7 @@ def parse_command_line():
def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,\
alphabet, verbosity, offset, colormap, maxCoilLength = 5): alphabet, verbosity, offset, colormap, maxCoilLength = 5, coilbreakerlength=1):
""" """
Perfect explanation for a function: typing the function call exactly! Perfect explanation for a function: typing the function call exactly!
doit is basically the main function in disguise! doit is basically the main function in disguise!
...@@ -1044,7 +1083,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1044,7 +1083,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("ERROR: There is not any pdb file.") print("ERROR: There is not any pdb file.")
sys.exit(-1) sys.exit(-1)
else: else:
calculateSecondaryStructure(pdbfile) calculateSecondaryStructure(pdbfile, coilbreakerlength)
countCoilSegments(pdbfile+".dssp") countCoilSegments(pdbfile+".dssp")
df = pd.read_table(prot+"_jet.res", sep="\s+") df = pd.read_table(prot+"_jet.res", sep="\s+")
...@@ -1081,7 +1120,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1081,7 +1120,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("ERROR: There is not any pdb file.") print("ERROR: There is not any pdb file.")
sys.exit(-1) sys.exit(-1)
else: else:
calculateSecondaryStructure(pdbfile) calculateSecondaryStructure(pdbfile, coilbreakerlength)
countCoilSegments(pdbfile+".dssp") countCoilSegments(pdbfile+".dssp")
df = pd.read_table(prot+"_jet.res", sep="\s+") df = pd.read_table(prot+"_jet.res", sep="\s+")
...@@ -1118,7 +1157,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1118,7 +1157,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("ERROR: There is not any pdb file.") print("ERROR: There is not any pdb file.")
sys.exit(-1) sys.exit(-1)
else: else:
calculateSecondaryStructure(pdbfile) calculateSecondaryStructure(pdbfile, coilbreakerlength)
countCoilSegments(pdbfile+".dssp") countCoilSegments(pdbfile+".dssp")
df = pd.read_table(prot+"_jet.res", sep="\s+") df = pd.read_table(prot+"_jet.res", sep="\s+")
...@@ -1182,7 +1221,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode, ...@@ -1182,7 +1221,7 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
threeLetters2oneLetter = {v: k for k, v in oneLetter2ThreeLetters.items()} threeLetters2oneLetter = {v: k for k, v in oneLetter2ThreeLetters.items()}
calculateSecondaryStructure(pdbfile) calculateSecondaryStructure(pdbfile, coilbreakerlength)
countCoilSegments(pdbfile+".dssp") countCoilSegments(pdbfile+".dssp")
df = pd.read_table(prot+"_jet.res", sep="\s+") df = pd.read_table(prot+"_jet.res", sep="\s+")
...@@ -1405,7 +1444,8 @@ def main(): ...@@ -1405,7 +1444,8 @@ def main():
doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\ doit(args.input, args.mutations, args.retrievingMethod, args.blastFile,\
args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\ args.fastaFile, args.nIter,args.NSeqs, args.jetfile, args.pdbfile,\
args.normweightmode, args.alphabet, args.verbose, args.offset, \ args.normweightmode, args.alphabet, args.verbose, args.offset, \
args.colormap, maxCoilLength=args.maxcoillength) args.colormap, maxCoilLength=args.maxcoillength, \
coilbreakerlength=args.coilbreakerlength)
toc = time.perf_counter() toc = time.perf_counter()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment