Commit 7a52b1c1 by Mustafa Tekpinar

Added calculateSecondaryStructure and countCoilSegments functions.

parent ecd6817e
......@@ -10,7 +10,7 @@ then
rm -rf BLAT*
rm -f default.conf caracTest.dat
rm -rf bin*.fasta
rm -rf blat-af2.pdb.dssp blat-af2.pdb.dssp.new
elif [ "$1" == "jetoff" ]
then
# If you have your own JET2 score file, you can turn off JET2 as follows:
......@@ -34,6 +34,13 @@ then
echo "Using blat-af2.pdb for the structural feature calculations!"
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-af2.pdb --normweightmode maxhalftracepchalftracecvhalfcvpc -m Stiffler_2015_BLAT_ECOLX.mut
elif [ "$1" == "ssjetormax" ]
then
#Please note that CV isa structural feature and it can not be calculated if you don't specify a pdb file.
echo "Using blat-af2.pdb for the secondary structure based calculations!"
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta --pdbfile blat-af2.pdb --normweightmode ssjetormax -m Stiffler_2015_BLAT_ECOLX.mut
else
echo "Running SGEMME with a user-provided alignment file."
python $SGEMME_PATH/sgemme.py aliBLAT.fasta -r input -f aliBLAT.fasta
......
......@@ -631,6 +631,68 @@ def check_argument_groups(parser, arg_dict, group, argument):
parser.error("gemme requires " + group +
" to be set to input if " + str(argument) + " is used.")
return None
def calculateSecondaryStructure(pdbfile):
"""
Calls dssp, calculates secondary structure and saves the results as a
text file with .dssp extention. The result contains a residue index and
dssp assignment for that amino acid on each line:
0,H
1,E
2,E
.
.
.
100,C
A structure file in PDB format is the main input.
"""
import biotite.structure.io as strucio
import biotite.application.dssp as dssp
#pdbfile: Name of the input pdb file
#outfile=Name of outputfile that contain secondary structure information= pdbfile+".dssp"
#for each residue in a new line
array = strucio.load_structure(pdbfile)
app = dssp.DsspApp(array)
app.start()
app.join()
sse = app.get_sse()
# print(sse)
calphas = array[array.atom_name == "CA"]
residList = (calphas.res_id)
i=0
with open(pdbfile+".dssp", 'w') as file:
for item in sse:
file.write(str(residList[i])+","+item+"\n")
i +=1
def countCoilSegments(inputfile):
"""
Count length of each coil segment and write it to outputfile.
"""
df = pd.read_csv(inputfile, header=None)
df.columns = ["pos", "ss"]
# numRows = (len(df))
# internalCounter = 0
from itertools import groupby
groups = groupby(df['ss'])
result = [(label, sum(1 for _ in group)) for label, group in groups]
#print(result)
beg = 0
end = 0
outputfile=inputfile+".new"
with open(outputfile, 'w') as file:
for item in result:
beg = end
end = end + item[1]
#print(item[0], item[1])
for i in range(beg, end):
file.write("{},{},{}\n".format(df.iloc[i]['pos'], item[0], item[1]))
# if(item[0]=='C'):
# print(item[1])
def parse_command_line():
"""
......@@ -842,9 +904,10 @@ def doit(inAli,mutFile,retMet,bFile,fFile,n,N, jetfile, pdbfile, normWeightMode,
print("ERROR: There is not any pdb file to calculate DFI.")
sys.exit(-1)
else:
#proc=subprocess.Popen("python /home/tekpinar/software/scripts/python_scripts/biotite-secondary-structure.py "+pdbfile+" "+pdbfile+".dssp",stdout=subprocess.PIPE,shell=True)
os.system("python /home/tekpinar/software/scripts/python_scripts/biotite-secondary-structure.py "+pdbfile+" "+pdbfile+".dssp")
os.system("python /home/tekpinar/software/scripts/python_scripts/count-coil-segments-v2.py "+pdbfile+".dssp >"+pdbfile+".dssp.new")
calculateSecondaryStructure(pdbfile)
countCoilSegments(pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/biotite-secondary-structure.py "+pdbfile+" "+pdbfile+".dssp")
#os.system("python /home/tekpinar/software/scripts/python_scripts/count-coil-segments-v2.py "+pdbfile+".dssp >"+pdbfile+".dssp.new")
df = pd.read_table(prot+"_jet.res", sep="\s+")
print(df['pos'])
print(pdbfile+".dssp")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment