Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
DLA-Ranker
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
DLA-Ranker
DLA-Ranker
Commits
e07912dd
Commit
e07912dd
authored
Apr 09, 2022
by
DLA-Ranker
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Updates
parent
1e0c8874
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
160 additions
and
2 deletions
+160
-2
generate_cubes_reduce_channels_multiproc.py
Representation/generate_cubes_reduce_channels_multiproc.py
+117
-0
train.py
Train/train.py
+43
-2
No files found.
Representation/generate_cubes_reduce_channels_multiproc.py
0 → 100755
View file @
e07912dd
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 28 16:00:07 2022
@author: mohseni
"""
import
glob
import
numpy
as
np
from
os
import
path
,
mkdir
,
getenv
,
listdir
,
remove
,
system
,
stat
from
subprocess
import
CalledProcessError
,
check_call
import
pickle
import
sys
sys
.
path
.
insert
(
1
,
'../lib/'
)
import
tools
as
tl
channels
=
{
'ALA'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
],
'ARG'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD'
,
'NE'
,
'CZ'
,
'NH1'
,
'NH2'
],
'ASN'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'ND2'
,
'OD1'
],
'ASP'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'OD1'
,
'OD2'
],
'CYS'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'SG'
],
'GLN'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD'
,
'NE2'
,
'OE1'
],
'GLU'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD'
,
'OE1'
,
'OE2'
],
'GLY'
:[
'C'
,
'N'
,
'O'
,
'CA'
],
'HIS'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD2'
,
'ND1'
,
'CE1'
,
'NE2'
],
'ILE'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG1'
,
'CG2'
,
'CD1'
],
'LEU'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD1'
,
'CD2'
],
'LYS'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD'
,
'CE'
,
'NZ'
],
'MET'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'SD'
,
'CE'
],
'PHE'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD1'
,
'CD2'
,
'CE1'
,
'CE2'
,
'CZ'
],
'PRO'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD'
],
'SER'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'OG'
],
'THR'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG2'
,
'OG1'
],
'TRP'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD1'
,
'CD2'
,
'CE2'
,
'CE3'
,
'NE1'
,
'CZ2'
,
'CZ3'
,
'CH2'
],
'TYR'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG'
,
'CD1'
,
'CD2'
,
'CE1'
,
'CE2'
,
'CZ'
,
'OH'
],
'VAL'
:[
'C'
,
'N'
,
'O'
,
'CA'
,
'CB'
,
'CG1'
,
'CG2'
]}
def
save_obj
(
obj
,
name
):
with
open
(
name
+
'.pkl'
,
'wb'
)
as
f
:
pickle
.
dump
(
obj
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
def
load_obj
(
name
):
with
open
(
name
+
'.pkl'
,
'rb'
)
as
f
:
return
pickle
.
load
(
f
)
def
load_map
(
file_path
):
check_call
(
[
'lz4'
,
'-d'
,
'-f'
,
file_path
],
stdout
=
sys
.
stdout
)
tuple_obj
=
load_obj
(
file_path
.
replace
(
'.pkl.lz4'
,
''
))
remove
(
file_path
.
replace
(
'.lz4'
,
''
))
return
tuple_obj
def
save_map
(
tuple_obj
,
file_path
):
save_obj
(
tuple_obj
,
file_path
)
check_call
(
[
'lz4'
,
'-f'
,
#, '--rm' because if inconsistency in lz4 versions!
file_path
+
'.pkl'
],
stdout
=
sys
.
stdout
)
remove
(
file_path
+
'.pkl'
)
v_dim
=
24
n_channels
=
4
+
4
+
2
all_channels
=
[]
for
aa
,
a_vector
in
channels
.
items
():
all_channels
+=
a_vector
C_index
,
O_index
,
N_index
,
S_index
=
[],
[],
[],
[]
for
i
,
a
in
enumerate
(
all_channels
):
if
a
[
0
]
==
"C"
:
C_index
.
append
(
i
)
if
a
[
0
]
==
"O"
:
O_index
.
append
(
i
)
if
a
[
0
]
==
"N"
:
N_index
.
append
(
i
)
if
a
[
0
]
==
"S"
:
S_index
.
append
(
i
)
samples
=
glob
.
glob
(
path
.
join
(
'../Examples/map_dir'
,
'*'
,
'*'
,
'*.lz4'
))
def
process_sample
(
sample
,
report_dict
):
try
:
tuple_obj
=
load_map
(
sample
)
n
=
len
(
tuple_obj
)
X
=
tuple_obj
[
0
]
X_new
=
np
.
zeros
(
X
.
shape
[:
-
1
]
+
tuple
([
n_channels
]))
X_new
[:,:,:,:,
0
]
=
X
[:,:,:,:,
C_index
]
.
sum
(
axis
=
4
)
X_new
[:,:,:,:,
1
]
=
X
[:,:,:,:,
N_index
]
.
sum
(
axis
=
4
)
X_new
[:,:,:,:,
2
]
=
X
[:,:,:,:,
O_index
]
.
sum
(
axis
=
4
)
X_new
[:,:,:,:,
3
]
=
X
[:,:,:,:,
S_index
]
.
sum
(
axis
=
4
)
for
i
in
range
(
6
):
X_new
[:,:,:,:,
i
+
4
]
=
X
[:,:,:,:,
167
+
i
]
tuple_obj_new
=
(
X_new
,)
for
i
in
range
(
1
,
n
):
tuple_obj_new
+=
(
tuple_obj
[
i
],)
save_map
(
tuple_obj_new
,
sample
.
replace
(
'.pkl.lz4'
,
'_'
))
except
:
pass
remove
(
sample
)
return
cases
=
[]
for
sample
in
samples
:
cases
.
append
((
sample
,))
report_dict
=
tl
.
do_processing
(
cases
,
process_sample
,
True
)
Train/train.py
View file @
e07912dd
...
@@ -68,6 +68,9 @@ hidden_size1 = 200
...
@@ -68,6 +68,9 @@ hidden_size1 = 200
hidden_size2
=
20
hidden_size2
=
20
v_dim
=
24
v_dim
=
24
atom_channels
=
167
#atom_channels = 4
logging
.
basicConfig
(
filename
=
'manager.log'
,
filemode
=
'w'
,
format
=
'
%(levelname)
s:
%(message)
s'
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
'manager.log'
,
filemode
=
'w'
,
format
=
'
%(levelname)
s:
%(message)
s'
,
level
=
logging
.
DEBUG
)
mainlog
=
logging
.
getLogger
(
'main'
)
mainlog
=
logging
.
getLogger
(
'main'
)
logging
.
Logger
logging
.
Logger
...
@@ -102,9 +105,42 @@ else:
...
@@ -102,9 +105,42 @@ else:
encoder
=
OneHotEncoder
(
sparse
=
False
,
handle_unknown
=
'ignore'
)
encoder
=
OneHotEncoder
(
sparse
=
False
,
handle_unknown
=
'ignore'
)
onehot
=
encoder
.
fit
(
np
.
asarray
([[
'S'
],
[
'C'
],
[
'R'
]]))
onehot
=
encoder
.
fit
(
np
.
asarray
([[
'S'
],
[
'C'
],
[
'R'
]]))
def
Conv_3D_model
(
input_shape
,
input_shape_aux
):
def
Conv_3D_model
(
input_shape
,
input_shape_aux
):
X_in
=
Input
(
shape
=
input_shape
)
X_in
=
Input
(
shape
=
input_shape
)
aux_input
=
Input
(
shape
=
input_shape_aux
)
aux_input
=
Input
(
shape
=
input_shape_aux
)
H
=
Conv3D
(
20
,
kernel_size
=
(
1
,
1
,
1
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'linear'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
X_in
.
shape
)(
X_in
)
H
=
BatchNormalization
()(
H
)
H
=
Conv3D
(
20
,
kernel_size
=
(
3
,
3
,
3
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'elu'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
H
.
shape
)(
H
)
H
=
BatchNormalization
()(
H
)
H
=
Conv3D
(
30
,
kernel_size
=
(
4
,
4
,
4
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'elu'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
H
.
shape
)(
H
)
H
=
BatchNormalization
()(
H
)
H
=
Conv3D
(
20
,
kernel_size
=
(
4
,
4
,
4
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'elu'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
H
.
shape
)(
H
)
H
=
BatchNormalization
()(
H
)
H
=
AveragePooling3D
(
pool_size
=
(
4
,
4
,
4
),
strides
=
(
4
,
4
,
4
))(
H
)
H
=
Flatten
()(
H
)
H
=
Dropout
(
0.4
)(
H
)
H
=
Concatenate
()([
H
,
aux_input
])
H
=
Dense
(
hidden_size1
,
activation
=
'elu'
,
name
=
'layer1'
,
kernel_constraint
=
max_norm
(
4
),
bias_constraint
=
max_norm
(
4
))(
H
)
H
=
Dropout
(
0.2
)(
H
)
H
=
Dense
(
hidden_size2
,
activation
=
'elu'
,
name
=
'layer2'
,
kernel_constraint
=
max_norm
(
4
),
bias_constraint
=
max_norm
(
4
))(
H
)
H
=
Dropout
(
0.1
)(
H
)
Y
=
Dense
(
1
,
activation
=
'sigmoid'
)(
H
)
_model
=
Model
(
inputs
=
[
X_in
,
aux_input
],
outputs
=
Y
)
_model
.
compile
(
loss
=
'binary_crossentropy'
,
optimizer
=
Adam
(
lr
=
0.001
))
_model
.
summary
()
return
_model
def
Conv_3D_model_4channels
(
input_shape
,
input_shape_aux
):
X_in
=
Input
(
shape
=
input_shape
)
aux_input
=
Input
(
shape
=
input_shape_aux
)
H
=
Conv3D
(
10
,
kernel_size
=
(
3
,
3
,
3
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'elu'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
X_in
.
shape
)(
X_in
)
H
=
Conv3D
(
10
,
kernel_size
=
(
3
,
3
,
3
),
use_bias
=
True
,
padding
=
'valid'
,
activation
=
'elu'
,
kernel_initializer
=
'he_uniform'
,
input_shape
=
X_in
.
shape
)(
X_in
)
H
=
BatchNormalization
()(
H
)
H
=
BatchNormalization
()(
H
)
...
@@ -164,8 +200,13 @@ d_class_weights = dict(enumerate(class_weights))
...
@@ -164,8 +200,13 @@ d_class_weights = dict(enumerate(class_weights))
for
foldk
in
[
'Total'
]:
for
foldk
in
[
'Total'
]:
seed
(
int
(
np
.
round
(
np
.
random
.
random
()
*
10
)))
seed
(
int
(
np
.
round
(
np
.
random
.
random
()
*
10
)))
input_shape
=
(
v_dim
,
v_dim
,
v_dim
,
4
+
6
)
input_shape
=
(
v_dim
,
v_dim
,
v_dim
,
atom_channels
+
6
)
model
=
Conv_3D_model
(
input_shape
,
3
)
if
atom_channels
==
4
:
model
=
Conv_3D_model_4channels
(
input_shape
,
3
)
else
:
model
=
Conv_3D_model
(
input_shape
,
3
)
#model = load_model('Total_0_model')
#model = load_model('Total_0_model')
with
open
(
str
(
foldk
)
+
'_train_interfaces.txt'
,
'w'
)
as
f_handler_trainlist
:
with
open
(
str
(
foldk
)
+
'_train_interfaces.txt'
,
'w'
)
as
f_handler_trainlist
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment