Commit 102d58ee by Gianluca LOMBARDI

Update types and imports, add train script and utils and example input files

parent f3163134
......@@ -52,7 +52,7 @@ We provide several command line interfaces for quick usage of MuLAN different ap
- `mulan-predict` for $\Delta \Delta G$ prediction of single and multiple-point mutations;
- `mulan-att` to extract residues weights, related to interface regions;
- `mulan-landscape` to produce a full mutational landscape for a given complex;
- `mulan-train` to re-train the model on a custom dataset or to run cross validation (not supported yet);
- `mulan-train` to re-train the model on a custom dataset or to run cross validation;
- `plm-embed` to extract embeddings from protein language models.
Since the script uses the `transformers` interface, only models that are saved on HuggingFace 🤗 Hub can be loaded.
......
1A22_A 1A22_B SA51A 0.3480601775656975
1A22_A 1A22_B FA165A 0.4104498198934685
1A22_A 1A22_B SA62A 0.1553600275920779
1A22_A 1A22_B IA168A 0.8059076005509755
1A22_A 1A22_B QA22A -0.2197268133916914
1A22_A 1A22_B NA63A 0.3142135987668766
1A22_A 1A22_B RA172A 0.5425851484586328
1A22_A 1A22_B FA180A 0.1911951368906681
1A22_A 1A22_B EA56A 0.4104498198934685
1A22_A 1A22_B TA164A 1.906069936704197
1A22_A 1A22_B RA167A 2.4244821810979293
1A22_A 1A22_B HA18A -0.4861468519007541
1A22_A 1A22_B QA68A 0.5881579306311284
1A22_A 1A22_B PA48A 0.4104498198934685
1A22_A 1A22_B KA161A 2.014032361204464
1A22_A 1A22_B LA45A 1.2239008844916306
1A22_A 1A22_B EA65A -0.472839462221236
1A22_A 1A22_B DA160A 0.7905261144613256
1A22_A 1A22_B CA171A 1.0098644391267992
1A22_A 1A22_B HA21A 0.1553600275920779
1A22_A 1A22_B KA157A -0.1547681695521046
1A22_A 1A22_B EA163A -0.9241436813966502
1A22_A 1A22_B DA26A -0.2112064661099832
1A22_A 1A22_B YA153A 0.3480601775656975
1A22_A 1A22_B QA46A 0.1079624245002666
1A22_A 1A22_B PA61A 1.20871661869346
1A22_A 1A22_B RA64A 1.641799279573867
1A22_A 1A22_B RA156A 0.2783144913283042
1A22_A 1A22_B YA42A 0.1992433537834852
1A22_A 1A22_B RB11M 0.999896442915034
1A22_A 1A22_B SB174A 0.0338465787988226
1A22_A 1A22_B RB11L 0.5213078683529346
1A22_A 1A22_B SB58A -0.5066860410200569
1A22_A 1A22_B CB81A 0.0
1A22_A 1A22_B WB63F 2.783407873049008
1A22_A 1A22_B CB67A 0.0
1A4Y_A 1A4Y_B WA261A 0.1006063805906443
1A4Y_A 1A4Y_B WA375A 1.0348412327905765
1A4Y_A 1A4Y_B EA344A 0.178519836594301
1A4Y_A 1A4Y_B YA434F 0.5575771876945907
1A4Y_A 1A4Y_B WA263A 1.170784638271947
1A4Y_A 1A4Y_B YA434A 3.2603738601208216
1A4Y_A 1A4Y_B DA435A 3.4837901179692
1A4Y_A 1A4Y_B WA318A 1.4999902762747832
1A4Y_A 1A4Y_B RA457A -0.2238733431043407
1A4Y_A 1A4Y_B YA437A 0.8358916790228896
1A4Y_A 1A4Y_B SA289A 0.0423147056193045
1A4Y_A 1A4Y_B EA401A 0.8832892821147027
1A4Y_A 1A4Y_B KA320A -0.3098434393028242
1A4Y_A 1A4Y_B IA459A 0.6790906364878461
1A4Y_A 1A4Y_B YA437F 0.2451373913115802
1A4Y_A 1A4Y_B HB84A 0.1703520668280376
1A4Y_A 1A4Y_B KB40G 3.2351720176220624
1A4Y_A 1A4Y_B RB5A 2.3081202693877465
1A4Y_A 1A4Y_B HB13A -0.296536049623306
1A4Y_A 1A4Y_B EB108A -0.3225623321016648
1A4Y_A 1A4Y_B RB31A 0.2503963416825741
1A4Y_A 1A4Y_B WB89A 0.2400977530654273
1A4Y_A 1A4Y_B KB40Q 4.250288323419703
1A4Y_A 1A4Y_B RB32A 0.9097928484168456
1A4Y_A 1A4Y_B QB12A 0.3001140298124305
1A4Y_A 1A4Y_B HB114A 0.6564989187287935
1A4Y_A 1A4Y_B NB68A 0.1177503154404853
1A4Y_A 1A4Y_B HB8A 0.9036602217884386
1ACB_A 1ACB_B LB38S 4.950962020246454
1ACB_A 1ACB_B LB38P 6.852342536090567
1ACB_A 1ACB_B LB38G 6.0329104263395
1ACB_A 1ACB_B LB38E 6.553915374192577
1ACB_A 1ACB_B LB38I 4.23043510099434
1ACB_A 1ACB_B LB38D 6.771166322627187
1AK4_A 1AK4_B PB93A 2.0460484782066217
1AK4_A 1AK4_B HB87R 3.0083601989953013
1AK4_A 1AK4_B HB87A 2.37295805315554
1AK4_A 1AK4_B AB88V 2.13286030009011
1AK4_A 1AK4_B PB85A 2.4486550851628266
1AK4_A 1AK4_B AB92V 1.7224104801966431
1AK4_A 1AK4_B IB91A 1.6035825413109936
1AK4_A 1AK4_B AB88G 4.016996102578333
1AK4_A 1AK4_B GB89V 4.392941760129896
1AK4_A 1AK4_B AB92G 1.9442866749670664
1AK4_A 1AK4_B VB86A 2.3547364948605334
1AK4_A 1AK4_B HB87Q 2.3359363787083702
1AK4_A 1AK4_B PB90A 3.5354023574986417
1AK4_A 1AK4_B IB91V 1.3634847882455636
1AK4_A 1AK4_B GB89A 3.439906791777797
1B2S_A 1B2S_B AA27K -3.864350158924305
1B2S_A 1B2S_B AB43T -4.166151396743981
1B2U_A 1B2U_B AA27K -5.043489333088793
1B2U_A 1B2U_B AB36D -4.166151396743981
1B3S_A 1B3S_B AA102H -5.679044866811731
1B3S_A 1B3S_B FB30Y 0.5957939259574623
1BRS_A 1BRS_B EA58A 0.0944460365287085
1BRS_A 1BRS_B EA71C 2.527489313754817
1BRS_A 1BRS_B HA100G 6.817423941227826
1BRS_A 1BRS_B RA57K 2.4868718234257017
1BRS_A 1BRS_B HA100D 4.548301787721975
1BRS_A 1BRS_B EA71W 1.6564211069067412
1BRS_A 1BRS_B HA100L 7.662496485079659
1BRS_A 1BRS_B RA81Q 5.4172993901379405
1BRS_A 1BRS_B EA71F 2.2340551775614266
1BRS_A 1BRS_B WA33F 1.2596283269819324
1BRS_A 1BRS_B EA71Y 2.412519068771428
1BRS_A 1BRS_B EA71S 3.0089735122761176
1BRS_A 1BRS_B HA100Q 4.548301787721975
1BRS_A 1BRS_B EA71Q 1.4513722760373606
1BRS_A 1BRS_B NA56A 3.089507324075856
1BRS_A 1BRS_B YB29A 3.4687980787332435
1BRS_A 1BRS_B TB42A 1.8566951901405384
1BRS_A 1BRS_B WB38F 1.641799279573867
1BRS_A 1BRS_B WB44F 0.0564382965578786
1BRS_A 1BRS_B YB29F -0.132135328565166
1CSE_A 1CSE_B LB38D 4.350433681449669
1CSE_A 1CSE_B LB38G 2.2488334837936126
1CSE_A 1CSE_B LB38I 2.9409875695104866
1CSE_A 1CSE_B LB38E 2.349579985314381
1CSE_A 1CSE_B LB38S 1.1722287392442985
1CSE_A 1CSE_B LB38P 6.67127645335119
1CSO_A 1CSO_B IB13L -4.4188804360352485
1CT0_A 1CT0_B SB13L -4.100355769895383
1CT2_A 1CT2_B TB13L -3.1601132172258635
1CT4_A 1CT4_B VB13L -2.997860793780033
1E96_A 1E96_B NA26H 1.0854474195615502
1E96_A 1E96_B IA33N 2.0077513733569488
1EAW_A 1EAW_B TA144A 0.0894056741917026
1EAW_A 1EAW_B DA214A 2.2280123870440565
1EAW_A 1EAW_B FA92A 0.8915712901218367
1EAW_A 1EAW_B YA141A 0.5019009328018527
1EAW_A 1EAW_B FA89A 0.7282270437769274
1EAW_A 1EAW_B DA91A 0.6541111980754817
1EAW_A 1EAW_B RA48A 0.5873189040934879
1EAW_A 1EAW_B IA45A -0.1942602619391777
1EAW_A 1EAW_B FA50A -0.4285969168341221
1EAW_A 1EAW_B YA52A -0.0789009539196516
1EAW_A 1EAW_B QA169A -0.1330295959181544
1EAW_A 1EAW_B IA26A -0.8220922966245396
1EAW_A 1EAW_B DA47A 1.5020192808795692
1EAW_A 1EAW_B HA138A -0.0144729139862604
1EAW_A 1EAW_B QA23A -0.5186505836683111
1EFN_A 1EFN_B TA13H 1.2429228688538476
1EFN_A 1EFN_B IA12A 1.4502966101290538
1EMV_A 1EMV_B IA51A 0.8476985078878343
1EMV_A 1EMV_B TA36A 0.8995136920757965
1EMV_A 1EMV_B SA26A 0.1730871937216989
1EMV_A 1EMV_B TA25A 0.7280937188813291
1EMV_A 1EMV_B VA35A 1.6637749495047736
1EMV_A 1EMV_B SA46A 0.0072657834232536
1EMV_A 1EMV_B EA28A 1.4159220295835304
1EMV_A 1EMV_B VA32A 2.5781527527089203
1EMV_A 1EMV_B NA22A 0.1394011119884197
1EMV_A 1EMV_B DA49A 5.915151095926328
1EMV_A 1EMV_B SA27A 0.9559519886336716
1EMV_A 1EMV_B YA53A 4.634499819532684
1EMV_A 1EMV_B EA39A 2.0829652073420992
1EMV_A 1EMV_B SA48A 2.18711955492616
1EMV_A 1EMV_B YA52A 4.834367918146082
1EMV_A 1EMV_B HA44A 0.8317651541723166
1EMV_A 1EMV_B CA21A 0.9215060203775778
1EMV_A 1EMV_B PA54A 1.242214974065785
1EMV_A 1EMV_B PA45A 0.4372486879943693
1EMV_A 1EMV_B LA31A 3.417558708632216
1EMV_A 1EMV_B GA47A 1.4852958766126645
1EMV_A 1EMV_B NB75A 2.3344109640223465
1EMV_A 1EMV_B QB92A -0.2775883674853894
1EMV_A 1EMV_B SB78A -0.5398667630280478
1EMV_A 1EMV_B FB86A 3.8787286930522846
1EMV_A 1EMV_B SB77A -0.2328762743011872
1EMV_A 1EMV_B KB97A 1.959528419718087
1EMV_A 1EMV_B TB87A 0.1584260231997625
1EMV_A 1EMV_B VB98A 1.0887920447193764
1EMV_A 1EMV_B RB54A 1.6656087875428582
1EMV_A 1EMV_B SB84A -0.1094173488678116
1EMV_A 1EMV_B NB72A 1.1646046076839518
1EMV_A 1EMV_B SB74A -0.2410066617929089
1F47_A 1F47_B KA14A -0.0426193899170961
1F47_A 1F47_B YA5A 0.8689295482454922
1F47_A 1F47_B DA7G 1.1387685212697871
1F47_A 1F47_B FA11A 2.444252433508604
1F47_A 1F47_B DA7A 1.732786408841286
1F47_A 1F47_B IA8A 2.514979319506033
1F47_A 1F47_B LA12A 2.2941716734520803
1F47_A 1F47_B DA7S 2.063822508640781
1F47_A 1F47_B DA4A 0.6911582373455776
1F47_A 1F47_B PA9A -0.0575358944989963
1F47_A 1F47_B QA15A -0.0455727821724947
1F47_A 1F47_B LA6A 0.9249655467883962
1FC2_A 1FC2_B YA10W 0.4104498198934649
1FCC_A 1FCC_B EB42A 0.3852479773947088
1FCC_A 1FCC_B WB43A 3.771285474126445
1FCC_A 1FCC_B DB40A 0.2721138700675905
1FCC_A 1FCC_B KB31A 3.4758058992163714
1FCC_A 1FCC_B TB25A 0.2400977530654309
1FCC_A 1FCC_B KB28A 1.2555223637452997
1FCC_A 1FCC_B NB35A 2.3639173596894745
1FFW_A 1FFW_B YA105W 0.7129372152866669
1FFW_A 1FFW_B AA89V 0.0912809292832159
1FFW_A 1FFW_B DB49A 0.0962362211265936
1FFW_A 1FFW_B DB44A -0.0741158457014421
1FFW_A 1FFW_B CB55A 0.2041986456268584
1FFW_A 1FFW_B EB20A 0.6388213695852247
1FFW_A 1FFW_B HB23A 0.0338465787988226
1FFW_A 1FFW_B EB13A 0.7164102687598799
1FFW_A 1FFW_B IB58A 0.4276149034752397
1FFW_A 1FFW_B FB56A 3.644105437404656
1FR2_A 1FR2_B AA39E -2.0829652073420992
1GC1_A 1GC1_B QB33A 0.1052146055570144
1GC1_A 1GC1_B KB46A 1.4302990134480849
1GC1_A 1GC1_B SB60A -0.0885458023895573
1GC1_A 1GC1_B KB35A 0.3219040175039112
1GC1_A 1GC1_B QB40A -0.4104498198934649
1GC1_A 1GC1_B NB32A 0.1825618585664354
1GC1_A 1GC1_B TB45A -0.1488168237822105
1GC1_A 1GC1_B SB42A 0.0
1GC1_A 1GC1_B RB59A 1.1753222384043518
1GC1_A 1GC1_B LB44A 1.055488944574842
1GC1_A 1GC1_B QB25A 0.0320161170021613
1GC1_A 1GC1_B KB29A 0.5359688347135325
1GC1_A 1GC1_B DB63A -0.3191688906102499
1GC1_A 1GC1_B NB52A 0.7079819234432936
1GC1_A 1GC1_B EB85A 1.3226303889636168
1GC1_A 1GC1_B HB27A 0.2824124586847354
1GC1_A 1GC1_B QB64A 0.4424659368956298
1GC1_A 1GC1_B SB23A 0.292534994403395
1GL0_A 1GL0_B MB28K -0.5693033910682619
1GL1_A 1GL1_B LB29V 4.257093855643765
1H9D_A 1H9D_B VB3A 1.284109116566453
1H9D_A 1H9D_B LB93A 0.8608913724081031
1H9D_A 1H9D_B RB2A 1.0641537924494653
1H9D_A 1H9D_B QB66A 1.2490984805068432
1H9D_A 1H9D_B GB60A 1.9022255267258377
1H9D_A 1H9D_B NB94A 2.1099898529149463
1HE8_A 1HE8_B KA80V 0.4671409205936161
1IAR_A 1IAR_B RA53Q 0.8353431343614393
1IAR_A 1IAR_B QA8A -0.0223480831455837
1IAR_A 1IAR_B FA82D -0.5803147176938648
1IAR_A 1IAR_B WA91D 1.3059488937465673
1IAR_A 1IAR_B QA78A 0.124779284655542
1IAR_A 1IAR_B TA6D 1.3920278517745128
1IAR_A 1IAR_B RA85E 1.223993415770776
1IAR_A 1IAR_B RA88A 3.7528288874815807
1IAR_A 1IAR_B WA91A 0.7291616252477553
1IAR_A 1IAR_B QA78E 0.2449515008942047
1IAR_A 1IAR_B IA5R 0.7956977972881756
1IAR_A 1IAR_B KA84A 0.3448021010125046
1IAR_A 1IAR_B KA12E 0.1394011119884162
1IAR_A 1IAR_B IA5A 1.1707846382719451
1IAR_A 1IAR_B KA84D 1.8791492136960457
1IAR_A 1IAR_B QA8R 0.0389017052793203
1IAR_A 1IAR_B TA13A 0.9780844882983252
1IAR_A 1IAR_B FA82A -0.0864271814544448
1IAR_A 1IAR_B KA12S -0.0148046190983937
1IAR_A 1IAR_B RA85A 0.4266742298545552
1IAR_A 1IAR_B TA6A -0.1035922650362177
1IAR_A 1IAR_B RA81A 0.4793826660310998
1IAR_A 1IAR_B RA81E 1.4609989742393452
1IAR_A 1IAR_B NA89A 1.5580097591384998
1IAR_A 1IAR_B TA13D -0.2185625100196091
1IAR_A 1IAR_B EA9Q 3.111263236322809
1IAR_A 1IAR_B RA88Q 2.827575957081772
1JCK_A 1JCK_B YB90A 2.5948342479259647
1JCK_A 1JCK_B KB103A 0.6762974524423493
1JCK_A 1JCK_B FB176A 2.132860300090111
1JCK_A 1JCK_B YB26A 1.7739346081390313
1JCK_A 1JCK_B TB20A 1.6540090713122657
1JCK_A 1JCK_B VB91A 2.2317820311243093
1JCK_A 1JCK_B NB60A 1.641799279573867
1JTG_A 1JTG_B QA74A 0.4293857000092753
1JTG_A 1JTG_B VA78A 1.9097825126251613
1JTG_A 1JTG_B MA104A 0.7384493296917629
1JTG_A 1JTG_B VA190A -0.4067372439725005
1JTG_A 1JTG_B GA212S -1.6311295509134285
1JTG_A 1JTG_B EA79K 4.233259421962298
1JTG_A 1JTG_B SA209A 1.2384592701602788
1JTG_A 1JTG_B PA82A -0.3825643399076011
1JTG_A 1JTG_B EA143A -0.0725454735794386
1JTG_A 1JTG_B YA80A -0.1682866516901349
1JTG_A 1JTG_B NA75A -0.4555758620072812
1JTG_A 1JTG_B EA85A 4.059884794410223
1JTG_A 1JTG_B TB140K -0.0143850131246843
1JTG_A 1JTG_B WB112A 3.0089966437403977
1JTG_A 1JTG_B YB143A 0.3818377849685177
1JTG_A 1JTG_B YB50A -0.4067372439725005
1JTG_A 1JTG_B DB163A -1.3402600892186527
1JTG_A 1JTG_B HB148A 2.746506323142203
1JTG_A 1JTG_B RB160A 2.2210862582653434
1JTG_A 1JTG_B DB163K -1.9817669687114847
1JTG_A 1JTG_B SB113A -0.1682866516901349
1JTG_A 1JTG_B WB150A 4.251421862409736
1JTG_A 1JTG_B SB71A 0.3579091184487169
1JTG_A 1JTG_B WB162A 2.3396489546293378
1JTG_A 1JTG_B YB53A 2.076422003532233
1JTG_A 1JTG_B FB36A 3.1998090387123685
1JTG_A 1JTG_B HB41A 3.2480654583719684
1KAC_A 1KAC_B SA87Y -1.252128922253883
1KAC_A 1KAC_B PA15S -0.7929814901412069
1KTZ_A 1KTZ_B RA64A 2.882953252292189
1KTZ_A 1KTZ_B RA64K 2.200389562331132
1KTZ_A 1KTZ_B RA13A 1.4808360332993633
1KTZ_A 1KTZ_B VA62I 0.242795497684547
1KTZ_A 1KTZ_B RA13K 1.1511183728524426
1KTZ_A 1KTZ_B IB29A 1.815898256424136
1KTZ_A 1KTZ_B TB27A 1.9591341277245409
1KTZ_A 1KTZ_B SB25A 0.7727305005342533
1KTZ_A 1KTZ_B EB95Q 2.0727094276112656
1KTZ_A 1KTZ_B DB94A 1.2606811735209271
1KTZ_A 1KTZ_B DB8N 2.445686888874384
1KTZ_A 1KTZ_B LB3A 2.2703545242309806
1KTZ_A 1KTZ_B VB53A 0.8611368905818617
1KTZ_A 1KTZ_B SB28A 0.6627573646972937
1KTZ_A 1KTZ_B SB28L 4.482104068477997
1KTZ_A 1KTZ_B EB31A 1.6622596077177991
1KTZ_A 1KTZ_B EB95A 1.939875392275132
1KTZ_A 1KTZ_B FB6A 3.424914752541837
1KTZ_A 1KTZ_B IB26A 2.3418757857708226
1KTZ_A 1KTZ_B DB8A 1.9671999130830256
1LFD_A 1LFD_B KA39A 1.178714348448343
1LFD_A 1LFD_B RA7A 1.1354722285130876
1LFD_A 1LFD_B KA19A 1.3256748888305028
1LFD_A 1LFD_B DA38K -1.0859338739665807
1LFD_A 1LFD_B DA43A -0.2795366876748657
1LFD_A 1LFD_B NA14K 0.4027693760390596
1LFD_A 1LFD_B DA38A -0.5787704819172914
1LFD_A 1LFD_B MA17K -0.9203766019163782
1LFD_A 1LFD_B NA41K -1.1677401384050592
1M9E_A 1M9E_B AB76H -2.37295805315554
1MAH_A 1MAH_B FA287Y 0.7919900197272476
1MAH_A 1MAH_B YA69N 5.2174270026893605
1MAH_A 1MAH_B FA287I 2.06036958390061
1MAH_A 1MAH_B FA285L 1.244947005000947
1MAH_A 1MAH_B DA71N 1.87946870323982
1MAH_A 1MAH_B YA121Q 3.004103816360712
1MAH_A 1MAH_B WA276R 8.808838044698076
1P69_A 1P69_B SA15P 0.0
1P6A_A 1P6A_B YA87S 0.0
1PPF_A 1PPF_B NB36H -0.525587222200782
1PPF_A 1PPF_B AB15G 1.1223859378254897
1PPF_A 1PPF_B LB18V -0.4858069628552393
1PPF_A 1PPF_B EB19Q 0.6512382651313118
1PPF_A 1PPF_B NB36I 0.8593760161521313
1PPF_A 1PPF_B KB13V 0.8593760161521313
1PPF_A 1PPF_B RB21P 6.612233374468008
1PPF_A 1PPF_B TB17P 3.031374527610504
1PPF_A 1PPF_B KB13G 1.231501458438531
1PPF_A 1PPF_B EB19C 1.3831093378131278
1PPF_A 1PPF_B LB18F 5.097540666949608
1PPF_A 1PPF_B GB32P 0.2601531651498519
1PPF_A 1PPF_B NB36M 1.0790916495695375
1PPF_A 1PPF_B NB36V 0.3410085695957665
1PPF_A 1PPF_B LB18H 6.381171448492668
1PPF_A 1PPF_B TB17Q 1.9413244222451755
1PPF_A 1PPF_B AB15M -0.3957512809449515
1PPF_A 1PPF_B KB13R -0.6319253388219295
1PPF_A 1PPF_B EB19Y 1.231501458438531
1PPF_A 1PPF_B TB17M 2.4675689168328567
1PPF_A 1PPF_B RB21C -0.0800488939137569
1PPF_A 1PPF_B YB20N 3.593094703431716
1PPF_A 1PPF_B AB15T 0.9494316980624192
1PPF_A 1PPF_B NB36S -1.3276379503571842
1PPF_A 1PPF_B AB15S 0.745948995933615
1PPF_A 1PPF_B YB20C 3.403623598200179
1PPF_A 1PPF_B EB19G 2.0911319749409856
1PPF_A 1PPF_B NB36W 1.7038071206990306
1PPF_A 1PPF_B LB18R 7.084539036728509
1PPF_A 1PPF_B GB32R 4.781497932955725
1PPF_A 1PPF_B LB18I -0.7226819272889191
1PPF_A 1PPF_B GB32I 4.311461974288624
1PPF_A 1PPF_B PB14A -0.1224011681871513
1PPF_A 1PPF_B NB36E -1.004174409411604
1PPF_A 1PPF_B KB13D 0.6225010517184497
1PPF_A 1PPF_B EB19N 1.195813690056745
1PPF_A 1PPF_B PB14F -1.8309899418626097
1PPF_A 1PPF_B KB13N 0.6512382651313118
1PPF_A 1PPF_B YB20P 5.29245463897787
1PPF_A 1PPF_B TB17G 3.549741974166869
1PPF_A 1PPF_B RB21H -0.4858069628552393
1PPF_A 1PPF_B EB19D 0.5698805931962312
1PPF_A 1PPF_B AB15F -0.071934797575297
1PPF_A 1PPF_B AB15N 0.902728745416976
1PPF_A 1PPF_B EB19A 1.186540391462982
1PPF_A 1PPF_B NB36C 0.5961414432378067
1PPF_A 1PPF_B EB19F 1.3451829790073706
1PPF_A 1PPF_B EB19K 2.0911319749409856
1PPF_A 1PPF_B YB20V 4.12763664026779
1PPF_A 1PPF_B YB20S 3.593094703431716
1PPF_A 1PPF_B KB13H 1.00043953246319
1PPF_A 1PPF_B RB21Q -0.0180883038813046
1PPF_A 1PPF_B TB17D 4.855144693828697
1PPF_A 1PPF_B NB36F 1.8205139793267249
1PPF_A 1PPF_B YB20I 3.8769063494777214
1PPF_A 1PPF_B AB15R -0.2605008030922313
1PPF_A 1PPF_B RB21V -0.3447434465441788
1PPF_A 1PPF_B RB21A 0.2051566446635693
1PPF_A 1PPF_B GB32Y 1.186540391462982
1PPF_A 1PPF_B NB36P -2.9184025459312224
1PPF_A 1PPF_B YB20G 4.12763664026779
1PPF_A 1PPF_B LB18P 6.061585379633843
1PPF_A 1PPF_B GB32M 2.1647787358139574
1PPF_A 1PPF_B GB32L 3.086332236383951
1PPF_A 1PPF_B KB13C 0.9494316980624192
1PPF_A 1PPF_B PB14V -1.5038255665517577
1PPF_A 1PPF_B NB36Y 1.653500192854958
1PPF_A 1PPF_B GB32N 1.5914808178008322
1PPF_A 1PPF_B AB15P 1.9676840307258203
1PPF_A 1PPF_B YB20K 4.431515215391321
1PPF_A 1PPF_B GB32E 1.4372706822530503
1PPF_A 1PPF_B YB20T 4.984980635084527
1PPF_A 1PPF_B TB17W 2.1647787358139574
1PPF_A 1PPF_B KB13M 0.2051566446635693
1PPF_A 1PPF_B KB13W 0.8195957568065886
1PPF_A 1PPF_B KB13A 0.745948995933615
1PPF_A 1PPF_B RB21T -0.0180883038813046
1PPF_A 1PPF_B KB13E 1.117146391090886
1PPF_A 1PPF_B KB13S 0.4544355898142811
1PPF_A 1PPF_B RB21F -0.9675894583346132
1PPF_A 1PPF_B TB17C 2.058440619192808
1PPF_A 1PPF_B PB14L -2.882675792827865
1PPF_A 1PPF_B KB13F 0.9494316980624192
1PPF_A 1PPF_B GB32D 2.462329370098253
1PPF_A 1PPF_B RB21E 0.4544355898142811
1PPF_A 1PPF_B NB36K 2.585275850295563
1PPF_A 1PPF_B RB21L -0.8466354583335054
1PPF_A 1PPF_B EB19S 1.8205139793267249
1PPF_A 1PPF_B LB18Y 6.543423871938499
1PPF_A 1PPF_B RB21D 0.2051566446635693
1PPF_A 1PPF_B PB14D -0.4858069628552393
1PPF_A 1PPF_B TB17Y 2.44632273745208
1PPF_A 1PPF_B PB14T -0.2605008030922313
1PPF_A 1PPF_B EB19W 1.508778598563552
1PPF_A 1PPF_B RB21G 0.5698805931962312
1PPF_A 1PPF_B AB15Y -0.1824910230983487
1PPF_A 1PPF_B AB15K 1.111859426999418
1PPF_A 1PPF_B GB32H 1.4840320759073862
1PPF_A 1PPF_B GB32W 1.621096016273885
1PPF_A 1PPF_B KB13Y 0.4985475206738652
1PPF_A 1PPF_B YB20R 4.311461974288624
1PPF_A 1PPF_B YB20W 0.232105913093271
1PPF_A 1PPF_B KB13P 1.2485597665171095
1PPF_A 1PPF_B PB14G 0.0920877032456797
1PPF_A 1PPF_B YB20M 2.773253383796048
1PPF_A 1PPF_B RB21K 0.5961414432378067
1PPF_A 1PPF_B YB20F 0.4753310003193558
1PPF_A 1PPF_B YB20A 3.165696958334093
1PPF_A 1PPF_B LB18C -0.088277277051608
1PPF_A 1PPF_B LB18N 5.152695328112994
1PPF_A 1PPF_B KB13L 0.3959662783692117
1PPF_A 1PPF_B TB17N 2.661149593010588
1PPF_A 1PPF_B EB19V 1.1428864814382518
1PPF_A 1PPF_B AB15E 0.1041336051620867
1PPF_A 1PPF_B YB20Q 4.631690380259915
1PPF_A 1PPF_B NB36G -0.5635830138822371
1PPF_A 1PPF_B PB14M -1.5525375648393942
1PPF_A 1PPF_B TB17I 1.4726879224776823
1PPF_A 1PPF_B TB17L 2.2479117244243447
1PPF_A 1PPF_B PB14H -1.7409342599523203
1PPF_A 1PPF_B EB19R 1.4493165841694555
1PPF_A 1PPF_B RB21Y 0.2175606253806012
1PPF_A 1PPF_B PB14I -1.6283742280872389
1PPF_A 1PPF_B RB21M -0.7490415357695621
1PPF_A 1PPF_B AB15W -0.6319253388219295
1PPF_A 1PPF_B PB14K -0.4424542335903929
1PPF_A 1PPF_B LB18S 3.0489900997063994
1PPF_A 1PPF_B GB32S 0.902728745416976
1PPF_A 1PPF_B KB13T 0.1635956195561831
1PPF_A 1PPF_B LB18D 7.507514860161978
1PPF_A 1PPF_B GB32Q 2.760756531996245
1PPF_A 1PPF_B AB15C -0.664329515403006
1PPF_A 1PPF_B EB19H 0.6808534636043628
1PPF_A 1PPF_B PB14N -0.6319253388219295
1PPF_A 1PPF_B AB15I 0.3248915324240951
1PPF_A 1PPF_B YB20E 6.330163614091897
1PPF_A 1PPF_B PB14Y -0.9492167006381572
1PPF_A 1PPF_B NB36Q 0.3775935206727574
1PPF_A 1PPF_B AB15Q 0.2462978387934616
1PPF_A 1PPF_B AB15L 0.0175450286501863
1PPF_A 1PPF_B PB14Q -0.7999908007261016
1PPF_A 1PPF_B TB17H 1.670074511431462
1PPF_A 1PPF_B PB14E -1.4091148357494525
1PPF_A 1PPF_B TB17K 3.341604223146051
1PPF_A 1PPF_B AB15H 0.545192178281269
1PPF_A 1PPF_B RB21I -0.7999908007261016
1PPF_A 1PPF_B TB17F 1.6861915486031354
1PPF_A 1PPF_B PB14C -1.9771083178293
1PPF_A 1PPF_B YB20D 6.330163614091897
1PPF_A 1PPF_B LB18G 3.286507401252546
1PPF_A 1PPF_B NB36L 2.6103170641009843
1PPF_A 1PPF_B EB19L 1.056178691469162
1PPF_A 1PPF_B KB13Q 0.2462978387934616
1PPF_A 1PPF_B RB21S 0.4137788028550453
1PPF_A 1PPF_B RB21W -0.525587222200782
1PPF_A 1PPF_B NB36R 1.843730499681236
1PPF_A 1PPF_B EB19I 0.7132576401854376
1PPF_A 1PPF_B LB18W 7.439172535222286
1PPF_A 1PPF_B PB14W -1.9771083178293
1PPF_A 1PPF_B RB21N 0.3248915324240951
1PPF_A 1PPF_B LB18T 0.902728745416976
1PPF_A 1PPF_B TB17R 3.403623598200179
1PPF_A 1PPF_B EB19M 1.132726128909928
1PPF_A 1PPF_B GB32F 1.4611191178070104
1PPF_A 1PPF_B PB14S -0.5635830138822371
1PPF_A 1PPF_B EB19P 3.165696958334093
1PPF_A 1PPF_B NB36T 0.545192178281269
1PPF_A 1PPF_B NB36D -3.0351094045589195
1PPF_A 1PPF_B GB32K 3.2602465512109724
1PPF_A 1PPF_B GB32C 1.162691955909022
1PPF_A 1PPF_B PB14R -0.1357296294440111
1PPF_A 1PPF_B EB19T 1.4840320759073862
1PPF_A 1PPF_B KB13I 0.6225010517184497
1PPF_A 1PPF_B NB36A -1.622618461874907
1PPF_A 1PPF_B YB20L 1.3557730976014764
1PPF_A 1PPF_B AB15D 1.1223859378254897
1PPF_A 1PPF_B GB32T 2.773253383796048
1R0R_A 1R0R_B NB31V 0.6594309628674253
1R0R_A 1R0R_B NB31A -0.0326913557481756
1R0R_A 1R0R_B EB14A 2.060353100880766
1R0R_A 1R0R_B EB14C 2.338571748937099
1R0R_A 1R0R_B KB8D -0.5850517901584239
1R0R_A 1R0R_B PB9L 1.1677700289677873
1R0R_A 1R0R_B GB27D 2.84790490909284
1R0R_A 1R0R_B YB15M 3.0953063843525896
1R0R_A 1R0R_B TB12L 0.6594309628674253
1R0R_A 1R0R_B LB13H 0.5617201758212111
1R0R_A 1R0R_B LB13N 1.47950540973096
1R0R_A 1R0R_B RB16C 0.1134270202185145
1R0R_A 1R0R_B KB8P 1.3290659418356991
1R0R_A 1R0R_B RB16H 0.5183674465563648
1R0R_A 1R0R_B RB16Y 0.8118645745984789
1R0R_A 1R0R_B RB16T 0.3398448940085981
1R0R_A 1R0R_B AB10C -0.4767381990397759
1R0R_A 1R0R_B EB14S 3.1309573535441046
1R0R_A 1R0R_B EB14N 2.088856585326744
1R0R_A 1R0R_B GB27C 1.4001406877808158
1R0R_A 1R0R_B KB8Y -0.6857520161399435
1R0R_A 1R0R_B PB9V 1.3124916232591932
1R0R_A 1R0R_B PB9E -2.2117787708039143
1R0R_A 1R0R_B EB14K 2.6742489208430658
1R0R_A 1R0R_B AB10Q 1.7857743745367376
1R0R_A 1R0R_B KB8R 0.1343224307235893
1R0R_A 1R0R_B KB8T 0.8773641218435113
1R0R_A 1R0R_B RB16I 0.2041836086855024
1R0R_A 1R0R_B PB9H -0.213503626125453
1R0R_A 1R0R_B AB10G 1.0962621126572838
1R0R_A 1R0R_B TB12F -0.0650955323292521
1R0R_A 1R0R_B LB13F 0.6084231284666526
1R0R_A 1R0R_B GB27W 1.3817679300843615
1R0R_A 1R0R_B NB31T 0.5617201758212111
1R0R_A 1R0R_B EB14W 1.8237701662181929
1R0R_A 1R0R_B YB15A 5.398347488125372
1R0R_A 1R0R_B TB12G 3.0953063843525896
1R0R_A 1R0R_B RB16Q 0.1798679955417252
1R0R_A 1R0R_B TB12D 2.84790490909284
1R0R_A 1R0R_B RB16A -0.0947107308023032
1R0R_A 1R0R_B EB14M 1.5493665876928733
1R0R_A 1R0R_B LB13P 7.580720015497826
1R0R_A 1R0R_B PB9S 0.1343224307235893
1R0R_A 1R0R_B LB13S 1.5493665876928733
1R0R_A 1R0R_B GB27S 0.8990843016924277
1R0R_A 1R0R_B KB8W -1.0900501053653286
1R0R_A 1R0R_B GB27Y 0.4785871872108221
1R0R_A 1R0R_B YB15L 2.539273024063462
1R0R_A 1R0R_B RB16D -0.1774129500395815
1R0R_A 1R0R_B TB12C 0.8018779118424888
1R0R_A 1R0R_B PB9Q -1.3949326428341244
1R0R_A 1R0R_B EB14V 0.0934522353437419
1R0R_A 1R0R_B NB31R 1.4386352143511107
1R0R_A 1R0R_B AB10E 4.51413612423293
1R0R_A 1R0R_B GB27R 3.422804496059225
1R0R_A 1R0R_B KB8V 0.404940426337852
1R0R_A 1R0R_B AB10F -2.2117787708039143
1R0R_A 1R0R_B EB14D 0.255132873642042
1R0R_A 1R0R_B AB10M -1.7154079136072866
1R0R_A 1R0R_B LB13C -1.3275674069114751
1R0R_A 1R0R_B YB15K 5.435689624802924
1R0R_A 1R0R_B TB12M 1.3124916232591932
1R0R_A 1R0R_B NB31P 1.4586099992258852
1R0R_A 1R0R_B TB12P 4.8810807588893255
1R0R_A 1R0R_B AB10S 1.7174320495970417
1R0R_A 1R0R_B AB10N 2.0661661393391064
1R0R_A 1R0R_B GB27H 2.161977587461724
1R0R_A 1R0R_B KB8I 1.1083080145736908
1R0R_A 1R0R_B RB16N -0.0497496638267538
1R0R_A 1R0R_B RB16M 0.440591395529367
1R0R_A 1R0R_B PB9N -0.9045915834780036
1R0R_A 1R0R_B KB8A -0.6015093726879961
1R0R_A 1R0R_B LB13V 2.1265603472370938
1R0R_A 1R0R_B NB31G 0.2814924821226849
1R0R_A 1R0R_B AB10T 1.2504722482050656
1R0R_A 1R0R_B NB31F -0.1774129500395815
1R0R_A 1R0R_B KB8G -0.8268155324510058
1R0R_A 1R0R_B AB10R 5.453502149288758
1R0R_A 1R0R_B PB9Y 0.8455318218672154
1R0R_A 1R0R_B TB12R 3.2987890864813925
1R0R_A 1R0R_B YB15P 6.561810163862448
1R0R_A 1R0R_B RB16L 0.2288720236004682
1R0R_A 1R0R_B NB31H 0.2814924821226849
1R0R_A 1R0R_B AB10H 1.750123405345219
1R0R_A 1R0R_B LB13Y 0.440591395529367
1R0R_A 1R0R_B LB13T 0.1575389510780986
1R0R_A 1R0R_B EB14F 3.567730079858249
1R0R_A 1R0R_B AB10L -1.7305829367964058
1R0R_A 1R0R_B GB27I 1.9536061074740232
1R0R_A 1R0R_B PB9R 1.0870618351929124
1R0R_A 1R0R_B AB10K 3.030210852023337
1R0R_A 1R0R_B KB8L -0.3234635409455801
1R0R_A 1R0R_B AB10Y -0.8268155324510058
1R0R_A 1R0R_B LB13W 1.5027219300854693
1R0R_A 1R0R_B EB14T 4.193087888100207
1R0R_A 1R0R_B NB31L 1.525050974549096
1R0R_A 1R0R_B GB27L 2.3771819587328427
1R0R_A 1R0R_B NB31S 0.4785871872108221
1R0R_A 1R0R_B EB14I 0.7267389265440674
1R0R_A 1R0R_B GB27E 1.9536061074740232
1R0R_A 1R0R_B RB16S -0.0161170371716714
1R0R_A 1R0R_B RB16W 0.9322396118363072
1R0R_A 1R0R_B RB16F 0.440591395529367
1R0R_A 1R0R_B YB15F 0.6084231284666526
1R0R_A 1R0R_B YB15T 5.398347488125372
1R0R_A 1R0R_B TB12I 0.255132873642042
1R0R_A 1R0R_B EB14H 1.685027873015967
1R0R_A 1R0R_B NB31M 0.3102296955355469
1R0R_A 1R0R_B KB8C -0.6070998990335958
1R0R_A 1R0R_B GB27F 0.1134270202185145
1R0R_A 1R0R_B PB9T 1.1810782478329696
1R0R_A 1R0R_B PB9A -0.6300128571339751
1R0R_A 1R0R_B TB12N 0.0176155720958952
1R0R_A 1R0R_B TB12K 1.90690315482858
1R0R_A 1R0R_B YB15E 4.553916383578473
1R0R_A 1R0R_B LB13I 3.208733404571104
1R0R_A 1R0R_B AB10I -1.7744688256547416
1R0R_A 1R0R_B YB15N 5.5382708671075775
1R0R_A 1R0R_B YB15W -0.3590968734770712
1R0R_A 1R0R_B NB31D 0.5617201758212111
1R0R_A 1R0R_B GB27Q 2.004613941874794
1R0R_A 1R0R_B YB15R 4.407798007611783
1R0R_A 1R0R_B EB14Q 1.264327574561456
1R0R_A 1R0R_B GB27P 1.0650137263177406
1R0R_A 1R0R_B NB31W 0.0549577087734469
1R0R_A 1R0R_B NB31Q -0.2706179956142591
1R0R_A 1R0R_B TB12H 0.5183674465563648
1R0R_A 1R0R_B KB8E -0.1089026565024937
1R0R_A 1R0R_B EB14P 3.712085396076529
1R0R_A 1R0R_B PB9D -2.4861823493292365
1R0R_A 1R0R_B KB8Q -0.5234995926941153
1R0R_A 1R0R_B EB14Y 1.863550425563736
1R0R_A 1R0R_B EB14G 2.6742489208430658
1R0R_A 1R0R_B YB15C 3.208733404571104
1R0R_A 1R0R_B KB8M -0.5534654196932056
1R0R_A 1R0R_B KB8S -0.3030822107900057
1R0R_A 1R0R_B NB31K 1.1316793528819176
1R0R_A 1R0R_B NB31I 0.3102296955355469
1R0R_A 1R0R_B NB31E -0.2021594726957474
1R0R_A 1R0R_B LB13G 2.3201410234148216
1R0R_A 1R0R_B AB10W 0.2041836086855024
1R0R_A 1R0R_B PB9C -0.972933908417696
1R0R_A 1R0R_B KB8N -0.3853999577890352
1R0R_A 1R0R_B LB13R 2.894549566700244
1R0R_A 1R0R_B KB8H -0.5850517901584239
1R0R_A 1R0R_B TB12Q 2.1907148008745856
1R0R_A 1R0R_B PB9M -0.0326913557481756
1R0R_A 1R0R_B YB15V 3.4612168154183887
1R0R_A 1R0R_B EB14R 2.0661661393391064
1R0R_A 1R0R_B LB13D 4.476140332551474
1R0R_A 1R0R_B PB9K 0.3398448940085981
1R0R_A 1R0R_B RB16G 2.060353100880766
1R0R_A 1R0R_B YB15S 5.0908526523600806
1R0R_A 1R0R_B PB9I 1.7174320495970417
1R0R_A 1R0R_B YB15G 6.312971527865785
1R0R_A 1R0R_B GB27T 1.655412674542916
1R0R_A 1R0R_B PB9F 1.362798551103266
1R0R_A 1R0R_B AB10P 3.2987890864813925
1R0R_A 1R0R_B GB27N 1.264327574561456
1R0R_A 1R0R_B KB8F -0.7834628031861595
1R0R_A 1R0R_B NB31Y 0.1798679955417252
1R0R_A 1R0R_B RB16E 0.3398448940085981
1R0R_A 1R0R_B TB12W 2.894549566700244
1R0R_A 1R0R_B PB9G -0.3410085695957647
1R0R_A 1R0R_B GB27K 2.894549566700244
1R0R_A 1R0R_B RB16K -0.0947107308023032
1R0R_A 1R0R_B AB10D 5.155659485233353
1R0R_A 1R0R_B YB15Q 4.440489363359959
1R0R_A 1R0R_B PB9W 0.4785871872108221
1R0R_A 1R0R_B GB27M 2.1265603472370938
1R0R_A 1R0R_B NB31C 0.5617201758212111
1R0R_A 1R0R_B RB16V 0.1134270202185145
1R0R_A 1R0R_B EB14L 0.5617201758212111
1R0R_A 1R0R_B RB16P 7.287635070858058
1R0R_A 1R0R_B YB15I 2.8246883887383287
1R0R_A 1R0R_B TB12Y -0.8665957917965468
1S0W_A 1S0W_B AB142F -2.1019341179373274
1S1Q_A 1S1Q_B FA86A 0.7618593974133088
1S1Q_A 1S1Q_B NA43A 1.210689233846836
1S1Q_A 1S1Q_B WA73A 0.2759324833310703
1S1Q_A 1S1Q_B FA42A 0.1959003444918128
1S1Q_A 1S1Q_B DA44A 0.9488435117371492
1S1Q_A 1S1Q_B VA41A 0.6591666674866832
1SBB_A 1SBB_B VB26Y -1.4547657175287825
1SBB_A 1SBB_B LB20T -0.0912809292832177
1SBB_A 1SBB_B YB91V 0.0790711375448189
1SBN_A 1SBN_B RB38K -0.3098434393028242
1SGD_A 1SGD_B DB13L -5.589755775252961
1SGE_A 1SGE_B EB13L -5.899483203299344
1SGN_A 1SGN_B NB13L -3.3524942922738106
1SGP_A 1SGP_B AB13L -2.9475538659359604
1SGQ_A 1SGQ_B GB13L -4.933851329158159
1SGY_A 1SGY_B YB13L -1.6526778147726642
1SIB_A 1SIB_B KB46R -0.7311587735816741
1TM1_A 1TM1_B MB40A 1.0271508140535437
1TM1_A 1TM1_B MB40F 1.0271508140535437
1TM1_A 1TM1_B TB39D 2.0980176673647684
1TM1_A 1TM1_B RB48C 3.234173525639145
1TM1_A 1TM1_B MB40K 1.093013509854524
1TM1_A 1TM1_B YB42G 4.678612295367822
1TM1_A 1TM1_B EB41S 2.74638619860184
1TM1_A 1TM1_B MB40Y 0.0564382965578786
1TM1_A 1TM1_B TB39P 3.754120390544673
1TM1_A 1TM1_B RB43A 1.2555223637452997
1TM1_A 1TM1_B MB40G 2.231782031124311
1TM3_A 1TM3_B KB40M -1.093013509854524
1TM4_A 1TM4_B GB40M -2.231782031124311
1TM5_A 1TM5_B AB40M -1.0271508140535437
1TM7_A 1TM7_B YB40M -0.0564382965578786
1TMG_A 1TMG_B FB40M -1.0271508140535437
1TO1_A 1TO1_B AB42Y -2.980082225320676
1UUZ_A 1UUZ_B HA62D 3.3775171494500267
1UUZ_A 1UUZ_B HA62N 1.5188448158376406
1UUZ_A 1UUZ_B HA62A 1.7739346081390313
1UUZ_A 1UUZ_B CA64A 0.6505475729588959
1UUZ_A 1UUZ_B HA62Q 1.5188448158376406
1X1X_A 1X1X_B AB76E -0.8231384096358614
1XD3_A 1XD3_B RB42L -0.8819965836107677
1XD3_A 1XD3_B IB44A 0.2720923755953315
1XD3_A 1XD3_B KB11R 1.4346274012921905
1XD3_A 1XD3_B LB8A 2.738291482575054
1XD3_A 1XD3_B DB39A -0.4200912586157965
1XD3_A 1XD3_B KB6A 1.37496645878797
1XD3_A 1XD3_B RB72L 1.33165778357358
1XD3_A 1XD3_B KB27R 0.2720923755953315
1XD3_A 1XD3_B HB68N 0.0
1XD3_A 1XD3_B KB27A -0.0638551708388259
1XD3_A 1XD3_B RB74L 2.4323607052688656
1XD3_A 1XD3_B KB6R 0.309592804009819
1Y1K_A 1Y1K_B AB39T -2.726969576491131
1Y33_A 1Y33_B PB39T -3.754120390544673
1Y34_A 1Y34_B AB41E -3.0526816709112428
1Y3B_A 1Y3B_B SB41E -2.74638619860184
1Z7X_A 1Z7X_B WA263A 2.211305151040648
1Z7X_A 1Z7X_B YA434A 5.952934775265284
1Z7X_A 1Z7X_B EA287A 1.320659825016321
1Z7X_A 1Z7X_B SA289A 0.8139737839962642
1Z7X_A 1Z7X_B IA459A 0.3372513313386136
1Z7X_A 1Z7X_B RA457A 0.8478203627950833
1Z7X_A 1Z7X_B YA437F 2.1649678059217905
1Z7X_A 1Z7X_B YA437A 2.6228152289070703
1Z7X_A 1Z7X_B WA261A 1.3352816523491953
1Z7X_A 1Z7X_B EA206A 1.0181724296231174
1Z7X_A 1Z7X_B WA318A 0.9929705871243611
1Z7X_A 1Z7X_B YA434F 0.1205619193917186
1Z7X_A 1Z7X_B WA375A 1.6687200025820168
1Z7X_A 1Z7X_B DA435A 3.660587922732521
1Z7X_A 1Z7X_B EA344A 1.560757578081752
1Z7X_A 1Z7X_B KA320A 1.320659825016321
1Z7X_A 1Z7X_B EA401A 1.3056677857803614
2A9K_A 2A9K_B GB55D 2.409802431359984
2AJF_A 2AJF_B TB159S 1.8230168607872856
2AJF_A 2AJF_B NB151K 2.0115904859103293
2B0Z_A 2B0Z_B IB87F -2.527726222707648
2B10_A 2B10_B SB87F -1.1642414344620822
2B11_A 2B11_B WB87F -0.7537916145686152
2B12_A 2B12_B YB87F -1.574691254355549
2B42_A 2B42_B HA357K 2.5406589081690485
2B42_A 2B42_B HA357A 1.6211478042605787
2B42_A 2B42_B HA357Q 1.064746296952702
2BTF_A 2BTF_B GB120F 1.7200653130929708
2BTF_A 2BTF_B KB125A 0.4598245664571241
2BTF_A 2BTF_B FB59A 1.5949378050109324
2BTF_A 2BTF_B VB60E 0.9782368108508566
2FTL_A 2FTL_B KB15D 11.38118981571144
2FTL_A 2FTL_B GB36A 2.190994415843768
2FTL_A 2FTL_B KB15S 7.64231393280437
2FTL_A 2FTL_B KB15G 12.221723141911871
2FTL_A 2FTL_B KB15N 7.948233485706734
2FTL_A 2FTL_B KB15H 8.689000456471591
2FTL_A 2FTL_B KB15E 9.324441184036544
2FTL_A 2FTL_B KB15Q 9.271641744362888
2FTL_A 2FTL_B IB18A 4.968786397859223
2FTL_A 2FTL_B KB15M 7.611102390346371
2FTL_A 2FTL_B KB15Y 6.7845643875760775
2FTL_A 2FTL_B KB15F 6.952966976747844
2FTL_A 2FTL_B KB15I 11.053423601134748
2FTL_A 2FTL_B KB15L 8.770452779504572
2FTL_A 2FTL_B GB12A 4.346368041583864
2FTL_A 2FTL_B KB15T 10.4855197758984
2FTL_A 2FTL_B KB15V 11.632235033424028
2FTL_A 2FTL_B KB15W 8.572581166725215
2G2U_A 2G2U_B DA79K -0.4526592630612107
2G2U_A 2G2U_B RB160A 0.6694568519687198
2G2U_A 2G2U_B GB48A -0.4263831736089845
2G2U_A 2G2U_B FB142A 0.2756885272923011
2G2U_A 2G2U_B EB31A 0.6505475729588968
2G2U_A 2G2U_B WB112A 0.9582522172533592
2G2U_A 2G2U_B HB148A 1.1183877055633626
2G2U_A 2G2U_B SB39A -0.9556609323880992
2G2U_A 2G2U_B EB73M -3.5342558829494286
2G2U_A 2G2U_B HB41A 1.7161847851985126
2G2U_A 2G2U_B YB143A -1.846306258412426
2G2U_A 2G2U_B KB74A -0.2172258716374013
2G2U_A 2G2U_B FB36A 2.762560350718
2G2U_A 2G2U_B YB50A -2.0746778081041817
2G2U_A 2G2U_B WB162A 0.5309427839523906
2G2U_A 2G2U_B SB35A -0.9504205980140484
2G2U_A 2G2U_B GB141A -0.4136023770452688
2G2U_A 2G2U_B SB113A -0.6120034064766262
2G2U_A 2G2U_B YB51A -0.6284324378571107
2G2U_A 2G2U_B SB71A -0.5119496660751359
2G2U_A 2G2U_B YB53A 2.3005864028821463
2G2U_A 2G2U_B RB144A -0.3422199365820422
2G2U_A 2G2U_B WB150A 1.7843235398671444
2G2U_A 2G2U_B EB73A -1.978441586977592
2G2W_A 2G2W_B KA79D 0.4526592630612107
2GYK_A 2GYK_B AA49D -5.915151095926328
2HLE_A 2HLE_B KA141Q -0.4104498198934649
2HLE_A 2HLE_B LA87R 2.286146231272053
2HRK_A 2HRK_B TA108V 0.7207069189544306
2HRK_A 2HRK_B KA140A 0.9530349683520996
2HRK_A 2HRK_B TB55V 0.6238866235542542
2I26_A 2I26_B AA29V 0.0303735253256078
2I9B_A 2I9B_B RB129A -0.287495356157244
2I9B_A 2I9B_B KB131A 0.6739390994047731
2J0T_A 2J0T_B SB68A 2.105313290487679
2J0T_A 2J0T_B VB69I 0.8090955198345746
2J0T_A 2J0T_B SB68R 2.834932000991396
2J0T_A 2J0T_B SB68Y 2.976855220496777
2J0T_A 2J0T_B VB4K 1.8236053576679616
2J0T_A 2J0T_B TB2A 4.387569519641223
2J0T_A 2J0T_B CB70S 4.407535772449172
2J0T_A 2J0T_B VB4I 1.641799279573867
2J0T_A 2J0T_B TB2L 2.6808043441244465
2J0T_A 2J0T_B SB68E 2.1843844280325
2J0T_A 2J0T_B VB4A 0.0
2J0T_A 2J0T_B MB66A 1.6803650344631844
2J0T_A 2J0T_B VB4S 0.9819262553075436
2J0T_A 2J0T_B VB69T -0.0774751502088015
2J0T_A 2J0T_B TB2R 5.043489333088793
2J0T_A 2J0T_B TB2S 1.5936301403211883
2J1K_A 2J1K_B GA10D 0.1836594565075522
2J1K_A 2J1K_B RA24A 0.7644613432290566
2NU0_A 2NU0_B WB13L -1.839245851362272
2NU1_A 2NU1_B HB13L -1.690019951450214
2NU2_A 2NU2_B RB13L -3.3200901156927367
2NU4_A 2NU4_B KB13L -3.137784172762238
2O3B_A 2O3B_B DB75N 5.900288080314299
2O3B_A 2O3B_B EB24A 5.472160711277267
2O3B_A 2O3B_B EB24Q 5.395647478010924
2O3B_A 2O3B_B EB24D 0.5990234450165097
2O3B_A 2O3B_B DB75E 5.435139036830098
2O3B_A 2O3B_B QB74A 3.231337986686892
2O3B_A 2O3B_B WB76A 4.071654248584531
2OOB_A 2OOB_B GA13S 0.2352455349568441
2PCB_A 2PCB_B EA35Q 0.6756671661458079
2PCB_A 2PCB_B AA193F 0.7333081549604046
2PCB_A 2PCB_B EA290N 0.8705703893158621
2PCB_A 2PCB_B DA34N 0.8208996397869335
2PCB_A 2PCB_B EA32Q 0.6054483245339028
2PCC_A 2PCC_B EA290A 6.19943425467731
2PCC_A 2PCC_B DA34A -0.8965966717942191
2PCC_A 2PCC_B VA197A 2.101623846030991
2PCC_A 2PCC_B AB86G 1.900898253877345
2PCC_A 2PCC_B KB92A 0.9014172118349733
2PCC_A 2PCC_B KB77A 0.3035163338270603
2SGP_A 2SGP_B PB13L -8.328285781965437
2SGQ_A 2SGQ_B QB13L -2.567301854513074
2SIC_A 2SIC_B MB67K 0.0
2SIC_A 2SIC_B MB67L -0.2400977530654327
2SIC_A 2SIC_B MB67V 0.7129372152866669
2SIC_A 2SIC_B MB67G 0.1451502243292779
2SIC_A 2SIC_B MB67E 0.7956977972881719
2SIC_A 2SIC_B MB67I 1.6035825413109936
2SIC_A 2SIC_B MB67A 0.2177496699198453
2SIC_A 2SIC_B MB67H 0.2177496699198453
2SIC_A 2SIC_B MB67D 0.7511539535495384
2SIC_A 2SIC_B MB67R 0.0
2VLN_A 2VLN_B AB75N -2.3344109640223465
2VLO_A 2VLO_B AB95K -1.959528419718087
2VLQ_A 2VLQ_B AB86F -3.8787286930522846
2WPT_A 2WPT_B VA34A 3.8073835914542
2WPT_A 2WPT_B RA35T -1.0377726938254526
2WPT_A 2WPT_B SA47A 2.4244821810979267
2WPT_A 2WPT_B PA53A 2.926212930274612
2WPT_A 2WPT_B NA31A -0.3796816568193666
2WPT_A 2WPT_B NA31V -0.8965966717942226
2WPT_A 2WPT_B EA27A 1.7330802088570818
2WPT_A 2WPT_B DA30L -3.4016900535149284
2WPT_A 2WPT_B EA38A 4.500904184630159
2WPT_A 2WPT_B DA30A -0.132135328565166
2WPT_A 2WPT_B RA35A -1.1103721394160235
3BK3_A 3BK3_B LB2R 0.02632228544363
3BK3_A 3BK3_B IB22R 2.1204975562858497
3BK3_A 3BK3_B TB6P 1.7421427493935546
3BK3_A 3BK3_B LB2A 0.0
3BK3_A 3BK3_B IB28R 1.1749111631225233
3BK3_A 3BK3_B IB28A 1.2608812593210048
3BK3_A 3BK3_B IB19R 0.4237572095729831
3BK3_A 3BK3_B IB3A 1.0381314662809549
3BK3_A 3BK3_B IB3R 0.6505475729588959
3BK3_A 3BK3_B AB37R 1.3820325122991317
3BK3_A 3BK3_B IB19A 0.4861468519007541
3BK3_A 3BK3_B TB4P 0.4861468519007541
3BK3_A 3BK3_B IB22A 1.3070464916876876
3BP8_A 3BP8_B FA115A 0.7086306223429606
3BP8_A 3BP8_B AB51F 0.615184594049202
3BTD_A 3BTD_B DB13K -11.38118981571144
3BTE_A 3BTE_B EB13K -9.324441184036544
3BTF_A 3BTF_B FB13K -6.952966976747844
3BTG_A 3BTG_B GB13K -12.221723141911871
3BTH_A 3BTH_B HB13K -8.689000456471591
3BTM_A 3BTM_B MB13K -7.611102390346371
3BTQ_A 3BTQ_B QB13K -9.271641744362888
3BTT_A 3BTT_B TB13K -10.4855197758984
3BTW_A 3BTW_B WB13K -8.572581166725215
3SGB_A 3SGB_B NB30T 0.0097100109797683
3SGB_A 3SGB_B YB14F 0.166431319454233
3SGB_A 3SGB_B LB12H 1.690019951450214
3SGB_A 3SGB_B NB30S 0.4033062838879129
3SGB_A 3SGB_B NB30E 0.9493102265368236
3SGB_A 3SGB_B GB26T 2.997860793780033
3SGB_A 3SGB_B AB9E 0.6621283342590729
3SGB_A 3SGB_B TB11Q 2.899389817238223
3SGB_A 3SGB_B LB12G 4.933851329158159
3SGB_A 3SGB_B GB26Q 2.9151496893548874
3SGB_A 3SGB_B YB14R 2.7221240778696814
3SGB_A 3SGB_B LB12K 3.137784172762238
3SGB_A 3SGB_B AB9Y -1.62434887285022
3SGB_A 3SGB_B NB30I 0.1951685328670951
3SGB_A 3SGB_B TB11N 3.209117245284604
3SGB_A 3SGB_B YB14K 3.3851856480219884
3SGB_A 3SGB_B LB12V 2.997860793780033
3SGB_A 3SGB_B EB13S 1.916554724799452
3SGB_A 3SGB_B RB15W 0.3074948357652936
3SGB_A 3SGB_B YB14Q 3.235378095326178
3SGB_A 3SGB_B AB9L 0.166431319454233
3SGB_A 3SGB_B LB12R 3.3200901156927367
3SGB_A 3SGB_B KB7T -1.9218995071345155
3SGB_A 3SGB_B TB11K 2.00731131326644
3SGB_A 3SGB_B LB12I 4.4188804360352485
3SGB_A 3SGB_B YB14S 2.00731131326644
3SGB_A 3SGB_B RB15L 0.2247837313401461
3SGB_A 3SGB_B AB9F -1.1642063718404645
3SGB_A 3SGB_B YB14T 4.804015387902329
3SGB_A 3SGB_B EB13P 2.243485371143421
3SGB_A 3SGB_B LB12P 8.328285781965437
3SGB_A 3SGB_B PB8M -0.4935835395167612
3SGB_A 3SGB_B KB7Q -0.6147123198086053
3SGB_A 3SGB_B YB14P 6.18897862625524
3SGB_A 3SGB_B EB13D 0.5450121373114403
3SGB_A 3SGB_B NB30K 0.5713717457920833
3SGB_A 3SGB_B GB26W 3.8576093435276473
3SGB_A 3SGB_B PB8K 0.1124663136298167
3SGB_A 3SGB_B KB7H -0.3958727524705452
3SGB_A 3SGB_B KB7C -0.5767165281271485
3SGB_A 3SGB_B AB9I -0.4468805868713179
3SGB_A 3SGB_B NB30M 0.1387863297181226
3SGB_A 3SGB_B KB7F -0.7154588213293724
3SGB_A 3SGB_B LB12F 1.3548929899871391
3SGB_A 3SGB_B EB13A 1.0050493855427938
3SGB_A 3SGB_B RB15T 0.447418214747497
3SGB_A 3SGB_B GB26S 1.5853125788500115
3SGB_A 3SGB_B EB13M -0.1868589353703776
3SGB_A 3SGB_B EB13L 0.7684664508802204
3SGB_A 3SGB_B KB7V -0.941876695119456
3SGB_A 3SGB_B PB8V -0.3060925742413527
3SGB_A 3SGB_B AB9P 0.5450121373114403
3SGB_A 3SGB_B AB9Q 0.4242016943929876
3SGB_A 3SGB_B EB13R 1.2572990674231974
3SGB_A 3SGB_B GB26N 2.243485371143421
3SGB_A 3SGB_B KB7I -1.3259217109522314
3SGB_A 3SGB_B YB14G 2.8028322716445544
3SGB_A 3SGB_B YB14A 1.916554724799452
3SGB_A 3SGB_B AB9D 0.4697472592111236
3SGB_A 3SGB_B RB15A 0.0530042992357202
3SGB_A 3SGB_B RB15S 0.2737622264977269
3SGB_A 3SGB_B NB30D 0.8983023921360509
3SGB_A 3SGB_B RB15V -0.0233047356125002
3SGB_A 3SGB_B PB8D -1.0187187642609796
3SGB_A 3SGB_B AB9W -1.5320419143777482
3SGB_A 3SGB_B PB8N -0.1230641035016635
3SGB_A 3SGB_B YB14C 1.6716471937537598
3SGB_A 3SGB_B PB8L -0.1394065829779744
3SGB_A 3SGB_B GB26P 1.0815968229835615
3SGB_A 3SGB_B EB13V 0.125774532494999
3SGB_A 3SGB_B YB14L 1.5542068382308545
3SGB_A 3SGB_B KB7R -0.6830546447482977
3SGB_A 3SGB_B KB7L -1.7920635658786883
3SGB_A 3SGB_B TB11P 3.137784172762238
3SGB_A 3SGB_B YB14N 3.114567652407727
3SGB_A 3SGB_B EB13I -0.6147123198086053
3SGB_A 3SGB_B LB12T 3.1601132172258635
3SGB_A 3SGB_B LB12N 3.3524942922738106
3SGB_A 3SGB_B GB26K 2.489867363137748
3SGB_A 3SGB_B GB26I 4.035548937022108
3SGB_A 3SGB_B GB26Y 3.0530154549434165
3SGB_A 3SGB_B AB9K 2.456745628990024
3SGB_A 3SGB_B PB8I -0.0233047356125002
3SGB_A 3SGB_B EB13W 1.5261595861742734
3SGB_A 3SGB_B AB9M 0.0877197909736509
3SGB_A 3SGB_B RB15H 0.447418214747497
3SGB_A 3SGB_B RB15Y 0.2898792636693983
3SGB_A 3SGB_B RB15D 0.6297241576779964
3SGB_A 3SGB_B TB11H 3.45883240889496
3SGB_A 3SGB_B NB30H -0.1230641035016635
3SGB_A 3SGB_B RB15C 0.5450121373114403
3SGB_A 3SGB_B NB30P 0.5713717457920833
3SGB_A 3SGB_B TB11I 1.7285144780205088
3SGB_A 3SGB_B TB11D 4.887148376512718
3SGB_A 3SGB_B YB14I 3.5419653975053467
3SGB_A 3SGB_B NB30V -0.2385091068836153
3SGB_A 3SGB_B EB13G 2.075653638206136
3SGB_A 3SGB_B PB8S 0.0648068328732751
3SGB_A 3SGB_B KB7S -2.5515796282966186
3SGB_A 3SGB_B RB15P 7.577514334527455
3SGB_A 3SGB_B TB11C 2.8855344908818346
3SGB_A 3SGB_B PB8Y -0.3958727524705452
3SGB_A 3SGB_B TB11R 1.9452919382123144
3SGB_A 3SGB_B RB15E 0.6621283342590729
3SGB_A 3SGB_B NB30L 0.7304706591987653
3SGB_A 3SGB_B RB15G 1.0917571755118871
3SGB_A 3SGB_B PB8T -0.1071663729985825
3SGB_A 3SGB_B PB8Q -0.3116301090185978
3SGB_A 3SGB_B KB7N -0.6503632890001203
3SGB_A 3SGB_B GB26C 2.5753047953462893
3SGB_A 3SGB_B AB9G 2.4518568511311223
3SGB_A 3SGB_B AB9T 0.3264642147463892
3SGB_A 3SGB_B GB26M 2.7667415955586847
3SGB_A 3SGB_B AB9H -0.4468805868713179
3SGB_A 3SGB_B GB26F 2.9802452216841377
3SGB_A 3SGB_B AB9S 0.8983023921360509
3SGB_A 3SGB_B RB15F 0.2247837313401461
3SGB_A 3SGB_B PB8C -0.3228651772934849
3SGB_A 3SGB_B TB11Y 3.3200901156927367
3SGB_A 3SGB_B NB30F -0.1394065829779744
3SGB_A 3SGB_B EB13N 1.086699087248979
3SGB_A 3SGB_B LB12D 5.589755775252961
3SGB_A 3SGB_B KB7D -0.6147123198086053
3SGB_A 3SGB_B TB11L 2.436940154519254
3SGB_A 3SGB_B KB7Y -0.5767165281271485
3SGB_A 3SGB_B KB7G -0.7154588213293724
3SGB_A 3SGB_B PB8E -0.6147123198086053
3SGB_A 3SGB_B PB8F -0.3401335934645768
3SGB_A 3SGB_B RB15N 0.3264642147463892
3SGB_A 3SGB_B PB8H 0.2737622264977269
3SGB_A 3SGB_B AB9C 0.040958397319315
3SGB_A 3SGB_B RB15K -0.2739263471082491
3SGB_A 3SGB_B GB26D 1.6023708869285915
3SGB_A 3SGB_B TB11W 2.997860793780033
3SGB_A 3SGB_B NB30R 0.5713717457920833
3SGB_A 3SGB_B PB8W -0.4935835395167612
3SGB_A 3SGB_B KB7A -2.536338619277537
3SGB_A 3SGB_B GB26R 3.45883240889496
3SGB_A 3SGB_B YB14W 0.3833314990131402
3SGB_A 3SGB_B KB7W -0.0132029471206074
3SGB_A 3SGB_B GB26L 2.6211483482070648
3SGB_A 3SGB_B LB12Y 1.6526778147726642
3SGB_A 3SGB_B NB30A 0.3264642147463892
3SGB_A 3SGB_B PB8A -0.1868589353703776
3SGB_A 3SGB_B TB11G 5.506506448524476
3SGB_A 3SGB_B YB14M 1.9749071366853648
3SGB_A 3SGB_B NB30Y -0.1071663729985825
3SGB_A 3SGB_B GB26E 1.9452919382123144
3SGB_A 3SGB_B TB11M 2.00731131326644
3SGB_A 3SGB_B RB15M 0.125774532494999
3SGB_A 3SGB_B EB13Y 0.7684664508802204
3SGB_A 3SGB_B LB12S 4.100355769895383
3SGB_A 3SGB_B RB15Q 0.040958397319315
3SGB_A 3SGB_B TB11F 3.5886683501507903
3SGB_A 3SGB_B AB9N -0.0878839115841731
3SGB_A 3SGB_B EB13T 2.1136494298875905
3SGB_A 3SGB_B RB15I 0.2401295998426445
3SGB_A 3SGB_B PB8G 0.0530042992357202
3SGB_A 3SGB_B EB13Q 0.1809766071669045
3SGB_A 3SGB_B YB14E 2.00731131326644
3SGB_A 3SGB_B EB13F 1.916554724799452
3SGB_A 3SGB_B EB13H 0.5187512872698665
3SGB_A 3SGB_B KB7E 0.0317581198549401
3SGB_A 3SGB_B AB9R 2.00731131326644
3SGB_A 3SGB_B GB26H 2.700075968994508
3SGB_A 3SGB_B NB30C 0.3448369724428453
3SGB_A 3SGB_B LB12W 1.839245851362272
3SGB_A 3SGB_B NB30Q 0.2401295998426445
3SGB_A 3SGB_B YB14V 4.360143692429439
3SGB_A 3SGB_B NB30W 0.5187512872698665
3SGB_A 3SGB_B PB8R 0.3074948357652936
3SGB_A 3SGB_B EB13C 1.1583240436369913
3SGB_A 3SGB_B KB7P 0.0
3SGB_A 3SGB_B EB13K 0.166431319454233
3SGB_A 3SGB_B KB7M -1.1500144461402757
3SGB_A 3SGB_B NB30G 0.3074948357652936
3SGB_A 3SGB_B LB12C -0.0132029471206074
4CPA_A 4CPA_B YB36G 0.5017307491766836
4CPA_A 4CPA_B VB37I 0.1992433537834816
4CPA_A 4CPA_B PB35G 2.5948342479259647
4CPA_A 4CPA_B VB37A 2.324362969534924
4CPA_A 4CPA_B YB36F 0.0
4CPA_A 4CPA_B VB37F 0.0
4CPA_A 4CPA_B VB37L 1.2787470627722115
4CPA_A 4CPA_B VB37G 3.7328431104389743
1A22 FPTIPLSRLFDNAMLRAHRLHQLAFDTYQEFEEAYIPKEQKYSFLQNPQTSLCFSESIPTPSNREETQQKSNLELLRISLLLIQSWLEPVQFLRSVFANSLVYGASDSNVYDLLKDLEERIQTLMGRLEGQIFKQTYSKFDTDALLKNYGLLYCFRKDMDKVETFLRIVQCRSVEGSCGF PKFTKCRSPERETFSCHWTLGPIQLFYTRRNTQEWTQEWKECPDYVSAGENSCYFNSSFTSIWIPYCIKLTSNGGTVDEKCFSVDEIVQPDPPIALNWTLLNGIHADIQVRWEAPRNADIQKGWMVLEYELQYKEVNETKWKMMDPILTTSVPVYSLKVDKEYEVRVRSKQRNSGNYGEFSEVLYVTLPQMS CA171A,CB67A,EB12A,FA25A,PA61A,RB11M
\ No newline at end of file
from .config import MulanConfig # noqa
from .modules import LightAttModel # noqa
from .config import MulanConfig
from .modules import LightAttModel
from .utils import (
load_pretrained,
load_pretrained_plm,
get_available_models,
get_available_plms,
) # noqa
\ No newline at end of file
)
\ No newline at end of file
from typing import Dict, List, NamedTuple, Tuple, Optional
import os
from tqdm import tqdm
import pandas as pd
import torch
from torch.utils.data import Dataset, default_collate
from torch.nn.utils.rnn import pad_sequence
import numpy as np
from mulan import utils
class MutatedComplex(NamedTuple):
sequence_A: str
sequence_B: str
mutations: Tuple[str]
class MutatedComplexEmbeds(NamedTuple):
seq1: torch.Tensor
seq2: torch.Tensor
mut_seq1: torch.Tensor
mut_seq2: torch.Tensor
class MulanDataset(Dataset):
def __init__(
self,
mutated_complexes: List[MutatedComplex],
wt_sequences: Dict[str, str],
embeddings_dir: str,
plm_model_name: str = None,
scores: List[float] = None,
zs_scores: List[float] = None,
):
self.sequences = wt_sequences
self.embeddings_dir = embeddings_dir
self.mutated_complexes = mutated_complexes
self.zs_scores = zs_scores
self.scores = scores
self._sequences_ids = []
self._fill_metadata(mutated_complexes)
# generate embeddings if not provided
all_ids = set([id_ for ids in self._sequences_ids for id_ in ids])
provided_embeddings_ids = (
[os.path.splitext(file)[0] for file in os.listdir(self.embeddings_dir)]
if os.path.exists(self.embeddings_dir)
else []
)
missing_ids = all_ids - set(provided_embeddings_ids)
if missing_ids:
if not plm_model_name:
raise ValueError(
"`plm_model_name` must be provided if embeddings were not pre-computed."
)
self._generate_missing_embeddings(plm_model_name, missing_ids)
def __len__(self):
return len(self.mutated_complexes)
def __getitem__(self, index):
return {
"data": self.mutated_complexes[index],
"inputs_embeds": self._load_embeddings(index),
"zs_scores": (
torch.tensor(self.zs_scores[index], dtype=torch.float32)
if self.zs_scores
else None
),
"labels": (
torch.tensor(self.scores[index], dtype=torch.float32) if self.scores else None
),
}
@classmethod
def from_table(
cls,
mutated_complexes_file: str,
wt_sequences_file: str,
embeddings_dir: str,
plm_model_name: str = None,
):
wt_sequences = utils.parse_fasta(wt_sequences_file)
# parse table file
data = pd.read_table(mutated_complexes_file, sep=r"\s+", header=None)
mutated_complexes = [
MutatedComplex(row[0], row[1], tuple(row[2].split(",")))
for row in data.itertuples(index=False)
]
scores, zs_scores = None, None
if len(data.columns) > 3:
scores = data[3].astype(float).tolist()
if len(data.columns) > 4:
zs_scores = data[4].astype(float).tolist()
return cls(
mutated_complexes, wt_sequences, embeddings_dir, plm_model_name, scores, zs_scores
)
def _fill_metadata(self, mutated_complexes):
for seq1_label, seq2_label, mutations in mutated_complexes:
seq1 = self.sequences[seq1_label]
seq2 = self.sequences[seq2_label]
mut_seq1, mut_seq2 = utils.parse_mutations(mutations, seq1, seq2)
mut_seq1_label = (
f"{seq1_label}_{'-'.join([mut for mut in mutations if mut[1] == 'A'])}"
)
mut_seq2_label = (
f"{seq2_label}_{'-'.join([mut for mut in mutations if mut[1] == 'B'])}"
)
self.sequences.update({mut_seq1_label: mut_seq1, mut_seq2_label: mut_seq2})
self._sequences_ids.append((seq1_label, seq2_label, mut_seq1_label, mut_seq2_label))
return
def _generate_missing_embeddings(self, plm_model_name, missing_ids):
plm_model, plm_tokenizer = utils.load_pretrained_plm(plm_model_name)
os.makedirs(self.embeddings_dir, exist_ok=True)
for id_ in tqdm(missing_ids, desc="Generating embeddings"):
seq = self.sequences[id_]
embedding = utils.embed_sequence(plm_model, plm_tokenizer, seq)
utils.save_embedding(embedding, self.embeddings_dir, id_)
# del plm_model, plm_tokenizer
return
def _load_embeddings(self, index):
return MutatedComplexEmbeds(
*[
torch.load(os.path.join(self.embeddings_dir, f"{id_}.pt"), weights_only=True)
for id_ in self._sequences_ids[index]
]
)
class MulanDataCollator(object):
def __init__(self, padding_value: float = 0.0):
self.padding_value = padding_value
def __call__(self, batch):
return self._collate_fn(batch)
def _collate_fn(self, batch):
elem = batch[0]
if isinstance(elem, dict):
return {key: self._collate_fn([d[key] for d in batch]) for key in elem}
if isinstance(elem, MutatedComplexEmbeds):
return MutatedComplexEmbeds(
*[
pad_sequence(embeds, batch_first=True, padding_value=self.padding_value)
for embeds in (zip(*batch))
]
)
elif elem is None:
return None
else:
return default_collate(batch)
def split_data(
mutated_complexes_file: str,
output_dir: Optional[str] = None,
add_validation_set: bool = True,
validation_size: float = 0.15,
test_size: float = 0.15,
num_folds: int = 1,
random_state: int = 42,
):
"""Split data into train, validation and test sets for training or cross-validation."""
def _save_data(data, output_file):
data.to_csv(output_file, sep="\t", index=False, header=False)
train_data_all, test_data_all = [], []
val_data_all = [] if add_validation_set else None
files_basename = os.path.splitext(os.path.basename(mutated_complexes_file))[0]
data = pd.read_table(mutated_complexes_file, sep=r"\s+", header=None)
rng = np.random.default_rng(random_state)
if num_folds <= 0:
raise ValueError("`num_folds` must be greater than 0.")
elif num_folds == 2 and add_validation_set:
raise ValueError("`num_folds` must be greater than 2 to add a validation set.")
elif num_folds == 1:
split_index = rng.choice(
[0, 1, 2],
size=len(data),
p=[test_size, validation_size, 1 - test_size - validation_size],
)
test_data = data[split_index == 0]
if add_validation_set:
val_data = data[split_index == 1]
train_data = data[split_index == 2]
val_data_all.append(val_data)
else:
train_data = data[(split_index == 1) & (data[split_index == 2])]
train_data_all.append(train_data)
test_data_all.append(test_data)
if output_dir:
os.makedirs(output_dir, exist_ok=True)
_save_data(train_data, os.path.join(output_dir, f"{files_basename}_train.tsv"))
_save_data(test_data, os.path.join(output_dir, f"{files_basename}_test.tsv"))
if add_validation_set:
_save_data(val_data, os.path.join(output_dir, f"{files_basename}_val.tsv"))
else:
fold_index = rng.integers(low=0, high=num_folds, size=len(data))
for test_fold_index in range(num_folds):
test_data = data[fold_index == test_fold_index]
if add_validation_set:
val_fold_index = (test_fold_index - 1) % num_folds
val_data = data[fold_index == val_fold_index]
val_data_all.append(val_data)
train_data = data[(fold_index != test_fold_index) & (fold_index != val_fold_index)]
else:
train_data = data[fold_index != test_fold_index]
train_data_all.append(train_data)
test_data_all.append(test_data)
if output_dir:
os.makedirs(os.path.join(output_dir, f"fold_{test_fold_index}"), exist_ok=True)
_save_data(
train_data,
os.path.join(
output_dir, f"fold_{test_fold_index}", f"{files_basename}_train.tsv"
),
)
_save_data(
test_data,
os.path.join(
output_dir, f"fold_{test_fold_index}", f"{files_basename}_test.tsv"
),
)
if add_validation_set:
_save_data(
val_data,
os.path.join(
output_dir, f"fold_{test_fold_index}", f"{files_basename}_val.tsv"
),
)
return train_data_all, test_data_all, val_data_all
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
from dataclasses import dataclass, field
import torch
import torch.nn as nn
from transformers import Trainer
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from scipy.stats import pearsonr, spearmanr
from mulan.data import MutatedComplexEmbeds, MutatedComplex
def _metric_spearmanr(y_true, y_pred):
return spearmanr(y_true, y_pred, nan_policy="omit")[0]
def _metric_pearsonr(y_true, y_pred):
return pearsonr(y_true, y_pred)[0]
_DEFAULT_METRICS = {
"mae": mean_absolute_error,
"rmse": root_mean_squared_error,
"pcc": _metric_pearsonr,
"scc": _metric_spearmanr,
}
def default_compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions.flatten()
labels = labels.flatten()
res = {}
for name, metric in _DEFAULT_METRICS.items():
res[name] = metric(labels, predictions)
return res
@dataclass
class DatasetArguments:
train_data: str = field(
metadata={
"help": (
"Training data in TSV format. Must contain columns for sequence_A, sequence_B,"
" mutations (separated with comma if multiple), score and (optionally) zero-shot"
" score."
)
}
)
train_fasta_file: str = field(
metadata={
"help": (
"Fasta file containing wild-type sequences for training data. Identifiers must"
" match the training data and, if present, the evaluation data."
)
}
)
embeddings_dir: str = field(
metadata={
"help": (
"Directory containing pre-computed embeddings in PT format, or where new"
" embeddings will be stored. In the latter case, `plm_model_name` must be"
" provided."
)
}
)
eval_data: Optional[str] = field(
default=None,
metadata={"help": "Evaluation data file, with the same format of training data."},
)
test_data: Optional[str] = field(
default=None,
metadata={"help": "Test data file, with the same format of training data."},
)
test_fasta_file: Optional[str] = field(
default=None,
metadata={
"help": (
"Fasta file containing wild-type sequences. Identifiers must match the test data."
)
},
)
plm_model_name: Optional[str] = field(
default=None,
metadata={
"help": (
"Name of the pre-trained protein language model to use for embedding generation."
)
},
)
@dataclass
class ModelArguments:
model_name_or_config_path: str = field(
metadata={
"help": (
"Name of the pre-trained model to fine-tune, or path to config file in JSON"
" format."
)
}
)
save_model: bool = field(
default=False,
metadata={"help": "Whether to save the model after training."},
)
@dataclass
class CustomisableTrainingArguments:
output_dir: str = field(metadata={"help": "Directory where the trained model will be saved."})
num_epochs: int = field(default=30, metadata={"help": "Number of training epochs."})
batch_size: int = field(default=8, metadata={"help": "Batch size."})
learning_rate: float = field(default=5e-4, metadata={"help": "Learning rate."})
disable_tqdm: bool = field(
default=False, metadata={"help": "Whether to disable tqdm progress bars."}
)
report_to: Union[None, str, List[str]] = field(
default="none",
metadata={"help": "The list of integrations to report the results and logs to."},
)
early_stopping_patience: Optional[int] = field(
default=None,
metadata={
"help": (
"Number of epochs without improvement before early stopping. If not set, early"
" stopping is disabled."
)
},
)
class MulanTrainer(Trainer):
"""Custom Trainer class adapted for Mulan model training"""
def compute_loss(self, model, inputs, return_outputs=False):
"""
Computes the loss for MulanDataset inputs for a model that do not return loss values.
"""
inputs.pop("data")
outputs = model(inputs["inputs_embeds"], inputs.get("zs_scores"))
labels = inputs.get("labels")
loss = torch.nn.functional.mse_loss(outputs.view(-1), labels.view(-1))
return (loss, outputs) if return_outputs else loss
def _prepare_input(self, data: Union[torch.Tensor, Any]) -> Union[torch.Tensor, Any]:
"""
Prepares one `data` before feeding it to the model, be it a tensor or a nested list/dictionary of tensors.
Adapted from the parent class to handle the case where the input is a custom type.
"""
if isinstance(data, Mapping):
return type(data)({k: self._prepare_input(v) for k, v in data.items()})
elif isinstance(data, (MutatedComplexEmbeds, MutatedComplex)):
return type(data)(*[self._prepare_input(v) for v in data])
elif isinstance(data, (tuple, list)):
return type(data)(self._prepare_input(v) for v in data)
elif isinstance(data, torch.Tensor):
kwargs = {"device": self.args.device}
if self.is_deepspeed_enabled and (
torch.is_floating_point(data) or torch.is_complex(data)
):
# NLP models inputs are int/uint and those get adjusted to the right dtype of the
# embedding. Other models such as wav2vec2's inputs are already float and thus
# may need special handling to match the dtypes of the model
kwargs.update(
{"dtype": self.accelerator.state.deepspeed_plugin.hf_ds_config.dtype()}
)
return data.to(**kwargs)
return data
def prediction_step(
self,
model: nn.Module,
inputs: Dict[str, Union[torch.Tensor, Any]],
prediction_loss_only: bool,
ignore_keys: Optional[List[str]] = None,
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on `model` using `inputs`.
Overridden from the parent class to handle the case where the model does not return loss values.
Support for sagemaker was removed!
Args:
model (`nn.Module`):
The model to evaluate.
inputs (`Dict[str, Union[torch.Tensor, Any]]`):
The inputs and targets of the model.
The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
argument `labels`. Check your model's documentation for all accepted arguments.
prediction_loss_only (`bool`):
Whether or not to return the loss only.
ignore_keys (`List[str]`, *optional*):
A list of keys in the output of your model (if it is a dictionary) that should be ignored when
gathering predictions.
Return:
Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
logits and labels (each being optional).
"""
has_labels = (
False
if len(self.label_names) == 0
else all(inputs.get(k) is not None for k in self.label_names)
)
# For CLIP-like models capable of returning loss values.
# If `return_loss` is not specified or being `None` in `inputs`, we check if the default value of `return_loss`
# is `True` in `model.forward`.
return_loss = inputs.get("return_loss", None)
if return_loss is None:
return_loss = self.can_return_loss
# print("return_loss", return_loss, "has_labels", has_labels)
loss_without_labels = True if len(self.label_names) == 0 and return_loss else False
inputs = self._prepare_inputs(inputs)
if ignore_keys is None:
if hasattr(self.model, "config"):
ignore_keys = getattr(self.model.config, "keys_to_ignore_at_inference", [])
else:
ignore_keys = []
# labels may be popped when computing the loss (label smoothing for instance) so we grab them first.
if has_labels or loss_without_labels:
labels = inputs.get("labels")
else:
labels = None
with torch.no_grad():
if has_labels or loss_without_labels:
with self.compute_loss_context_manager():
loss, outputs = self.compute_loss(model, inputs, return_outputs=True)
loss = loss.mean().detach()
logits = outputs
else:
loss = None
with self.compute_loss_context_manager():
outputs = model(inputs["inputs_embeds"], inputs.get("zs_scores"))
logits = outputs
# TODO: this needs to be fixed and made cleaner later.
if self.args.past_index >= 0:
self._past = outputs[self.args.past_index - 1]
if prediction_loss_only:
return (loss, None, None)
return (loss, logits, labels)
......@@ -9,7 +9,7 @@ import numpy as np
from scipy.stats import rankdata
import mulan.constants as C
from mulan.constants import AAs, aa2idx, idx2aa, one2three, three2one # noqa
from mulan.constants import AAs, aa2idx, idx2aa, one2three, three2one
def mutation_generator(sequence):
......@@ -83,7 +83,9 @@ def get_available_models():
return list(C.MODELS.keys())
def load_pretrained_plm(model_name, device="cpu"):
def load_pretrained_plm(model_name, device=None):
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_id = C.PLM_ENCODERS.get(model_name)
if model_id is None:
raise ValueError(
......
......@@ -3,6 +3,8 @@ scipy
pandas
tqdm
h5py
scikit-learn
torch>=2.0
transformers<4.45,>=4.27
accelerate
sentencepiece
\ No newline at end of file
"""Train Mulan model on custom data. Not implemented yet!"""
"""Train Mulan model on custom data using HuggingFace Trainer API"""
import os
import pandas as pd
import torch
from transformers import (
HfArgumentParser,
TrainingArguments,
logging,
EarlyStoppingCallback,
set_seed,
)
import mulan
from mulan.data import MulanDataset, MulanDataCollator
from mulan.train_utils import (
DatasetArguments,
ModelArguments,
CustomisableTrainingArguments,
MulanTrainer,
default_compute_metrics,
)
logging.set_verbosity_info()
logger = logging.get_logger(__name__)
from argparse import ArgumentParser
def get_args():
parser = ArgumentParser(
parser = HfArgumentParser(
dataclass_types=[DatasetArguments, ModelArguments, CustomisableTrainingArguments],
prog="mulan-train",
description=__doc__,
)
args = parser.parse_args()
return args
data_args, model_args, custom_training_args = parser.parse_args_into_dataclasses()
return (data_args, model_args, custom_training_args)
def load_data(data_args):
logger.info("Loading training data")
train_dataset = MulanDataset.from_table(
data_args.train_data,
data_args.train_fasta_file,
data_args.embeddings_dir,
data_args.plm_model_name,
)
eval_dataset, test_dataset = None, None
if data_args.eval_data:
eval_dataset = MulanDataset.from_table(
data_args.eval_data,
data_args.train_fasta_file,
data_args.embeddings_dir,
data_args.plm_model_name,
)
if data_args.test_data:
logger.info("Loading test data...")
test_dataset = MulanDataset.from_table(
data_args.test_data,
data_args.test_fasta_file,
data_args.embeddings_dir,
data_args.plm_model_name,
)
return train_dataset, eval_dataset, test_dataset
def dummy_forward_call(model, dataset, data_collator):
inputs = data_collator([dataset[0]])
return model(inputs["inputs_embeds"], inputs.get("zs_scores"))
def save_predictions(output_dir, dataset, predictions):
df = pd.DataFrame(dataset.mutated_complexes)
df["mutations"] = df["mutations"].apply(lambda x: ",".join(x))
df["score"] = predictions
df.to_csv(
os.path.join(output_dir, "test_predictions.tsv"), sep="\t", index=False, header=False
)
return
def save_model_ckpt(model, output_dir):
torch.save(
{"state_dict": model.state_dict(), "config": model.config.__dict__},
os.path.join(output_dir, "model.ckpt"),
)
def train(data_args, model_args, custom_training_args):
# set global seed
set_seed(42)
# load data
train_dataset, eval_dataset, test_dataset = load_data(data_args)
# load model
if model_args.model_name_or_config_path in mulan.get_available_models():
model = mulan.load_pretrained(model_args.model_name_or_config_path, device="cpu")
else:
config = mulan.MulanConfig.from_json(model_args.model_name_or_config_path)
model = mulan.LightAttModel(config)
# training arguments
training_args = TrainingArguments(
output_dir=custom_training_args.output_dir,
num_train_epochs=custom_training_args.num_epochs,
per_device_train_batch_size=custom_training_args.batch_size,
per_device_eval_batch_size=custom_training_args.batch_size,
logging_dir=custom_training_args.output_dir,
report_to=custom_training_args.report_to,
remove_unused_columns=False,
label_names=["labels"],
logging_strategy="epoch",
eval_strategy="epoch" if eval_dataset else "no",
save_strategy="epoch" if model_args.save_model else "no",
load_best_model_at_end=(eval_dataset and model_args.save_model), # to be tested
metric_for_best_model="loss",
save_total_limit=2,
)
data_collator = MulanDataCollator(padding_value=model.config.padding_value)
optimizer = torch.optim.AdamW(model.parameters(), lr=custom_training_args.learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode="min", factor=0.5, patience=5
)
early_stopping = (
EarlyStoppingCallback(custom_training_args.early_stopping_patience)
if custom_training_args.early_stopping_patience
else None
)
dummy_forward_call(model, train_dataset, data_collator) # to initialize lazy modules
# instantiate Trainer
trainer = MulanTrainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=default_compute_metrics,
optimizers=(optimizer, scheduler),
callbacks=[early_stopping] if early_stopping else None,
)
# train model
train_results = trainer.train()
metrics = train_results.metrics
if test_dataset:
prediction_results = trainer.predict(test_dataset)
save_predictions(
custom_training_args.output_dir, test_dataset, prediction_results.predictions
)
metrics.update(prediction_results.metrics)
trainer.save_metrics("all", metrics)
# TODO
# remove logging message `Trainer.model is not a `PreTrainedModel`, only saving its state dict.``
if model_args.save_model:
save_model_ckpt(model, custom_training_args.output_dir)
return
def main():
get_args()
raise NotImplementedError("This script has not been implemented yet.")
data_args, model_args, custom_training_args = get_args()
train(data_args, model_args, custom_training_args)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment