Skip to content

Commit 7ff3278

Browse files
authored
Merge pull request #21 from BioinfoMachineLearning/new-features
Add new structure-based splits, graph featurization methods, and disorder propensities
2 parents b406199 + a1421b2 commit 7ff3278

11 files changed

+1393
-89
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ venv.tar.gz
115115
tb_logs/
116116

117117
# Feature Processing
118+
*idr_annotation*.txt
118119
*work_filenames*.csv
119120

120121
# DIPS
@@ -124,7 +125,8 @@ project/datasets/DIPS/pairs/**
124125
project/datasets/DIPS/parsed/**
125126
project/datasets/DIPS/raw/**
126127
project/datasets/DIPS/final/raw/**
127-
project/datasets/DIPS/final/final_raw_dips.tar.gz*
128+
project/datasets/DIPS/final/load_pair_example.py
129+
project/datasets/DIPS/final/final_raw_dips*.tar.gz*
128130
project/datasets/DIPS/final/processed/**
129131

130132
# DB5

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
The Enhanced Database of Interacting Protein Structures for Interface Prediction
66

7-
[![Paper](http://img.shields.io/badge/paper-arxiv.2106.04362-B31B1B.svg)](https://arxiv.org/abs/2106.04362) [![CC BY 4.0][cc-by-shield]][cc-by] [![Primary Data DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5134732.svg)](https://doi.org/10.5281/zenodo.5134732) [![Supplementary Data DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8071136.svg)](https://doi.org/10.5281/zenodo.8071136)
7+
[![Paper](http://img.shields.io/badge/paper-arxiv.2106.04362-B31B1B.svg)](https://arxiv.org/abs/2106.04362) [![CC BY 4.0][cc-by-shield]][cc-by] [![Primary Data DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5134732.svg)](https://doi.org/10.5281/zenodo.5134732) [![Supplementary Data DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8140981.svg)](https://doi.org/10.5281/zenodo.8140981)
88

99
[cc-by]: http://creativecommons.org/licenses/by/4.0/
1010
[cc-by-image]: https://i.creativecommons.org/l/by/4.0/88x31.png
@@ -219,7 +219,7 @@ python3 project/datasets/builder/generate_hhsuite_features.py "$PROJDIR"/project
219219
# (1) Pull down the Docker image for `flDPnn`
220220
docker pull docker.io/sinaghadermarzi/fldpnn
221221
# (2) For all sequences in the dataset, predict which interface residues reside within IDRs
222-
python3 project/datasets/builder/annotate_idr_interfaces.py "$PROJDIR"/project/datasets/DIPS/final/raw
222+
python3 project/datasets/builder/annotate_idr_interfaces.py "$PROJDIR"/project/datasets/DIPS/final/raw --num_cpus 16
223223

224224
# Add new features to the filtered pairs, ensuring that the pruned pairs' original PDB files are stored locally for DSSP:
225225
python3 project/datasets/builder/download_missing_pruned_pair_pdbs.py "$PROJDIR"/project/datasets/DIPS/raw/pdb "$PROJDIR"/project/datasets/DIPS/interim/pairs-pruned --num_cpus 32 --rank "$1" --size "$2"

environment.yml

+69-16
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
name: DIPS-Plus
22
channels:
3-
- bioconda
43
- pytorch
54
- salilab
65
- dglteam/label/cu116
76
- nvidia
7+
- bioconda
88
- defaults
99
- conda-forge
1010
dependencies:
1111
- _libgcc_mutex=0.1=conda_forge
1212
- _openmp_mutex=4.5=2_kmp_llvm
1313
- appdirs=1.4.4=pyhd3eb1b0_0
14+
- aria2=1.23.0=0
1415
- asttokens=2.2.1=pyhd8ed1ab_0
1516
- backcall=0.2.0=pyh9f0ad1d_0
1617
- backports=1.0=pyhd8ed1ab_3
@@ -21,13 +22,14 @@ dependencies:
2122
- bottleneck=1.3.5=py38h7deecbd_0
2223
- brotlipy=0.7.0=py38h27cfd23_1003
2324
- bzip2=1.0.8=h7b6447c_0
25+
- c-ares=1.19.1=hd590300_0
2426
- ca-certificates=2023.5.7=hbcca054_0
2527
- certifi=2023.5.7=py38h06a4308_0
2628
- cffi=1.15.1=py38h5eee18b_3
2729
- charset-normalizer=2.0.4=pyhd3eb1b0_0
2830
- colorama=0.4.6=pyhd8ed1ab_0
2931
- comm=0.1.3=pyhd8ed1ab_0
30-
- cryptography=40.0.2=py38h3d167d9_0
32+
- cryptography=39.0.0=py38h1724139_0
3133
- cuda=11.6.1=0
3234
- cuda-cccl=11.6.55=hf6102b2_0
3335
- cuda-command-line-tools=11.6.2=0
@@ -62,16 +64,20 @@ dependencies:
6264
- cuda-visual-tools=11.6.1=0
6365
- cudatoolkit=11.7.0=hd8887f6_10
6466
- cudnn=8.8.0.121=h0800d71_0
67+
- cycler=0.11.0=pyhd8ed1ab_0
6568
- debugpy=1.6.7=py38h8dc9893_0
6669
- decorator=5.1.1=pyhd8ed1ab_0
6770
- dgl=1.1.0.cu116=py38_0
6871
- dssp=3.0.0=h3fd9d12_4
6972
- executing=1.2.0=pyhd8ed1ab_0
7073
- ffmpeg=4.3=hf484d3e_0
74+
- foldseek=7.04e0ec8=pl5321hb365157_0
7175
- freetype=2.12.1=hca18f0e_1
76+
- gawk=5.1.0=h7f98852_0
7277
- gcc=10.3.0=he2824d0_10
7378
- gcc_impl_linux-64=10.3.0=hf2f2afa_16
7479
- gds-tools=1.6.1.9=0
80+
- gettext=0.21.1=h27087fc_0
7581
- gmp=6.2.1=h58526e2_0
7682
- gnutls=3.6.13=h85f3911_1
7783
- gxx=10.3.0=he2824d0_10
@@ -90,6 +96,7 @@ dependencies:
9096
- jupyter_client=8.2.0=pyhd8ed1ab_0
9197
- jupyter_core=4.12.0=py38h578d9bd_0
9298
- kernel-headers_linux-64=2.6.32=he073ed8_15
99+
- kiwisolver=1.4.4=py38h43d8883_1
93100
- lame=3.100=h166bdaf_1003
94101
- lcms2=2.15=hfd0df8a_0
95102
- ld_impl_linux-64=2.36.1=hea4e1c9_2
@@ -110,12 +117,14 @@ dependencies:
110117
- libcusparse-dev=11.7.2.124=hbbe9722_0
111118
- libdeflate=1.17=h0b41bf4_0
112119
- libffi=3.4.4=h6a678d5_0
120+
- libgcc=7.2.0=h69d50b8_2
113121
- libgcc-devel_linux-64=10.3.0=he6cfe16_16
114122
- libgcc-ng=12.2.0=h65d4601_19
115123
- libgfortran-ng=12.2.0=h69a702a_19
116124
- libgfortran5=12.2.0=h337968e_19
117125
- libgomp=12.2.0=h65d4601_19
118126
- libiconv=1.17=h166bdaf_0
127+
- libidn2=2.3.4=h166bdaf_0
119128
- liblapack=3.9.0=16_linux64_openblas
120129
- libnpp=11.6.3.124=hd2722f0_0
121130
- libnpp-dev=11.6.3.124=h3c42840_0
@@ -128,12 +137,15 @@ dependencies:
128137
- libsanitizer=10.3.0=h26c7422_16
129138
- libsodium=1.0.18=h36c2ea0_1
130139
- libsqlite=3.42.0=h2797004_0
140+
- libssh2=1.10.0=haa6b8db_3
131141
- libstdcxx-devel_linux-64=10.3.0=he6cfe16_16
132142
- libstdcxx-ng=12.2.0=h46fd767_19
133143
- libtiff=4.5.0=h6adf6a1_2
144+
- libunistring=0.9.10=h7f98852_0
134145
- libuuid=2.38.1=h0b41bf4_0
135146
- libwebp-base=1.3.0=h0b41bf4_0
136147
- libxcb=1.13=h7f98852_1004
148+
- libxml2=2.9.9=h13577e0_2
137149
- libzlib=1.2.13=h166bdaf_4
138150
- llvm-openmp=16.0.4=h4dfa4b3_0
139151
- lz4-c=1.9.4=h6a678d5_0
@@ -150,19 +162,18 @@ dependencies:
150162
- ninja=1.11.1=h924138e_0
151163
- nsight-compute=2023.1.1.4=0
152164
- numexpr=2.8.4=py38hd2a5715_1
153-
- numpy=1.24.3=py38hf838250_0
154-
- numpy-base=1.24.3=py38h1e6e340_0
155165
- openh264=2.1.1=h780b84a_0
156166
- openjpeg=2.5.0=hfec8fc6_2
157167
- openmpi=4.1.5=h414af15_101
158-
- openssl=3.1.1=hd590300_1
168+
- openssl=1.1.1u=hd590300_0
159169
- packaging=23.0=py38h06a4308_0
160170
- pandas=1.5.3=py38h417a72b_0
161171
- parso=0.8.3=pyhd8ed1ab_0
162172
- perl=5.32.1=0_h5eee18b_perl5
163173
- pexpect=4.8.0=pyh1a96a4e_2
164174
- pickleshare=0.7.5=py_1003
165175
- pillow=9.4.0=py38hde6dc18_1
176+
- pip=23.1.2=py38h06a4308_0
166177
- pooch=1.4.0=pyhd3eb1b0_0
167178
- prompt-toolkit=3.0.38=pyha770c72_0
168179
- psutil=5.9.5=py38h1de0b5d_0
@@ -185,6 +196,7 @@ dependencies:
185196
- requests=2.29.0=py38h06a4308_0
186197
- scikit-learn=1.2.2=py38h6a678d5_0
187198
- scipy=1.10.1=py38h32ae08f_1
199+
- setuptools=67.8.0=py38h06a4308_0
188200
- six=1.16.0=pyhd3eb1b0_1
189201
- sleef=3.5.1=h9b69904_2
190202
- sqlite=3.41.2=h5eee18b_0
@@ -196,18 +208,19 @@ dependencies:
196208
- torchaudio=0.13.1=py38_cu116
197209
- torchvision=0.14.1=py38_cu116
198210
- tornado=6.3.2=py38h01eb140_0
211+
- tqdm=4.65.0=py38hb070fc8_0
199212
- traitlets=5.9.0=pyhd8ed1ab_0
200213
- typing_extensions=4.6.0=pyha770c72_0
201214
- urllib3=1.26.15=py38h06a4308_0
202215
- wcwidth=0.2.6=pyhd8ed1ab_0
203216
- wheel=0.38.4=py38h06a4308_0
204217
- xorg-libxau=1.0.11=hd590300_0
205218
- xorg-libxdmcp=1.1.3=h7f98852_0
206-
- xz=5.4.2=h5eee18b_0
219+
- xz=5.2.6=h166bdaf_0
207220
- zeromq=4.3.4=h9c3ff4c_1
208221
- zipp=3.15.0=pyhd8ed1ab_0
209222
- zlib=1.2.13=h166bdaf_4
210-
- zstd=1.5.5=hc292b87_0
223+
- zstd=1.5.2=h3eb15da_6
211224
- pip:
212225
- absl-py==1.4.0
213226
- aiohttp==3.8.4
@@ -217,63 +230,103 @@ dependencies:
217230
- git+https://github.com/amorehead/atom3.git@83987404ceed38a1f5a5abd517aa38128d0a4f2c
218231
- attrs==23.1.0
219232
- babel==2.12.1
233+
- beautifulsoup4==4.12.2
234+
- biopandas==0.5.0.dev0
235+
- bioservices==1.11.2
220236
- cachetools==5.3.1
237+
- cattrs==23.1.2
221238
- click==7.0
239+
- colorlog==6.7.0
222240
- configparser==5.3.0
241+
- contourpy==1.1.0
242+
- deepdiff==6.3.1
223243
- dill==0.3.3
224244
- docker-pycreds==0.4.0
225245
- docutils==0.17.1
226246
- easy-parallel-py3==0.1.6.4
247+
- easydev==0.12.1
248+
- exceptiongroup==1.1.2
227249
- fairscale==0.4.0
250+
- fonttools==4.40.0
228251
- frozenlist==1.3.3
229252
- fsspec==2023.5.0
230253
- future==0.18.3
254+
- gevent==22.10.2
231255
- gitdb==4.0.10
232256
- gitpython==3.1.31
233257
- google-auth==2.19.0
234258
- google-auth-oauthlib==1.0.0
259+
- git+https://github.com/a-r-j/graphein.git@371ce9a462b610529488e87a712484328a89de36
260+
- greenlet==2.0.2
261+
- grequests==0.7.0
235262
- grpcio==1.54.2
236263
- h5py==3.8.0
237264
- hickle==5.0.2
238265
- imagesize==1.4.1
266+
- importlib-resources==6.0.0
239267
- install==1.3.5
268+
- jaxtyping==0.2.19
240269
- jinja2==2.11.3
270+
- loguru==0.7.0
271+
- looseversion==1.1.2
272+
- lxml==4.9.3
241273
- markdown==3.4.3
274+
- markdown-it-py==3.0.0
242275
- markupsafe==1.1.1
276+
- matplotlib==3.7.2
277+
- mdurl==0.1.2
278+
- mmtf-python==1.1.3
243279
- mpi4py==3.0.3
280+
- msgpack==1.0.5
244281
- multidict==6.0.4
282+
- multipledispatch==1.0.0
245283
- multiprocess==0.70.11.1
284+
- numpy==1.23.5
246285
- oauthlib==3.2.2
286+
- ordered-set==4.1.0
247287
- pathos==0.2.7
248288
- pathtools==0.1.2
289+
- pdb-tools==2.5.0
290+
- platformdirs==3.8.1
291+
- plotly==5.15.0
249292
- pox==0.3.2
250293
- ppft==1.7.6.6
251294
- promise==2.3
252295
- protobuf==3.20.3
253296
- pyasn1==0.5.0
254297
- pyasn1-modules==0.3.0
298+
- pydantic==1.10.11
255299
- pydeprecate==0.3.1
300+
- pyparsing==3.0.9
256301
- pytorch-lightning==1.4.8
257-
- pyyaml==6.0
302+
- pyyaml==5.4.1
303+
- requests-cache==1.1.0
258304
- requests-oauthlib==1.3.1
305+
- rich==13.4.2
306+
- rich-click==1.6.1
259307
- rsa==4.9
308+
- seaborn==0.12.2
260309
- sentry-sdk==1.24.0
261310
- shortuuid==1.0.11
262311
- smmap==5.0.0
263312
- snowballstemmer==2.2.0
264-
- sphinx==4.0.1
265-
- sphinxcontrib-applehelp==1.0.4
266-
- sphinxcontrib-devhelp==1.0.2
267-
- sphinxcontrib-htmlhelp==2.0.1
268-
- sphinxcontrib-jsmath==1.0.1
269-
- sphinxcontrib-qthelp==1.0.3
270-
- sphinxcontrib-serializinghtml==1.1.5
313+
- soupsieve==2.4.1
271314
- subprocess32==3.5.4
315+
- suds-community==1.1.2
316+
- tenacity==8.2.2
272317
- tensorboard==2.13.0
273318
- tensorboard-data-server==0.7.0
274319
- termcolor==2.3.0
275320
- torchmetrics==0.5.1
321+
- typeguard==4.0.0
322+
- url-normalize==1.4.3
276323
- wandb==0.12.2
277-
- werkzeug==2.3.4
324+
- werkzeug==2.3.6
325+
- wget==3.2
326+
- wrapt==1.15.0
327+
- xarray==2023.1.0
328+
- xmltodict==0.13.0
278329
- yarl==1.9.2
279330
- yaspin==2.3.0
331+
- zope-event==5.0
332+
- zope-interface==6.0

0 commit comments

Comments
 (0)