keep the same version as Deepmind

kiharalab · Aug 21, 2024 · 60daeff · 60daeff
2 parents 6571c82 + f251de6
commit 60daeff
Show file tree

Hide file tree

Showing 7 changed files with 422 additions and 16 deletions.
diff --git a/alphafold/data/mmcif_parsing.py b/alphafold/data/mmcif_parsing.py
@@ -315,6 +315,7 @@ def _get_header(parsed_info: MmCIFDict) -> PdbHeader:
       try:
         raw_resolution = parsed_info[res_key][0]
         header['resolution'] = float(raw_resolution)
+        break
       except ValueError:
         logging.debug('Invalid resolution format: %s', parsed_info[res_key])
 

diff --git a/alphafold/model/utils.py b/alphafold/model/utils.py
@@ -163,7 +163,7 @@ def inner(key, shape, **kwargs):
     keys = grid_keys(key, shape)
     signature = (
         '()->()'
-        if isinstance(keys, jax.random.PRNGKeyArray)
+        if jax.dtypes.issubdtype(keys.dtype, jax.dtypes.prng_key)
         else '(2)->()'
     )
     return jnp.vectorize(

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG CUDA=11.1.1
-FROM nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04
+ARG CUDA=12.2.2
+FROM nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu20.04
 # FROM directive resets ARGS, so we specify again (the value is retained if
 # previously set).
 ARG CUDA
@@ -54,14 +54,10 @@ RUN wget -q -P /tmp \
 # Install conda packages.
 ENV PATH="/opt/conda/bin:$PATH"
 ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
-RUN conda install -qy conda==24.1.2 \
-    && conda install -y -c conda-forge \
-      openmm=7.7.0 \
-      cudatoolkit==${CUDA_VERSION} \
-      pdbfixer \
-      pip \
-      python=3.10 \
-      && conda clean --all --force-pkgs-dirs --yes
+RUN conda install -qy conda==24.1.2 pip python=3.11 \
+    && conda install -y -c nvidia cuda=${CUDA_VERSION} \
+    && conda install -y -c conda-forge openmm=8.0.0 pdbfixer \
+    && conda clean --all --force-pkgs-dirs --yes
 
 COPY . /app/alphafold
 RUN wget -q -P /app/alphafold/alphafold/common/ \
@@ -71,13 +67,16 @@ RUN wget -q -P /app/alphafold/alphafold/common/ \
 RUN pip3 install --upgrade pip --no-cache-dir \
     && pip3 install -r /app/alphafold/requirements.txt --no-cache-dir \
     && pip3 install --upgrade --no-cache-dir \
-      jax==0.3.25 \
-      jaxlib==0.3.25+cuda11.cudnn805 \
+      jax==0.4.26 \
+      jaxlib==0.4.26+cuda12.cudnn89 \
       -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 
 # Add SETUID bit to the ldconfig binary so that non-root users can run it.
 RUN chmod u+s /sbin/ldconfig.real
 
+# Currently needed to avoid undefined_symbol error.
+RUN ln -sf /usr/lib/x86_64-linux-gnu/libffi.so.7 /opt/conda/lib/libffi.so.7
+
 # We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk
 # with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for
 # details.

diff --git a/notebooks/AlphaFold.ipynb b/notebooks/AlphaFold.ipynb
@@ -112,7 +112,7 @@
         "      %shell conda install -qy conda==24.1.2 \\\n",
         "          \u0026\u0026 conda install -qy -c conda-forge \\\n",
         "            python=3.10 \\\n",
-        "            openmm=7.7.0 \\\n",
+        "            openmm=8.0.0 \\\n",
         "            pdbfixer\n",
         "      pbar.update(80)\n",
         "\n",

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 absl-py==1.0.0
 biopython==1.79
-chex==0.0.7
+chex==0.1.86
 dm-haiku==0.0.12
 dm-tree==0.1.8
 docker==5.0.0
@@ -10,4 +10,4 @@ ml-collections==0.1.0
 numpy==1.24.3
 pandas==2.0.3
 scipy==1.11.1
-tensorflow-cpu==2.13.0
+tensorflow-cpu==2.16.1
diff --git a/server/README.md b/server/README.md
@@ -0,0 +1,286 @@
+# JSON file format for AlphaFold Server jobs
+
+You can
+[download an example JSON file here](https://github.com/google-deepmind/alphafold/blob/main/server/example.json);
+here we describe the contents of this example JSON file.
+
+This JSON file consists of a list of dictionaries (even in the case of a single
+dictionary, a single-element list must be used), with each dictionary containing
+a job description. Therefore, you can specify multiple jobs in one JSON file.
+
+Each job description contains a job name, a list of PRNG seeds (which can be an
+empty list for automated random seed assignment), and a list of entities
+(molecules) to be modeled.
+
+AlphaFold Server JSON files are especially useful for automation of repetitive
+modeling jobs (e.g. to screen interactions of one protein with a small number of
+others). The easiest way to construct an initial JSON file is to run a modeling
+job via AlphaFold Server GUI and use it as a template. AlphaFold Server will
+produce a zip file containing modeling results. Inside the zip file you will
+find a JSON file named `<job_name>_job_request.json` containing the job inputs.
+These files offer a convenient starting point for generating new jobs as they
+are easily editable in standard text editors or in programming environments like
+Google Colab notebooks.
+
+Note that comments are not allowed in JSON files.
+
+## Job name, seeds and sequences
+
+*   `name` is a string with the job name. This is how the job will appear as in
+    the job history table.
+*   `modelSeeds` is a list of strings of uint32 seed values (e.g.
+    `["1593933729", "4273"]`). Seeds are used to run the modeling. We recommend
+    providing an empty list, in which case a single random seed will be used.
+    This is the recommended option.
+*   `sequences` is a list of dictionaries that carry descriptions of the
+    entities (molecules) for modeling.
+
+```json
+{
+  "name": "Test Fold Job Number One",
+  "modelSeeds": [],
+  "sequences": [...]
+}
+```
+
+## Entity types
+
+Valid entity types mirror those available in the AlphaFold Server web interface:
+
+*   `proteinChain` – used for proteins
+*   `dnaSequence` – used for DNA (single strand)
+*   `rnaSequence` – used for RNA (single strand)
+*   `ligand` – used for allowed ligands
+*   `ion` – used for allowed ions
+
+### Protein chains
+
+`sequence` is a string containing protein sequence; the same limitations as in
+the UI are in place, e.g. only letters corresponding to amino acids are allowed,
+as defined by IUPAC. Only 20 standard amino acid type are supported.
+
+`count` is the number of copies of this protein chain (integer).
+
+`glycans` is an optional list of dictionaries that carries descriptions of the
+protein glycosylation.
+
+*   `residues` is a string defining glycan. Please refer to the
+    [FAQ](https://alphafoldserver.com/faq) for the format description and
+    allowed glycans.
+*   `position` is a position of the amino acid to which the glycan is attached
+    (integer, 1-based indexing).
+
+`modifications` is an optional list of dictionaries that carries descriptions of
+the post-translational modifications.
+
+*   `ptmType` is a string containing the
+    [CCD code](https://www.wwpdb.org/data/ccd) of the modification; the same
+    codes are allowed as in the UI.
+*   `position` is a position of the modified amino acid (integer).
+*   Allowed modifications: `CCD_SEP`, `CCD_TPO`, `CCD_PTR`, `CCD_NEP`,
+    `CCD_HIP`, `CCD_ALY`, `CCD_MLY`, `CCD_M3L`, `CCD_MLZ`, `CCD_2MR`, `CCD_AGM`,
+    `CCD_MCS`, `CCD_HYP`, `CCD_HY3`, `CCD_LYZ`, `CCD_AHB`, `CCD_P1L`, `CCD_SNN`,
+    `CCD_SNC`, `CCD_TRF`, `CCD_KCR`, `CCD_CIR`, `CCD_YHA`
+
+```json
+{
+  "proteinChain": {
+    "sequence": "PREACHINGS",
+
+    "glycans": [
+      {
+        "residues": "NAG(NAG)(BMA)",
+        "position": 8
+      },
+      {
+        "residues": "BMA",
+        "position": 10
+      }
+    ],
+
+    "modifications": [
+      {
+        "ptmType": "CCD_HY3",
+        "ptmPosition": 1
+      },
+      {
+        "ptmType": "CCD_P1L",
+        "ptmPosition": 5
+      }
+    ],
+
+    "count": 1
+  }
+},
+{
+  "proteinChain": {
+    "sequence": "REACHER",
+    "count": 1
+  }
+}
+```
+
+### DNA chains
+
+Please note that the `dnaSequence` type refers to single stranded DNA. If you
+wish to model double stranded DNA, please add a second `"dnaSequence`", carrying
+the sequence of the reverse complement strand.
+
+`sequence` is a string containing a DNA sequence; the same limitations as in the
+UI are in place, i.e. only letters A, T, G, C are allowed.
+
+`count` is a number of copies of this DNA chain (integer).
+
+`modifications` is an optional list of dictionaries that carries descriptions of
+the DNA chemical modifications.
+
+*   `modificationType` is a string containing
+    [CCD code](https://www.wwpdb.org/data/ccd) of modification; the same codes
+    are allowed as in the UI.
+*   `basePosition` is a position of the modified nucleotide (integer).
+*   Allowed modifications: `CCD_5CM`, `CCD_C34`, `CCD_5HC`, `CCD_6OG`,
+    `CCD_6MA`, `CCD_1CC`, `CCD_8OG`, `CCD_5FC`, `CCD_3DR`
+
+```json
+{
+  "dnaSequence": {
+    "sequence": "GATTACA",
+
+    "modifications": [
+      {
+        "modificationType": "CCD_6OG",
+        "basePosition": 1
+      },
+      {
+        "modificationType": "CCD_6MA",
+        "basePosition": 2
+      }
+    ],
+
+    "count": 1
+  }
+},
+{
+  "dnaSequence": {
+    "sequence": "TGTAATC",
+    "count": 1
+  }
+}
+```
+
+### RNA chains
+
+`sequence` is a string containing RNA sequence (single strand); the same
+limitations as in the UI are in place, e.g. only letters A, U, G, C are allowed.
+
+`count` is a number of copies of this RNA chain (integer).
+
+`modifications` is an optional list of dictionaries that carries descriptions of
+the RNA chemical modifications.
+
+*   `modificationType` is a string containing
+    [CCD code](https://www.wwpdb.org/data/ccd) of modification; the same codes
+    are allowed as in the UI.
+*   `basePosition` is a position of the modified nucleotide (integer).
+*   Allowed modifications: `CCD_PSU`, `CCD_5MC`, `CCD_OMC`, `CCD_4OC`,
+    `CCD_5MU`, `CCD_OMU`, `CCD_UR3`, `CCD_A2M`, `CCD_MA6`, `CCD_6MZ`, `CCD_2MG`,
+    `CCD_OMG`, `CCD_7MG`, `CCD_RSQ`
+
+```json
+{
+  "rnaSequence": {
+    "sequence": "GUAC",
+
+    "modifications": [
+      {
+        "modificationType": "CCD_2MG",
+        "basePosition": 1
+      },
+      {
+        "modificationType": "CCD_5MC",
+        "basePosition": 4
+      }
+    ],
+
+    "count": 1
+  }
+}
+```
+
+### Ligands
+
+`ligand` is a string containing the [CCD code](https://www.wwpdb.org/data/ccd)
+of the ligand; the same codes are allowed as in the UI.
+
+`count` is the number of copies of this ligand (integer).
+
+Allowed ligands: `CCD_ADP`, `CCD_ATP`, `CCD_AMP`, `CCD_GTP`, `CCD_GDP`,
+`CCD_FAD`, `CCD_NAD`, `CCD_NAP`, `CCD_NDP`, `CCD_HEM`, `CCD_HEC`, `CCD_PLM`,
+`CCD_OLA`, `CCD_MYR`, `CCD_CIT`, `CCD_CLA`, `CCD_CHL`, `CCD_BCL`, `CCD_BCB`
+
+```json
+{
+  "ligand": {
+    "ligand": "CCD_ATP",
+    "count": 1
+  }
+},
+{
+  "ligand": {
+    "ligand": "CCD_HEM",
+    "count": 2
+  }
+}
+```
+
+### Ions
+
+`ion` is a string containing [CCD code](https://www.wwpdb.org/data/ccd) of the
+ion; the same codes are allowed as in the UI. The ion charge is implicitly
+specified by the CCD code.
+
+`count` is a number of copies of this ion (integer).
+
+Allowed ions: `MG`, `ZN`, `CL`, `CA`, `NA`, `MN`, `K`, `FE`, `CU`, `CO`
+
+```json
+{
+  "ion": {
+    "ion": "MG",
+    "count": 2
+  }
+},
+{
+  "ion": {
+    "ion": "NA",
+    "count": 3
+  }
+}
+```
+
+# Additional modeling jobs
+
+You may specify multiple jobs in one JSON file. This is an example of a simple
+job request for one protein chain and two copies of the palindromic DNA
+sequence:
+
+```json
+{
+  "name": "Test Fold Job Number Two",
+  "modelSeeds": [],
+  "sequences": [
+    {
+      "proteinChain": {
+        "sequence": "TEACHINGS",
+        "count": 1
+      }
+    },
+    {
+      "dnaSequence": {
+        "sequence": "TAGCTA",
+        "count": 2
+      }
+    }
+  ]
+}
+```