diff --git a/.env.example b/.env.example
index 95909141..ec2f2f2a 100644
--- a/.env.example
+++ b/.env.example
@@ -40,4 +40,9 @@ NERSC_USERNAME=replaceme
 ORCID_NMDC_CLIENT_ID=replaceme
 ORCID_NMDC_CLIENT_SECRET=replaceme
 
+# Base URL (without a trailing slash) at which the Runtime can access an instance of ORCID.
+# Note: For the production instance of ORCID, use: https://orcid.org (default)
+#       For the sandbox instance of ORCID, use: https://sandbox.orcid.org
+ORCID_BASE_URL=https://orcid.org
+
 INFO_BANNER_INNERHTML='Announcement: Something important is about to happen. If you have questions, please contact <a href="mailto:support@microbiomedata.org">support@microbiomedata.org</a>.'
\ No newline at end of file
diff --git a/.github/workflows/build-and-release-to-spin-berkeley.yml b/.github/workflows/build-and-release-to-spin-berkeley.yml
deleted file mode 100644
index e38416b1..00000000
--- a/.github/workflows/build-and-release-to-spin-berkeley.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-# Note: This GitHub Actions workflow was initialized by copy/pasting the contents of `build-and-release-to-spin.yml`.
-#       Changes made here since then include:
-#       - Changed the triggering branch to `berkeley` (was `main`)
-#       - Excluded Git tag creation from triggering criteria
-#       - Hard-coded the Spin namespace as `nmdc-berkeley` for deployment
-#       - Disabled pushing to Docker Hub (only push to GHCR)
-#       - Changed tagging rules to, effectively, "always tag as :berkeley"
-
-name: Build Docker images and release to Spin (nmdc-berkeley)
-
-on:
-  push:
-    branches:
-      - berkeley  # the `berkeley` branch, not the `main` branch
-    paths:
-      - '.github/workflows/build-and-release-to-spin-berkeley.yml'
-      - 'Makefile'
-      - '**.Dockerfile'
-      - '**.py'
-      - 'requirements/main.txt'
-
-env:
-  # We don't want to do certain steps if this is running in a fork
-  IS_ORIGINAL_REPO: ${{ github.repository == 'microbiomedata/nmdc-runtime' }}
-
-  # Used when sending redeploy action requests to Rancher
-  RANCHER_NAMESPACE: 'nmdc-berkeley'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    strategy:
-      matrix:
-        image: [ fastapi, dagster ]
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          # history for all branches and tags is needed for setuptools-scm (part of build and push step)
-          fetch-depth: 0
-
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Docker meta
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: |
-            ghcr.io/microbiomedata/nmdc-runtime-${{ matrix.image }}
-          flavor: |
-            latest=false
-          tags: |
-            type=raw,value=berkeley
-
-      - name: Login to DockerHub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      # Reference: https://docs.docker.com/build/ci/github-actions/push-multi-registries/
-      # Reference: https://docs.github.com/en/actions/learn-github-actions/contexts#github-context
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: ${{ env.IS_ORIGINAL_REPO }}
-          file: nmdc_runtime/${{ matrix.image }}.Dockerfile
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-
-  release:
-    needs: build
-
-    runs-on: ubuntu-latest
-
-    strategy:
-      matrix:
-        deployment: [ runtime-api, dagster-dagit, dagster-daemon ]
-
-    steps:
-      - name: Redeploy ${{ env.RANCHER_NAMESPACE }}:${{ matrix.deployment }}
-        if: ${{ env.IS_ORIGINAL_REPO }}
-        uses: fjogeleit/http-request-action@v1
-        with:
-          url: ${{ secrets.RANCHER_URL }}/v3/project/${{ secrets.RANCHER_CONTEXT }}/workloads/deployment:${{ env.RANCHER_NAMESPACE }}:${{ matrix.deployment }}?action=redeploy
-          method: POST
-          bearerToken: ${{ secrets.RANCHER_TOKEN }}
diff --git a/demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb b/demo/metadata_migration/notebooks/migrate_10_4_0_to_10_9_1.ipynb
similarity index 92%
rename from demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb
rename to demo/metadata_migration/notebooks/migrate_10_4_0_to_10_9_1.ipynb
index ceb9b9a3..8026cbb3 100644
--- a/demo/metadata_migration/notebooks/migrate_10_5_6_to_10_8_0.ipynb
+++ b/demo/metadata_migration/notebooks/migrate_10_4_0_to_10_9_1.ipynb
@@ -3,13 +3,13 @@
   {
    "metadata": {},
    "cell_type": "markdown",
-   "source": "# Migrate MongoDB database from `nmdc-schema` `v10.5.6` to `v10.8.0`",
+   "source": "# Migrate MongoDB database from `nmdc-schema` `v10.4.0` to `v10.9.1`",
    "id": "d05efc6327778f9c"
   },
   {
    "metadata": {},
    "cell_type": "markdown",
-   "source": "There are no migrators associated with any schema changes between schema versions `v10.5.6` and `v10.8.0`. So, this notebook is a \"no op\" (i.e. \"no operation\").",
+   "source": "There are no migrators associated with any schema changes between schema versions `v10.4.0` and `v10.9.1`. So, this notebook is a \"no op\" (i.e. \"no operation\").",
    "id": "b99d5924e825b9a2"
   },
   {
diff --git a/demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb b/demo/metadata_migration/notebooks/migrate_10_9_1_to_11_0_0.ipynb
similarity index 95%
rename from demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb
rename to demo/metadata_migration/notebooks/migrate_10_9_1_to_11_0_0.ipynb
index c19345d3..118b566f 100644
--- a/demo/metadata_migration/notebooks/migrate_10_8_0_to_11_0_0.ipynb
+++ b/demo/metadata_migration/notebooks/migrate_10_9_1_to_11_0_0.ipynb
@@ -4,10 +4,13 @@
    "cell_type": "markdown",
    "id": "initial_id",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
-    "# Migrate MongoDB database from `nmdc-schema` `v10.8.0` to `v11.0.0`"
+    "# Migrate MongoDB database from `nmdc-schema` `v10.9.1` to `v11.0.0`"
    ]
   },
   {
@@ -17,7 +20,7 @@
    "source": [
     "## Introduction\n",
     "\n",
-    "This notebook will be used to migrate the database from `nmdc-schema` `v10.8.0` ([released](https://github.com/microbiomedata/nmdc-schema/releases/tag/v10.8.0) August 21, 2024) to `v11.0.0` (i.e. the initial version of the so-called \"Berkeley schema\").\n",
+    "This notebook will be used to migrate the database from `nmdc-schema` `v10.9.1` ([released](https://github.com/microbiomedata/nmdc-schema/releases/tag/v10.9.1) October 7, 2024) to `v11.0.0` (i.e. the initial version of the so-called \"Berkeley schema\").\n",
     "\n",
     "Unlike previous migrators, this one does not pick and choose which collections it will dump. There are two reasons for this: (1) migrators no longer have a dedicated `self.agenda` dictionary that indicates all the collections involved in the migration; and (2) this migration is the first one that involves creating, renaming, and dropping any collections; none of which are things that the old `self.agenda`-based system was designed to handle. So, instead of picking and choosing collections, this migrator **dumps them all.**"
    ]
@@ -106,12 +109,16 @@
    "cell_type": "code",
    "id": "e25a0af308c3185b",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "scrolled": true
    },
    "source": [
     "%pip install --upgrade pip\n",
     "%pip install -r requirements.txt\n",
-    "%pip install nmdc-schema==11.0.0rc22"
+    "%pip install nmdc-schema==11.0.0"
    ],
    "outputs": [],
    "execution_count": null
@@ -273,7 +280,10 @@
    "cell_type": "markdown",
    "id": "bc387abc62686091",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "### Create JSON Schema validator\n",
@@ -285,7 +295,10 @@
    "cell_type": "code",
    "id": "5c982eb0c04e606d",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "nmdc_jsonschema: dict = get_nmdc_jsonschema_dict(variant=SchemaVariantIdentifier.nmdc_materialized_patterns)\n",
@@ -367,23 +380,23 @@
    "execution_count": null
   },
   {
-   "metadata": {},
    "cell_type": "markdown",
+   "id": "7f9c87de6fb8530c",
+   "metadata": {},
    "source": [
     "### Delete obsolete dumps from previous migrations\n",
     "\n",
     "Delete any existing dumps before we create new ones in this notebook. This is so the dumps you generate with this notebook do not get merged with any unrelated ones."
-   ],
-   "id": "7f9c87de6fb8530c"
+   ]
   },
   {
-   "metadata": {},
    "cell_type": "code",
+   "id": "6a949d0fcb4b6fa0",
+   "metadata": {},
    "source": [
     "!rm -rf {cfg.origin_dump_folder_path}\n",
     "!rm -rf {cfg.transformer_dump_folder_path}"
    ],
-   "id": "6a949d0fcb4b6fa0",
    "outputs": [],
    "execution_count": null
   },
@@ -402,7 +415,9 @@
   {
    "cell_type": "code",
    "id": "da530d6754c4f6fe",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "source": [
     "# Dump all collections from the \"origin\" database.\n",
     "shell_command = f\"\"\"\n",
@@ -435,7 +450,9 @@
   {
    "cell_type": "code",
    "id": "79bd888e82d52a93",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "source": [
     "# Restore the dumped collections to the \"transformer\" MongoDB server.\n",
     "shell_command = f\"\"\"\n",
@@ -474,7 +491,9 @@
   {
    "cell_type": "code",
    "id": "9c89c9dd3afe64e2",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "source": [
     "# Instantiate a MongoAdapter bound to the \"transformer\" database.\n",
     "adapter = MongoAdapter(\n",
@@ -524,7 +543,7 @@
     "for collection_name in ordered_collection_names:\n",
     "    collection = transformer_mongo_client[\"nmdc\"][collection_name]\n",
     "    num_documents_in_collection = collection.count_documents({})\n",
-    "    print(f\"Validating collection {collection_name} ({num_documents_in_collection} documents)\")\n",
+    "    print(f\"Validating collection {collection_name} ({num_documents_in_collection} documents)\", end=\"\\t\")  # no newline\n",
     "\n",
     "    for document in collection.find():\n",
     "        # Validate the transformed document.\n",
@@ -541,7 +560,9 @@
     "        #\n",
     "        document_without_underscore_id_key = {key: value for key, value in document.items() if key != \"_id\"}\n",
     "        root_to_validate = dict([(collection_name, [document_without_underscore_id_key])])\n",
-    "        nmdc_jsonschema_validator.validate(root_to_validate)  # raises exception if invalid"
+    "        nmdc_jsonschema_validator.validate(root_to_validate)  # raises exception if invalid\n",
+    "\n",
+    "    print(f\"Done\")"
    ],
    "outputs": [],
    "execution_count": null
@@ -559,7 +580,9 @@
   {
    "cell_type": "code",
    "id": "db6e432d",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "source": [
     "# Dump the database from the \"transformer\" MongoDB server.\n",
     "shell_command = f\"\"\"\n",
@@ -583,7 +606,10 @@
    "cell_type": "markdown",
    "id": "997fcb281d9d3222",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "### Create a bookkeeper\n",
@@ -664,7 +690,9 @@
   {
    "cell_type": "code",
    "id": "1dfbcf0a",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "source": [
     "# Load the transformed collections into the origin server, replacing any same-named ones that are there.\n",
     "shell_command = f\"\"\"\n",
@@ -691,7 +719,10 @@
    "cell_type": "markdown",
    "id": "ca5ee89a79148499",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "### Indicate that the migration is complete\n",
@@ -703,7 +734,10 @@
    "cell_type": "code",
    "id": "d1eaa6c92789c4f3",
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "source": [
     "bookkeeper.record_migration_event(migrator=migrator, event=MigrationEvent.MIGRATION_COMPLETED)"
@@ -740,11 +774,19 @@
    ],
    "outputs": [],
    "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "037db214-ea76-46bf-bb6a-bf1ff9b28a72",
+   "metadata": {},
+   "source": [],
+   "outputs": [],
+   "execution_count": null
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
diff --git a/docs/nb/bulk_validation_referential_integrity_check.ipynb b/docs/nb/bulk_validation_referential_integrity_check.ipynb
index b1ab4ef4..06a01ec8 100644
--- a/docs/nb/bulk_validation_referential_integrity_check.ipynb
+++ b/docs/nb/bulk_validation_referential_integrity_check.ipynb
@@ -37,7 +37,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "localhost:27018\n"
+      "mongodb://localhost:27018\n"
      ]
     }
    ],
@@ -93,7 +93,7 @@
     {
      "data": {
       "text/plain": [
-       "'10.7.0'"
+       "'11.0.0rc22'"
       ]
      },
      "execution_count": 3,
@@ -126,8 +126,8 @@
     "from tqdm.notebook import tqdm\n",
     "\n",
     "from nmdc_runtime.api.core.util import pick\n",
-    "from nmdc_runtime.api.db.mongo import get_mongo_db, nmdc_schema_collection_names, get_collection_names_from_schema\n",
-    "from nmdc_runtime.util import collection_name_to_class_names, nmdc_schema_view, nmdc_database_collection_instance_class_names, get_nmdc_jsonschema_dict\n",
+    "from nmdc_runtime.api.db.mongo import get_mongo_db, get_nonempty_nmdc_schema_collection_names, get_collection_names_from_schema\n",
+    "from nmdc_runtime.util import collection_name_to_class_names, populated_schema_collection_names_with_id_field, nmdc_schema_view, nmdc_database_collection_instance_class_names, get_nmdc_jsonschema_dict\n",
     "from nmdc_schema.nmdc import Database as NMDCDatabase \n",
     "from nmdc_schema.get_nmdc_view import ViewGetter\n",
     "\n",
@@ -156,9 +156,18 @@
    "execution_count": 5,
    "id": "1d76b70e-4412-4b17-9db9-322ac791859a",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'study_set', 'workflow_execution_set', 'material_processing_set', 'instrument_set', 'data_object_set', 'configuration_set', 'biosample_set', 'functional_annotation_agg', 'calibration_set', 'processed_sample_set', 'field_research_site_set', 'data_generation_set'}\n"
+     ]
+    }
+   ],
    "source": [
-    "collection_names = sorted(nmdc_schema_collection_names(mdb))"
+    "collection_names = get_nonempty_nmdc_schema_collection_names(mdb)\n",
+    "print(collection_names)"
    ]
   },
   {
@@ -279,20 +288,28 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "95817da9de0a4934b5e3683f2f81893e",
+       "model_id": "6c88577a3a9342808d3bbc0e3707a95a",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/9601505 [00:00<?, ?it/s]"
+       "  0%|          | 0/2351449 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'bad_type': [], 'no_type': [], 'bad_slot': [], 'is_null': []}\n"
+     ]
     }
    ],
    "source": [
-    "errors = collect_errors(note_doc_field_errors)"
+    "errors = collect_errors(note_doc_field_errors)\n",
+    "print(errors)"
    ]
   },
   {
@@ -363,12 +380,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0a5d8aabad3a43448826525e77820b76",
+       "model_id": "f63a4ce942bc4278b3e99a5a87b0155c",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/9601505 [00:00<?, ?it/s]"
+       "  0%|          | 0/2351449 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -486,7 +503,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 15,
    "id": "103d70b6-24ab-41bd-8b7f-d2faaa028bdf",
    "metadata": {
     "scrolled": true
@@ -495,12 +512,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "dc9d77f96c9548c4adf28e124c99d8bf",
+       "model_id": "c8c75b6bd622470f9ded8e3813fc1d64",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/9601505 [00:00<?, ?it/s]"
+       "  0%|          | 0/3039449 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -559,17 +576,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 16,
    "id": "e01450d1-3369-4fc5-80be-9787e00a6597",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(0, 3157)"
+       "(5, 45604)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -591,17 +608,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 17,
    "id": "afd25543-1cb3-4887-9aba-0086d4b998a6",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "set()"
+       "{'nmdc:dobj-11-cvcxxr53', 'nmdc:dobj-11-fg28a080', 'nmdc:dobj-11-gxgpbv06'}"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -612,17 +629,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 18,
    "id": "a25857f4-e26e-4896-9e5f-607e7b4bb07c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[]"
+       "[{'id': 'nmdc:wfmgan-11-w1d6gy98.1',\n",
+       "  'id_is_nmdc_id': True,\n",
+       "  'field': 'has_input',\n",
+       "  'value': 'nmdc:dobj-11-cvcxxr53',\n",
+       "  'slot_range': 'NamedThing'},\n",
+       " {'id': 'nmdc:wfmgan-11-fmymf551.1',\n",
+       "  'id_is_nmdc_id': True,\n",
+       "  'field': 'has_input',\n",
+       "  'value': 'nmdc:dobj-11-fg28a080',\n",
+       "  'slot_range': 'NamedThing'},\n",
+       " {'id': 'nmdc:wfmgan-11-3nkefn97.1',\n",
+       "  'id_is_nmdc_id': True,\n",
+       "  'field': 'has_input',\n",
+       "  'value': 'nmdc:dobj-11-gxgpbv06',\n",
+       "  'slot_range': 'NamedThing'},\n",
+       " {'id': 'nmdc:wfmgan-11-fmymf551.1',\n",
+       "  'id_is_nmdc_id': True,\n",
+       "  'field': 'has_input',\n",
+       "  'value': 'nmdc:dobj-11-fg28a080',\n",
+       "  'slot_range': 'NamedThing'},\n",
+       " {'id': 'nmdc:wfmgan-11-3nkefn97.1',\n",
+       "  'id_is_nmdc_id': True,\n",
+       "  'field': 'has_input',\n",
+       "  'value': 'nmdc:dobj-11-gxgpbv06',\n",
+       "  'slot_range': 'NamedThing'}]"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -641,7 +682,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 19,
    "id": "33516e3c-f10d-4c30-942b-0d01d06082f9",
    "metadata": {},
    "outputs": [
@@ -649,8 +690,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'id': 'nmdc:dobj-11-1epz0d53', 'id_is_nmdc_id': True, 'field': 'was_generated_by', 'value': 'nmdc:omprc-11-sxze4w22', 'slot_range': 'Activity'}\n",
-      "{'id': 'nmdc:libprp-11-f6kv1904', 'id_is_nmdc_id': True, 'field': 'has_input', 'value': 'nmdc:procsm-11-v5sykd35', 'slot_range': 'Biosample'}\n"
+      "{'id': 'nmdc:dobj-11-xt088e26', 'id_is_nmdc_id': True, 'field': 'was_generated_by', 'value': 'nmdc:omprc-11-ymxzx274', 'slot_range': 'WorkflowExecution'}\n"
      ]
     }
    ],
@@ -673,22 +713,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 20,
    "id": "29ec7e82-d079-4525-bd7b-d770fd69d788",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'_id': ObjectId('66a8f648b3ed1b2200050335'),\n",
+       "{'_id': ObjectId('66edad78007ef07eb670a09d'),\n",
        " 'id': 'nmdc:omprc-11-sxze4w22',\n",
        " 'has_input': ['nmdc:bsm-11-978cs285'],\n",
        " 'has_output': ['nmdc:dobj-11-1epz0d53'],\n",
-       " 'part_of': ['nmdc:sty-11-28tm5d36'],\n",
-       " 'type': ['OmicsProcessing', 'PlannedProcess', 'NamedThing']}"
+       " 'associated_studies': ['nmdc:sty-11-28tm5d36'],\n",
+       " 'instrument_used': ['nmdc:inst-14-mwrrj632'],\n",
+       " 'type': ['MassSpectrometry',\n",
+       "  'DataGeneration',\n",
+       "  'PlannedProcess',\n",
+       "  'NamedThing']}"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -700,19 +744,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 21,
    "id": "802290e0-58dd-4fbd-835a-c9928006819d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'_id': ObjectId('66a8f648b3ed1b2200051041'),\n",
+       "{'_id': ObjectId('66edad78007ef07eb67078c8'),\n",
        " 'id': 'nmdc:procsm-11-v5sykd35',\n",
        " 'type': ['ProcessedSample', 'MaterialEntity', 'NamedThing']}"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -725,9 +769,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "nmdc-runtime",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "nmdc-runtime"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -739,7 +783,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,
diff --git a/metadata-translation/notebooks/data/changesheet-array-item-nested-attributes.tsv b/metadata-translation/notebooks/data/changesheet-array-item-nested-attributes.tsv
index beba8172..0addac60 100644
--- a/metadata-translation/notebooks/data/changesheet-array-item-nested-attributes.tsv
+++ b/metadata-translation/notebooks/data/changesheet-array-item-nested-attributes.tsv
@@ -1,6 +1,6 @@
 id	action	attribute	value
-gold:Gs0114675	update	has_credit_associations	ca1
-ca1	update	applied_role	Conceptualization
+nmdc:sty-11-r2h77870	update	has_credit_associations	ca1
+ca1	update	applied_roles	Conceptualization
 ca1	update	applies_to_person.name	CREDIT NAME 1
 ca1	update	applies_to_person.email	CREDIT_NAME_1@foo.edu
-ca1	update	applies_to_person.orcid	orcid:0000-0000-0000-0001
\ No newline at end of file
+ca1	update	applies_to_person.orcid	orcid:0000-0000-0000-0001
diff --git a/metadata-translation/notebooks/data/changesheet-update-pi-websites.tsv b/metadata-translation/notebooks/data/changesheet-update-pi-websites.tsv
index 3d291c10..6e3bfa4b 100644
--- a/metadata-translation/notebooks/data/changesheet-update-pi-websites.tsv
+++ b/metadata-translation/notebooks/data/changesheet-update-pi-websites.tsv
@@ -1,6 +1,6 @@
 id	action	attribute	value
-gold:Gs0103573	update	principal_investigator.has_raw_value	NEW PI NAME
+nmdc:sty-11-r2h77870	update	principal_investigator.has_raw_value	NEW PI NAME
 	update	principal_investigator.name	NEW PI NAME
 	update	principal_investigator.profile_image_url	https://portal.nersc.gov/NEW-PI-NAME.jpg
 	update	principal_investigator.orcid	orcid:0000-0000-0000-0000
-	update	principal_investigator.websites	https://www.ornl.gov/staff-profile/NEW-PI-NAME
\ No newline at end of file
+	update	principal_investigator.websites	https://www.ornl.gov/staff-profile/NEW-PI-NAME
diff --git a/metadata-translation/notebooks/data/changesheet-without-separator3.tsv b/metadata-translation/notebooks/data/changesheet-without-separator3.tsv
index 907319dd..bd87e899 100644
--- a/metadata-translation/notebooks/data/changesheet-without-separator3.tsv
+++ b/metadata-translation/notebooks/data/changesheet-without-separator3.tsv
@@ -1,10 +1,12 @@
 id	action	attribute	value
-gold:Gs0114663	update	name	NEW STUDY NAME 1
+nmdc:sty-11-pzmd0x14	update	name	NEW STUDY NAME 1
 	update	ecosystem	NEW ECOSYSTEM 1
 	update	ecosystem_type	NEW ECOSYSTEM_TYPE 1
 	update	ecosystem_subtype	NEW ECOSYSTEM_SUBTYPE 1
-	update	doi	v1
-v1	update	has_raw_value	NEW DOI 1
+	update	associated_dois	v1
+v1	update	doi_value	doi:10.25345/C5Q23RB6B
+v1	upate	doi_provider	massive
+v1	update	doi_category	dataset_doi
 	update	principal_investigator	v2
 v2		name	NEW PI NAME 1
 v2		has_raw_value	NEW RAW NAME 1
@@ -17,21 +19,3 @@ v3		applies_to_person.name	NEW CO-INVESTIGATOR NAME 1
 	update	has_credit_associations	v4
 v4		applied_roles	Investigation
 v4		applies_to_person.name	NEW CURATOR NAME 1
-gold:Gs0103573	update	name	NEW STUDY NAME 2
-	update	ecosystem	NEW ECOSYSTEM 2
-	update	ecosystem_type	NEW ECOSYSTEM_TYPE 2
-	update	ecosystem_subtype	NEW ECOSYSTEM_SUBTYPE 2
-	update	doi	v1
-v1	update	has_raw_value	NEW DOI 2
-	update	principal_investigator	v2
-v2		name	NEW PI NAME 2
-v2		has_raw_value	NEW RAW NAME 2
-	update	description	NEW DESCRIPTION 2
-	update	websites	HTTP://TEST3.EXAMPLE.COM
-	update	websites	HTTP://TEST4.EXAMPLE.COM
-	update	has_credit_associations	v3
-v3		applied_roles	Investigation
-v3		applies_to_person.name	NEW CO-INVESTIGATOR NAME 2
-	update	has_credit_associations	v4
-v4		applied_roles	Investigation
-v4		applies_to_person.name	NEW CURATOR NAME 2
\ No newline at end of file
diff --git a/nmdc_runtime/api/core/auth.py b/nmdc_runtime/api/core/auth.py
index 251019b7..17244991 100644
--- a/nmdc_runtime/api/core/auth.py
+++ b/nmdc_runtime/api/core/auth.py
@@ -21,20 +21,39 @@
 from starlette.requests import Request
 from starlette.status import HTTP_400_BAD_REQUEST, HTTP_401_UNAUTHORIZED
 
+ORCID_PRODUCTION_BASE_URL = "https://orcid.org"
+
 SECRET_KEY = os.getenv("JWT_SECRET_KEY")
 ALGORITHM = "HS256"
 ORCID_NMDC_CLIENT_ID = os.getenv("ORCID_NMDC_CLIENT_ID")
 ORCID_NMDC_CLIENT_SECRET = os.getenv("ORCID_NMDC_CLIENT_SECRET")
-
-# https://orcid.org/.well-known/openid-configuration
-# XXX do we want to live-load this?
-ORCID_JWK = {  # https://orcid.org/oauth/jwks
+ORCID_BASE_URL = os.getenv("ORCID_BASE_URL", default=ORCID_PRODUCTION_BASE_URL)
+
+# Define the JSON Web Key Set (JWKS) for ORCID.
+#
+# Note: The URL from which we got this dictionary is: https://orcid.org/oauth/jwks
+#       We got _that_ URL from the dictionary at: https://orcid.org/.well-known/openid-configuration
+#
+# TODO: Consider _live-loading_ this dictionary from the Internet.
+#
+ORCID_JWK = {
     "e": "AQAB",
     "kid": "production-orcid-org-7hdmdswarosg3gjujo8agwtazgkp1ojs",
     "kty": "RSA",
     "n": "jxTIntA7YvdfnYkLSN4wk__E2zf_wbb0SV_HLHFvh6a9ENVRD1_rHK0EijlBzikb-1rgDQihJETcgBLsMoZVQqGj8fDUUuxnVHsuGav_bf41PA7E_58HXKPrB2C0cON41f7K3o9TStKpVJOSXBrRWURmNQ64qnSSryn1nCxMzXpaw7VUo409ohybbvN6ngxVy4QR2NCC7Fr0QVdtapxD7zdlwx6lEwGemuqs_oG5oDtrRuRgeOHmRps2R6gG5oc-JqVMrVRv6F9h4ja3UgxCDBQjOVT1BFPWmMHnHCsVYLqbbXkZUfvP2sO1dJiYd_zrQhi-FtNth9qrLLv3gkgtwQ",
     "use": "sig",
 }
+# If the application is using a _non-production_ ORCID environment, overwrite
+# the "kid" and "n" values with those from the sandbox ORCID environment.
+#
+# Source: https://sandbox.orcid.org/oauth/jwks
+#
+if ORCID_BASE_URL != ORCID_PRODUCTION_BASE_URL:
+    ORCID_JWK["kid"] = "sandbox-orcid-org-3hpgosl3b6lapenh1ewsgdob3fawepoj"
+    ORCID_JWK["n"] = (
+        "pl-jp-kTAGf6BZUrWIYUJTvqqMVd4iAnoLS6vve-KNV0q8TxKvMre7oi9IulDcqTuJ1alHrZAIVlgrgFn88MKirZuTqHG6LCtEsr7qGD9XyVcz64oXrb9vx4FO9tLNQxvdnIWCIwyPAYWtPMHMSSD5oEVUtVL_5IaxfCJvU-FchdHiwfxvXMWmA-i3mcEEe9zggag2vUPPIqUwbPVUFNj2hE7UsZbasuIToEMFRZqSB6juc9zv6PEUueQ5hAJCEylTkzMwyBMibrt04TmtZk2w9DfKJR91555s2ZMstX4G_su1_FqQ6p9vgcuLQ6tCtrW77tta-Rw7McF_tyPmvnhQ"
+    )
+
 ORCID_JWS_VERITY_ALGORITHM = "RS256"
 
 
diff --git a/nmdc_runtime/api/db/mongo.py b/nmdc_runtime/api/db/mongo.py
index 7e1107c1..d83a016e 100644
--- a/nmdc_runtime/api/db/mongo.py
+++ b/nmdc_runtime/api/db/mongo.py
@@ -1,6 +1,7 @@
 import gzip
 import json
 import os
+from collections import defaultdict
 from contextlib import AbstractContextManager
 from functools import lru_cache
 from typing import Set, Dict, Any, Iterable
@@ -21,6 +22,7 @@
     get_nmdc_jsonschema_dict,
     schema_collection_names_with_id_field,
     nmdc_schema_view,
+    collection_name_to_class_names,
 )
 from pymongo import MongoClient, ReplaceOne
 from pymongo.database import Database as MongoDatabase
@@ -60,7 +62,8 @@ def get_async_mongo_db() -> AsyncIOMotorDatabase:
     return _client[os.getenv("MONGO_DBNAME")]
 
 
-def nmdc_schema_collection_names(mdb: MongoDatabase) -> Set[str]:
+def get_nonempty_nmdc_schema_collection_names(mdb: MongoDatabase) -> Set[str]:
+    """Returns the names of schema collections in the database that have at least one document."""
     names = set(mdb.list_collection_names()) & set(get_collection_names_from_schema())
     return {name for name in names if mdb[name].estimated_document_count() > 0}
 
@@ -92,7 +95,7 @@ def get_collection_names_from_schema() -> list[str]:
 
 @lru_cache
 def activity_collection_names(mdb: MongoDatabase) -> Set[str]:
-    return nmdc_schema_collection_names(mdb) - {
+    return get_nonempty_nmdc_schema_collection_names(mdb) - {
         "biosample_set",
         "study_set",
         "data_object_set",
@@ -101,6 +104,26 @@ def activity_collection_names(mdb: MongoDatabase) -> Set[str]:
     }
 
 
+@lru_cache
+def get_planned_process_collection_names() -> Set[str]:
+    r"""
+    Returns the names of all collections that the schema says can contain documents
+    that represent instances of the `PlannedProcess` class or any of its subclasses.
+    """
+    schema_view = nmdc_schema_view()
+    collection_names = set()
+    planned_process_descendants = set(schema_view.class_descendants("PlannedProcess"))
+
+    for collection_name, class_names in collection_name_to_class_names.items():
+        for class_name in class_names:
+            # If the name of this class is the name of the `PlannedProcess` class
+            # or any of its subclasses, add it to the result set.
+            if class_name in planned_process_descendants:
+                collection_names.add(collection_name)
+
+    return collection_names
+
+
 def mongodump_excluded_collections():
     _mdb = get_mongo_db()
     excluded_collections = " ".join(
diff --git a/nmdc_runtime/api/endpoints/find.py b/nmdc_runtime/api/endpoints/find.py
index 72960715..b40947f5 100644
--- a/nmdc_runtime/api/endpoints/find.py
+++ b/nmdc_runtime/api/endpoints/find.py
@@ -1,7 +1,7 @@
 from operator import itemgetter
-from typing import List
+from typing import List, Annotated
 
-from fastapi import APIRouter, Depends, Form
+from fastapi import APIRouter, Depends, Form, Path
 from jinja2 import Environment, PackageLoader, select_autoescape
 from nmdc_runtime.minter.config import typecodes
 from nmdc_runtime.util import get_nmdc_jsonschema_dict
@@ -10,7 +10,12 @@
 from toolz import merge, assoc_in
 
 from nmdc_runtime.api.core.util import raise404_if_none
-from nmdc_runtime.api.db.mongo import get_mongo_db, activity_collection_names
+from nmdc_runtime.api.db.mongo import (
+    get_mongo_db,
+    activity_collection_names,
+    get_planned_process_collection_names,
+    get_nonempty_nmdc_schema_collection_names,
+)
 from nmdc_runtime.api.endpoints.util import (
     find_resources,
     strip_oid,
@@ -134,21 +139,25 @@ def find_data_objects_for_study(
     study_id: str,
     mdb: MongoDatabase = Depends(get_mongo_db),
 ):
-    """This API endpoint is used to retrieve data object ids associated with
-    all the biosamples that are part of a given study. This endpoint makes
+    """This API endpoint is used to retrieve data objects associated with
+    all the biosamples associated with a given study. This endpoint makes
     use of the `alldocs` collection for its implementation.
 
     :param study_id: NMDC study id for which data objects are to be retrieved
     :param mdb: PyMongo connection, defaults to Depends(get_mongo_db)
-    :return: List of dictionaries where each dictionary contains biosample id as key,
-        and another dictionary with key 'data_object_set' containing list of data object ids as value
+    :return: List of dictionaries, each of which has a `biosample_id` entry
+        and a `data_object_set` entry. The value of the `biosample_id` entry
+        is the `Biosample`'s `id`. The value of the `data_object_set` entry
+        is a list of the `DataObject`s associated with that `Biosample`.
     """
     biosample_data_objects = []
     study = raise404_if_none(
         mdb.study_set.find_one({"id": study_id}, ["id"]), detail="Study not found"
     )
 
-    biosamples = mdb.biosample_set.find({"part_of": study["id"]}, ["id"])
+    # Note: With nmdc-schema v10 (legacy schema), we used the field named `part_of` here.
+    #       With nmdc-schema v11 (Berkeley schema), we use the field named `associated_studies` here.
+    biosamples = mdb.biosample_set.find({"associated_studies": study["id"]}, ["id"])
     biosample_ids = [biosample["id"] for biosample in biosamples]
 
     for biosample_id in biosample_ids:
@@ -210,47 +219,70 @@ def find_data_object_by_id(
 
 
 @router.get(
-    "/activities",
+    "/planned_processes",
     response_model=FindResponse,
     response_model_exclude_unset=True,
 )
-def find_activities(
+def find_planned_processes(
     req: FindRequest = Depends(),
     mdb: MongoDatabase = Depends(get_mongo_db),
 ):
+    # TODO: Add w3id URL links for classes (e.g. <https://w3id.org/nmdc/PlannedProcess>) when they resolve
+    #   to Berkeley schema definitions.
     """
-    The GET /activities endpoint is a general way to fetch metadata about various activities (e.g. metagenome assembly,
-    natural organic matter analysis, library preparation, etc.). Any "slot" (a.k.a. attribute) for
-    [WorkflowExecutionActivity](https://microbiomedata.github.io/nmdc-schema/WorkflowExecutionActivity/)
-    or [PlannedProcess](https://microbiomedata.github.io/nmdc-schema/PlannedProcess/) classes may be used in the filter
-    and sort parameters, including attributes of subclasses of *WorkflowExecutionActivity* and *PlannedProcess*.
-
-    For example, attributes used in subclasses such as MetabolomicsAnalysisActivity (subclass of *WorkflowExecutionActivity*)
-    or [Extraction](https://microbiomedata.github.io/nmdc-schema/Extraction/) (subclass of *PlannedProcess*),
+    The GET /planned_processes endpoint is a general way to fetch metadata about various planned processes (e.g.
+    workflow execution, material processing, etc.). Any "slot" (a.k.a. attribute) for
+    `PlannedProcess` may be used in the filter
+    and sort parameters, including attributes of subclasses of *PlannedProcess*.
+
+    For example, attributes used in subclasses such as `Extraction` (subclass of *PlannedProcess*),
     can be used as input criteria for the filter and sort parameters of this endpoint.
     """
-    return find_resources_spanning(req, mdb, activity_collection_names(mdb))
+    return find_resources_spanning(
+        req,
+        mdb,
+        get_planned_process_collection_names()
+        & get_nonempty_nmdc_schema_collection_names(mdb),
+    )
 
 
 @router.get(
-    "/activities/{activity_id}",
+    "/planned_processes/{planned_process_id}",
     response_model=Doc,
     response_model_exclude_unset=True,
 )
-def find_activity_by_id(
-    activity_id: str,
+def find_planned_process_by_id(
+    planned_process_id: Annotated[
+        str,
+        Path(
+            title="PlannedProcess ID",
+            description="The `id` of the document that represents an instance of "
+            "the `PlannedProcess` class or any of its subclasses",
+            example=r"nmdc:wfmag-11-00jn7876.1",
+        ),
+    ],
     mdb: MongoDatabase = Depends(get_mongo_db),
 ):
-    """
-    If the activity identifier is known, the activity metadata can be retrieved using the GET /activities/activity_id endpoint.
-    \n Note that only one metadata record for an activity may be returned at a time using this method.
+    r"""
+    Returns the document that has the specified `id` and represents an instance of the `PlannedProcess` class
+    or any of its subclasses. If no such document exists, returns an HTTP 404 response.
     """
     doc = None
-    for name in activity_collection_names(mdb):
-        doc = mdb[name].find_one({"id": activity_id})
+
+    # Note: We exclude empty collections as a performance optimization
+    #       (we already know they don't contain the document).
+    collection_names = (
+        get_planned_process_collection_names()
+        & get_nonempty_nmdc_schema_collection_names(mdb)
+    )
+
+    # For each collection, search it for a document having the specified `id`.
+    for name in collection_names:
+        doc = mdb[name].find_one({"id": planned_process_id})
         if doc is not None:
             return strip_oid(doc)
 
+    # Note: If execution gets to this point, it means we didn't find the document.
     return raise404_if_none(doc)
 
 
diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py
index 420991db..0d83d048 100644
--- a/nmdc_runtime/api/endpoints/nmdcschema.py
+++ b/nmdc_runtime/api/endpoints/nmdcschema.py
@@ -15,7 +15,11 @@
 
 from nmdc_runtime.api.core.metadata import map_id_to_collection, get_collection_for_id
 from nmdc_runtime.api.core.util import raise404_if_none
-from nmdc_runtime.api.db.mongo import get_mongo_db, nmdc_schema_collection_names
+from nmdc_runtime.api.db.mongo import (
+    get_mongo_db,
+    get_nonempty_nmdc_schema_collection_names,
+    get_collection_names_from_schema,
+)
 from nmdc_runtime.api.endpoints.util import list_resources
 from nmdc_runtime.api.models.metadata import Doc
 from nmdc_runtime.api.models.util import ListRequest, ListResponse
@@ -23,10 +27,8 @@
 router = APIRouter()
 
 
-def verify_collection_name(
-    collection_name: str, mdb: MongoDatabase = Depends(get_mongo_db)
-):
-    names = nmdc_schema_collection_names(mdb)
+def ensure_collection_name_is_known_to_schema(collection_name: str):
+    names = get_collection_names_from_schema()
     if collection_name not in names:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
@@ -96,7 +98,7 @@ def get_nmdc_database_collection_stats(
     "/nmdcschema/{collection_name}",
     response_model=ListResponse[Doc],
     response_model_exclude_unset=True,
-    dependencies=[Depends(verify_collection_name)],
+    dependencies=[Depends(ensure_collection_name_is_known_to_schema)],
 )
 def list_from_collection(
     collection_name: str,
@@ -235,7 +237,7 @@ def get_collection_name_by_doc_id(
     "/nmdcschema/{collection_name}/{doc_id}",
     response_model=Doc,
     response_model_exclude_unset=True,
-    dependencies=[Depends(verify_collection_name)],
+    dependencies=[Depends(ensure_collection_name_is_known_to_schema)],
 )
 def get_from_collection_by_id(
     collection_name: str,
diff --git a/nmdc_runtime/api/endpoints/queries.py b/nmdc_runtime/api/endpoints/queries.py
index 8c56fd5a..79aa5488 100644
--- a/nmdc_runtime/api/endpoints/queries.py
+++ b/nmdc_runtime/api/endpoints/queries.py
@@ -7,7 +7,10 @@
 
 from nmdc_runtime.api.core.idgen import generate_one_id
 from nmdc_runtime.api.core.util import now, raise404_if_none
-from nmdc_runtime.api.db.mongo import get_mongo_db, nmdc_schema_collection_names
+from nmdc_runtime.api.db.mongo import (
+    get_mongo_db,
+    get_nonempty_nmdc_schema_collection_names,
+)
 from nmdc_runtime.api.endpoints.util import permitted, users_allowed
 from nmdc_runtime.api.models.query import (
     Query,
@@ -130,7 +133,7 @@ def _run_query(query, mdb) -> CommandResponse:
     ran_at = now()
     if q_type is DeleteCommand:
         collection_name = query.cmd.delete
-        if collection_name not in nmdc_schema_collection_names(mdb):
+        if collection_name not in get_nonempty_nmdc_schema_collection_names(mdb):
             raise HTTPException(
                 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                 detail="Can only delete documents in nmdc-schema collections.",
@@ -153,7 +156,7 @@ def _run_query(query, mdb) -> CommandResponse:
                 )
     elif q_type is UpdateCommand:
         collection_name = query.cmd.update
-        if collection_name not in nmdc_schema_collection_names(mdb):
+        if collection_name not in get_nonempty_nmdc_schema_collection_names(mdb):
             raise HTTPException(
                 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
                 detail="Can only update documents in nmdc-schema collections.",
diff --git a/nmdc_runtime/api/endpoints/users.py b/nmdc_runtime/api/endpoints/users.py
index 87ada8a6..1357c214 100644
--- a/nmdc_runtime/api/endpoints/users.py
+++ b/nmdc_runtime/api/endpoints/users.py
@@ -20,6 +20,7 @@
     ORCID_JWS_VERITY_ALGORITHM,
     credentials_exception,
     ORCID_NMDC_CLIENT_SECRET,
+    ORCID_BASE_URL,
 )
 from nmdc_runtime.api.core.auth import get_password_hash
 from nmdc_runtime.api.core.util import generate_secret
@@ -39,7 +40,7 @@
 @router.get("/orcid_code", response_class=RedirectResponse, include_in_schema=False)
 async def receive_orcid_code(request: Request, code: str, state: str | None = None):
     rv = requests.post(
-        "https://orcid.org/oauth/token",
+        f"{ORCID_BASE_URL}/oauth/token",
         data=(
             f"client_id={ORCID_NMDC_CLIENT_ID}&client_secret={ORCID_NMDC_CLIENT_SECRET}&"
             f"grant_type=authorization_code&code={code}&redirect_uri={BASE_URL_EXTERNAL}/orcid_code"
@@ -98,7 +99,7 @@ async def login_for_access_token(
                 )
                 payload = json.loads(payload.decode())
                 issuer: str = payload.get("iss")
-                if issuer != "https://orcid.org":
+                if issuer != ORCID_BASE_URL:
                     raise credentials_exception
                 subject: str = payload.get("sub")
                 user = get_user(mdb, subject)
diff --git a/nmdc_runtime/api/endpoints/util.py b/nmdc_runtime/api/endpoints/util.py
index fd389f1d..e7895d71 100644
--- a/nmdc_runtime/api/endpoints/util.py
+++ b/nmdc_runtime/api/endpoints/util.py
@@ -343,6 +343,19 @@ def find_resources_spanning(
             detail="This resource only supports page-based pagination",
         )
 
+    if len(collection_names) == 0:
+        return {
+            "meta": {
+                "mongo_filter_dict": get_mongo_filter(req.filter),
+                "count": 0,
+                "db_response_time_ms": 0,
+                "page": req.page,
+                "per_page": req.per_page,
+            },
+            "results": [],
+            "group_by": [],
+        }
+
     responses = {name: find_resources(req, mdb, name) for name in collection_names}
     rv = {
         "meta": {
diff --git a/nmdc_runtime/api/endpoints/workflows.py b/nmdc_runtime/api/endpoints/workflows.py
index 8d9029eb..267cd202 100644
--- a/nmdc_runtime/api/endpoints/workflows.py
+++ b/nmdc_runtime/api/endpoints/workflows.py
@@ -4,13 +4,12 @@
 import pymongo
 
 from fastapi import APIRouter, Depends, HTTPException
-from motor.motor_asyncio import AsyncIOMotorDatabase
 from pymongo.database import Database as MongoDatabase
 from pymongo.errors import BulkWriteError
 from starlette import status
 
 from nmdc_runtime.api.core.util import raise404_if_none
-from nmdc_runtime.api.db.mongo import get_mongo_db, activity_collection_names
+from nmdc_runtime.api.db.mongo import get_mongo_db
 from nmdc_runtime.api.models.capability import Capability
 from nmdc_runtime.api.models.object_type import ObjectType
 from nmdc_runtime.api.models.site import Site, get_current_client_site
@@ -54,24 +53,36 @@ def list_workflow_capabilities(
     return list(mdb.capabilities.find({"id": {"$in": doc.get("capability_ids", [])}}))
 
 
-# TODO: Create activity.py in ../models
-@router.post("/workflows/activities")
+@router.post("/workflows/activities", status_code=410, deprecated=True)
 async def post_activity(
     activity_set: dict[str, Any],
     site: Site = Depends(get_current_client_site),
     mdb: MongoDatabase = Depends(get_mongo_db),
 ):
     """
-    Please migrate all workflows from `v1/workflows/activities` to this endpoint.
-    -------
-    Post activity set to database and claim job.
+    DEPRECATED: migrate all workflows from this endpoint to `/workflows/workflow_executions`.
+    """
+    return f"DEPRECATED: POST your request to `/workflows/workflow_executions` instead."
+
+
+@router.post("/workflows/workflow_executions")
+async def post_workflow_execution(
+    workflow_execution_set: dict[str, Any],
+    site: Site = Depends(get_current_client_site),
+    mdb: MongoDatabase = Depends(get_mongo_db),
+):
+    """
+    Post workflow execution set to database and claim job.
 
     Parameters
     -------
-    activity_set: dict[str,Any]
-             Set of activities for specific workflows, in the form of a nmdc:Database.
+    workflow_execution_set: dict[str,Any]
+             Set of workflow executions for specific workflows, in the form of a nmdc:Database.
              Other collections (such as data_object_set) are allowed, as they may be associated
-             with the activities submitted.
+             with the workflow executions submitted.
+
+    site: Site
+    mdb: MongoDatabase
 
     Returns
     -------
@@ -81,7 +92,7 @@ async def post_activity(
     _ = site  # must be authenticated
     try:
         # validate request JSON
-        rv = validate_json(activity_set, mdb)
+        rv = validate_json(workflow_execution_set, mdb)
         if rv["result"] == "errors":
             raise HTTPException(
                 status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
@@ -94,7 +105,7 @@ async def post_activity(
             username=os.getenv("MONGO_USERNAME"),
             password=os.getenv("MONGO_PASSWORD"),
         )
-        mongo_resource.add_docs(activity_set, validate=False, replace=True)
+        mongo_resource.add_docs(workflow_execution_set, validate=False, replace=True)
         return {"message": "jobs accepted"}
     except BulkWriteError as e:
         raise HTTPException(status_code=409, detail=str(e))
diff --git a/nmdc_runtime/api/main.py b/nmdc_runtime/api/main.py
index f4c8203e..67c6379c 100644
--- a/nmdc_runtime/api/main.py
+++ b/nmdc_runtime/api/main.py
@@ -22,7 +22,11 @@
     ensure_unique_id_indexes,
     REPO_ROOT_DIR,
 )
-from nmdc_runtime.api.core.auth import get_password_hash, ORCID_NMDC_CLIENT_ID
+from nmdc_runtime.api.core.auth import (
+    get_password_hash,
+    ORCID_NMDC_CLIENT_ID,
+    ORCID_BASE_URL,
+)
 from nmdc_runtime.api.db.mongo import (
     get_mongo_db,
 )
@@ -218,51 +222,45 @@
     {
         "name": "metadata",
         "description": """
-The [metadata endpoints](https://api.microbiomedata.org/docs#/metadata) can be used to get and filter metadata from 
-collection set types (including [studies](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Study.html), 
-[biosamples](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Biosample.html), 
-[data objects](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/DataObject.html), and 
-[activities](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Activity.html)).<br/>
+The [metadata endpoints](https://api.microbiomedata.org/docs#/metadata) can be used to get and filter metadata from collection set types (including 
+[studies](https://w3id.org/nmdc/Study/), 
+[biosamples](https://w3id.org/nmdc/Biosample/), 
+[planned processes](https://w3id.org/nmdc/PlannedProcess/), and 
+[data objects](https://w3id.org/nmdc/DataObject/) 
+as discussed in the __find__ section).
+<br/>
  
 The __metadata__ endpoints allow users to retrieve metadata from the data portal using the various GET endpoints 
-that are slightly different than the __find__ endpoints, but some can be used similarly. As with the __find__  endpoints, 
+that are slightly different than the __find__ endpoints, but some can be used similarly. As with the __find__ endpoints, 
 parameters for the __metadata__ endpoints that do not have a red ___* required___ next to them are optional. <br/>
 
 Unlike the compact syntax used in the __find__  endpoints, the syntax for the filter parameter of the metadata endpoints 
 uses [MongoDB-like language querying](https://www.mongodb.com/docs/manual/tutorial/query-documents/). 
 The applicable parameters of the __metadata__ endpoints, with acceptable syntax and examples, are in the table below.
 
-<details>
-<summary>More Details</summary>
-
 | Parameter | Description | Syntax | Example |
 | :---: | :-----------: | :-------: | :---: | 
-| collection_name | The name of the collection to be queried. For a list of collection names please see the [Database class](https://microbiomedata.github.io/nmdc-schema/Database/) of the NMDC Schema | String | `biosample_set` |
+| collection_name | The name of the collection to be queried. For a list of collection names please see the [Database class](https://w3id.org/nmdc/Database/) of the NMDC Schema | String | `biosample_set` |
 | filter | Allows conditions to be set as part of the query, returning only results that satisfy the conditions | [MongoDB-like query language](https://www.mongodb.com/docs/manual/tutorial/query-documents/). All strings should be in double quotation marks. | `{"lat_lon.latitude": {"$gt": 45.0}, "ecosystem_category": "Plants"}` | 
 | max_page_size | Specifies the maximum number of documents returned at a time | Integer | `25`
-| page_token | Specifies the token of the page to return. If unspecified, the first page is returned. To retrieve a subsequent page, the value received as the `next_page_token` from the bottom of the previous results can be provided as a `page_token`. ![next_page_token](../_static/images/howto_guides/api_gui/metadata_page_token_param.png) | String | `nmdc:sys0ae1sh583`
+| page_token | Specifies the token of the page to return. If unspecified, the first page is returned. To retrieve a subsequent page, the value received as the `next_page_token` from the bottom of the previous results can be provided as a `page_token`. | String | `nmdc:sys0ae1sh583`
 | projection | Indicates the desired attributes to be included in the response. Helpful for trimming down the returned results | Comma-separated list of attributes that belong to the documents in the collection being queried | `name, ecosystem_type` |
 | doc_id | The unique identifier of the item being requested. For example, the identifier of a biosample or an extraction | Curie e.g. `prefix:identifier` | `nmdc:bsm-11-ha3vfb58` |<br/>
 <br/>
-</details>        
+
         """,
     },
     {
         "name": "find",
         "description": """
-The [find endpoints](https://api.microbiomedata.org/docs#/find:~:text=Find%20NMDC-,metadata,-entities.) are provided with 
-NMDC metadata entities already specified - where metadata about [studies](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Study.html), 
-[biosamples](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Biosample.html), 
-[data objects](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/DataObject.html), and 
-[activities](https://nmdc-documentation.readthedocs.io/en/latest/reference/metadata/Activity.html) can be retrieved using GET requests. 
+The [find endpoints](https://api.microbiomedata.org/docs#/find:~:text=Find%20NMDC-,metadata,-entities.) are provided with NMDC metadata entities already specified - where metadata about [studies](https://w3id.org/nmdc/Study), [biosamples](https://w3id.org/nmdc/Biosample), [data objects](https://w3id.org/nmdc/DataObject/), and [planned processes](https://w3id.org/nmdc/PlannedProcess/) can be retrieved using GET requests. 
+<br/>
 
 Each endpoint is unique and requires the applicable attribute names to be known in order to structure a query in a meaningful way. 
 Please note that endpoints with parameters that do not have a red ___* required___ label next to them are optional.<br/>
 
 The applicable parameters of the ___find___ endpoints, with acceptable syntax and examples, are in the table below.
 
-<details><summary>More Details</summary>
-
 | Parameter | Description | Syntax | Example |
 | :---: | :-----------: | :-------: | :---: |
 | filter | Allows conditions to be set as part of the query, returning only results that satisfy the conditions | Comma separated string of attribute:value pairs. Can include comparison operators like >=, <=, <, and >. May use a `.search` after the attribute name to conduct a full text search of the field that are of type string. e.g. `attribute:value,attribute.search:value` | `ecosystem_category:Plants, lat_lon.latitude:>35.0` |
@@ -276,9 +274,9 @@
 | study_id | The unique identifier of a study | Curie e.g. `prefix:identifier` | `nmdc:sty-11-34xj1150` |
 | sample_id | The unique identifier of a biosample | Curie e.g. `prefix:identifier` | `nmdc:bsm-11-w43vsm21` |
 | data_object_id | The unique identifier of a data object | Curie e.g. `prefix:identifier` | `nmdc:dobj-11-7c6np651` |
-| activity_id | The unique identifier for an NMDC workflow execution activity | Curie e.g. `prefix:identifier` | `nmdc:wfmgan-11-hvcnga50.1`|<br/>
+| planned_process_id | The unique identifier for an NMDC planned process | Curie e.g. `prefix:identifier` | `nmdc:wfmgan-11-hvcnga50.1`|
+
 <br/>
-</details>
 
 """,
     },
@@ -420,13 +418,13 @@ async def get_versions():
         "The NMDC Runtime API, via on-demand functions "
         "and via schedule-based and sensor-based automation, "
         "supports validation and submission of metadata, as well as "
-        "orchestration of workflow execution activities."
+        "orchestration of workflow executions."
         "\n\n"
         "Dependency versions:\n\n"
         f'nmdc-schema={version("nmdc_schema")}\n\n'
         "<a href='https://microbiomedata.github.io/nmdc-runtime/'>Documentation</a>\n\n"
         '<img src="/static/ORCIDiD_icon128x128.png" height="18" width="18"/> '
-        f'<a href="https://orcid.org/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}'
+        f'<a href="{ORCID_BASE_URL}/oauth/authorize?client_id={ORCID_NMDC_CLIENT_ID}'
         "&response_type=code&scope=openid&"
         f'redirect_uri={BASE_URL_EXTERNAL}/orcid_code">Login with ORCiD</a>'
         " (note: this link is static; if you are logged in, you will see a 'locked' lock icon"
diff --git a/nmdc_runtime/api/v1/models/ingest.py b/nmdc_runtime/api/v1/models/ingest.py
deleted file mode 100644
index a0e384f3..00000000
--- a/nmdc_runtime/api/v1/models/ingest.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from typing import List, Optional
-
-from components.workflow.workflow.core import DataObject, ReadsQCSequencingActivity
-from pydantic import BaseModel
-
-
-class Ingest(BaseModel):
-    data_object_set: List[DataObject] = []
-    read_qc_analysis_activity_set: Optional[List[ReadsQCSequencingActivity]] = None
-    metagenome_assembly_activity_set: Optional[List[ReadsQCSequencingActivity]] = None
-    metagenome_annotation_activity_set: Optional[List[ReadsQCSequencingActivity]] = None
diff --git a/nmdc_runtime/api/v1/models/users.py b/nmdc_runtime/api/v1/models/users.py
deleted file mode 100644
index 2af337bd..00000000
--- a/nmdc_runtime/api/v1/models/users.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from typing import Optional, List
-
-from pydantic import BaseModel
-
-
-from nmdc_runtime.domain.users.userSchema import UserOut
-
-
-class Response(BaseModel):
-    query: str
-    limit: int
-
-
-class UserResponse(Response):
-    users: List[UserOut]
diff --git a/nmdc_runtime/api/v1/models/workflow_execution_activity.py b/nmdc_runtime/api/v1/models/workflow_execution_activity.py
deleted file mode 100644
index 91cd3265..00000000
--- a/nmdc_runtime/api/v1/models/workflow_execution_activity.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Beans."""
-
-from typing import List
-
-from nmdc_runtime.workflow_execution_activity import (
-    DataObject,
-    WorkflowExecutionActivity,
-    init_activity_service,
-)
-from pydantic import BaseModel
-
-
-class ActivitySet(BaseModel):
-    """More thought."""
-
-    activity_set: List[WorkflowExecutionActivity]
-    data_object_set: List[DataObject]
diff --git a/nmdc_runtime/api/v1/outputs.py b/nmdc_runtime/api/v1/outputs.py
deleted file mode 100644
index 9150c0ec..00000000
--- a/nmdc_runtime/api/v1/outputs.py
+++ /dev/null
@@ -1,52 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException
-
-from nmdc_runtime.api.endpoints.util import (
-    _claim_job,
-    _request_dagster_run,
-    permitted,
-    persist_content_and_get_drs_object,
-    users_allowed,
-)
-from nmdc_runtime.api.models.site import Site, get_current_client_site
-from pymongo import ReturnDocument
-from pymongo.database import Database as MongoDatabase
-from pymongo.errors import DuplicateKeyError
-from starlette import status
-
-router = APIRouter(prefix="/outputs", tags=["outputs"])
-
-
-# @router.post(
-#     "",
-#     status_code=status.HTTP_201_CREATED,
-# )
-# async def ingest(
-#     # ingest: Ingest,
-#     mdb: MongoDatabase = Depends(get_mongo_db),
-#     # site: Site = Depends(get_current_client_site),
-# ) -> bool:
-#     pass
-#     # try:
-
-#     # if site is None:
-#     #     raise HTTPException(status_code=401, detail="Client site not found")
-
-#     #     drs_obj_doc = persist_content_and_get_drs_object(
-#     #         content=ingest.json(),
-#     #         filename=None,
-#     #         content_type="application/json",
-#     #         description="input metadata for readqc-in wf",
-#     #         id_ns="json-readqc-in",
-#     #     )
-
-#     #     doc_after = mdb.objects.find_one_and_update(
-#     #         {"id": drs_obj_doc["id"]},
-#     #         {"$set": {"types": ["readqc-in"]}},
-#     #         return_document=ReturnDocument.AFTER,
-#     #     )
-#     #     return doc_after
-
-#     # except DuplicateKeyError as e:
-#     #     raise HTTPException(status_code=409, detail=e.details)
-# if site is None:
-#     raise HTTPException(status_code=401, detail="Client site not found")
diff --git a/nmdc_runtime/api/v1/router.py b/nmdc_runtime/api/v1/router.py
index a0209e30..76ba3266 100644
--- a/nmdc_runtime/api/v1/router.py
+++ b/nmdc_runtime/api/v1/router.py
@@ -1,9 +1,3 @@
 from fastapi import APIRouter
 
-# from . import users
-from . import outputs
-from .workflows import activities
-
 router_v1 = APIRouter(prefix="/v1", responses={404: {"description": "Not found"}})
-
-router_v1.include_router(activities.router)
diff --git a/nmdc_runtime/api/v1/users.py b/nmdc_runtime/api/v1/users.py
deleted file mode 100644
index 45b38d08..00000000
--- a/nmdc_runtime/api/v1/users.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""Endpoints module."""
-
-from typing import List, Optional
-
-from fastapi import APIRouter, HTTPException, Depends, Response, status
-from dependency_injector.wiring import inject, Provide
-
-from nmdc_runtime.containers import Container
-
-from nmdc_runtime.domain.users.userService import UserService
-from nmdc_runtime.domain.users.userSchema import UserAuth, UserOut
-
-
-router = APIRouter(prefix="/users", tags=["users"])
-
-
-# @router.get("", response_model=Response)
-# @inject
-# async def index(
-#     query: Optional[str] = None,
-#     limit: Optional[str] = None,
-#     user_service: UserService = Depends(Provide[Container.user_service]),
-# ) -> List[UserOut]:
-#     query = query
-#     limit = limit
-
-#     users = await user_service.search(query, limit)
-
-#     return {"query": query, "limit": limit, "users": users}
-
-
-@router.post("", response_model=Response, status_code=status.HTTP_201_CREATED)
-@inject
-async def add(
-    user: UserAuth,
-    user_service: UserService = Depends(Provide[Container.user_service]),
-) -> UserOut:
-    new_user = await user_service.create_user(user)
-    return new_user
diff --git a/nmdc_runtime/api/v1/workflows/__init__.py b/nmdc_runtime/api/v1/workflows/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/nmdc_runtime/api/v1/workflows/activities.py b/nmdc_runtime/api/v1/workflows/activities.py
deleted file mode 100644
index 4c490a14..00000000
--- a/nmdc_runtime/api/v1/workflows/activities.py
+++ /dev/null
@@ -1,68 +0,0 @@
-"""Module."""
-
-import os
-from typing import Any
-
-from fastapi import APIRouter, Depends, HTTPException
-from motor.motor_asyncio import AsyncIOMotorDatabase
-from pymongo.database import Database as MongoDatabase
-from pymongo.errors import BulkWriteError
-from starlette import status
-
-from nmdc_runtime.api.db.mongo import (
-    get_mongo_db,
-    activity_collection_names,
-)
-from nmdc_runtime.api.models.site import Site, get_current_client_site
-from nmdc_runtime.site.resources import MongoDB
-from nmdc_runtime.util import validate_json
-
-router = APIRouter(
-    prefix="/workflows/activities", tags=["workflow_execution_activities"]
-)
-
-
-async def job_to_db(job_spec: dict[str, Any], mdb: AsyncIOMotorDatabase) -> None:
-    return await mdb["jobs"].insert_one(job_spec)
-
-
-@router.post("", status_code=status.HTTP_201_CREATED)
-async def post_activity(
-    activity_set: dict[str, Any],
-    site: Site = Depends(get_current_client_site),
-    mdb: MongoDatabase = Depends(get_mongo_db),
-) -> dict[str, str]:
-    """
-    **NOTE: This endpoint is DEPRECATED. Please migrate to `~/workflows/activities`.**
-    ----------
-    The `v1/workflows/activities` endpoint will be removed in an upcoming release.
-    --
-    Post activity set to database and claim job.
-
-    Parameters: activity_set: dict[str,Any]
-        Set of activities for specific workflows.
-
-    Returns: dict[str,str]
-    """
-    _ = site  # must be authenticated
-    try:
-        # validate request JSON
-        rv = validate_json(activity_set, mdb)
-        if rv["result"] == "errors":
-            raise HTTPException(
-                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
-                detail=str(rv),
-            )
-        # create mongodb instance for dagster
-        mongo_resource = MongoDB(
-            host=os.getenv("MONGO_HOST"),
-            dbname=os.getenv("MONGO_DBNAME"),
-            username=os.getenv("MONGO_USERNAME"),
-            password=os.getenv("MONGO_PASSWORD"),
-        )
-        mongo_resource.add_docs(activity_set, validate=False, replace=True)
-        return {"message": "jobs accepted"}
-    except BulkWriteError as e:
-        raise HTTPException(status_code=409, detail=str(e))
-    except ValueError as e:
-        raise HTTPException(status_code=409, detail=str(e))
diff --git a/nmdc_runtime/api/v1/workflows/activities/router.py b/nmdc_runtime/api/v1/workflows/activities/router.py
deleted file mode 100644
index 66fed736..00000000
--- a/nmdc_runtime/api/v1/workflows/activities/router.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""Under embargo due to E999 SyntaxError"""
-
-# """Module"""
-# from fastapi import APIRouter, Depends, HTTPException
-# from nmdc_runtime.api.models.site import Site, get_current_client_site
-# from pymongo.errors import DuplicateKeyError
-# from starlette import status
-#
-# from components.nmdc_runtime.workflow_execution_activity import ActivitySet
-#
-# router = APIRouter(prefix="/activities", tags=["workflow_execution_activities"])
-#
-#
-# @router.post(
-#     activity_set: ActivitySet,
-#     status_code=status.HTTP_201_CREATED,
-# )
-# async def post_l(
-#     site: Site = Depends(get_current_client_site),
-# ) -> None:
-#     """Docs"""
-#     try:
-#
-#         if site is None:
-#             raise HTTPException(status_code=401, detail="Client site not found")
-#
-#     #     drs_obj_doc = persist_content_and_get_drs_object(
-#     #         content=ingest.json(),
-#     #         filename=None,
-#     #         content_type="application/json",
-#     #         description="input metadata for readqc-in wf",
-#     #         id_ns="json-readqc-in",
-#     #     )
-#
-#     #     doc_after = mdb.objects.find_one_and_update(
-#     #         {"id": drs_obj_doc["id"]},
-#     #         {"$set": {"types": ["readqc-in"]}},
-#     #         return_document=ReturnDocument.AFTER,
-#     #     )
-#     #     return doc_after
-#
-#     except DuplicateKeyError as e:
-#         raise HTTPException(status_code=409, detail=e.details)
diff --git a/nmdc_runtime/minter/config.py b/nmdc_runtime/minter/config.py
index 3883fca0..b1a5ac0e 100644
--- a/nmdc_runtime/minter/config.py
+++ b/nmdc_runtime/minter/config.py
@@ -1,5 +1,6 @@
 import os
 from functools import lru_cache
+from typing import List
 
 from nmdc_runtime.util import get_nmdc_jsonschema_dict
 
@@ -11,18 +12,73 @@ def minting_service_id() -> str | None:
     return os.getenv("MINTING_SERVICE_ID")
 
 
+def extract_typecode_from_pattern(pattern: str) -> str:
+    r"""
+    Returns the typecode portion of the specified string.
+
+    >>> extract_typecode_from_pattern("foo-123-456$")  # original behavior
+    'foo'
+    >>> extract_typecode_from_pattern("(foo)-123-456$")  # returns first and only typecode
+    'foo'
+    >>> extract_typecode_from_pattern("(foo|bar)-123-456$")  # returns first of 2 typecodes
+    'foo'
+    >>> extract_typecode_from_pattern("(foo|bar|baz)-123-456$")  # returns first of > 2 typecodes
+    'foo'
+    """
+
+    # Get the portion of the pattern preceding the first hyphen.
+    # e.g. "foo-bar-baz" → ["foo", "bar-baz"] → "foo"
+    typecode_sub_pattern = pattern.split("-", maxsplit=1)[0]
+
+    # If that portion of the pattern is enclosed in parentheses, get the portion between the parentheses.
+    # e.g. "(apple|banana|carrot)" → "apple|banana|carrot"
+    if typecode_sub_pattern.startswith("(") and typecode_sub_pattern.endswith(")"):
+        inner_pattern = typecode_sub_pattern[1:-1]
+
+        # Finally, get everything before the first `|`, if any.
+        # e.g. "apple|banana|carrot" → "apple"
+        # e.g. "apple" → "apple"
+        typecode = inner_pattern.split("|", maxsplit=1)[0]
+    else:
+        # Note: This is the original behavior, before we added support for multi-typecode patterns.
+        # e.g. "apple" → "apple"
+        typecode = typecode_sub_pattern
+
+    return typecode
+
+
 @lru_cache()
-def typecodes():
+def typecodes() -> List[dict]:
+    r"""
+    Returns a list of dictionaries containing typecodes and associated information derived from the schema.
+
+    Preconditions about the schema:
+    - The typecode portion of the pattern is between the pattern prefix and the first subsequent hyphen.
+    - The typecode portion of the pattern either consists of a single typecode verbatim (e.g. "foo");
+      or consists of multiple typecodes in a pipe-delimited list enclosed in parentheses (e.g. "(foo|bar|baz)").
+    - The typecode portion of the pattern does not, itself, contain any hyphens.
+
+    TODO: Get the typecodes in a different way than by extracting them from a larger string, which seems brittle to me.
+          Getting them a different way may require schema authors to _define_ them a different way (e.g. defining them
+          in a dedicated property of a class; for example, one named `typecode`).
+    """
+    id_pattern_prefix = r"^(nmdc):"
+
     rv = []
     schema_dict = get_nmdc_jsonschema_dict()
     for cls_name, defn in schema_dict["$defs"].items():
         match defn.get("properties"):
-            case {"id": {"pattern": p}} if p.startswith("^(nmdc):"):
+            case {"id": {"pattern": p}} if p.startswith(id_pattern_prefix):
+                # Get the portion of the pattern following the prefix.
+                # e.g. "^(nmdc):foo-bar-baz" → "foo-bar-baz"
+                index_of_first_character_following_prefix = len(id_pattern_prefix)
+                pattern_without_prefix = p[index_of_first_character_following_prefix:]
+
                 rv.append(
                     {
                         "id": "nmdc:" + cls_name + "_" + "typecode",
                         "schema_class": "nmdc:" + cls_name,
-                        "name": p.split(":", maxsplit=1)[-1].split("-", maxsplit=1)[0],
+                        "name": extract_typecode_from_pattern(pattern_without_prefix),
                     }
                 )
             case _:
diff --git a/nmdc_runtime/site/graphs.py b/nmdc_runtime/site/graphs.py
index 8d0cb9bb..6626df09 100644
--- a/nmdc_runtime/site/graphs.py
+++ b/nmdc_runtime/site/graphs.py
@@ -126,15 +126,23 @@ def apply_metadata_in():
 
 @graph
 def gold_study_to_database():
-    study_id = get_gold_study_pipeline_inputs()
+    (study_id, study_type, gold_nmdc_instrument_mapping_file_url) = (
+        get_gold_study_pipeline_inputs()
+    )
 
     projects = gold_projects_by_study(study_id)
     biosamples = gold_biosamples_by_study(study_id)
     analysis_projects = gold_analysis_projects_by_study(study_id)
     study = gold_study(study_id)
+    gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
 
     database = nmdc_schema_database_from_gold_study(
-        study, projects, biosamples, analysis_projects
+        study,
+        study_type,
+        projects,
+        biosamples,
+        analysis_projects,
+        gold_nmdc_instrument_map_df,
     )
     database_dict = nmdc_schema_object_to_dict(database)
     filename = nmdc_schema_database_export_filename(study)
@@ -147,14 +155,16 @@ def gold_study_to_database():
 def translate_metadata_submission_to_nmdc_schema_database():
     (
         submission_id,
-        omics_processing_mapping_file_url,
+        nucleotide_sequencing_mapping_file_url,
         data_object_mapping_file_url,
         biosample_extras_file_url,
         biosample_extras_slot_mapping_file_url,
     ) = get_submission_portal_pipeline_inputs()
 
     metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
-    omics_processing_mapping = get_csv_rows_from_url(omics_processing_mapping_file_url)
+    nucleotide_sequencing_mapping = get_csv_rows_from_url(
+        nucleotide_sequencing_mapping_file_url
+    )
     data_object_mapping = get_csv_rows_from_url(data_object_mapping_file_url)
     biosample_extras = get_csv_rows_from_url(biosample_extras_file_url)
     biosample_extras_slot_mapping = get_csv_rows_from_url(
@@ -163,8 +173,8 @@ def translate_metadata_submission_to_nmdc_schema_database():
 
     database = translate_portal_submission_to_nmdc_schema_database(
         metadata_submission,
-        omics_processing_mapping,
-        data_object_mapping,
+        nucleotide_sequencing_mapping=nucleotide_sequencing_mapping,
+        data_object_mapping=data_object_mapping,
         biosample_extras=biosample_extras,
         biosample_extras_slot_mapping=biosample_extras_slot_mapping,
     )
@@ -181,14 +191,16 @@ def translate_metadata_submission_to_nmdc_schema_database():
 def ingest_metadata_submission():
     (
         submission_id,
-        omics_processing_mapping_file_url,
+        nucleotide_sequencing_mapping_file_url,
         data_object_mapping_file_url,
         biosample_extras_file_url,
         biosample_extras_slot_mapping_file_url,
     ) = get_submission_portal_pipeline_inputs()
 
     metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
-    omics_processing_mapping = get_csv_rows_from_url(omics_processing_mapping_file_url)
+    nucleotide_sequencing_mapping = get_csv_rows_from_url(
+        nucleotide_sequencing_mapping_file_url
+    )
     data_object_mapping = get_csv_rows_from_url(data_object_mapping_file_url)
     biosample_extras = get_csv_rows_from_url(biosample_extras_file_url)
     biosample_extras_slot_mapping = get_csv_rows_from_url(
@@ -197,8 +209,8 @@ def ingest_metadata_submission():
 
     database = translate_portal_submission_to_nmdc_schema_database(
         metadata_submission,
-        omics_processing_mapping,
-        data_object_mapping,
+        nucleotide_sequencing_mapping=nucleotide_sequencing_mapping,
+        data_object_mapping=data_object_mapping,
         biosample_extras=biosample_extras,
         biosample_extras_slot_mapping=biosample_extras_slot_mapping,
     )
@@ -217,6 +229,7 @@ def translate_neon_api_soil_metadata_to_nmdc_schema_database():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
@@ -225,8 +238,16 @@ def translate_neon_api_soil_metadata_to_nmdc_schema_database():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_soil_data(
-        mms_data, sls_data, neon_envo_mappings_file, neon_raw_data_file_mappings_file
+        mms_data,
+        sls_data,
+        neon_envo_mappings_file,
+        neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
 
     database_dict = nmdc_schema_object_to_dict(database)
@@ -247,6 +268,7 @@ def ingest_neon_soil_metadata():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
@@ -255,8 +277,16 @@ def ingest_neon_soil_metadata():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_soil_data(
-        mms_data, sls_data, neon_envo_mappings_file, neon_raw_data_file_mappings_file
+        mms_data,
+        sls_data,
+        neon_envo_mappings_file,
+        neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
     run_id = submit_metadata_to_db(database)
     poll_for_run_completion(run_id)
@@ -267,6 +297,7 @@ def translate_neon_api_benthic_metadata_to_nmdc_schema_database():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     mms_benthic_data_product = get_neon_pipeline_benthic_data_product()
@@ -280,11 +311,16 @@ def translate_neon_api_benthic_metadata_to_nmdc_schema_database():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_benthic_data(
         mms_benthic,
         sites_mapping_dict,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
 
     database_dict = nmdc_schema_object_to_dict(database)
@@ -305,6 +341,7 @@ def ingest_neon_benthic_metadata():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
@@ -313,11 +350,16 @@ def ingest_neon_benthic_metadata():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_benthic_data(
         mms_benthic,
         sites_mapping_dict,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
     run_id = submit_metadata_to_db(database)
     poll_for_run_completion(run_id)
@@ -334,6 +376,7 @@ def translate_neon_api_surface_water_metadata_to_nmdc_schema_database():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
@@ -342,11 +385,16 @@ def translate_neon_api_surface_water_metadata_to_nmdc_schema_database():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_surface_water_data(
         mms_surface_water,
         sites_mapping_dict,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
 
     database_dict = nmdc_schema_object_to_dict(database)
@@ -367,6 +415,7 @@ def ingest_neon_surface_water_metadata():
     (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     ) = get_neon_pipeline_inputs()
 
     neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
@@ -375,11 +424,16 @@ def ingest_neon_surface_water_metadata():
         neon_raw_data_file_mappings_file_url
     )
 
+    neon_nmdc_instrument_mapping_file = get_df_from_url(
+        neon_nmdc_instrument_mapping_file_url
+    )
+
     database = nmdc_schema_database_from_neon_benthic_data(
         mms_surface_water,
         sites_mapping_dict,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
     )
     run_id = submit_metadata_to_db(database)
     poll_for_run_completion(run_id)
diff --git a/nmdc_runtime/site/ops.py b/nmdc_runtime/site/ops.py
index 5a82519b..eb2a5a57 100644
--- a/nmdc_runtime/site/ops.py
+++ b/nmdc_runtime/site/ops.py
@@ -9,6 +9,7 @@
 from io import BytesIO, StringIO
 from typing import Tuple
 from zipfile import ZipFile
+from itertools import chain
 
 import pandas as pd
 import requests
@@ -582,9 +583,24 @@ def add_output_run_event(context: OpExecutionContext, outputs: List[str]):
         context.log.info(f"No NMDC RunEvent doc for Dagster Run {context.run_id}")
 
 
-@op(config_schema={"study_id": str})
-def get_gold_study_pipeline_inputs(context: OpExecutionContext) -> str:
-    return context.op_config["study_id"]
+@op(
+    config_schema={
+        "study_id": str,
+        "study_type": str,
+        "gold_nmdc_instrument_mapping_file_url": str,
+    },
+    out={
+        "study_id": Out(str),
+        "study_type": Out(str),
+        "gold_nmdc_instrument_mapping_file_url": Out(str),
+    },
+)
+def get_gold_study_pipeline_inputs(context: OpExecutionContext) -> Tuple[str, str, str]:
+    return (
+        context.op_config["study_id"],
+        context.op_config["study_type"],
+        context.op_config["gold_nmdc_instrument_mapping_file_url"],
+    )
 
 
 @op(required_resource_keys={"gold_api_client"})
@@ -621,9 +637,11 @@ def gold_study(context: OpExecutionContext, study_id: str) -> Dict[str, Any]:
 def nmdc_schema_database_from_gold_study(
     context: OpExecutionContext,
     study: Dict[str, Any],
+    study_type: str,
     projects: List[Dict[str, Any]],
     biosamples: List[Dict[str, Any]],
     analysis_projects: List[Dict[str, Any]],
+    gold_nmdc_instrument_map_df: pd.DataFrame,
 ) -> nmdc.Database:
     client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
 
@@ -632,7 +650,13 @@ def id_minter(*args, **kwargs):
         return response.json()
 
     translator = GoldStudyTranslator(
-        study, biosamples, projects, analysis_projects, id_minter=id_minter
+        study,
+        study_type,
+        biosamples,
+        projects,
+        analysis_projects,
+        gold_nmdc_instrument_map_df,
+        id_minter=id_minter,
     )
     database = translator.get_database()
     return database
@@ -641,7 +665,7 @@ def id_minter(*args, **kwargs):
 @op(
     out={
         "submission_id": Out(),
-        "omics_processing_mapping_file_url": Out(Optional[str]),
+        "nucleotide_sequencing_mapping_file_url": Out(Optional[str]),
         "data_object_mapping_file_url": Out(Optional[str]),
         "biosample_extras_file_url": Out(Optional[str]),
         "biosample_extras_slot_mapping_file_url": Out(Optional[str]),
@@ -649,14 +673,14 @@ def id_minter(*args, **kwargs):
 )
 def get_submission_portal_pipeline_inputs(
     submission_id: str,
-    omics_processing_mapping_file_url: Optional[str],
+    nucleotide_sequencing_mapping_file_url: Optional[str],
     data_object_mapping_file_url: Optional[str],
     biosample_extras_file_url: Optional[str],
     biosample_extras_slot_mapping_file_url: Optional[str],
 ) -> Tuple[str, str | None, str | None, str | None, str | None]:
     return (
         submission_id,
-        omics_processing_mapping_file_url,
+        nucleotide_sequencing_mapping_file_url,
         data_object_mapping_file_url,
         biosample_extras_file_url,
         biosample_extras_slot_mapping_file_url,
@@ -677,7 +701,7 @@ def fetch_nmdc_portal_submission_by_id(
 def translate_portal_submission_to_nmdc_schema_database(
     context: OpExecutionContext,
     metadata_submission: Dict[str, Any],
-    omics_processing_mapping: List,
+    nucleotide_sequencing_mapping: List,
     data_object_mapping: List,
     study_category: Optional[str],
     study_doi_category: Optional[str],
@@ -694,8 +718,8 @@ def id_minter(*args, **kwargs):
 
     translator = SubmissionPortalTranslator(
         metadata_submission,
-        omics_processing_mapping,
-        data_object_mapping,
+        nucleotide_sequencing_mapping=nucleotide_sequencing_mapping,
+        data_object_mapping=data_object_mapping,
         id_minter=id_minter,
         study_category=study_category,
         study_doi_category=study_doi_category,
@@ -840,6 +864,7 @@ def nmdc_schema_database_from_neon_soil_data(
     sls_data: Dict[str, pd.DataFrame],
     neon_envo_mappings_file: pd.DataFrame,
     neon_raw_data_file_mappings_file: pd.DataFrame,
+    neon_nmdc_instrument_mapping_file: pd.DataFrame,
 ) -> nmdc.Database:
     client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
 
@@ -852,6 +877,7 @@ def id_minter(*args, **kwargs):
         sls_data,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
         id_minter=id_minter,
     )
 
@@ -866,6 +892,7 @@ def nmdc_schema_database_from_neon_benthic_data(
     site_code_mapping: Dict[str, str],
     neon_envo_mappings_file: pd.DataFrame,
     neon_raw_data_file_mappings_file: pd.DataFrame,
+    neon_nmdc_instrument_mapping_file: pd.DataFrame,
 ) -> nmdc.Database:
     client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
 
@@ -878,6 +905,7 @@ def id_minter(*args, **kwargs):
         site_code_mapping,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
         id_minter=id_minter,
     )
 
@@ -892,6 +920,7 @@ def nmdc_schema_database_from_neon_surface_water_data(
     site_code_mapping: Dict[str, str],
     neon_envo_mappings_file: pd.DataFrame,
     neon_raw_data_file_mappings_file: pd.DataFrame,
+    neon_nmdc_instrument_mapping_file: pd.DataFrame,
 ) -> nmdc.Database:
     client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
 
@@ -904,6 +933,7 @@ def id_minter(*args, **kwargs):
         site_code_mapping,
         neon_envo_mappings_file,
         neon_raw_data_file_mappings_file,
+        neon_nmdc_instrument_mapping_file,
         id_minter=id_minter,
     )
 
@@ -915,15 +945,18 @@ def id_minter(*args, **kwargs):
     out={
         "neon_envo_mappings_file_url": Out(),
         "neon_raw_data_file_mappings_file_url": Out(),
+        "neon_nmdc_instrument_mapping_file_url": Out(),
     }
 )
 def get_neon_pipeline_inputs(
     neon_envo_mappings_file_url: str,
     neon_raw_data_file_mappings_file_url: str,
-) -> Tuple[str, str]:
+    neon_nmdc_instrument_mapping_file_url: str,
+) -> Tuple[str, str, str]:
     return (
         neon_envo_mappings_file_url,
         neon_raw_data_file_mappings_file_url,
+        neon_nmdc_instrument_mapping_file_url,
     )
 
 
@@ -999,47 +1032,101 @@ def materialize_alldocs(context) -> int:
     mdb = context.resources.mongo.db
     collection_names = populated_schema_collection_names_with_id_field(mdb)
 
-    for name in collection_names:
-        assert (
-            len(collection_name_to_class_names[name]) == 1
-        ), f"{name} collection has class name of {collection_name_to_class_names[name]} and len {len(collection_name_to_class_names[name])}"
+    # Insert a no-op as an anchor point for this comment.
+    #
+    # Note: There used to be code here that `assert`-ed that each collection could only contain documents of a single
+    #       type. With the legacy schema, that assertion was true. With the Berkeley schema, it is false. That code was
+    #       in place because subsequent code (further below) used a single document in a collection as the source of the
+    #       class ancestry information of _all_ documents in that collection; an optimization that spared us from
+    #       having to do the same for every single document in that collection. With the Berkeley schema, we have
+    #       eliminated that optimization (since it is inadequate; it would produce some incorrect class ancestries
+    #       for descendants of `PlannedProcess`, for example).
+    #
+    pass
 
     context.log.info(f"{collection_names=}")
 
     # Drop any existing `alldocs` collection (e.g. from previous use of this op).
+    #
+    # FIXME: This "nuke and pave" approach introduces a race condition.
+    #        For example, if someone were to visit an API endpoint that uses the "alldocs" collection,
+    #        the endpoint would fail to perform its job since the "alldocs" collection is temporarily missing.
+    #
     mdb.alldocs.drop()
 
     # Build alldocs
     context.log.info("constructing `alldocs` collection")
 
-    for collection in collection_names:
-        # Calculate class_hierarchy_as_list once per collection, using the first document in list
-        try:
-            nmdcdb = NMDCDatabase(
-                **{collection: [dissoc(mdb[collection].find_one(), "_id")]}
-            )
-            exemplar = getattr(nmdcdb, collection)[0]
-            newdoc_type: list[str] = class_hierarchy_as_list(exemplar)
-        except ValueError as e:
-            context.log.info(f"Collection {collection} does not exist.")
-            raise e
-
+    # For each collection, group its documents by their `type` value, transform them, and load them into `alldocs`.
+    for collection_name in collection_names:
         context.log.info(
-            f"Found {mdb[collection].estimated_document_count()} estimated documents for {collection=}."
-        )
-        # For each document in this collection, replace the value of the `type` field with
-        # a _list_ of the document's own class and ancestor classes, remove the `_id` field,
-        # and insert the resulting document into the `alldocs` collection.
-
-        inserted_many_result = mdb.alldocs.insert_many(
-            [
-                assoc(dissoc(doc, "type", "_id"), "type", newdoc_type)
-                for doc in mdb[collection].find()
-            ]
+            f"Found {mdb[collection_name].estimated_document_count()} estimated documents for {collection_name=}."
         )
+
+        # Process all the distinct `type` values (i.e. value in the `type` field) of the documents in this collection.
+        #
+        # References:
+        # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.distinct
+        #
+        distinct_type_values = mdb[collection_name].distinct(key="type")
         context.log.info(
-            f"Inserted {len(inserted_many_result.inserted_ids)} documents for {collection=}."
+            f"Found {len(distinct_type_values)} distinct `type` values in {collection_name=}: {distinct_type_values=}"
         )
+        for type_value in distinct_type_values:
+
+            # Process all the documents in this collection that have this value in their `type` field.
+            #
+            # References:
+            # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.count_documents
+            # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
+            #
+            filter_ = {"type": type_value}
+            num_docs_having_type = mdb[collection_name].count_documents(filter=filter_)
+            docs_having_type = mdb[collection_name].find(filter=filter_)
+            context.log.info(
+                f"Found {num_docs_having_type} documents having {type_value=} in {collection_name=}."
+            )
+
+            # Get a "representative" document from the result.
+            #
+            # Note: Since all of the documents in this batch have the same class ancestry, we will save time by
+            #       determining the class ancestry of only _one_ of them (we call this the "representative") and then
+            #       (later) attributing that class ancestry to all of them.
+            #
+            representative_doc = next(docs_having_type)
+
+            # Instantiate the Python class represented by the "representative" document.
+            db_dict = {
+                # Shed the `_id` attribute, since the constructor doesn't allow it.
+                collection_name: [dissoc(representative_doc, "_id")]
+            }
+            nmdc_db = NMDCDatabase(**db_dict)
+            representative_instance = getattr(nmdc_db, collection_name)[0]
+
+            # Get the class ancestry of that instance, as a list of class names (including its own class name).
+            ancestor_class_names = class_hierarchy_as_list(representative_instance)
+
+            # Store the documents belonging to this group, in the `alldocs` collection, setting their `type` field
+            # to the list of class names obtained from the "representative" document above.
+            #
+            # TODO: Document why clobbering the existing contents of the `type` field is OK.
+            #
+            # Note: The reason we `chain()` our "representative" document (in an iterable) with the `docs_having_type`
+            #       iterator here is that, when we called `next(docs_having_type)` above, we "consumed" our
+            #       "representative" document from that iterator. We use `chain()` here so that that document gets
+            #       inserted alongside its cousins (i.e. the documents _still_ accessible via `docs_having_type`).
+            #       Reference: https://docs.python.org/3/library/itertools.html#itertools.chain
+            #
+            inserted_many_result = mdb.alldocs.insert_many(
+                [
+                    assoc(dissoc(doc, "type", "_id"), "type", ancestor_class_names)
+                    for doc in chain([representative_doc], docs_having_type)
+                ]
+            )
+            context.log.info(
+                f"Inserted {len(inserted_many_result.inserted_ids)} documents from {collection_name=} "
+                f"originally having {type_value=}."
+            )
 
     # Re-idx for `alldocs` collection
     mdb.alldocs.create_index("id", unique=True)
diff --git a/nmdc_runtime/site/repository.py b/nmdc_runtime/site/repository.py
index 80dd26a2..5d7f1987 100644
--- a/nmdc_runtime/site/repository.py
+++ b/nmdc_runtime/site/repository.py
@@ -501,7 +501,13 @@ def biosample_submission_ingest():
                     },
                 ),
                 "ops": {
-                    "get_gold_study_pipeline_inputs": {"config": {"study_id": ""}},
+                    "get_gold_study_pipeline_inputs": {
+                        "config": {
+                            "study_id": "",
+                            "study_type": "research_study",
+                            "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
+                        },
+                    },
                     "export_json_to_drs": {"config": {"username": ""}},
                 },
             },
@@ -528,7 +534,7 @@ def biosample_submission_ingest():
                     "get_submission_portal_pipeline_inputs": {
                         "inputs": {
                             "submission_id": "",
-                            "omics_processing_mapping_file_url": None,
+                            "nucleotide_sequencing_mapping_file_url": None,
                             "data_object_mapping_file_url": None,
                             "biosample_extras_file_url": None,
                             "biosample_extras_slot_mapping_file_url": None,
@@ -536,7 +542,7 @@ def biosample_submission_ingest():
                     },
                     "translate_portal_submission_to_nmdc_schema_database": {
                         "inputs": {
-                            "study_category": None,
+                            "study_category": "research_study",
                             "study_doi_category": None,
                             "study_doi_provider": None,
                             "study_pi_image_url": None,
@@ -566,7 +572,7 @@ def biosample_submission_ingest():
                     "get_submission_portal_pipeline_inputs": {
                         "inputs": {
                             "submission_id": "",
-                            "omics_processing_mapping_file_url": None,
+                            "nucleotide_sequencing_mapping_file_url": None,
                             "data_object_mapping_file_url": None,
                             "biosample_extras_file_url": None,
                             "biosample_extras_slot_mapping_file_url": None,
@@ -636,6 +642,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                 },
@@ -677,6 +684,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                 },
@@ -719,6 +727,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                     "get_neon_pipeline_benthic_data_product": {
@@ -760,6 +769,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                 },
@@ -802,6 +812,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                     "get_neon_pipeline_surface_water_data_product": {
@@ -843,6 +854,7 @@ def biosample_submission_ingest():
                         "inputs": {
                             "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
                             "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
+                            "neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
                         }
                     },
                 },
diff --git a/nmdc_runtime/site/translation/gold_translator.py b/nmdc_runtime/site/translation/gold_translator.py
index 42d3fe6e..1d312e1f 100644
--- a/nmdc_runtime/site/translation/gold_translator.py
+++ b/nmdc_runtime/site/translation/gold_translator.py
@@ -1,7 +1,9 @@
 import collections
+import csv
 import re
 from typing import List, Tuple, Union
 from nmdc_schema import nmdc
+import pandas as pd
 
 from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
 
@@ -10,18 +12,22 @@ class GoldStudyTranslator(Translator):
     def __init__(
         self,
         study: JSON_OBJECT = {},
+        study_type: str = "research_study",
         biosamples: List[JSON_OBJECT] = [],
         projects: List[JSON_OBJECT] = [],
         analysis_projects: List[JSON_OBJECT] = [],
+        gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
         super().__init__(*args, **kwargs)
 
         self.study = study
+        self.study_type = nmdc.StudyCategoryEnum(study_type)
         self.biosamples = biosamples
         self.projects = projects
         self.analysis_projects = analysis_projects
+        self.gold_nmdc_instrument_map_df = gold_nmdc_instrument_map_df
 
         self._projects_by_id = self._index_by_id(self.projects, "projectGoldId")
         self._analysis_projects_by_id = self._index_by_id(
@@ -69,6 +75,7 @@ def _get_pi(self, gold_entity: JSON_OBJECT) -> Union[nmdc.PersonValue, None]:
             has_raw_value=pi_dict.get("name"),
             name=pi_dict.get("name"),
             email=pi_dict.get("email"),
+            type="nmdc:PersonValue",
         )
 
     def _get_mod_date(self, gold_entity: JSON_OBJECT) -> Union[str, None]:
@@ -108,22 +115,58 @@ def _get_insdc_biosample_identifiers(self, gold_biosample_id: str) -> List[str]:
 
     def _get_samp_taxon_id(
         self, gold_biosample: JSON_OBJECT
-    ) -> Union[nmdc.TextValue, None]:
-        """Get a TextValue representing the NCBI taxon for a GOLD biosample
+    ) -> Union[nmdc.ControlledIdentifiedTermValue, None]:
+        """Get a ControlledIdentifiedTermValue representing the NCBI taxon
+        for a GOLD biosample
 
         This method gets the `ncbiTaxName` and `ncbiTaxId` from a GOLD biosample object.
-        If both are not `None`, it constructs a TextValue of the format
+        If both are not `None`, it constructs a ControlledIdentifiedTermValue of the format
         `{ncbiTaxName} [NCBITaxon:{ncbiTaxId}]`. Otherwise, it returns `None`
 
         :param gold_biosample: GOLD biosample object
-        :return: TextValue object
+        :return: ControlledIdentifiedTermValue object
         """
         ncbi_tax_name = gold_biosample.get("ncbiTaxName")
         ncbi_tax_id = gold_biosample.get("ncbiTaxId")
         if ncbi_tax_name is None or ncbi_tax_id is None:
             return None
 
-        return nmdc.TextValue(f"{ncbi_tax_name} [NCBITaxon:{ncbi_tax_id}]")
+        raw_value = f"{ncbi_tax_name} [NCBITaxon:{ncbi_tax_id}]"
+
+        return nmdc.ControlledIdentifiedTermValue(
+            has_raw_value=raw_value,
+            term=nmdc.OntologyClass(
+                id=f"NCBITaxon:{ncbi_tax_id}",
+                name=ncbi_tax_name,
+                type="nmdc:OntologyClass",
+            ),
+            type="nmdc:ControlledIdentifiedTermValue",
+        )
+
+    def _get_host_taxid(
+        self, gold_biosample: JSON_OBJECT
+    ) -> Union[nmdc.ControlledIdentifiedTermValue, None]:
+        """Get a ControlledIdentifiedTermValue representing the NCBI host taxon id
+        for a GOLD biosample
+
+        This method gets the `hostNcbiTaxid` from a GOLD biosample object.
+        It constructs a ControlledIdentifiedTermValue of the format
+        `[NCBITaxon:{hostNcbiTaxid}]`. Otherwise, it returns `None`
+
+        :param gold_biosample: GOLD biosample object
+        :return: ControlledIdentifiedTermValue object
+        """
+        host_taxid = gold_biosample.get("hostNcbiTaxid")
+        if host_taxid is None:
+            return None
+        return nmdc.ControlledIdentifiedTermValue(
+            has_raw_value=f"NCBITaxon:{host_taxid}",
+            term=nmdc.OntologyClass(
+                id=f"NCBITaxon:{host_taxid}",
+                type="nmdc:OntologyClass",
+            ),
+            type="nmdc:ControlledIdentifiedTermValue",
+        )
 
     def _get_samp_name(self, gold_biosample: JSON_OBJECT) -> Union[str, None]:
         """Get a sample name for a GOLD biosample object
@@ -183,7 +226,9 @@ def _get_collection_date(
         date_collected = gold_biosample.get("dateCollected")
         if date_collected is None:
             return None
-        return nmdc.TimestampValue(has_raw_value=date_collected)
+        return nmdc.TimestampValue(
+            has_raw_value=date_collected, type="nmdc:TimestampValue"
+        )
 
     def _get_quantity_value(
         self,
@@ -215,12 +260,14 @@ def _get_quantity_value(
                     has_raw_value=minimum_numeric_value,
                     has_numeric_value=nmdc.Double(minimum_numeric_value),
                     has_unit=unit,
+                    type="nmdc:QuantityValue",
                 )
             else:
                 return nmdc.QuantityValue(
                     has_minimum_numeric_value=nmdc.Double(minimum_numeric_value),
                     has_maximum_numeric_value=nmdc.Double(maximum_numeric_value),
                     has_unit=unit,
+                    type="nmdc:QuantityValue",
                 )
 
         field_value = gold_entity.get(gold_field)
@@ -231,6 +278,7 @@ def _get_quantity_value(
             has_raw_value=field_value,
             has_numeric_value=nmdc.Double(field_value),
             has_unit=unit,
+            type="nmdc:QuantityValue",
         )
 
     def _get_text_value(
@@ -249,7 +297,7 @@ def _get_text_value(
         field_value = gold_entity.get(gold_field)
         if field_value is None:
             return None
-        return nmdc.TextValue(has_raw_value=field_value)
+        return nmdc.TextValue(has_raw_value=field_value, type="nmdc:TextValue")
 
     def _get_controlled_term_value(
         self, gold_entity: JSON_OBJECT, gold_field: str
@@ -267,7 +315,9 @@ def _get_controlled_term_value(
         field_value = gold_entity.get(gold_field)
         if field_value is None:
             return None
-        return nmdc.ControlledTermValue(has_raw_value=field_value)
+        return nmdc.ControlledTermValue(
+            has_raw_value=field_value, type="nmdc:ControlledTermValue"
+        )
 
     def _get_env_term_value(
         self, gold_biosample: JSON_OBJECT, gold_field: str
@@ -277,8 +327,8 @@ def _get_env_term_value(
         In GOLD entities ENVO terms are represented as a nested object with `id` and `label`
         fields. This method extracts this type of nested object by the given field name, and
         returns it as an `nmdc:ControlledIdentifiedTermValue` object. The `id` in the original
-        GOLD object be reformatted by replacing `_` with `:` (e.g. `ENVO_00005801` to
-        `ENVO:00005801`). If the value of the given field is `None` or if does not contain
+        GOLD object should be reformatted by replacing `_` with `:` (e.g. `ENVO_00005801` to
+        `ENVO:00005801`). If the value of the given field is `None` or if it does not contain
         a nested object with an `id` field, `None` is returned.
 
         :param gold_biosample: GOLD biosample object
@@ -292,8 +342,10 @@ def _get_env_term_value(
             term=nmdc.OntologyClass(
                 id=env_field["id"].replace("_", ":"),
                 name=env_field.get("label"),
+                type="nmdc:OntologyClass",
             ),
             has_raw_value=env_field["id"],
+            type="nmdc:ControlledIdentifiedTermValue",
         )
 
     def _get_lat_lon(
@@ -316,22 +368,40 @@ def _get_lat_lon(
             has_raw_value=f"{latitude} {longitude}",
             latitude=nmdc.DecimalDegree(latitude),
             longitude=nmdc.DecimalDegree(longitude),
+            type="nmdc:GeolocationValue",
         )
 
-    def _get_instrument_name(self, gold_project: JSON_OBJECT) -> Union[str, None]:
-        """Get instrument name used in a GOLD project
+    def _get_instrument(self, gold_project: JSON_OBJECT) -> Union[str, None]:
+        """Get instrument id referenced in instrument_set collection in Mongo.
+        Note: The instrument id is not retrieved by making a call to the database,
+        but rather parsed out from a TSV file in the nmdc-schema repo stored at
+        self.gold_instrument_set_mapping_file_path.
 
-        This method gets the `seqMethod` field from a GOLD project object. If
-        that value is not `None` it should be a list and the first element of that
-        list is returned. If the value of the field is `None`, `None` is returned.
+        This method gets the seqMethod field from a GOLD project object. If
+        that value is not None and is in the self.gold_instrument_set_mapping_file_path
+        file's GOLD SeqMethod column, the corresponding instrument id from
+        NMDC instrument_set id column is returned. If the value of the field
+        is None, None is returned.
 
         :param gold_project: GOLD project object
-        :return: Instrument name
+        :return: id corresponding to an Instrument from instrument_set collection
         """
         seq_method = gold_project.get("seqMethod")
         if not seq_method:
             return None
-        return seq_method[0]
+
+        seq_method = seq_method[0].strip()
+        df = self.gold_nmdc_instrument_map_df
+
+        matching_row = df[df["GOLD SeqMethod"] == seq_method]
+
+        if not matching_row.empty:
+            instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return instrument_id
+
+        raise ValueError(
+            f"seqMethod '{seq_method}' could not be found in the GOLD-NMDC instrument mapping TSV file."
+        )
 
     def _get_processing_institution(
         self, gold_project: JSON_OBJECT
@@ -407,6 +477,7 @@ def _translate_study(
             principal_investigator=self._get_pi(gold_study),
             title=gold_study.get("studyName"),
             type="nmdc:Study",
+            study_category=self.study_type,
         )
 
     def _translate_biosample(
@@ -454,7 +525,7 @@ def _translate_biosample(
             gold_biosample_identifiers=self._get_curie("gold", gold_biosample_id),
             habitat=gold_biosample.get("habitat"),
             host_name=gold_biosample.get("hostName"),
-            host_taxid=self._get_text_value(gold_biosample, "hostNcbiTaxid"),
+            host_taxid=self._get_host_taxid(gold_biosample),
             id=nmdc_biosample_id,
             img_identifiers=self._get_img_identifiers(gold_biosample_id),
             insdc_biosample_identifiers=self._get_insdc_biosample_identifiers(
@@ -466,7 +537,6 @@ def _translate_biosample(
             name=gold_biosample.get("biosampleName"),
             ncbi_taxonomy_name=gold_biosample.get("ncbiTaxName"),
             nitrite=self._get_quantity_value(gold_biosample, "nitrateConcentration"),
-            part_of=nmdc_study_id,
             ph=gold_biosample.get("ph"),
             pressure=self._get_quantity_value(gold_biosample, "pressure"),
             samp_name=self._get_samp_name(gold_biosample),
@@ -482,47 +552,47 @@ def _translate_biosample(
                 gold_biosample, "sampleCollectionTemperature"
             ),
             type="nmdc:Biosample",
+            associated_studies=[nmdc_study_id],
         )
 
-    def _translate_omics_processing(
+    def _translate_nucleotide_sequencing(
         self,
         gold_project: JSON_OBJECT,
-        nmdc_omics_processing_id: str,
+        nmdc_nucleotide_sequencing_id: str,
         nmdc_biosample_id: str,
         nmdc_study_id: str,
-    ) -> nmdc.OmicsProcessing:
-        """Translate a GOLD project object into an `nmdc:OmicsProcessing` object.
+    ):
+        """Translate a GOLD project object into an `nmdc:NucleotideSequencing` object.
 
-        This method translates a GOLD project object into an equivalent `nmdc:OmicsProcessing`
+        This method translates a GOLD project object into an equivalent `nmdc:NucleotideSequencing`
         object. Any minted NMDC IDs must be passed to this method. Internally, each
-        slot of the `nmdc:OmicsProcessing` is either directly pulled from the GOLD object or
+        slot of the `nmdc:NucleotideSequencing` is either directly pulled from the GOLD object or
         one of the `_get_*` methods is used.
 
         :param gold_project: GOLD project object
-        :param nmdc_omics_processing_id: Minted nmdc:OmicsProcessing identifier for the translated object
+        :param nmdc_omics_processing_id: Minted nmdc:NucleotideSequencing identifier for the translated object
         :param nmdc_biosample_id: Minted nmdc:Biosample identifier for the related Biosample
         :param nmdc_study_id: Minted nmdc:Study identifier for the related Study
-        :return: nmdc:OmicsProcessing object
+        :return: nmdc:NucleotideSequencing object
         """
         gold_project_id = gold_project["projectGoldId"]
-        return nmdc.OmicsProcessing(
-            id=nmdc_omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nmdc_nucleotide_sequencing_id,
             name=gold_project.get("projectName"),
             gold_sequencing_project_identifiers=self._get_curie(
                 "gold", gold_project_id
             ),
             ncbi_project_name=gold_project.get("projectName"),
-            type="nmdc:OmicsProcessing",
+            type="nmdc:NucleotideSequencing",
             has_input=nmdc_biosample_id,
             part_of=nmdc_study_id,
             add_date=gold_project.get("addDate"),
             mod_date=self._get_mod_date(gold_project),
             principal_investigator=self._get_pi(gold_project),
-            omics_type=self._get_controlled_term_value(
-                gold_project, "sequencingStrategy"
-            ),
-            instrument_name=self._get_instrument_name(gold_project),
             processing_institution=self._get_processing_institution(gold_project),
+            instrument_used=self._get_instrument(gold_project),
+            analyte_category="metagenome",
+            associated_studies=[nmdc_study_id],
         )
 
     def get_database(self) -> nmdc.Database:
@@ -563,11 +633,11 @@ def get_database(self) -> nmdc.Database:
         }
 
         gold_project_ids = [project["projectGoldId"] for project in self.projects]
-        nmdc_omics_processing_ids = self._id_minter(
-            "nmdc:OmicsProcessing", len(gold_project_ids)
+        nmdc_nucleotide_sequencing_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(gold_project_ids)
         )
-        gold_project_to_nmdc_omics_processing_ids = dict(
-            zip(gold_project_ids, nmdc_omics_processing_ids)
+        gold_project_to_nmdc_nucleotide_sequencing_ids = dict(
+            zip(gold_project_ids, nmdc_nucleotide_sequencing_ids)
         )
 
         database.study_set = [self._translate_study(self.study, nmdc_study_id)]
@@ -585,13 +655,13 @@ def get_database(self) -> nmdc.Database:
             for biosample in self.biosamples
         ]
         database.field_research_site_set = [
-            nmdc.FieldResearchSite(id=id, name=name)
+            nmdc.FieldResearchSite(id=id, name=name, type="nmdc:FieldResearchSite")
             for name, id in gold_name_to_nmdc_field_site_ids.items()
         ]
-        database.omics_processing_set = [
-            self._translate_omics_processing(
+        database.data_generation_set = [
+            self._translate_nucleotide_sequencing(
                 project,
-                nmdc_omics_processing_id=gold_project_to_nmdc_omics_processing_ids[
+                nmdc_nucleotide_sequencing_id=gold_project_to_nmdc_nucleotide_sequencing_ids[
                     project["projectGoldId"]
                 ],
                 nmdc_biosample_id=gold_to_nmdc_biosample_ids[
diff --git a/nmdc_runtime/site/translation/neon_benthic_translator.py b/nmdc_runtime/site/translation/neon_benthic_translator.py
index 65c9fdfa..efbd9e7e 100644
--- a/nmdc_runtime/site/translation/neon_benthic_translator.py
+++ b/nmdc_runtime/site/translation/neon_benthic_translator.py
@@ -1,5 +1,6 @@
 import re
 import sqlite3
+from typing import Union
 
 import pandas as pd
 import requests_cache
@@ -47,6 +48,7 @@ def __init__(
         site_code_mapping: dict,
         neon_envo_mappings_file: pd.DataFrame,
         neon_raw_data_file_mappings_file: pd.DataFrame,
+        neon_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
@@ -92,13 +94,13 @@ def __init__(
         )
 
         self.site_code_mapping = site_code_mapping
+        self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
 
     def _translate_biosample(
         self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
     ) -> nmdc.Biosample:
         return nmdc.Biosample(
             id=nmdc_id,
-            part_of="nmdc:sty-11-pzmd0x14",
             env_broad_scale=_create_controlled_identified_term_value(
                 BENTHIC_BROAD_SCALE_MAPPINGS.get(
                     biosample_row["aquaticSiteType"].values[0]
@@ -146,8 +148,10 @@ def _translate_biosample(
             depth=nmdc.QuantityValue(
                 has_minimum_numeric_value=nmdc.Float("0"),
                 has_maximum_numeric_value=nmdc.Float("1"),
-                has_unit="meters",
+                has_unit="m",
+                type="nmdc:QuantityValue",
             ),
+            associated_studies=["nmdc:sty-11-pzmd0x14"],
         )
 
     def _translate_extraction_process(
@@ -187,6 +191,7 @@ def _translate_extraction_process(
             ),
             qc_status=_get_value_or_none(extraction_row, "qaqcStatus"),
             processing_institution=processing_institution,
+            type="nmdc:Extraction",
         )
 
     def _translate_library_preparation(
@@ -199,13 +204,13 @@ def _translate_library_preparation(
         """
         Create LibraryPreparation process object. The input to LibraryPreparation process
         is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
-        process is fed as input to an OmicsProcessing object.
+        process is fed as input to an NucleotideSequencing object.
 
         :param library_preparation_id: Minted id for LibraryPreparation process.
         :param library_preparation_input: Input to LibraryPreparation process is output from
         Extraction process.
         :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
-        is also input to OmicsProcessing.
+        is also input to NucleotideSequencing.
         :param library_preparation_row: Metadata required to populate LibraryPreparation.
         :return: Object that using LibraryPreparation process model.
         """
@@ -224,31 +229,47 @@ def _translate_library_preparation(
             start_date=_get_value_or_none(library_preparation_row, "collectDate"),
             end_date=_get_value_or_none(library_preparation_row, "processedDate"),
             processing_institution=processing_institution,
+            type="nmdc:LibraryPreparation",
         )
 
-    def _translate_omics_processing(
+    def _get_instrument_id(self, instrument_model: Union[str | None]) -> str:
+        if not instrument_model:
+            raise ValueError(
+                f"instrument_model '{instrument_model}' could not be found in the NEON-NMDC instrument mapping TSV file."
+            )
+
+        df = self.neon_nmdc_instrument_map_df
+        matching_row = df[
+            df["NEON sequencingMethod"].str.contains(instrument_model, case=False)
+        ]
+
+        if not matching_row.empty:
+            nmdc_instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return nmdc_instrument_id
+
+    def _translate_nucleotide_sequencing(
         self,
-        omics_processing_id: str,
+        nucleotide_sequencing_id: str,
         processed_sample_id: str,
         raw_data_file_data: str,
-        omics_processing_row: pd.DataFrame,
-    ) -> nmdc.OmicsProcessing:
-        """Create nmdc OmicsProcessing object. This class typically models the run of a
-        Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
-        process is the output from a LibraryPreparation process, and the output of OmicsProcessing
+        nucleotide_sequencing_row: pd.DataFrame,
+    ):
+        """Create nmdc NucleotideSequencing object. This class typically models the run of a
+        Bioinformatics workflow on sequence data from a biosample. The input to an NucleotideSequencing
+        process is the output from a LibraryPreparation process, and the output of NucleotideSequencing
         is a DataObject which has the FASTQ sequence file URLs embedded in them.
 
-        :param omics_processing_id: Minted id for an OmicsProcessing process.
+        :param nucleotide_sequencing_id: Minted id for an NucleotideSequencing process.
         :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
         :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
         files embedded in them.
-        :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
+        :param nucleotide_sequencing_row: DataFrame with metadata for an NucleotideSequencing workflow
         process/run.
-        :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
+        :return: NucleotideSequencing object that models a Bioinformatics workflow process/run.
         """
         processing_institution = None
         sequencing_facility = _get_value_or_none(
-            omics_processing_row, "sequencingFacilityID"
+            nucleotide_sequencing_row, "sequencingFacilityID"
         )
         if sequencing_facility is not None:
             if re.search("Battelle", sequencing_facility, re.IGNORECASE):
@@ -256,19 +277,21 @@ def _translate_omics_processing(
             elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
                 processing_institution = "ANL"
 
-        return nmdc.OmicsProcessing(
-            id=omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nucleotide_sequencing_id,
             has_input=processed_sample_id,
             has_output=raw_data_file_data,
             processing_institution=processing_institution,
-            ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
-            omics_type=_create_controlled_term_value(
-                omics_processing_row["investigation_type"].values[0]
+            ncbi_project_name=_get_value_or_none(
+                nucleotide_sequencing_row, "ncbiProjectID"
+            ),
+            instrument_used=self._get_instrument_id(
+                _get_value_or_none(nucleotide_sequencing_row, "instrument_model")
             ),
-            instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
-            part_of="nmdc:sty-11-34xj1150",
-            name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
-            type="nmdc:OmicsProcessing",
+            name=f"Benthic microbial communities - {_get_value_or_none(nucleotide_sequencing_row, 'dnaSampleID')}",
+            type="nmdc:NucleotideSequencing",
+            associated_studies=["nmdc:sty-11-pzmd0x14"],
+            analyte_category="metagenome",
         )
 
     def _translate_processed_sample(
@@ -285,12 +308,14 @@ def _translate_processed_sample(
         :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
         :return: ProcessedSample objects to be stored in `processed_sample_set`.
         """
-        return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
+        return nmdc.ProcessedSample(
+            id=processed_sample_id, name=sample_id, type="nmdc:ProcessedSample"
+        )
 
     def _translate_data_object(
         self, do_id: str, url: str, do_type: str, checksum: str
     ) -> nmdc.DataObject:
-        """Create nmdc DataObject which is the output of an OmicsProcessing process. This
+        """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
         object mainly contains information about the sequencing file that was generated as
         the result of running a Bioinformatics workflow on a certain ProcessedSample, which
         is the result of a LibraryPreparation process.
@@ -417,7 +442,9 @@ def get_database(self):
         )
 
         neon_omprc_ids = benthic_samples["sampleID"]
-        nmdc_omprc_ids = self._id_minter("nmdc:OmicsProcessing", len(neon_omprc_ids))
+        nmdc_omprc_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(neon_omprc_ids)
+        )
         neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
 
         neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
@@ -443,7 +470,7 @@ def get_database(self):
             processed_sample_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
 
             if extraction_input is not None and processed_sample_id is not None:
-                database.extraction_set.append(
+                database.material_processing_set.append(
                     self._translate_extraction_process(
                         nmdc_id,
                         extraction_input,
@@ -487,7 +514,7 @@ def get_database(self):
             processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(neon_id)
 
             if lib_prep_input is not None and processed_sample_id is not None:
-                database.library_preparation_set.append(
+                database.material_processing_set.append(
                     self._translate_library_preparation(
                         nmdc_id,
                         lib_prep_input,
@@ -534,8 +561,8 @@ def get_database(self):
                             )
                         )
 
-                    database.omics_processing_set.append(
-                        self._translate_omics_processing(
+                    database.data_generation_set.append(
+                        self._translate_nucleotide_sequencing(
                             neon_to_nmdc_omprc_ids.get(neon_id),
                             processed_sample_id,
                             has_output_do_ids,
diff --git a/nmdc_runtime/site/translation/neon_soil_translator.py b/nmdc_runtime/site/translation/neon_soil_translator.py
index a634e2d3..adf1132d 100644
--- a/nmdc_runtime/site/translation/neon_soil_translator.py
+++ b/nmdc_runtime/site/translation/neon_soil_translator.py
@@ -1,6 +1,6 @@
 import re
 import sqlite3
-from typing import List
+from typing import List, Union
 
 import pandas as pd
 
@@ -26,6 +26,7 @@ def __init__(
         sls_data: dict,
         neon_envo_mappings_file: pd.DataFrame,
         neon_raw_data_file_mappings_file: pd.DataFrame,
+        neon_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
@@ -99,6 +100,23 @@ def __init__(
             "neonRawDataFile", self.conn, if_exists="replace", index=False
         )
 
+        self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
+
+    def _get_instrument_id(self, instrument_model: Union[str | None]) -> str:
+        if not instrument_model:
+            raise ValueError(
+                f"instrument_model '{instrument_model}' could not be found in the NEON-NMDC instrument mapping TSV file."
+            )
+
+        df = self.neon_nmdc_instrument_map_df
+        matching_row = df[
+            df["NEON sequencingMethod"].str.contains(instrument_model, case=False)
+        ]
+
+        if not matching_row.empty:
+            nmdc_instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return nmdc_instrument_id
+
     def _translate_biosample(
         self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
     ) -> nmdc.Biosample:
@@ -116,7 +134,6 @@ def _translate_biosample(
         """
         return nmdc.Biosample(
             id=nmdc_id,
-            part_of="nmdc:sty-11-34xj1150",
             env_broad_scale=_create_controlled_identified_term_value(
                 "ENVO:00000446", "terrestrial biome"
             ),
@@ -145,6 +162,7 @@ def _translate_biosample(
                     biosample_row, "sampleBottomDepth"
                 ),
                 has_unit="m",
+                type="nmdc:QuantityValue",
             ),
             samp_collec_device=_get_value_or_none(biosample_row, "soilSamplingDevice"),
             soil_horizon=_get_value_or_none(biosample_row, "horizon"),
@@ -172,6 +190,7 @@ def _translate_biosample(
                 biosample_row["kclNitrateNitriteNConc"].values[0], "mg/L"
             ),
             type="nmdc:Biosample",
+            associated_studies=["nmdc:sty-11-34xj1150"],
         )
 
     def _translate_pooling_process(
@@ -198,6 +217,7 @@ def _translate_pooling_process(
             has_input=bsm_input_values_list,
             start_date=_get_value_or_none(pooling_row, "startDate"),
             end_date=_get_value_or_none(pooling_row, "collectDate"),
+            type="nmdc:Pooling",
         )
 
     def _translate_processed_sample(
@@ -214,12 +234,14 @@ def _translate_processed_sample(
         :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
         :return: ProcessedSample objects to be stored in `processed_sample_set`.
         """
-        return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
+        return nmdc.ProcessedSample(
+            id=processed_sample_id, name=sample_id, type="nmdc:ProcessedSample"
+        )
 
     def _translate_data_object(
         self, do_id: str, url: str, do_type: str, checksum: str
     ) -> nmdc.DataObject:
-        """Create nmdc DataObject which is the output of an OmicsProcessing process. This
+        """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
         object mainly contains information about the sequencing file that was generated as
         the result of running a Bioinformatics workflow on a certain ProcessedSample, which
         is the result of a LibraryPreparation process.
@@ -282,6 +304,7 @@ def _translate_extraction_process(
             ),
             qc_status=_get_value_or_none(extraction_row, "qaqcStatus"),
             processing_institution=processing_institution,
+            type="nmdc:Extraction",
         )
 
     def _translate_library_preparation(
@@ -294,13 +317,13 @@ def _translate_library_preparation(
         """
         Create LibraryPreparation process object. The input to LibraryPreparation process
         is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
-        process is fed as input to an OmicsProcessing object.
+        process is fed as input to an NucleotideSequencing object.
 
         :param library_preparation_id: Minted id for LibraryPreparation process.
         :param library_preparation_input: Input to LibraryPreparation process is output from
         Extraction process.
         :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
-        is also input to OmicsProcessing.
+        is also input to NucleotideSequencing.
         :param library_preparation_row: Metadata required to populate LibraryPreparation.
         :return: Object that using LibraryPreparation process model.
         """
@@ -319,31 +342,32 @@ def _translate_library_preparation(
             start_date=_get_value_or_none(library_preparation_row, "collectDate"),
             end_date=_get_value_or_none(library_preparation_row, "processedDate"),
             processing_institution=processing_institution,
+            type="nmdc:LibraryPreparation",
         )
 
-    def _translate_omics_processing(
+    def _translate_nucleotide_sequencing(
         self,
-        omics_processing_id: str,
+        nucleotide_sequencing_id: str,
         processed_sample_id: str,
         raw_data_file_data: str,
-        omics_processing_row: pd.DataFrame,
-    ) -> nmdc.OmicsProcessing:
-        """Create nmdc OmicsProcessing object. This class typically models the run of a
-        Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
-        process is the output from a LibraryPreparation process, and the output of OmicsProcessing
+        nucleotide_sequencing_row: pd.DataFrame,
+    ):
+        """Create nmdc NucleotideSequencing object. This class typically models the run of a
+        Bioinformatics workflow on sequence data from a biosample. The input to an NucleotideSequencing
+        process is the output from a LibraryPreparation process, and the output of NucleotideSequencing
         is a DataObject which has the FASTQ sequence file URLs embedded in them.
 
-        :param omics_processing_id: Minted id for an OmicsProcessing process.
+        :param nucleotide_sequencing_id: Minted id for an NucleotideSequencing process.
         :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
         :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
         files embedded in them.
-        :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
+        :param nucleotide_sequencing_row: DataFrame with metadata for an NucleotideSequencing workflow
         process/run.
-        :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
+        :return: NucleotideSequencing object that models a Bioinformatics workflow process/run.
         """
         processing_institution = None
         sequencing_facility = _get_value_or_none(
-            omics_processing_row, "sequencingFacilityID"
+            nucleotide_sequencing_row, "sequencingFacilityID"
         )
         if sequencing_facility is not None:
             if re.search("Battelle", sequencing_facility, re.IGNORECASE):
@@ -351,19 +375,21 @@ def _translate_omics_processing(
             elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
                 processing_institution = "ANL"
 
-        return nmdc.OmicsProcessing(
-            id=omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nucleotide_sequencing_id,
             has_input=processed_sample_id,
             has_output=raw_data_file_data,
             processing_institution=processing_institution,
-            ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
-            omics_type=_create_controlled_term_value(
-                omics_processing_row["investigation_type"].values[0]
+            ncbi_project_name=_get_value_or_none(
+                nucleotide_sequencing_row, "ncbiProjectID"
+            ),
+            instrument_used=self._get_instrument_id(
+                _get_value_or_none(nucleotide_sequencing_row, "instrument_model")
             ),
-            instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
-            part_of="nmdc:sty-11-34xj1150",
-            name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
-            type="nmdc:OmicsProcessing",
+            name=f"Terrestrial soil microbial communities - {_get_value_or_none(nucleotide_sequencing_row, 'dnaSampleID')}",
+            type="nmdc:NucleotideSequencing",
+            associated_studies=["nmdc:sty-11-34xj1150"],
+            analyte_category="metagenome",
         )
 
     def get_database(self) -> nmdc.Database:
@@ -371,10 +397,9 @@ def get_database(self) -> nmdc.Database:
         nmdc object creation methods as well as the nmdc type (QuantityValue, GeolocationValue, etc.)
         creation methods, to make an nmdc Database object. It populates multiple sets in the Mongo database -
             * `biosample_set`: uses `_translate_biosample()`
-            * `pooling_set`: uses `_translate_pooling_process()`
-            * `extraction_set`: uses `_translate_extraction_process()`
-            * `library_preparation_set`: uses `_translate_library_preparation()`
-            * `omics_processing_set`: uses `_translate_omics_processing()`
+            * `material_processing_set`: uses `_translate_pooling_process()`, `_translate_extraction_process()`,
+            `_translate_library_preparation()`
+            * `data_generation_set`: uses `_translate_nucleotide_sequencing()`
             * `processed_sample_set`: uses `_translate_processed_sample()`
             * `data_object_set`: uses `_translate_data_object()`
         The core Biosample information is in the `sls_soilCoreCollection` table. However, we
@@ -605,14 +630,13 @@ def get_database(self) -> nmdc.Database:
                 mms_metagenomeDnaExtraction.processedDate,
                 mms_metagenomeSequencing.sequencingFacilityID,
                 mms_metagenomeSequencing.ncbiProjectID,
-                mms_metagenomeSequencing.investigation_type,
                 mms_metagenomeSequencing.sequencingMethod,
                 mms_metagenomeSequencing.instrument_model
             FROM mms_metagenomeSequencing 
             LEFT JOIN mms_metagenomeDnaExtraction ON mms_metagenomeDnaExtraction.dnaSampleID = mms_metagenomeSequencing.dnaSampleID
         """
         library_preparation_table = pd.read_sql_query(query, self.conn)
-        omics_processing_table = pd.read_sql_query(query, self.conn)
+        nucleotide_sequencing_table = pd.read_sql_query(query, self.conn)
 
         nmdc_pooling_ids = self._id_minter("nmdc:Pooling", len(pooling_ids_dict))
         neon_to_nmdc_pooling_ids = dict(
@@ -651,12 +675,12 @@ def get_database(self) -> nmdc.Database:
             zip(library_prepration_ids, nmdc_library_preparation_processed_sample_ids)
         )
 
-        omics_processing_ids = omics_processing_table["dnaSampleID"]
-        nmdc_omics_processing_ids = self._id_minter(
-            "nmdc:OmicsProcessing", len(omics_processing_ids)
+        nucleotide_sequencing_ids = nucleotide_sequencing_table["dnaSampleID"]
+        nmdc_nucleotide_sequencing_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(nucleotide_sequencing_ids)
         )
-        neon_to_nmdc_omics_processing_ids = dict(
-            zip(omics_processing_ids, nmdc_omics_processing_ids)
+        neon_to_nmdc_nucleotide_sequencing_ids = dict(
+            zip(nucleotide_sequencing_ids, nmdc_nucleotide_sequencing_ids)
         )
 
         neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
@@ -699,7 +723,7 @@ def get_database(self) -> nmdc.Database:
             # if the number of biosamples that are input to a pooling process
             # is one or less, then ignore it and go straight to extraction
             if len(bsm_values_list) > 1:
-                database.pooling_set.append(
+                database.material_processing_set.append(
                     self._translate_pooling_process(
                         pooling_process_id,
                         processed_sample_id,
@@ -732,7 +756,7 @@ def get_database(self) -> nmdc.Database:
             # handler for creating extraction process records
             # for both pooled and non-pooled samples
             if "|" in genomics_pooled_id_list:
-                database.extraction_set.append(
+                database.material_processing_set.append(
                     self._translate_extraction_process(
                         extraction_id,
                         extraction_input,
@@ -753,7 +777,7 @@ def get_database(self) -> nmdc.Database:
 
                 extraction_input = neon_to_nmdc_biosample_ids[neon_biosample_id]
 
-                database.extraction_set.append(
+                database.material_processing_set.append(
                     self._translate_extraction_process(
                         extraction_id,
                         extraction_input,
@@ -770,7 +794,9 @@ def get_database(self) -> nmdc.Database:
                 dna_sample_id
             ]
 
-            omics_processing_id = neon_to_nmdc_omics_processing_ids[dna_sample_id]
+            nucleotide_sequencing_id = neon_to_nmdc_nucleotide_sequencing_ids[
+                dna_sample_id
+            ]
 
             genomics_sample_id = library_preparation_table[
                 library_preparation_table["dnaSampleID"] == dna_sample_id
@@ -785,7 +811,7 @@ def get_database(self) -> nmdc.Database:
                     library_preparation_table["dnaSampleID"] == dna_sample_id
                 ]
 
-                database.library_preparation_set.append(
+                database.material_processing_set.append(
                     self._translate_library_preparation(
                         library_preparation_id,
                         library_preparation_input,
@@ -807,9 +833,9 @@ def get_database(self) -> nmdc.Database:
                         if item in neon_to_nmdc_data_object_ids:
                             has_output_do_ids.append(neon_to_nmdc_data_object_ids[item])
 
-                    database.omics_processing_set.append(
-                        self._translate_omics_processing(
-                            omics_processing_id,
+                    database.data_generation_set.append(
+                        self._translate_nucleotide_sequencing(
+                            nucleotide_sequencing_id,
                             processed_sample_id,
                             has_output_do_ids,
                             library_preparation_row,
diff --git a/nmdc_runtime/site/translation/neon_surface_water_translator.py b/nmdc_runtime/site/translation/neon_surface_water_translator.py
index bf5d8539..2e05c6eb 100644
--- a/nmdc_runtime/site/translation/neon_surface_water_translator.py
+++ b/nmdc_runtime/site/translation/neon_surface_water_translator.py
@@ -1,6 +1,6 @@
 import re
 import sqlite3
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 import pandas as pd
 import requests
@@ -36,6 +36,7 @@
             "term_id": "ENVO:01000409",
             "term_name": "freshwater littoral zone",
         },
+        "inflow": {"term_id": "ENVO:00000476", "term_name": "lake inlet"},
     },
     "river": {"term_id": "ENVO:01000297", "term_name": "freshwater river"},
     "stream": {"term_id": "ENVO:03605007", "term_name": "freshwater stream"},
@@ -58,6 +59,7 @@ def __init__(
         site_code_mapping: dict,
         neon_envo_mappings_file: pd.DataFrame,
         neon_raw_data_file_mappings_file: pd.DataFrame,
+        neon_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
@@ -108,6 +110,8 @@ def __init__(
 
         self.site_code_mapping = site_code_mapping
 
+        self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
+
     def _translate_biosample(
         self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
     ) -> nmdc.Biosample:
@@ -136,16 +140,17 @@ def map_local_scale(
                     has_minimum_numeric_value=nmdc.Float(minimum_depth),
                     has_maximum_numeric_value=nmdc.Float(maximum_depth),
                     has_unit="m",
+                    type="nmdc:QuantityValue",
                 )
             else:
                 depth = nmdc.QuantityValue(
                     has_numeric_value=nmdc.Float(minimum_depth),
                     has_unit="m",
+                    type="nmdc:QuantityValue",
                 )
 
         return nmdc.Biosample(
             id=nmdc_id,
-            part_of="nmdc:sty-11-hht5sb92",
             env_broad_scale=_create_controlled_identified_term_value(
                 SURFACE_WATER_BROAD_SCALE_MAPPINGS.get(
                     biosample_row["aquaticSiteType"].values[0]
@@ -201,7 +206,8 @@ def map_local_scale(
             samp_size=_create_quantity_value(
                 biosample_row["geneticFilteredSampleVolume"].values[0], "mL"
             ),
-            env_package=nmdc.TextValue(has_raw_value="water"),
+            env_package=nmdc.TextValue(has_raw_value="water", type="nmdc:TextValue"),
+            associated_studies=["nmdc:sty-11-hht5sb92"],
         )
 
     def _translate_extraction_process(
@@ -243,6 +249,7 @@ def _translate_extraction_process(
                 _get_value_or_none(extraction_row, "extrQaqcStatus")
             ),
             processing_institution=processing_institution,
+            type="nmdc:Extraction",
         )
 
     def _translate_library_preparation(
@@ -255,13 +262,13 @@ def _translate_library_preparation(
         """
         Create LibraryPreparation process object. The input to LibraryPreparation process
         is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
-        process is fed as input to an OmicsProcessing object.
+        process is fed as input to an NucleotideSequencing object.
 
         :param library_preparation_id: Minted id for LibraryPreparation process.
         :param library_preparation_input: Input to LibraryPreparation process is output from
         Extraction process.
         :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
-        is also input to OmicsProcessing.
+        is also input to NucleotideSequencing.
         :param library_preparation_row: Metadata required to populate LibraryPreparation.
         :return: Object that using LibraryPreparation process model.
         """
@@ -280,31 +287,47 @@ def _translate_library_preparation(
             start_date=_get_value_or_none(library_preparation_row, "seqCollectDate"),
             end_date=_get_value_or_none(library_preparation_row, "seqProcessedDate"),
             processing_institution=processing_institution,
+            type="nmdc:LibraryPreparation",
         )
 
-    def _translate_omics_processing(
+    def _get_instrument_id(self, instrument_model: Union[str | None]) -> str:
+        if not instrument_model:
+            raise ValueError(
+                f"instrument_model '{instrument_model}' could not be found in the NEON-NMDC instrument mapping TSV file."
+            )
+
+        df = self.neon_nmdc_instrument_map_df
+        matching_row = df[
+            df["NEON sequencingMethod"].str.contains(instrument_model, case=False)
+        ]
+
+        if not matching_row.empty:
+            nmdc_instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return nmdc_instrument_id
+
+    def _translate_nucleotide_sequencing(
         self,
-        omics_processing_id: str,
+        nucleotide_sequencing_id: str,
         processed_sample_id: str,
         raw_data_file_data: str,
-        omics_processing_row: pd.DataFrame,
-    ) -> nmdc.OmicsProcessing:
-        """Create nmdc OmicsProcessing object. This class typically models the run of a
-        Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
-        process is the output from a LibraryPreparation process, and the output of OmicsProcessing
+        nucleotide_sequencing_row: pd.DataFrame,
+    ):
+        """Create nmdc NucleotideSequencing object. This class typically models the run of a
+        Bioinformatics workflow on sequence data from a biosample. The input to an NucleotideSequencing
+        process is the output from a LibraryPreparation process, and the output of NucleotideSequencing
         is a DataObject which has the FASTQ sequence file URLs embedded in them.
 
-        :param omics_processing_id: Minted id for an OmicsProcessing process.
+        :param nucleotide_sequencing_id: Minted id for an NucleotideSequencing process.
         :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
         :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
         files embedded in them.
-        :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
+        :param nucleotide_sequencing_row: DataFrame with metadata for an NucleotideSequencing workflow
         process/run.
-        :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
+        :return: NucleotideSequencing object that models a Bioinformatics workflow process/run.
         """
         processing_institution = None
         sequencing_facility = _get_value_or_none(
-            omics_processing_row, "sequencingFacilityID"
+            nucleotide_sequencing_row, "sequencingFacilityID"
         )
         if sequencing_facility is not None:
             if re.search("Battelle", sequencing_facility, re.IGNORECASE):
@@ -312,19 +335,21 @@ def _translate_omics_processing(
             elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
                 processing_institution = "ANL"
 
-        return nmdc.OmicsProcessing(
-            id=omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nucleotide_sequencing_id,
             has_input=processed_sample_id,
             has_output=raw_data_file_data,
             processing_institution=processing_institution,
-            ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
-            omics_type=_create_controlled_term_value(
-                omics_processing_row["investigation_type"].values[0]
+            ncbi_project_name=_get_value_or_none(
+                nucleotide_sequencing_row, "ncbiProjectID"
+            ),
+            instrument_used=self._get_instrument_id(
+                _get_value_or_none(nucleotide_sequencing_row, "instrument_model")
             ),
-            instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
-            part_of="nmdc:sty-11-hht5sb92",
-            name=f"Surface water microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
-            type="nmdc:OmicsProcessing",
+            name=f"Surface water microbial communities - {_get_value_or_none(nucleotide_sequencing_row, 'dnaSampleID')}",
+            type="nmdc:NucleotideSequencing",
+            associated_studies=["nmdc:sty-11-hht5sb92"],
+            analyte_category="metagenome",
         )
 
     def _translate_processed_sample(
@@ -341,12 +366,14 @@ def _translate_processed_sample(
         :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
         :return: ProcessedSample objects to be stored in `processed_sample_set`.
         """
-        return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
+        return nmdc.ProcessedSample(
+            id=processed_sample_id, name=sample_id, type="nmdc:ProcessedSample"
+        )
 
     def _translate_data_object(
         self, do_id: str, url: str, do_type: str, checksum: str
     ) -> nmdc.DataObject:
-        """Create nmdc DataObject which is the output of an OmicsProcessing process. This
+        """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
         object mainly contains information about the sequencing file that was generated as
         the result of running a Bioinformatics workflow on a certain ProcessedSample, which
         is the result of a LibraryPreparation process.
@@ -485,7 +512,9 @@ def get_database(self):
         )
 
         neon_omprc_ids = surface_water_samples["parentSampleID"]
-        nmdc_omprc_ids = self._id_minter("nmdc:OmicsProcessing", len(neon_omprc_ids))
+        nmdc_omprc_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(neon_omprc_ids)
+        )
         neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
 
         neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
@@ -515,7 +544,7 @@ def get_database(self):
             processed_sample_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
 
             if extraction_input is not None and processed_sample_id is not None:
-                database.extraction_set.append(
+                database.material_processing_set.append(
                     self._translate_extraction_process(
                         nmdc_id,
                         extraction_input,
@@ -561,7 +590,7 @@ def get_database(self):
             processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(neon_id)
 
             if lib_prep_input is not None and processed_sample_id is not None:
-                database.library_preparation_set.append(
+                database.material_processing_set.append(
                     self._translate_library_preparation(
                         nmdc_id,
                         lib_prep_input,
@@ -608,8 +637,8 @@ def get_database(self):
                             )
                         )
 
-                    database.omics_processing_set.append(
-                        self._translate_omics_processing(
+                    database.data_generation_set.append(
+                        self._translate_nucleotide_sequencing(
                             neon_to_nmdc_omprc_ids.get(neon_id),
                             processed_sample_id,
                             has_output_do_ids,
diff --git a/nmdc_runtime/site/translation/neon_utils.py b/nmdc_runtime/site/translation/neon_utils.py
index 75183960..000707f8 100644
--- a/nmdc_runtime/site/translation/neon_utils.py
+++ b/nmdc_runtime/site/translation/neon_utils.py
@@ -50,7 +50,14 @@ def _create_controlled_identified_term_value(
     """
     if id is None or name is None:
         return None
-    return nmdc.ControlledIdentifiedTermValue(term=nmdc.OntologyClass(id=id, name=name))
+    return nmdc.ControlledIdentifiedTermValue(
+        term=nmdc.OntologyClass(
+            id=id,
+            name=name,
+            type="nmdc:OntologyClass",
+        ),
+        type="nmdc:ControlledIdentifiedTermValue",
+    )
 
 
 def _create_controlled_term_value(name: str = None) -> nmdc.ControlledTermValue:
@@ -64,7 +71,10 @@ def _create_controlled_term_value(name: str = None) -> nmdc.ControlledTermValue:
     """
     if name is None:
         return None
-    return nmdc.ControlledTermValue(has_raw_value=name)
+    return nmdc.ControlledTermValue(
+        has_raw_value=name,
+        type="nmdc:ControlledTermValue",
+    )
 
 
 def _create_timestamp_value(value: str = None) -> nmdc.TimestampValue:
@@ -77,7 +87,7 @@ def _create_timestamp_value(value: str = None) -> nmdc.TimestampValue:
     """
     if value is None:
         return None
-    return nmdc.TimestampValue(has_raw_value=value)
+    return nmdc.TimestampValue(has_raw_value=value, type="nmdc:TimestampValue")
 
 
 def _create_quantity_value(
@@ -94,7 +104,9 @@ def _create_quantity_value(
     """
     if numeric_value is None or math.isnan(numeric_value):
         return None
-    return nmdc.QuantityValue(has_numeric_value=float(numeric_value), has_unit=unit)
+    return nmdc.QuantityValue(
+        has_numeric_value=float(numeric_value), has_unit=unit, type="nmdc:QuantityValue"
+    )
 
 
 def _create_text_value(value: str = None) -> nmdc.TextValue:
@@ -106,7 +118,7 @@ def _create_text_value(value: str = None) -> nmdc.TextValue:
     """
     if value is None:
         return None
-    return nmdc.TextValue(has_raw_value=value)
+    return nmdc.TextValue(has_raw_value=value, type="nmdc:TextValue")
 
 
 def _create_double_value(value: str = None) -> nmdc.Double:
@@ -119,7 +131,7 @@ def _create_double_value(value: str = None) -> nmdc.Double:
     """
     if value is None or math.isnan(value):
         return None
-    return nmdc.Double(value)
+    return nmdc.Double(value, type="nmdc:Double")
 
 
 def _create_geolocation_value(
@@ -147,4 +159,5 @@ def _create_geolocation_value(
     return nmdc.GeolocationValue(
         latitude=nmdc.DecimalDegree(latitude),
         longitude=nmdc.DecimalDegree(longitude),
+        type="nmdc:GeolocationValue",
     )
diff --git a/nmdc_runtime/site/translation/submission_portal_translator.py b/nmdc_runtime/site/translation/submission_portal_translator.py
index fff4648b..dc36ebf0 100644
--- a/nmdc_runtime/site/translation/submission_portal_translator.py
+++ b/nmdc_runtime/site/translation/submission_portal_translator.py
@@ -64,9 +64,9 @@ class SubmissionPortalTranslator(Translator):
     def __init__(
         self,
         metadata_submission: JSON_OBJECT = {},
-        omics_processing_mapping: Optional[list] = None,
-        data_object_mapping: Optional[list] = None,
         *args,
+        nucleotide_sequencing_mapping: Optional[list] = None,
+        data_object_mapping: Optional[list] = None,
         # Additional study-level metadata not captured by the submission portal currently
         # See: https://github.com/microbiomedata/submission-schema/issues/162
         study_doi_category: Optional[str] = None,
@@ -84,7 +84,7 @@ def __init__(
         super().__init__(*args, **kwargs)
 
         self.metadata_submission = metadata_submission
-        self.omics_processing_mapping = omics_processing_mapping
+        self.nucleotide_sequencing_mapping = nucleotide_sequencing_mapping
         self.data_object_mapping = data_object_mapping
 
         self.study_doi_category = (
@@ -127,6 +127,7 @@ def _get_pi(
             email=study_form.get("piEmail"),
             orcid=study_form.get("piOrcid"),
             profile_image_url=self.study_pi_image_url,
+            type=nmdc.PersonValue.class_class_curie,
         )
 
     def _get_doi(self, metadata_submission: JSON_OBJECT) -> Union[List[nmdc.Doi], None]:
@@ -147,6 +148,7 @@ def _get_doi(self, metadata_submission: JSON_OBJECT) -> Union[List[nmdc.Doi], No
                 doi_value=dataset_doi,
                 doi_provider=self.study_doi_provider,
                 doi_category=self.study_doi_category,
+                type="nmdc:Doi",
             )
         ]
 
@@ -167,8 +169,10 @@ def _get_has_credit_associations(
                 applies_to_person=nmdc.PersonValue(
                     name=contributor.get("name"),
                     orcid=contributor.get("orcid"),
+                    type="nmdc:PersonValue",
                 ),
                 applied_roles=contributor.get("roles"),
+                type="nmdc:CreditAssociation",
             )
             for contributor in contributors
         ]
@@ -217,7 +221,10 @@ def _get_quantity_value(
         if not match:
             return None
 
-        qv = nmdc.QuantityValue(has_raw_value=raw_value)
+        qv = nmdc.QuantityValue(
+            has_raw_value=raw_value,
+            type="nmdc:QuantityValue",
+        )
         if match.group(2):
             # having group 2 means the value is a range like "0 - 1". Either
             # group 1 or group 2 might be the minimum especially when handling
@@ -264,6 +271,7 @@ def _get_ontology_class(
         return nmdc.OntologyClass(
             name=match.group(1).strip(),
             id=match.group(2).strip(),
+            type="nmdc:OntologyClass",
         )
 
     def _get_controlled_identified_term_value(
@@ -285,7 +293,9 @@ def _get_controlled_identified_term_value(
             return None
 
         return nmdc.ControlledIdentifiedTermValue(
-            has_raw_value=raw_value, term=ontology_class
+            has_raw_value=raw_value,
+            term=ontology_class,
+            type="nmdc:ControlledIdentifiedTermValue",
         )
 
     def _get_controlled_term_value(
@@ -302,7 +312,10 @@ def _get_controlled_term_value(
         if not raw_value:
             return None
 
-        value = nmdc.ControlledTermValue(has_raw_value=raw_value)
+        value = nmdc.ControlledTermValue(
+            has_raw_value=raw_value,
+            type="nmdc:ControlledTermValue",
+        )
         ontology_class = self._get_ontology_class(raw_value)
         if ontology_class is not None:
             value.term = ontology_class
@@ -332,7 +345,10 @@ def _get_geolocation_value(
             return None
 
         return nmdc.GeolocationValue(
-            has_raw_value=raw_value, latitude=match.group(1), longitude=match.group(2)
+            has_raw_value=raw_value,
+            latitude=match.group(1),
+            longitude=match.group(2),
+            type="nmdc:GeolocationValue",
         )
 
     def _get_float(self, raw_value: Optional[str]) -> Union[float, None]:
@@ -425,6 +441,7 @@ def _translate_study(
             principal_investigator=self._get_pi(metadata_submission),
             study_category=self.study_category,
             title=self._get_from(metadata_submission, ["studyForm", "studyName"]),
+            type="nmdc:Study",
             websites=self._get_from(
                 metadata_submission, ["studyForm", "linkOutWebpage"]
             ),
@@ -435,15 +452,24 @@ def _transform_value_for_slot(
     ):
         transformed_value = None
         if slot.range == "TextValue":
-            transformed_value = nmdc.TextValue(has_raw_value=value)
+            transformed_value = nmdc.TextValue(
+                has_raw_value=value,
+                type="nmdc:TextValue",
+            )
         elif slot.range == "QuantityValue":
-            transformed_value = self._get_quantity_value(value, unit=unit)
+            transformed_value = self._get_quantity_value(
+                value,
+                unit=unit,
+            )
         elif slot.range == "ControlledIdentifiedTermValue":
             transformed_value = self._get_controlled_identified_term_value(value)
         elif slot.range == "ControlledTermValue":
             transformed_value = self._get_controlled_term_value(value)
         elif slot.range == "TimestampValue":
-            transformed_value = nmdc.TimestampValue(has_raw_value=value)
+            transformed_value = nmdc.TimestampValue(
+                has_raw_value=value,
+                type="nmdc:TimestampValue",
+            )
         elif slot.range == "GeolocationValue":
             transformed_value = self._get_geolocation_value(value)
         elif slot.range == "float":
@@ -531,9 +557,12 @@ def _translate_biosample(
         biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
         slots = {
             "id": nmdc_biosample_id,
-            "part_of": nmdc_study_id,
+            "associated_studies": [nmdc_study_id],
+            "type": "nmdc:Biosample",
             "name": sample_data[0].get("samp_name", "").strip(),
-            "env_package": nmdc.TextValue(has_raw_value=default_env_package),
+            "env_package": nmdc.TextValue(
+                has_raw_value=default_env_package, type="nmdc:TextValue"
+            ),
         }
         for tab in sample_data:
             transformed_tab = self._transform_dict_for_class(tab, "Biosample")
@@ -590,18 +619,18 @@ def get_database(self) -> nmdc.Database:
             if sample_data
         ]
 
-        if self.omics_processing_mapping:
-            # If there is data from an OmicsProcessing mapping file, process it now. This part
+        if self.nucleotide_sequencing_mapping:
+            # If there is data from an NucleotideSequencing mapping file, process it now. This part
             # assumes that there is a column in that file with the header __biosample_samp_name
             # that can be used to join with the sample data from the submission portal. The
             # biosample identified by that `samp_name` will be referenced in the `has_input`
-            # slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
-            # those objects will also be generated and referenced in the `has_output` slot of the
-            # OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
-            # sample data there is an implicit 1:1 relationship between Biosample objects and
-            # OmicsProcessing objects generated here.
+            # slot of the NucleotideSequencing object. If a DataObject mapping file was also
+            # provided, those objects will also be generated and referenced in the `has_output` slot
+            # of the NucleotideSequencing object. By keying off of the `samp_name` slot of the
+            # submission's sample data there is an implicit 1:1 relationship between Biosample
+            # objects and NucleotideSequencing objects generated here.
             join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
-            database.omics_processing_set = []
+            database.data_generation_set = []
             database.data_object_set = []
             data_objects_by_sample_data_id = {}
             today = datetime.now().strftime("%Y-%m-%d")
@@ -617,10 +646,10 @@ def get_database(self) -> nmdc.Database:
                     grouped,
                 )
 
-            for omics_processing_row in self.omics_processing_mapping:
-                # For each row in the OmicsProcessing mapping file, first grab the minted Biosample
-                # id that corresponds to the sample ID from the submission
-                sample_data_id = omics_processing_row.pop(join_key)
+            for nucleotide_sequencing_row in self.nucleotide_sequencing_mapping:
+                # For each row in the NucleotideSequencing mapping file, first grab the minted
+                # Biosample id that corresponds to the sample ID from the submission
+                sample_data_id = nucleotide_sequencing_row.pop(join_key)
                 if (
                     not sample_data_id
                     or sample_data_id not in sample_data_to_nmdc_biosample_ids
@@ -631,31 +660,33 @@ def get_database(self) -> nmdc.Database:
                     continue
                 nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
 
-                # Transform the raw row data according to the OmicsProcessing class's slots, and
-                # generate an instance. A few key slots do not come from the mapping file, but
+                # Transform the raw row data according to the NucleotideSequencing class's slots,
+                # and generate an instance. A few key slots do not come from the mapping file, but
                 # instead are defined here.
-                omics_processing_slots = {
-                    "id": self._id_minter("nmdc:OmicsProcessing", 1)[0],
+                nucleotide_sequencing_slots = {
+                    "id": self._id_minter("nmdc:NucleotideSequencing", 1)[0],
                     "has_input": [nmdc_biosample_id],
                     "has_output": [],
-                    "part_of": nmdc_study_id,
+                    "associated_studies": [nmdc_study_id],
                     "add_date": today,
                     "mod_date": today,
-                    "type": "nmdc:OmicsProcessing",
+                    "type": "nmdc:NucleotideSequencing",
                 }
-                omics_processing_slots.update(
+                nucleotide_sequencing_slots.update(
                     self._transform_dict_for_class(
-                        omics_processing_row, "OmicsProcessing"
+                        nucleotide_sequencing_row, "NucleotideSequencing"
                     )
                 )
-                omics_processing = nmdc.OmicsProcessing(**omics_processing_slots)
+                nucleotide_sequencing = nmdc.NucleotideSequencing(
+                    **nucleotide_sequencing_slots
+                )
 
                 for data_object_row in data_objects_by_sample_data_id.get(
                     sample_data_id, []
                 ):
                     # For each row in the DataObject mapping file that corresponds to the sample ID,
                     # transform the raw row data according to the DataObject class's slots, generate
-                    # an instance, and connect that instance's minted ID to the OmicsProcessing
+                    # an instance, and connect that instance's minted ID to the NucleotideSequencing
                     # instance
                     data_object_id = self._id_minter("nmdc:DataObject", 1)[0]
                     data_object_slots = {
@@ -667,10 +698,10 @@ def get_database(self) -> nmdc.Database:
                     )
                     data_object = nmdc.DataObject(**data_object_slots)
 
-                    omics_processing.has_output.append(data_object_id)
+                    nucleotide_sequencing.has_output.append(data_object_id)
 
                     database.data_object_set.append(data_object)
 
-                database.omics_processing_set.append(omics_processing)
+                database.data_generation_set.append(nucleotide_sequencing)
 
         return database
diff --git a/nmdc_runtime/test.Dockerfile b/nmdc_runtime/test.Dockerfile
index 6edce923..1bb2464a 100644
--- a/nmdc_runtime/test.Dockerfile
+++ b/nmdc_runtime/test.Dockerfile
@@ -40,4 +40,4 @@ ENV PYTHONFAULTHANDLER=1
 
 # uncomment line below to stop after first test failure:
 # https://docs.pytest.org/en/6.2.x/usage.html#stopping-after-the-first-or-n-failures
-ENTRYPOINT [ "./wait-for-it.sh", "fastapi:8000" , "--strict" , "--timeout=300" , "--" , "pytest", "-x"]
\ No newline at end of file
+ENTRYPOINT [ "./wait-for-it.sh", "fastapi:8000" , "--strict" , "--timeout=300" , "--" , "pytest"]
diff --git a/requirements/dev.txt b/requirements/dev.txt
index cd0e6420..2cd84421 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -4,22 +4,22 @@
 #
 #    pip-compile --allow-unsafe --output-file=requirements/dev.txt --strip-extras requirements/dev.in
 #
-attrs==23.2.0
+attrs==24.2.0
     # via
     #   -c requirements/main.txt
     #   cattrs
     #   requests-cache
 backports-tarfile==1.2.0
     # via jaraco-context
-black==24.4.2
+black==24.10.0
     # via -r requirements/dev.in
-build==1.2.1
+build==1.2.2.post1
     # via pip-tools
-cattrs==23.2.3
+cattrs==24.1.2
     # via
     #   -c requirements/main.txt
     #   requests-cache
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   -c requirements/main.txt
     #   requests
@@ -32,24 +32,24 @@ click==8.1.7
     #   -c requirements/main.txt
     #   black
     #   pip-tools
-coverage==7.5.4
+coverage==7.6.1
     # via
     #   -r requirements/dev.in
     #   pytest-cov
 docutils==0.21.2
     # via readme-renderer
-exceptiongroup==1.2.1
+exceptiongroup==1.2.2
     # via
     #   -c requirements/main.txt
     #   cattrs
     #   pytest
-flake8==7.1.0
+flake8==7.1.1
     # via -r requirements/dev.in
-idna==3.7
+idna==3.10
     # via
     #   -c requirements/main.txt
     #   requests
-importlib-metadata==8.0.0
+importlib-metadata==8.5.0
     # via
     #   keyring
     #   twine
@@ -61,11 +61,11 @@ invoke==2.2.0
     # via -r requirements/dev.in
 jaraco-classes==3.4.0
     # via keyring
-jaraco-context==5.3.0
+jaraco-context==6.0.1
     # via keyring
-jaraco-functools==4.0.1
+jaraco-functools==4.1.0
     # via keyring
-keyring==25.2.1
+keyring==25.4.1
     # via twine
 markdown-it-py==3.0.0
     # via
@@ -77,7 +77,7 @@ mdurl==0.1.2
     # via
     #   -c requirements/main.txt
     #   markdown-it-py
-more-itertools==10.3.0
+more-itertools==10.5.0
     # via
     #   jaraco-classes
     #   jaraco-functools
@@ -99,7 +99,7 @@ pip-tools==7.4.1
     # via -r requirements/dev.in
 pkginfo==1.10.0
     # via twine
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   -c requirements/main.txt
     #   black
@@ -108,7 +108,7 @@ pluggy==1.5.0
     # via
     #   -c requirements/main.txt
     #   pytest
-pycodestyle==2.12.0
+pycodestyle==2.12.1
     # via flake8
 pyflakes==3.2.0
     # via flake8
@@ -117,18 +117,18 @@ pygments==2.18.0
     #   -c requirements/main.txt
     #   readme-renderer
     #   rich
-pyproject-hooks==1.1.0
+pyproject-hooks==1.2.0
     # via
     #   build
     #   pip-tools
-pytest==8.2.2
+pytest==8.3.3
     # via
     #   -c requirements/main.txt
     #   -r requirements/dev.in
     #   pytest-asyncio
     #   pytest-cov
     #   pytest-mock
-pytest-asyncio==0.23.7
+pytest-asyncio==0.24.0
     # via -r requirements/dev.in
 pytest-cov==5.0.0
     # via -r requirements/dev.in
@@ -155,7 +155,7 @@ requests-toolbelt==1.0.0
     #   twine
 rfc3986==2.0.0
     # via twine
-rich==13.7.1
+rich==13.9.2
     # via
     #   -c requirements/main.txt
     #   twine
@@ -163,7 +163,7 @@ six==1.16.0
     # via
     #   -c requirements/main.txt
     #   url-normalize
-tomli==2.0.1
+tomli==2.0.2
     # via
     #   -c requirements/main.txt
     #   black
@@ -178,27 +178,28 @@ typing-extensions==4.12.2
     #   -c requirements/main.txt
     #   black
     #   cattrs
+    #   rich
 url-normalize==1.4.3
     # via
     #   -c requirements/main.txt
     #   requests-cache
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   -c requirements/main.txt
     #   requests
     #   requests-cache
     #   twine
-wheel==0.43.0
+wheel==0.44.0
     # via pip-tools
-zipp==3.19.2
+zipp==3.20.2
     # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
-pip==24.1.2
+pip==24.2
     # via
     #   -r requirements/dev.in
     #   pip-tools
-setuptools==70.3.0
+setuptools==75.1.0
     # via
     #   -c requirements/main.txt
     #   -r requirements/dev.in
diff --git a/requirements/main.in b/requirements/main.in
index 9ee62b39..ebc5312b 100644
--- a/requirements/main.in
+++ b/requirements/main.in
@@ -25,7 +25,7 @@ mkdocs-jupyter
 mkdocs-material
 mkdocs-mermaid2-plugin
 motor
-nmdc-schema==10.8.0
+nmdc-schema==11.0.0
 openpyxl
 pandas
 passlib[bcrypt]
diff --git a/requirements/main.txt b/requirements/main.txt
index 94cd5051..c18f62d3 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -1,10 +1,10 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
 #    pip-compile --allow-unsafe --output-file=requirements/main.txt --strip-extras requirements/main.in
 #
-alembic==1.13.2
+alembic==1.13.3
     # via dagster
 aniso8601==9.0.1
     # via graphene
@@ -15,7 +15,7 @@ antlr4-python3-runtime==4.9.3
     #   linkml
     #   pyjsg
     #   pyshexc
-anyio==4.4.0
+anyio==4.6.0
     # via
     #   gql
     #   httpx
@@ -34,13 +34,13 @@ asttokens==2.4.1
     # via stack-data
 async-lru==2.0.4
     # via jupyterlab
-attrs==23.2.0
+attrs==24.2.0
     # via
     #   cattrs
     #   jsonschema
     #   referencing
     #   requests-cache
-babel==2.15.0
+babel==2.16.0
     # via
     #   jupyterlab-server
     #   mkdocs-material
@@ -48,9 +48,9 @@ backoff==2.2.1
     # via gql
 base32-lib==1.0.2
     # via -r requirements/main.in
-bcrypt==4.1.3
+bcrypt==4.2.0
     # via passlib
-beanie==1.26.0
+beanie==1.27.0
     # via -r requirements/main.in
 beautifulsoup4==4.12.3
     # via
@@ -59,20 +59,20 @@ beautifulsoup4==4.12.3
     #   nbconvert
 bleach==6.1.0
     # via nbconvert
-boto3==1.34.142
+boto3==1.35.35
     # via -r requirements/main.in
-botocore==1.34.142
+botocore==1.35.35
     # via
     #   boto3
     #   s3transfer
-cattrs==23.2.3
+cattrs==24.1.2
     # via requests-cache
-certifi==2024.7.4
+certifi==2024.8.30
     # via
     #   httpcore
     #   httpx
     #   requests
-cffi==1.16.0
+cffi==1.17.1
     # via
     #   argon2-cffi-bindings
     #   cryptography
@@ -95,7 +95,6 @@ click==8.1.7
     #   linkml-runtime
     #   mkdocs
     #   prefixcommons
-    #   typer
     #   uvicorn
 colorama==0.4.6
     # via mkdocs-material
@@ -105,43 +104,43 @@ comm==0.2.2
     # via
     #   ipykernel
     #   ipywidgets
-croniter==2.0.5
+croniter==3.0.3
     # via dagster
-cryptography==42.0.8
+cryptography==43.0.1
     # via python-jose
 curies==0.7.10
     # via
     #   linkml-runtime
     #   prefixmaps
-dagit==1.7.12
+dagit==1.8.10
     # via -r requirements/main.in
-dagster==1.7.12
+dagster==1.8.10
     # via
     #   -r requirements/main.in
     #   dagster-graphql
     #   dagster-postgres
     #   dagster-webserver
-dagster-graphql==1.7.12
+dagster-graphql==1.8.10
     # via
     #   -r requirements/main.in
     #   dagster-webserver
-dagster-pipes==1.7.12
+dagster-pipes==1.8.10
     # via dagster
-dagster-postgres==0.23.12
+dagster-postgres==0.24.10
     # via -r requirements/main.in
-dagster-webserver==1.7.12
+dagster-webserver==1.8.10
     # via dagit
-debugpy==1.8.2
+debugpy==1.8.6
     # via ipykernel
 decorator==5.1.1
     # via ipython
 defusedxml==0.7.1
     # via nbconvert
-dependency-injector==4.41.0
+dependency-injector==4.42.0
     # via -r requirements/main.in
 deprecated==1.2.14
     # via linkml-runtime
-dnspython==2.6.1
+dnspython==2.7.0
     # via
     #   email-validator
     #   pymongo
@@ -154,30 +153,32 @@ ecdsa==0.19.0
 editorconfig==0.12.4
     # via jsbeautifier
 email-validator==2.2.0
-    # via
-    #   fastapi
-    #   pydantic
+    # via pydantic
 et-xmlfile==1.1.0
     # via openpyxl
-executing==2.0.1
+exceptiongroup==1.2.2
+    # via
+    #   anyio
+    #   cattrs
+    #   ipython
+    #   pytest
+executing==2.1.0
     # via stack-data
-fastapi==0.111.0
+fastapi==0.115.0
     # via -r requirements/main.in
-fastapi-cli==0.0.4
-    # via fastapi
 fastjsonschema==2.20.0
     # via
     #   -r requirements/main.in
     #   nbformat
-filelock==3.15.4
+filelock==3.16.1
     # via dagster
 fnc==0.5.3
     # via -r requirements/main.in
 fqdn==1.5.1
     # via jsonschema
-frozendict==2.4.4
+frozendict==2.4.5
     # via -r requirements/main.in
-fsspec==2024.6.1
+fsspec==2024.9.0
     # via universal-pathlib
 ghp-import==2.1.0
     # via mkdocs
@@ -187,7 +188,7 @@ gql==3.5.0
     # via dagster-graphql
 graphene==3.3
     # via dagster-graphql
-graphql-core==3.2.3
+graphql-core==3.2.4
     # via
     #   gql
     #   graphene
@@ -196,13 +197,11 @@ graphql-relay==3.2.0
     # via graphene
 graphviz==0.20.3
     # via linkml
-greenlet==3.0.3
-    # via sqlalchemy
-grpcio==1.64.1
+grpcio==1.66.2
     # via
     #   dagster
     #   grpcio-health-checking
-grpcio-health-checking==1.62.2
+grpcio-health-checking==1.62.3
     # via dagster
 h11==0.14.0
     # via
@@ -213,17 +212,15 @@ hbreader==0.9.1
     #   jsonasobj2
     #   linkml
     #   linkml-runtime
-httpcore==1.0.5
+httpcore==1.0.6
     # via httpx
 httptools==0.6.1
     # via uvicorn
-httpx==0.27.0
-    # via
-    #   fastapi
-    #   jupyterlab
+httpx==0.27.2
+    # via jupyterlab
 humanfriendly==10.0
     # via coloredlogs
-idna==3.7
+idna==3.10
     # via
     #   anyio
     #   email-validator
@@ -239,13 +236,12 @@ ipykernel==6.29.5
     #   jupyter-console
     #   jupyterlab
     #   mkdocs-jupyter
-    #   qtconsole
-ipython==8.26.0
+ipython==8.28.0
     # via
     #   ipykernel
     #   ipywidgets
     #   jupyter-console
-ipywidgets==8.1.3
+ipywidgets==8.1.5
     # via jupyter
 isodate==0.6.1
     # via
@@ -258,7 +254,6 @@ jedi==0.19.1
 jinja2==3.1.4
     # via
     #   dagster
-    #   fastapi
     #   jupyter-server
     #   jupyterlab
     #   jupyterlab-server
@@ -271,7 +266,7 @@ jmespath==1.0.1
     # via
     #   boto3
     #   botocore
-jq==1.7.0
+jq==1.8.0
     # via -r requirements/main.in
 jsbeautifier==1.15.1
     # via mkdocs-mermaid2-plugin
@@ -305,15 +300,14 @@ jsonschema==4.23.0
     #   nbformat
 jsonschema-specifications==2023.12.1
     # via jsonschema
-jupyter==1.0.0
+jupyter==1.1.1
     # via -r requirements/main.in
-jupyter-client==8.6.2
+jupyter-client==8.6.3
     # via
     #   ipykernel
     #   jupyter-console
     #   jupyter-server
     #   nbclient
-    #   qtconsole
 jupyter-console==6.6.3
     # via jupyter
 jupyter-core==5.7.2
@@ -326,12 +320,11 @@ jupyter-core==5.7.2
     #   nbclient
     #   nbconvert
     #   nbformat
-    #   qtconsole
 jupyter-events==0.10.0
     # via jupyter-server
 jupyter-lsp==2.2.5
     # via jupyterlab
-jupyter-server==2.14.1
+jupyter-server==2.14.2
     # via
     #   jupyter-lsp
     #   jupyterlab
@@ -340,39 +333,40 @@ jupyter-server==2.14.1
     #   notebook-shim
 jupyter-server-terminals==0.5.3
     # via jupyter-server
-jupyterlab==4.2.3
+jupyterlab==4.2.5
     # via
     #   -r requirements/main.in
+    #   jupyter
     #   notebook
 jupyterlab-pygments==0.3.0
     # via nbconvert
-jupyterlab-server==2.27.2
+jupyterlab-server==2.27.3
     # via
     #   jupyterlab
     #   notebook
-jupyterlab-widgets==3.0.11
+jupyterlab-widgets==3.0.13
     # via ipywidgets
-jupytext==1.16.2
+jupytext==1.16.4
     # via mkdocs-jupyter
 lazy-model==0.2.0
     # via beanie
-linkml==1.8.1
+linkml==1.8.4
     # via
     #   -r requirements/main.in
     #   nmdc-schema
 linkml-dataops==0.1.0
     # via linkml
-linkml-runtime==1.8.0
+linkml-runtime==1.8.3
     # via
     #   -r requirements/main.in
     #   linkml
     #   linkml-dataops
     #   nmdc-schema
-lxml==5.2.2
+lxml==5.3.0
     # via -r requirements/main.in
 mako==1.3.5
     # via alembic
-markdown==3.6
+markdown==3.7
     # via
     #   mkdocs
     #   mkdocs-material
@@ -382,7 +376,7 @@ markdown-it-py==3.0.0
     #   jupytext
     #   mdit-py-plugins
     #   rich
-markupsafe==2.1.5
+markupsafe==3.0.0
     # via
     #   jinja2
     #   mako
@@ -392,7 +386,7 @@ matplotlib-inline==0.1.7
     # via
     #   ipykernel
     #   ipython
-mdit-py-plugins==0.4.1
+mdit-py-plugins==0.4.2
     # via jupytext
 mdurl==0.1.2
     # via markdown-it-py
@@ -402,7 +396,7 @@ mergedeep==1.3.4
     #   mkdocs-get-deps
 mistune==3.0.2
     # via nbconvert
-mkdocs==1.6.0
+mkdocs==1.6.1
     # via
     #   mkdocs-jupyter
     #   mkdocs-material
@@ -411,9 +405,9 @@ mkdocs==1.6.0
     #   nmdc-schema
 mkdocs-get-deps==0.2.0
     # via mkdocs
-mkdocs-jupyter==0.24.8
+mkdocs-jupyter==0.25.0
     # via -r requirements/main.in
-mkdocs-material==9.5.28
+mkdocs-material==9.5.39
     # via
     #   -r requirements/main.in
     #   mkdocs-jupyter
@@ -426,11 +420,11 @@ mkdocs-mermaid2-plugin==0.6.0
     #   nmdc-schema
 mkdocs-redirects==1.2.1
     # via nmdc-schema
-motor==3.5.0
+motor==3.6.0
     # via
     #   -r requirements/main.in
     #   beanie
-multidict==6.0.5
+multidict==6.1.0
     # via yarl
 nbclient==0.10.0
     # via nbconvert
@@ -447,22 +441,20 @@ nbformat==5.10.4
     #   nbconvert
 nest-asyncio==1.6.0
     # via ipykernel
-nmdc-schema==10.8.0
+nmdc-schema==11.0.0
     # via -r requirements/main.in
-notebook==7.2.1
+notebook==7.2.2
     # via jupyter
 notebook-shim==0.2.4
     # via
     #   jupyterlab
     #   notebook
-numpy==2.0.0
+numpy==2.1.2
     # via pandas
 openpyxl==3.1.5
     # via
     #   -r requirements/main.in
     #   linkml
-orjson==3.10.6
-    # via fastapi
 overrides==7.7.0
     # via jupyter-server
 packaging==24.1
@@ -476,12 +468,10 @@ packaging==24.1
     #   mkdocs
     #   nbconvert
     #   pytest
-    #   qtconsole
-    #   qtpy
     #   setuptools-scm
-paginate==0.5.6
+paginate==0.5.7
     # via mkdocs-material
-pandas==2.2.2
+pandas==2.2.3
     # via -r requirements/main.in
 pandocfilters==1.5.1
     # via nbconvert
@@ -493,11 +483,9 @@ passlib==1.7.4
     # via -r requirements/main.in
 pathspec==0.12.1
     # via mkdocs
-pendulum==3.0.0
-    # via dagster
 pexpect==4.9.0
     # via ipython
-platformdirs==4.2.2
+platformdirs==4.3.6
     # via
     #   jupyter-core
     #   mkdocs-get-deps
@@ -510,17 +498,17 @@ prefixcommons==0.1.12
     # via
     #   linkml
     #   linkml-runtime
-prefixmaps==0.2.4
+prefixmaps==0.2.5
     # via
     #   linkml
     #   linkml-runtime
-prometheus-client==0.20.0
+prometheus-client==0.21.0
     # via jupyter-server
-prompt-toolkit==3.0.47
+prompt-toolkit==3.0.48
     # via
     #   ipython
     #   jupyter-console
-protobuf==4.25.3
+protobuf==4.25.5
     # via
     #   dagster
     #   grpcio-health-checking
@@ -532,15 +520,15 @@ ptyprocess==0.7.0
     # via
     #   pexpect
     #   terminado
-pure-eval==0.2.2
+pure-eval==0.2.3
     # via stack-data
-pyasn1==0.6.0
+pyasn1==0.6.1
     # via
     #   python-jose
     #   rsa
 pycparser==2.22
     # via cffi
-pydantic==2.8.2
+pydantic==2.9.2
     # via
     #   -r requirements/main.in
     #   beanie
@@ -550,7 +538,7 @@ pydantic==2.8.2
     #   lazy-model
     #   linkml
     #   linkml-runtime
-pydantic-core==2.20.1
+pydantic-core==2.23.4
     # via pydantic
 pygments==2.18.0
     # via
@@ -559,23 +547,22 @@ pygments==2.18.0
     #   mkdocs-jupyter
     #   mkdocs-material
     #   nbconvert
-    #   qtconsole
     #   rich
 pyjsg==0.11.10
     # via
     #   linkml
     #   pyshexc
     #   shexjsg
-pymdown-extensions==10.8.1
+pymdown-extensions==10.11.2
     # via
     #   mkdocs-material
     #   mkdocs-mermaid2-plugin
-pymongo==4.8.0
+pymongo==4.9.2
     # via
     #   -r requirements/main.in
     #   motor
     #   nmdc-schema
-pyparsing==3.1.2
+pyparsing==3.1.4
     # via rdflib
 pyshex==0.8.1
     # via linkml
@@ -583,7 +570,7 @@ pyshexc==0.9.1
     # via
     #   linkml
     #   pyshex
-pytest==8.2.2
+pytest==8.3.3
     # via pytest-logging
 pytest-logging==2015.11.4
     # via prefixcommons
@@ -592,13 +579,10 @@ python-dateutil==2.9.0.post0
     #   arrow
     #   botocore
     #   croniter
-    #   dagster
     #   ghp-import
     #   jupyter-client
     #   linkml
     #   pandas
-    #   pendulum
-    #   time-machine
 python-dotenv==1.0.1
     # via
     #   -r requirements/main.in
@@ -608,18 +592,16 @@ python-jose==3.3.0
     # via -r requirements/main.in
 python-json-logger==2.0.7
     # via jupyter-events
-python-multipart==0.0.9
-    # via
-    #   -r requirements/main.in
-    #   fastapi
+python-multipart==0.0.12
+    # via -r requirements/main.in
 pytrie==0.4.0
     # via curies
-pytz==2024.1
+pytz==2024.2
     # via
     #   croniter
     #   dagster
     #   pandas
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   -r requirements/main.in
     #   dagster
@@ -638,17 +620,12 @@ pyyaml==6.0.1
     #   uvicorn
 pyyaml-env-tag==0.1
     # via mkdocs
-pyzmq==26.0.3
+pyzmq==26.2.0
     # via
     #   ipykernel
     #   jupyter-client
     #   jupyter-console
     #   jupyter-server
-    #   qtconsole
-qtconsole==5.5.2
-    # via jupyter
-qtpy==2.4.1
-    # via qtconsole
 rdflib==7.0.0
     # via
     #   cfgraph
@@ -670,7 +647,7 @@ referencing==0.35.1
     #   jsonschema
     #   jsonschema-specifications
     #   jupyter-events
-regex==2024.5.15
+regex==2024.9.11
     # via mkdocs-material
 requests==2.32.3
     # via
@@ -702,18 +679,18 @@ rfc3986-validator==0.1.1
     #   jupyter-events
 rfc3987==1.3.8
     # via jsonschema
-rich==13.7.1
-    # via
-    #   dagster
-    #   typer
-rpds-py==0.19.0
+rich==13.9.2
+    # via dagster
+rpds-py==0.20.0
     # via
     #   jsonschema
     #   referencing
 rsa==4.9
     # via python-jose
 ruamel-yaml==0.18.6
-    # via linkml-dataops
+    # via
+    #   linkml-dataops
+    #   nmdc-schema
 ruamel-yaml-clib==0.2.8
     # via ruamel-yaml
 s3transfer==0.10.2
@@ -724,8 +701,6 @@ send2trash==1.8.3
     # via jupyter-server
 setuptools-scm==8.1.0
     # via -r requirements/main.in
-shellingham==1.5.4
-    # via typer
 shexjsg==0.8.2
     # via
     #   pyshex
@@ -748,7 +723,7 @@ sniffio==1.3.1
     #   httpx
 sortedcontainers==2.4.0
     # via pytrie
-soupsieve==2.5
+soupsieve==2.6
     # via beautifulsoup4
 sparqlslurper==0.5.1
     # via pyshex
@@ -756,37 +731,40 @@ sparqlwrapper==2.0.0
     # via
     #   pyshex
     #   sparqlslurper
-sqlalchemy==2.0.31
+sqlalchemy==2.0.35
     # via
     #   alembic
     #   dagster
     #   linkml
 stack-data==0.6.3
     # via ipython
-starlette==0.37.2
+starlette==0.38.6
     # via
     #   dagster-graphql
     #   dagster-webserver
     #   fastapi
-structlog==24.2.0
+structlog==24.4.0
     # via dagster
 tabulate==0.9.0
     # via dagster
-tenacity==8.5.0
+tenacity==9.0.0
     # via -r requirements/main.in
 terminado==0.18.1
     # via
     #   jupyter-server
     #   jupyter-server-terminals
-time-machine==2.14.2
-    # via pendulum
 tinycss2==1.3.0
     # via nbconvert
 toml==0.10.2
     # via beanie
-tomli==2.0.1
-    # via dagster
-toolz==0.12.1
+tomli==2.0.2
+    # via
+    #   dagster
+    #   jupyterlab
+    #   jupytext
+    #   pytest
+    #   setuptools-scm
+toolz==1.0.0
     # via -r requirements/main.in
 toposort==1.10
     # via dagster
@@ -798,7 +776,7 @@ tornado==6.4.1
     #   jupyterlab
     #   notebook
     #   terminado
-tqdm==4.66.4
+tqdm==4.66.5
     # via
     #   -r requirements/main.in
     #   dagster
@@ -818,55 +796,54 @@ traitlets==5.14.3
     #   nbclient
     #   nbconvert
     #   nbformat
-    #   qtconsole
-typer==0.12.3
-    # via fastapi-cli
-types-python-dateutil==2.9.0.20240316
+types-python-dateutil==2.9.0.20241003
     # via arrow
 typing-extensions==4.12.2
     # via
     #   alembic
+    #   anyio
+    #   async-lru
+    #   beanie
+    #   cattrs
     #   dagster
     #   fastapi
+    #   ipython
+    #   multidict
     #   pydantic
     #   pydantic-core
+    #   rich
     #   sqlalchemy
-    #   typer
-tzdata==2024.1
-    # via
-    #   pandas
-    #   pendulum
-ujson==5.10.0
-    # via fastapi
-universal-pathlib==0.2.2
+    #   uvicorn
+tzdata==2024.2
+    # via pandas
+universal-pathlib==0.2.5
     # via dagster
 uri-template==1.3.0
     # via jsonschema
 url-normalize==1.4.3
     # via requests-cache
-urllib3==2.2.2
+urllib3==2.2.3
     # via
     #   botocore
     #   pyshex
     #   requests
     #   requests-cache
-uvicorn==0.30.1
+uvicorn==0.31.0
     # via
     #   -r requirements/main.in
     #   dagster-webserver
-    #   fastapi
-uvloop==0.19.0
+uvloop==0.20.0
     # via uvicorn
-watchdog==4.0.1
+watchdog==5.0.3
     # via
     #   dagster
     #   linkml
     #   mkdocs
-watchfiles==0.22.0
+watchfiles==0.24.0
     # via uvicorn
 wcwidth==0.2.13
     # via prompt-toolkit
-webcolors==24.6.0
+webcolors==24.8.0
     # via jsonschema
 webencodings==0.5.1
     # via
@@ -874,9 +851,9 @@ webencodings==0.5.1
     #   tinycss2
 websocket-client==1.8.0
     # via jupyter-server
-websockets==12.0
+websockets==13.1
     # via uvicorn
-widgetsnbextension==4.0.11
+widgetsnbextension==4.0.13
     # via ipywidgets
 wrapt==1.16.0
     # via deprecated
@@ -884,11 +861,11 @@ xlrd==2.0.1
     # via -r requirements/main.in
 xlsxwriter==3.2.0
     # via -r requirements/main.in
-yarl==1.9.4
+yarl==1.13.1
     # via gql
 
 # The following packages are considered to be unsafe in a requirements file:
-setuptools==70.3.0
+setuptools==75.1.0
     # via
     #   dagster
     #   jupyterlab
diff --git a/tests/files/nmdc_bsm-12-7mysck21.json b/tests/files/nmdc_bsm-12-7mysck21.json
index d0571f47..16631400 100644
--- a/tests/files/nmdc_bsm-12-7mysck21.json
+++ b/tests/files/nmdc_bsm-12-7mysck21.json
@@ -6,42 +6,52 @@
     "NEON"
   ],
   "collection_date": {
-    "has_raw_value": "2014-07-15T18:00Z"
+    "has_raw_value": "2014-07-15T18:00Z",
+    "type": "nmdc:TimestampValue"
   },
   "depth": {
     "has_maximum_numeric_value": 1,
     "has_minimum_numeric_value": 0,
-    "has_unit": "meters"
+    "has_unit": "meters",
+    "type": "nmdc:QuantityValue"
   },
   "elev": 1179.5,
   "env_broad_scale": {
+    "type": "nmdc:ControlledIdentifiedTermValue",
     "term": {
       "id": "ENVO:01000253",
-      "name": "freshwater river biome"
+      "name": "freshwater river biome",
+      "type": "nmdc:OntologyClass"
     }
   },
   "env_local_scale": {
+    "type": "nmdc:ControlledIdentifiedTermValue",
     "term": {
       "id": "ENVO:03600095",
-      "name": "stream run"
+      "name": "stream run",
+      "type": "nmdc:OntologyClass"
     }
   },
   "env_medium": {
+    "type": "nmdc:ControlledIdentifiedTermValue",
     "term": {
       "id": "ENVO:01001057",
-      "name": "environment associated with a plant part or small plant"
+      "name": "environment associated with a plant part or small plant",
+      "type": "nmdc:OntologyClass"
     }
   },
   "geo_loc_name": {
-    "has_raw_value": "USA: Colorado, Arikaree River"
+    "has_raw_value": "USA: Colorado, Arikaree River",
+    "type": "nmdc:TextValue"
   },
   "id": "nmdc:bsm-12-7mysck21",
   "lat_lon": {
     "latitude": 39.758206,
-    "longitude": -102.447148
+    "longitude": -102.447148,
+    "type": "nmdc:GeolocationValue"
   },
   "name": "ARIK.20140715.AMC.EPIPHYTON.5",
-  "part_of": [
+  "associated_studies": [
     "nmdc:sty-11-34xj1150"
   ],
   "type": "nmdc:Biosample"
diff --git a/tests/files/nmdc_sty-11-pzmd0x14.json b/tests/files/nmdc_sty-11-pzmd0x14.json
index 114437c0..a4eb9c58 100644
--- a/tests/files/nmdc_sty-11-pzmd0x14.json
+++ b/tests/files/nmdc_sty-11-pzmd0x14.json
@@ -16,45 +16,53 @@
     "name": "Kate Thibault",
     "email": "kthibault@battelleecology.org",
     "orcid": "orcid:0000-0003-3477-6424",
-    "has_raw_value": "Kate Thibault"
+    "has_raw_value": "Kate Thibault",
+    "type": "nmdc:PersonValue"
   },
   "has_credit_associations": [
     {
       "applies_to_person": {
         "name": "Hugh Cross",
         "email": "crossh@battelleecology.org",
-        "orcid": "orcid:0000-0002-6745-9479"
+        "orcid": "orcid:0000-0002-6745-9479",
+        "type": "nmdc:PersonValue"
       },
       "applied_roles": [
         "Methodology",
         "Data curation"
-      ]
+      ],
+      "type": "prov:Association"
     },
     {
       "applies_to_person": {
         "name": "Kate Thibault",
         "email": "kthibault@battelleecology.org",
-        "orcid": "orcid:0000-0003-3477-6424"
+        "orcid": "orcid:0000-0003-3477-6424",
+        "type": "nmdc:PersonValue"
       },
       "applied_roles": [
         "Principal Investigator"
-      ]
+      ],
+      "type": "prov:Association"
     },
     {
       "applies_to_person": {
         "name": "Stephanie Parker",
         "email": "sparker@battelleecology.org",
-        "orcid": "0000-0002-7180-7245"
+        "orcid": "0000-0002-7180-7245",
+        "type": "nmdc:PersonValue"
       },
       "applied_roles": [
         "Methodology",
         "Data curation"
-      ]
+      ],
+      "type": "prov:Association"
     }
   ],
   "study_image": [
     {
-      "url": "https://portal.nersc.gov/project/m3408/profile_images/nmdc_sty-11-34xj1150.jpg"
+      "url": "https://portal.nersc.gov/project/m3408/profile_images/nmdc_sty-11-34xj1150.jpg",
+      "type": "nmdc:ImageValue"
     }
   ],
   "gold_study_identifiers": [],
diff --git a/tests/files/planned_processes.json b/tests/files/planned_processes.json
new file mode 100644
index 00000000..086d9ce1
--- /dev/null
+++ b/tests/files/planned_processes.json
@@ -0,0 +1,240 @@
+{
+  "data_generation_set": [
+    {
+      "id": "nmdc:omprc-11-0003fm52",
+      "name": "1000S_WLUP_FTMS_SPE_BTM_1_run2_Fir_22Apr22_300SA_p01_149_1_3506",
+      "description": "High resolution MS spectra only",
+      "has_input": [
+        "nmdc:bsm-11-jht0ty76"
+      ],
+      "has_output": [
+        "nmdc:dobj-11-cp4p5602"
+      ],
+      "processing_institution": "EMSL",
+      "type": "nmdc:MassSpectrometry",
+      "analyte_category": "nom",
+      "associated_studies": [
+        "nmdc:sty-11-28tm5d36"
+      ],
+      "instrument_used": [
+        "nmdc:inst-14-mwrrj632"
+      ]
+    },
+    {
+      "id": "nmdc:omprc-11-0011q207",
+      "name": "Root microbial communities from poplar common garden site in Clatskanie, Oregon, USA - BESC-847-CL1_28_5 endosphere",
+      "has_input": [
+        "nmdc:bsm-11-ta8dt754"
+      ],
+      "add_date": "2021-08-20T00:00:00",
+      "mod_date": "2021-08-20T00:00:00",
+      "ncbi_project_name": "Root microbial communities from poplar common garden site in Clatskanie, Oregon, USA - BESC-847-CL1_28_5 endosphere",
+      "principal_investigator": {
+        "has_raw_value": "Mitchel Doktycz",
+        "email": "doktyczmj@ornl.gov",
+        "name": "Mitchel Doktycz",
+        "type": "nmdc:PersonValue"
+      },
+      "processing_institution": "JGI",
+      "type": "nmdc:NucleotideSequencing",
+      "gold_sequencing_project_identifiers": [
+        "gold:Gp0587799"
+      ],
+      "analyte_category": "metagenome",
+      "associated_studies": [
+        "nmdc:sty-11-r2h77870"
+      ],
+      "instrument_used": [
+        "nmdc:inst-14-mr4r2w09"
+      ]
+    },
+    {
+      "id": "nmdc:omprc-11-00383810",
+      "name": "Brodie_185_H2O_14Mar19_R2_HESI_Neg",
+      "description": "High resolution MS spectra only",
+      "has_input": [
+        "nmdc:bsm-11-4sw8dr23"
+      ],
+      "has_output": [
+        "nmdc:dobj-13-gc7yqf33"
+      ],
+      "processing_institution": "EMSL",
+      "type": "nmdc:MassSpectrometry",
+      "alternative_identifiers": [
+        "emsl:738758"
+      ],
+      "analyte_category": "nom",
+      "associated_studies": [
+        "nmdc:sty-11-dcqce727"
+      ],
+      "instrument_used": [
+        "nmdc:inst-14-nstrhv39"
+      ]
+    }
+  ],
+  "material_processing_set": [
+    {
+      "end_date": "2021-08-19",
+      "has_input": [
+        "nmdc:procsm-11-9gjxns61"
+      ],
+      "has_output": [
+        "nmdc:procsm-11-0wxpzf07"
+      ],
+      "id": "nmdc:extrp-11-00r2pk65",
+      "processing_institution": "Battelle",
+      "start_date": "2020-06-24T22:06Z",
+      "input_mass": {
+        "has_numeric_value": 0.25,
+        "has_unit": "g",
+        "type": "nmdc:QuantityValue"
+      },
+      "qc_status": "pass",
+      "type": "nmdc:Extraction",
+      "extraction_targets": [
+        "DNA"
+      ]
+    },
+    {
+      "end_date": "2020-09-01",
+      "has_input": [
+        "nmdc:procsm-11-rd048144"
+      ],
+      "has_output": [
+        "nmdc:procsm-11-fbbgm243"
+      ],
+      "id": "nmdc:extrp-11-00ykcp41",
+      "processing_institution": "Battelle",
+      "start_date": "2019-08-20T16:21Z",
+      "input_mass": {
+        "has_numeric_value": 0.25,
+        "has_unit": "g",
+        "type": "nmdc:QuantityValue"
+      },
+      "qc_status": "pass",
+      "type": "nmdc:Extraction",
+      "extraction_targets": [
+        "DNA"
+      ]
+    },
+    {
+      "end_date": "2017-11-29",
+      "has_input": [
+        "nmdc:procsm-11-0eq9fn67"
+      ],
+      "has_output": [
+        "nmdc:procsm-11-avhg4c03"
+      ],
+      "id": "nmdc:extrp-11-01hngb04",
+      "processing_institution": "Battelle",
+      "start_date": "2016-08-09T18:27Z",
+      "input_mass": {
+        "has_numeric_value": 0.25,
+        "has_unit": "g",
+        "type": "nmdc:QuantityValue"
+      },
+      "qc_status": "pass",
+      "type": "nmdc:Extraction",
+      "extraction_targets": [
+        "DNA"
+      ]
+    }
+  ],
+  "workflow_execution_set": [
+    {
+      "id": "nmdc:wfmag-11-00jn7876.1",
+      "name": "Metagenome Assembled Genomes Analysis Activity for nmdc:wfmag-11-00jn7876.1",
+      "started_at_time": "2023-07-30T21:31:56.387227+00:00",
+      "ended_at_time": "2023-07-30T21:34:32.750008+00:00",
+      "was_informed_by": "nmdc:omprc-11-7yj0jg57",
+      "execution_resource": "NERSC-Perlmutter",
+      "git_url": "https://github.com/microbiomedata/metaMAGs",
+      "has_input": [
+        "nmdc:dobj-11-yjp1xw52",
+        "nmdc:dobj-11-3av14y79",
+        "nmdc:dobj-11-wa5pnq42",
+        "nmdc:dobj-11-nexa9703",
+        "nmdc:dobj-11-j13n8739",
+        "nmdc:dobj-11-116fa706",
+        "nmdc:dobj-11-60d0na51",
+        "nmdc:dobj-11-2vbz7538",
+        "nmdc:dobj-11-1t48mn65",
+        "nmdc:dobj-11-1cvwk224",
+        "nmdc:dobj-11-cdna6f90",
+        "nmdc:dobj-11-4vb3ww76",
+        "nmdc:dobj-11-xv4qd072",
+        "nmdc:dobj-11-m7p3sb10",
+        "nmdc:dobj-11-j0t1rv33"
+      ],
+      "has_output": [
+        "nmdc:dobj-11-k5ad4209",
+        "nmdc:dobj-11-bw8nqt30",
+        "nmdc:dobj-11-199t2777",
+        "nmdc:dobj-11-2qfh8476",
+        "nmdc:dobj-11-fcsvq172"
+      ],
+      "type": "nmdc:MagsAnalysis",
+      "version": "v1.0.6",
+      "mags_list": []
+    },
+    {
+      "id": "nmdc:wfmag-11-00jn7876.2",
+      "name": "Metagenome Assembled Genomes Analysis Activity for nmdc:wfmag-11-00jn7876.2",
+      "started_at_time": "2024-03-24T16:04:04.936972+00:00",
+      "ended_at_time": "2024-03-24T17:49:34.756540+00:00",
+      "was_informed_by": "nmdc:omprc-11-7yj0jg57",
+      "execution_resource": "NERSC-Perlmutter",
+      "git_url": "https://github.com/microbiomedata/metaMAGs",
+      "has_input": [
+        "nmdc:dobj-11-yjp1xw52",
+        "nmdc:dobj-11-3av14y79",
+        "nmdc:dobj-11-wa5pnq42",
+        "nmdc:dobj-11-nexa9703",
+        "nmdc:dobj-11-j13n8739",
+        "nmdc:dobj-11-116fa706",
+        "nmdc:dobj-11-60d0na51",
+        "nmdc:dobj-11-2vbz7538",
+        "nmdc:dobj-11-1t48mn65",
+        "nmdc:dobj-11-1cvwk224",
+        "nmdc:dobj-11-cdna6f90",
+        "nmdc:dobj-11-4vb3ww76",
+        "nmdc:dobj-11-xv4qd072",
+        "nmdc:dobj-11-m7p3sb10",
+        "nmdc:dobj-11-j0t1rv33"
+      ],
+      "type": "nmdc:MagsAnalysis",
+      "has_output": [
+        "nmdc:dobj-11-dsh5da11",
+        "nmdc:dobj-11-xgj4wc09",
+        "nmdc:dobj-11-dsfytf22",
+        "nmdc:dobj-11-y87nta16",
+        "nmdc:dobj-11-24xgzf65",
+        "nmdc:dobj-11-3ewrw426",
+        "nmdc:dobj-11-yaqmm448",
+        "nmdc:dobj-11-mkszjm42",
+        "nmdc:dobj-11-net1d451"
+      ],
+      "version": "v1.1.0"
+    },
+    {
+      "id": "nmdc:wfmag-11-0133pz73.1",
+      "name": "MAGs Activity for nmdc:wfmag-11-0133pz73.1",
+      "started_at_time": "2023-03-08T19:46:26.128394+00:00",
+      "ended_at_time": "2023-03-08T19:46:26.128414+00:00",
+      "was_informed_by": "nmdc:omprc-11-7c4mb403",
+      "execution_resource": "JGI",
+      "git_url": "https://github.com/microbiomedata/metaMAGs",
+      "has_input": [
+        "nmdc:dobj-11-49j1ct25",
+        "nmdc:dobj-11-wfagh677",
+        "nmdc:dobj-11-grtefb44"
+      ],
+      "has_output": [
+        "nmdc:dobj-11-y5hatt16"
+      ],
+      "type": "nmdc:MagsAnalysis",
+      "version": "v1.0.5-beta",
+      "mags_list": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/files/study_no_credit_associations.json b/tests/files/study_no_credit_associations.json
new file mode 100644
index 00000000..cb257971
--- /dev/null
+++ b/tests/files/study_no_credit_associations.json
@@ -0,0 +1,7 @@
+{
+  "id": "nmdc:sty-11-r2h77870",
+  "name": "study_1",
+  "description": "blah",
+  "type": "nmdc:Study",
+  "study_category": "research_study"
+}
diff --git a/tests/files/test_changesheet_insert_study_doi.tsv b/tests/files/test_changesheet_insert_study_doi.tsv
index 631facf1..52112575 100644
--- a/tests/files/test_changesheet_insert_study_doi.tsv
+++ b/tests/files/test_changesheet_insert_study_doi.tsv
@@ -1,5 +1,6 @@
 id	action	attribute	value
 nmdc:sty-11-pzmd0x14	insert	associated_dois	d1
-d1	update	doi_value	doi:10.25345/C5CG8S
-d1	update	doi_category	dataset_doi
-d1	update	doi_provider	massive
+d1	insert	doi_value	doi:10.25345/C5CG8S
+d1	insert	doi_category	dataset_doi
+d1	insert 	doi_provider	massive
+d1	insert	type	nmdc:Doi
diff --git a/tests/test_api/test_endpoints.py b/tests/test_api/test_endpoints.py
index 0bfae359..90f9d2d2 100644
--- a/tests/test_api/test_endpoints.py
+++ b/tests/test_api/test_endpoints.py
@@ -232,7 +232,7 @@ def test_submit_changesheet():
     sheet_in = ChangesheetIn(
         name="sheet",
         content_type="text/tab-separated-values",
-        text="id\taction\tattribute\tvalue\nnmdc:bsm-12-7mysck21\tupdate\tpart_of\tnmdc:sty-11-pzmd0x14\n",
+        text="id\taction\tattribute\tvalue\nnmdc:bsm-12-7mysck21\tupdate\tassociated_studies\tnmdc:sty-11-pzmd0x14\n",
     )
     mdb = get_mongo_db()
     rs = ensure_test_resources(mdb)
@@ -270,12 +270,9 @@ def test_submit_changesheet():
     assert True
 
 
-@pytest.mark.skip(
-    reason="Skipping because race condition causes  http://fastapi:8000/nmdcschema/ids/nmdc:wfrqc-11-t0tvnp52.2 to 404?"
-)
 def test_submit_workflow_activities(api_site_client):
     test_collection, test_id = (
-        "read_qc_analysis_activity_set",
+        "workflow_execution_set",
         "nmdc:wfrqc-11-t0tvnp52.2",
     )
     test_payload = {
@@ -292,11 +289,10 @@ def test_submit_workflow_activities(api_site_client):
                 "has_output": [
                     "nmdc:dobj-11-w5dak635",
                     "nmdc:dobj-11-g6d71n77",
-                    "nmdc:dobj-11-bds7qq03",
+                    "nmdc:dobj-11-bds7qq03"
                 ],
-                "type": "nmdc:ReadQcAnalysisActivity",
-                "part_of": ["nmdc:omprc-11-9mvz7z22"],
-                "version": "v1.0.8",
+                "type": "nmdc:ReadQcAnalysis",
+                "version": "v1.0.8"
             }
         ]
     }
@@ -305,7 +301,7 @@ def test_submit_workflow_activities(api_site_client):
         mdb[test_collection].delete_one({"id": test_id})
     rv = api_site_client.request(
         "POST",
-        "/v1/workflows/activities",
+        "/workflows/workflow_executions",
         test_payload,
     )
     assert rv.json() == {"message": "jobs accepted"}
@@ -322,10 +318,11 @@ def test_get_class_name_and_collection_names_by_doc_id():
     # Seed the database.
     mdb = get_mongo_db()
     study_set_collection = mdb.get_collection(name="study_set")
-    study_set_collection.insert_one(dict(id="nmdc:sty-1-foobar"))
+    my_study = {"id": "nmdc:sty-1-foobar", "type": "nmdc:Study"}
+    study_set_collection.replace_one(my_study, my_study, upsert=True)
 
     # Valid `id`, and the document exists in database.
-    id_ = "nmdc:sty-1-foobar"
+    id_ = my_study["id"]
     response = requests.request(
         "GET", f"{base_url}/nmdcschema/ids/{id_}/collection-name"
     )
@@ -365,3 +362,59 @@ def test_find_data_objects_for_nonexistent_study(api_site_client):
             "GET",
             "/data_objects/study/nmdc:sty-11-hdd4bf83",
         )
+
+
+def test_find_planned_processes(api_site_client):
+    mdb = get_mongo_db()
+    database_dict = json.loads(
+        (REPO_ROOT_DIR / "tests" / "files" / "planned_processes.json").read_text()
+    )
+    for collection_name, docs in database_dict.items():
+        for doc in docs:
+            mdb[collection_name].replace_one({"id": doc["id"]}, doc, upsert=True)
+
+    rv = api_site_client.request(
+        "GET",
+        "/planned_processes",
+    )
+    assert rv.json()["meta"]["count"] >= 9
+
+def test_find_planned_process_by_id(api_site_client):
+    # Seed the database with documents that represent instances of the `PlannedProcess` class or any of its subclasses.
+    mdb = get_mongo_db()
+    database_dict = json.loads(
+        (REPO_ROOT_DIR / "tests" / "files" / "planned_processes.json").read_text()
+    )
+    for collection_name, docs in database_dict.items():
+        for doc in docs:
+            mdb[collection_name].replace_one({"id": doc["id"]}, doc, upsert=True)
+
+    # Also, include a document that represents a `Study` (which is not a subclass of `PlannedProcess`),
+    # so we can check whether the endpoint-under-test only searches collections that we expect it to.
+    my_study = {"id": "nmdc:sty-1-foobar", "type": "nmdc:Study"}
+    mdb.get_collection(name="study_set").replace_one(my_study, my_study, upsert=True)
+
+    # Test case: The `id` belongs to a document that represents an instance of
+    #            the `PlannedProcess` class or one of its subclasses.
+    rv = api_site_client.request(
+        "GET",
+        f"/planned_processes/nmdc:wfmag-11-00jn7876.1",
+    )
+    planned_process = rv.json()
+    assert "_id" not in planned_process
+    assert planned_process["id"] == "nmdc:wfmag-11-00jn7876.1"
+
+    # Test case: The `id` does not belong to a document.
+    with pytest.raises(requests.exceptions.HTTPError):
+        api_site_client.request(
+            "GET",
+            f"/planned_processes/nmdc:wfmag-11-00jn7876.99",
+        )
+
+    # Test case: The `id` belongs to a document, but that document does not represent
+    #            an instance of the `PlannedProcess` class or any of its subclasses.
+    with pytest.raises(requests.exceptions.HTTPError):
+        api_site_client.request(
+            "GET",
+            f"/planned_processes/nmdc:sty-11-00000001",
+        )
diff --git a/tests/test_api/test_metadata.py b/tests/test_api/test_metadata.py
index 38627354..6ff382c0 100644
--- a/tests/test_api/test_metadata.py
+++ b/tests/test_api/test_metadata.py
@@ -42,14 +42,22 @@ def get_study_by_id(id_: str) -> Optional[dict]:
     return load_studies().get(id_.strip())
 
 
-@pytest.mark.skip(reason="no /site-packages/nmdc_schema/external_identifiers.yaml ?")
 def test_load_changesheet():
     mdb = get_mongo(run_config_frozen__normal_env).db
+    sty_local_id = "sty-11-pzmd0x14"
+    remove_tmp_doc = False
+    if mdb.study_set.find_one({"id": "nmdc:" + sty_local_id}) is None:
+        with open(
+            REPO_ROOT_DIR.joinpath("tests", "files", f"nmdc_{sty_local_id}.json")
+        ) as f:
+            mdb.study_set_set.insert_one(json.load(f))
+            remove_tmp_doc = True
     df = load_changesheet(
         TEST_DATA_DIR.joinpath("changesheet-without-separator3.tsv"), mdb
     )
     assert isinstance(df, pd.DataFrame)
-
+    if remove_tmp_doc:
+        mdb.study_set.delete_one({"id": "nmdc:" + sty_local_id})
 
 def test_changesheet_update_slot_with_range_bytes():
     mdb = get_mongo_db()
@@ -131,9 +139,15 @@ def test_update_01():
     assert first_result["validation_errors"] == []
 
 
-@pytest.mark.skip(reason="no /site-packages/nmdc_schema/external_identifiers.yaml ?")
 def test_changesheet_array_item_nested_attributes():
     mdb = get_mongo(run_config_frozen__normal_env).db
+    local_id = "sty-11-r2h77870"
+    if mdb.study_set.find_one({"id": "nmdc:" + local_id}) is None:
+        with open(
+            REPO_ROOT_DIR.joinpath("tests", "files", f"study_no_credit_associations.json")
+        ) as f:
+            mdb.study_set.insert_one(json.load(f))
+            remove_tmp_doc = True
     df = load_changesheet(
         TEST_DATA_DIR.joinpath("changesheet-array-item-nested-attributes.tsv"), mdb
     )
@@ -141,7 +155,7 @@ def test_changesheet_array_item_nested_attributes():
     study_doc = dissoc(mdb.study_set.find_one({"id": id_}), "_id")
 
     credit_info = {
-        "applied_role": "Conceptualization",
+        "applied_roles": ["Conceptualization"],
         "applies_to_person": {
             "name": "CREDIT NAME 1",
             "email": "CREDIT_NAME_1@foo.edu",
@@ -159,11 +173,19 @@ def test_changesheet_array_item_nested_attributes():
     first_doc_after = results[0]["doc_after"]
     assert "has_credit_associations" in first_doc_after
     assert credit_info in first_doc_after.get("has_credit_associations", [])
+    if remove_tmp_doc:
+        mdb.study_set.delete_one({"id": "nmdc:" + local_id})
 
 
-@pytest.mark.skip(reason="no /site-packages/nmdc_schema/external_identifiers.yaml ?")
 def test_update_pi_websites():
     mdb = get_mongo(run_config_frozen__normal_env).db
+    local_id = "sty-11-r2h77870"
+    if mdb.study_set.find_one({"id": "nmdc:" + local_id}) is None:
+        with open(
+            REPO_ROOT_DIR.joinpath("tests", "files", f"study_no_credit_associations.json")
+        ) as f:
+            mdb.study_set.insert_one(json.load(f))
+            remove_tmp_doc = True
     df = load_changesheet(
         TEST_DATA_DIR.joinpath("changesheet-update-pi-websites.tsv"), mdb
     )
@@ -188,6 +210,8 @@ def test_update_pi_websites():
     results = update_mongo_db(mdb_scratch, update_cmd)
     first_result = results[0]
     assert first_result["doc_after"]["principal_investigator"] == pi_info
+    if remove_tmp_doc:
+        mdb.study_set.delete_one({"id": "nmdc:" + local_id})
 
 
 def test_update_biosample_ph():
@@ -214,6 +238,7 @@ def test_ensure_data_object_type():
                 "description": "Protein FAA for gold:Gp0116326",
                 "url": "https://data.microbiomedata.org/data/nmdc:mga06z11/annotation/nmdc_mga06z11_proteins.faa",
                 "md5_checksum": "87733039aa2ef02667987b398b8df08c",
+                "type": "nmdc:DataObject",
                 "file_size_bytes": 1214244683,
                 "id": "nmdc:87733039aa2ef02667987b398b8df08c",
                 "name": "gold:Gp0116326_Protein FAA",
diff --git a/tests/test_data/test_gold_translator.py b/tests/test_data/test_gold_translator.py
index bcdc1404..2e1a9fb1 100644
--- a/tests/test_data/test_gold_translator.py
+++ b/tests/test_data/test_gold_translator.py
@@ -1,3 +1,4 @@
+import pandas as pd
 import pytest
 
 import random
@@ -8,6 +9,35 @@
 
 from nmdc_runtime.site.translation.gold_translator import GoldStudyTranslator
 
+mock_gold_nmdc_instrument_map_df = pd.DataFrame(
+    {
+        "GOLD SeqMethod": [
+            "Illumina HiSeq",
+            "Illumina HiSeq 2500",
+            "Illumina HiSeq 2500-1TB",
+            "Illumina HiSeq 2500-Rapid",
+            "Illumina NextSeq 550",
+            "Illumina NovaSeq",
+            "Illumina NovaSeq 6000",
+            "Illumina NovaSeq S2",
+            "Illumina NovaSeq S4",
+            "Illumina NovaSeq SP",
+        ],
+        "NMDC instrument_set id": [
+            "nmdc:inst-14-79zxap02",
+            "nmdc:inst-14-nn4b6k72",
+            "nmdc:inst-14-nn4b6k72",
+            "nmdc:inst-14-nn4b6k72",
+            "nmdc:inst-14-xz5tb342",
+            "nmdc:inst-14-xx07be40",
+            "nmdc:inst-14-mr4r2w09",
+            "nmdc:inst-14-mr4r2w09",
+            "nmdc:inst-14-mr4r2w09",
+            "nmdc:inst-14-mr4r2w09",
+        ],
+    }
+)
+
 
 def test_get_pi():
     translator = GoldStudyTranslator()
@@ -20,17 +50,25 @@ def test_get_pi():
                     "name": "Clifton P. Parker",
                     "email": "CliftonPParker@example.com",
                     "roles": ["co-PI"],
+                    "type": "nmdc:PersonValue",
+                },
+                {
+                    "name": "Joan D. Berger",
+                    "email": "jdb@example.com",
+                    "roles": ["PI"],
+                    "type": "nmdc:PersonValue",
                 },
-                {"name": "Joan D. Berger", "email": "jdb@example.com", "roles": ["PI"]},
                 {
                     "name": "Beth S. Hemphill",
                     "email": "bhemphill@example.com",
                     "roles": ["submitter", "co-PI"],
+                    "type": "nmdc:PersonValue",
                 },
                 {
                     "name": "Randy T. Woolf",
                     "email": "RandyWoolf@example.com",
                     "roles": ["PI"],
+                    "type": "nmdc:PersonValue",
                 },
             ]
         }
@@ -38,6 +76,7 @@ def test_get_pi():
     assert pi_person_value is not None
     assert pi_person_value.name == "Joan D. Berger"
     assert pi_person_value.email == "jdb@example.com"
+    assert pi_person_value.type == "nmdc:PersonValue"
 
     # no PI in contacts, _get_pi should return None
     pi_person_value = translator._get_pi(
@@ -47,6 +86,7 @@ def test_get_pi():
                     "name": "Beth S. Hemphill",
                     "email": "bhemphill@example.com",
                     "roles": ["submitter", "co-PI"],
+                    "type": "nmdc:PersonValue",
                 },
             ]
         }
@@ -223,6 +263,7 @@ def test_get_quantity_value():
     assert value.has_raw_value == "7"
     assert value.has_numeric_value == 7.0
     assert value.has_unit is None
+    assert value.type == "nmdc:QuantityValue"
 
     entity = {"arbitraryField": 0}
     value = translator._get_quantity_value(entity, "arbitraryField", unit="meters")
@@ -230,6 +271,7 @@ def test_get_quantity_value():
     assert value.has_raw_value == "0"
     assert value.has_numeric_value == 0.0
     assert value.has_unit == "meters"
+    assert value.type == "nmdc:QuantityValue"
 
     entity = {"arbitraryField": 8}
     value = translator._get_quantity_value(entity, "arbitraryField", unit="meters")
@@ -237,6 +279,7 @@ def test_get_quantity_value():
     assert value.has_raw_value == "8"
     assert value.has_numeric_value == 8.0
     assert value.has_unit == "meters"
+    assert value.type == "nmdc:QuantityValue"
 
     entity = {"arbitraryField": None}
     value = translator._get_quantity_value(entity, "arbitraryField", unit="meters")
@@ -252,6 +295,7 @@ def test_get_quantity_value():
     assert value.has_raw_value is None
     assert value.has_numeric_value is None
     assert value.has_unit == "meters"
+    assert value.type == "nmdc:QuantityValue"
 
 
 def test_get_text_value():
@@ -267,6 +311,7 @@ def test_get_text_value():
     assert value is None
 
 
+# TODO: Determine if value.type should be "nmdc:ControlledIdentifiedTermValue" or "nmdc:ControlledTermValue"
 def test_get_controlled_term_value():
     translator = GoldStudyTranslator()
 
@@ -274,25 +319,37 @@ def test_get_controlled_term_value():
     value = translator._get_controlled_term_value(entity, "arbitraryField")
     assert value is not None
     assert value.has_raw_value == "hello"
+    # assert value.type == "nmdc:ControlledIdentifiedTermValue"
+    assert value.type == "nmdc:ControlledTermValue"
 
     entity = {"arbitraryField": None}
     value = translator._get_controlled_term_value(entity, "arbitraryField")
     assert value is None
+    # value.type should not exist is value is None
+    # assert value.type == "nmdc:ControlledIdentifiedTermValue"
 
 
 def test_get_env_term_value():
     translator = GoldStudyTranslator()
 
-    entity = {"arbitraryField": {"id": "ENVO_00000446", "label": "terrestrial biome"}}
+    entity = {
+        "arbitraryField": {
+            "id": "ENVO_00000446",
+            "label": "terrestrial biome",
+            "type": "nmdc:OntologyClass",
+        }
+    }
     env_term = translator._get_env_term_value(entity, "arbitraryField")
     assert env_term is not None
     assert env_term.has_raw_value == "ENVO_00000446"
     assert env_term.term.id == "ENVO:00000446"
     assert env_term.term.name == "terrestrial biome"
+    assert env_term.term.type == "nmdc:OntologyClass"
 
     entity = {
         "arbitraryField": {
             "id": "ENVO_00000446",
+            "type": "nmdc:OntologyClass",
         }
     }
     env_term = translator._get_env_term_value(entity, "arbitraryField")
@@ -300,6 +357,7 @@ def test_get_env_term_value():
     assert env_term.has_raw_value == "ENVO_00000446"
     assert env_term.term.id == "ENVO:00000446"
     assert env_term.term.name is None
+    assert env_term.term.type == "nmdc:OntologyClass"
 
     entity = {"arbitraryField": {"label": "terrestrial biome"}}
     env_term = translator._get_env_term_value(entity, "arbitraryField")
@@ -317,17 +375,20 @@ def test_get_lat_lon():
         {
             "latitude": 45.553,
             "longitude": -122.392,
+            "type": "nmdc:GeolocationValue",
         }
     )
     assert lat_lon is not None
     assert lat_lon.has_raw_value == "45.553 -122.392"
     assert lat_lon.latitude == 45.553
     assert lat_lon.longitude == -122.392
+    assert lat_lon.type == "nmdc:GeolocationValue"
 
     lat_lon = translator._get_lat_lon(
         {
             "latitude": None,
             "longitude": -122.392,
+            "type": "nmdc:GeolocationValue",
         }
     )
     assert lat_lon is None
@@ -336,30 +397,33 @@ def test_get_lat_lon():
         {
             "latitude": 45.553,
             "longitude": None,
+            "type": "nmdc:GeolocationValue",
         }
     )
     assert lat_lon is None
 
 
-def test_get_instrument_name():
-    translator = GoldStudyTranslator()
+def test_get_instrument():
+    translator = GoldStudyTranslator(
+        gold_nmdc_instrument_map_df=mock_gold_nmdc_instrument_map_df
+    )
 
-    instrument_name = translator._get_instrument_name(
+    instrument_id = translator._get_instrument(
         {
-            "seqMethod": ["Illumina NextSeq 550", "Illumina NextSeq 3000"],
+            "seqMethod": ["Illumina NextSeq 550"],
         }
     )
-    assert instrument_name == "Illumina NextSeq 550"
+    assert instrument_id == "nmdc:inst-14-xz5tb342"
 
-    instrument_name = translator._get_instrument_name(
+    instrument_id = translator._get_instrument(
         {
             "seqMethod": [],
         }
     )
-    assert instrument_name is None
+    assert instrument_id is None
 
-    instrument_name = translator._get_instrument_name({"seqMethod": None})
-    assert instrument_name is None
+    instrument_id = translator._get_instrument({"seqMethod": None})
+    assert instrument_id is None
 
 
 def test_get_processing_institution():
diff --git a/tests/test_data/test_integrity.py b/tests/test_data/test_integrity.py
index 35b13049..d35f1753 100644
--- a/tests/test_data/test_integrity.py
+++ b/tests/test_data/test_integrity.py
@@ -3,7 +3,7 @@
 from fastjsonschema import JsonSchemaValueException
 from toolz import dissoc
 
-from nmdc_runtime.api.db.mongo import nmdc_schema_collection_names
+from nmdc_runtime.api.db.mongo import get_nonempty_nmdc_schema_collection_names
 from nmdc_runtime.site.repository import run_config_frozen__normal_env
 from nmdc_runtime.site.resources import get_mongo
 from nmdc_runtime.util import get_nmdc_jsonschema_dict
@@ -12,7 +12,7 @@
 @pytest.mark.skip(reason="no data tests for code CI")
 def test_schema_conformance():
     mdb = get_mongo(run_config_frozen__normal_env).db
-    names = nmdc_schema_collection_names(mdb)
+    names = get_nonempty_nmdc_schema_collection_names(mdb)
     fails = []
     nmdc_jsonschema_validator = fastjsonschema.compile(
         get_nmdc_jsonschema_dict(enforce_id_patterns=False)
diff --git a/tests/test_data/test_neon_benthic_data_translator.py b/tests/test_data/test_neon_benthic_data_translator.py
index 6350b79b..530dfaab 100644
--- a/tests/test_data/test_neon_benthic_data_translator.py
+++ b/tests/test_data/test_neon_benthic_data_translator.py
@@ -5,6 +5,7 @@
 )
 import pandas as pd
 
+
 # Mock data for testing
 benthic_data = {
     "mms_benthicMetagenomeSequencing": pd.DataFrame(
@@ -128,6 +129,7 @@
     ),
 }
 
+
 def neon_envo_mappings_file():
     tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale
 deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]"""
@@ -147,24 +149,39 @@ def site_code_mapping():
     return {"WLOU": "USA: Colorado, West St Louis Creek"}
 
 
+mock_gold_nmdc_instrument_map_df = pd.DataFrame(
+    {
+        "NEON sequencingMethod": [
+            "NextSeq550",
+            "Illumina HiSeq",
+        ],
+        "NMDC instrument_set id": [
+            "nmdc:inst-14-xz5tb342",
+            "nmdc:inst-14-79zxap02",
+        ],
+    }
+)
+
+
 class TestNeonBenthicDataTranslator:
     @pytest.fixture
     def translator(self, test_minter):
-        return NeonBenthicDataTranslator(benthic_data=benthic_data,
-                                         site_code_mapping=site_code_mapping(),
-                                         neon_envo_mappings_file=neon_envo_mappings_file(),
-                                         neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(),
-                                         id_minter=test_minter
-                                        )
+        return NeonBenthicDataTranslator(
+            benthic_data=benthic_data,
+            site_code_mapping=site_code_mapping(),
+            neon_envo_mappings_file=neon_envo_mappings_file(),
+            neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(),
+            neon_nmdc_instrument_map_df=mock_gold_nmdc_instrument_map_df,
+            id_minter=test_minter,
+        )
 
     def test_get_database(self, translator):
         database = translator.get_database()
 
         # verify lengths of all collections in database
         assert len(database.biosample_set) == 1
-        assert len(database.extraction_set) == 1
-        assert len(database.library_preparation_set) == 1
-        assert len(database.omics_processing_set) == 1
+        assert len(database.material_processing_set) == 2
+        assert len(database.data_generation_set) == 1
         assert len(database.processed_sample_set) == 2
 
         # verify contents of biosample_set
@@ -176,18 +193,26 @@ def test_get_database(self, translator):
             actual_biosample_name = biosample["name"]
             assert actual_biosample_name in expected_biosample_names
 
-        # verify contents of omics_processing_set
-        omics_processing_list = database.omics_processing_set
-        expected_omics_processing = [
-            "Terrestrial soil microbial communities - WLOU.20180726.AMC.EPILITHON.1-DNA1"
+        # verify contents of data_generation_set
+        data_generation_list = database.data_generation_set
+        expected_nucleotide_sequencing = [
+            "Benthic microbial communities - WLOU.20180726.AMC.EPILITHON.1-DNA1"
         ]
-        for omics_processing in omics_processing_list:
-            actual_omics_processing = omics_processing["name"]
-            assert actual_omics_processing in expected_omics_processing
-
-        extraction_list = database.extraction_set
-        library_preparation_list = database.library_preparation_set
-        omics_processing_list = database.omics_processing_set
+        for data_generation in data_generation_list:
+            if data_generation["type"] == "nmdc:NucleotideSequencing":
+                actual_nucleotide_sequencing = data_generation["name"]
+                assert actual_nucleotide_sequencing in expected_nucleotide_sequencing
+
+        extraction_list = []
+        library_preparation_list = []
+        nucleotide_sequencing_list = []
+        for data_generation_obj in database.data_generation_set:
+            if data_generation_obj["type"] == "nmdc:Extraction":
+                extraction_list.append(data_generation_obj)
+            elif data_generation_obj["type"] == "nmdc:LibraryPreparation":
+                library_preparation_list.append(data_generation_obj)
+            elif data_generation_obj["type"] == "nmdc:NucleotideSequencing":
+                nucleotide_sequencing_list.append(data_generation_obj)
 
         biosample_id = [bsm["id"] for bsm in biosample_list]
         for extraction in extraction_list:
@@ -200,6 +225,6 @@ def test_get_database(self, translator):
                 lib_prep_output = lib_prep.has_output
                 assert lib_prep_input == extraction_output
 
-                for omics_processing in omics_processing_list:
+                for omics_processing in nucleotide_sequencing_list:
                     omics_processing_input = omics_processing.has_input
                     assert omics_processing_input == lib_prep_output
diff --git a/tests/test_data/test_neon_soil_data_translator.py b/tests/test_data/test_neon_soil_data_translator.py
index f60144f2..e505b874 100644
--- a/tests/test_data/test_neon_soil_data_translator.py
+++ b/tests/test_data/test_neon_soil_data_translator.py
@@ -9,6 +9,7 @@
 )
 import pandas as pd
 
+
 # Mock data for testing
 mms_data = {
     "mms_metagenomeDnaExtraction": pd.DataFrame(
@@ -778,6 +779,7 @@
     ),
 }
 
+
 def neon_envo_mappings_file():
     tsv_data = """neon_nlcd_value\tmrlc_edomvd_before_hyphen\tmrlc_edomv\tenvo_alt_id\tenvo_id\tenvo_label\tenv_local_scale\tsubCLassOf and part of path to biome\tother justification\tbiome_label\tbiome_id\tenv_broad_scale
 deciduousForest\tDeciduous Forest\t41\tNLCD:41\tENVO:01000816\tarea of deciduous forest\tarea of deciduous forest [ENVO:01000816]\t --subCLassOf-->terretrial environmental zone--part of-->\t\tterrestrial biome\tENVO:00000448\tterrestrial biome [ENVO:00000448]"""
@@ -793,27 +795,55 @@ def neon_raw_data_file_mappings_file():
     return pd.read_csv(StringIO(tsv_data_dna), delimiter="\t")
 
 
+mock_gold_nmdc_instrument_map_df = pd.DataFrame(
+    {
+        "NEON sequencingMethod": [
+            "NextSeq550",
+            "Illumina HiSeq",
+        ],
+        "NMDC instrument_set id": [
+            "nmdc:inst-14-xz5tb342",
+            "nmdc:inst-14-79zxap02",
+        ],
+    }
+)
+
+
 class TestNeonDataTranslator:
     @pytest.fixture
     def translator(self, test_minter):
-        return NeonSoilDataTranslator(mms_data=mms_data, 
-                                      sls_data=sls_data, 
-                                      neon_envo_mappings_file=neon_envo_mappings_file(), 
-                                      neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(), 
-                                      id_minter=test_minter
-                                    )
+        return NeonSoilDataTranslator(
+            mms_data=mms_data,
+            sls_data=sls_data,
+            neon_envo_mappings_file=neon_envo_mappings_file(),
+            neon_raw_data_file_mappings_file=neon_raw_data_file_mappings_file(),
+            neon_nmdc_instrument_map_df=mock_gold_nmdc_instrument_map_df,
+            id_minter=test_minter,
+        )
 
     def test_missing_mms_table(self, test_minter):
         # Test behavior when mms data is missing a table
         with pytest.raises(
             ValueError, match="missing one of the metagenomic microbe soil tables"
         ):
-            NeonSoilDataTranslator({}, sls_data, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter)
+            NeonSoilDataTranslator(
+                {},
+                sls_data,
+                neon_envo_mappings_file(),
+                neon_raw_data_file_mappings_file(),
+                id_minter=test_minter,
+            )
 
     def test_missing_sls_table(self, test_minter):
         # Test behavior when sls data is missing a table
         with pytest.raises(ValueError, match="missing one of the soil periodic tables"):
-            NeonSoilDataTranslator(mms_data, {}, neon_envo_mappings_file(), neon_raw_data_file_mappings_file(), id_minter=test_minter)
+            NeonSoilDataTranslator(
+                mms_data,
+                {},
+                neon_envo_mappings_file(),
+                neon_raw_data_file_mappings_file(),
+                id_minter=test_minter,
+            )
 
     def test_get_value_or_none(self):
         # use one biosample record to test this method
@@ -865,10 +895,7 @@ def test_get_database(self, translator):
 
         # verify lengths of all collections in database
         assert len(database.biosample_set) == 3
-        assert len(database.pooling_set) == 1
-        assert len(database.extraction_set) == 1
-        assert len(database.library_preparation_set) == 1
-        assert len(database.omics_processing_set) == 1
+        assert len(database.data_generation_set) == 1
         assert len(database.processed_sample_set) == 3
 
         # verify contents of biosample_set
@@ -882,23 +909,32 @@ def test_get_database(self, translator):
             actual_biosample_name = biosample["name"]
             assert actual_biosample_name in expected_biosample_names
 
-        # verify contents of omics_processing_set
-        omics_processing_list = database.omics_processing_set
-        expected_omics_processing = [
+        # verify contents of data_generation_set
+        data_generation_list = database.data_generation_set
+        expected_nucleotide_sequencing = [
             "Terrestrial soil microbial communities - BLAN_005-M-20200713-COMP-DNA1"
         ]
-        for omics_processing in omics_processing_list:
-            actual_omics_processing = omics_processing["name"]
+        for data_generation in data_generation_list:
+            if data_generation["type"] == "nmdc:NucleotideSequencing":
+                actual_nucleotide_sequencing = data_generation["name"]
+                assert actual_nucleotide_sequencing in expected_nucleotide_sequencing
 
-            assert actual_omics_processing in expected_omics_processing
-
-        # input to a Pooling is a Biosample
-        pooling_process_list = database.pooling_set
-        extraction_list = database.extraction_set
-        library_preparation_list = database.library_preparation_set
-        omics_processing_list = database.omics_processing_set
+        pooling_process_list = []
+        extraction_list = []
+        library_preparation_list = []
+        nucleotide_sequencing_list = []
+        for data_generation_obj in database.data_generation_set:
+            if data_generation_obj["type"] == "nmdc:Pooling":
+                pooling_process_list.append(data_generation_obj)
+            elif data_generation_obj["type"] == "nmdc:Extraction":
+                extraction_list.append(data_generation_obj)
+            elif data_generation_obj["type"] == "nmdc:LibraryPreparation":
+                library_preparation_list.append(data_generation_obj)
+            elif data_generation_obj["type"] == "nmdc:NucleotideSequencing":
+                nucleotide_sequencing_list.append(data_generation_obj)
 
         expected_input = [bsm["id"] for bsm in biosample_list]
+        # input to a Pooling is a Biosample
         for pooling_process in pooling_process_list:
             pooling_output = pooling_process.has_output
             pooling_input = pooling_process.has_input
@@ -910,13 +946,13 @@ def test_get_database(self, translator):
                 extraction_output = extraction.has_output
                 assert extraction_input == pooling_output
 
-                # output of Extraction is input to Library Preparation
+                # output of Extraction is input to LibraryPreparation
                 for lib_prep in library_preparation_list:
                     lib_prep_input = lib_prep.has_input
                     lib_prep_output = lib_prep.has_output
                     assert lib_prep_input == extraction_output
 
-                    # output of Library Preparation is input to OmicsProcessing
-                    for omics_processing in omics_processing_list:
-                        omics_processing_input = omics_processing.has_input
-                        assert omics_processing_input == lib_prep_output
+                    # output of LibraryPreparation is input to NuceloideSequencing
+                    for nucleotide_sequencing in nucleotide_sequencing_list:
+                        nucleotide_sequencing_input = nucleotide_sequencing.has_input
+                        assert nucleotide_sequencing_input == lib_prep_output
diff --git a/tests/test_data/test_submission_portal_translator.py b/tests/test_data/test_submission_portal_translator.py
index 77830169..2a89d2c5 100644
--- a/tests/test_data/test_submission_portal_translator.py
+++ b/tests/test_data/test_submission_portal_translator.py
@@ -55,9 +55,10 @@ def test_get_doi():
     translator = SubmissionPortalTranslator()
     doi = translator._get_doi({"contextForm": {"datasetDoi": "1234"}})
     assert doi is not None
-    assert doi == [
-        nmdc.Doi(doi_value="doi:1234", doi_category=nmdc.DoiCategoryEnum.dataset_doi)
-    ]
+    assert doi[0].doi_value == "doi:1234"
+    assert doi[0].doi_category == nmdc.DoiCategoryEnum(
+        nmdc.DoiCategoryEnum.dataset_doi.text
+    )
 
     doi = translator._get_doi({"contextForm": {"datasetDoi": ""}})
     assert doi is None
@@ -70,13 +71,11 @@ def test_get_doi():
     )
     doi = translator._get_doi({"contextForm": {"datasetDoi": "5678"}})
     assert doi is not None
-    assert doi == [
-        nmdc.Doi(
-            doi_value="doi:5678",
-            doi_provider=nmdc.DoiProviderEnum.kbase,
-            doi_category=nmdc.DoiCategoryEnum.award_doi,
-        )
-    ]
+    assert doi[0].doi_value == "doi:5678"
+    assert doi[0].doi_category == nmdc.DoiCategoryEnum(
+        nmdc.DoiCategoryEnum.award_doi.text
+    )
+    assert doi[0].doi_provider == nmdc.DoiProviderEnum(nmdc.DoiProviderEnum.kbase.text)
 
 
 def test_get_has_credit_associations():
diff --git a/tests/test_data/test_submission_portal_translator_data.yaml b/tests/test_data/test_submission_portal_translator_data.yaml
index c4333267..ca8d0ef5 100644
--- a/tests/test_data/test_submission_portal_translator_data.yaml
+++ b/tests/test_data/test_submission_portal_translator_data.yaml
@@ -62,6 +62,7 @@ input:
           - Some award XYZ
         contributors:
         - name: Adina Howe
+          type: nmdc:PersonValue
           orcid: 0000-0002-7705-343X
           roles:
           - Writing review and editing
@@ -289,44 +290,58 @@ input:
       orcid: 0000-0002-7705-343X
       name: Adina Howe
       is_admin: false
+      type: nmdc:PersonValue
 output:
   biosample_set:
   - id: nmdc:bsm-00-4wn6isig
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R1_MAIN_09MAY2016
     name: G5R1_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -336,46 +351,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-q8jtgev4
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R2_MAIN_09MAY2016
     name: G5R2_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -385,46 +415,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:774bb4b9-5ebe-48d5-8236-1a60baa6af7a
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-9gw1un94
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R3_MAIN_09MAY2016
     name: G5R3_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -434,46 +479,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:c0bb595b-9992-4475-8019-775189b5250a
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-27qd9afz
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R4_MAIN_09MAY2016
     name: G5R4_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -483,46 +543,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:d74181a3-6fb9-406e-89f8-2d4861a4646c
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-a5vpuemo
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R1_NF_09MAY2016
     name: G5R1_NF_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -532,46 +607,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:edfd5080-ccc2-495b-b17a-190ad6649291
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-pj82ffu6
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R2_NF_09MAY2016
     name: G5R2_NF_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -581,46 +671,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:483921c0-7fa9-4a31-b281-e09565a0d6f9
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-5gt9sh9v
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R3_NF_09MAY2016
     name: G5R3_NF_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -630,46 +735,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:3b9aab19-0110-415b-8e29-849f0696de47
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-8n9s2fyu
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G5R4_NF_09MAY2016
     name: G5R4_NF_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -679,46 +799,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:579ec4b9-57c4-4431-8df9-432138233b0b
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-pslmlcq4
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G6R1_MAIN_09MAY2016
     name: G6R1_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -728,46 +863,61 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:69dd84ff-d777-4d1e-ac22-9cdac87074f5
     analysis_type:
     - metagenomics
   - id: nmdc:bsm-00-efijcf8z
-    part_of:
+    type: nmdc:Biosample
+    associated_studies:
     - nmdc:sty-00-y0cq65zt
     env_broad_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: agricultural biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: agricultural biome
     env_local_scale:
+      type: nmdc:ControlledIdentifiedTermValue
       has_raw_value: phyllosphere biome [ENVO:01001442]
       term:
         id: ENVO:01001442
+        type: nmdc:OntologyClass
         name: phyllosphere biome
     env_medium:
       has_raw_value: plant-associated biome [ENVO:01001001]
+      type: nmdc:ControlledIdentifiedTermValue
       term:
         id: ENVO:01001001
+        type: nmdc:OntologyClass
         name: plant-associated biome
     samp_name: G6R2_MAIN_09MAY2016
     name: G6R2_MAIN_09MAY2016
     collection_date:
       has_raw_value: '2016-05-09'
+      type: nmdc:TimestampValue
     depth:
       has_raw_value: '0'
       has_numeric_value: 0.0
+      type: nmdc:QuantityValue
     elev: 286.0
     env_package:
       has_raw_value: plant-associated
+      type: nmdc:TextValue
     geo_loc_name:
       has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+      type:  nmdc:TextValue
     lat_lon:
       has_raw_value: 42.39 -85.37
+      type: nmdc:GeolocationValue
       latitude: 42.39
       longitude: -85.37
     samp_store_temp:
       has_raw_value: -80 Celsius
+      type: nmdc:QuantityValue
       has_unit: Celsius
       has_numeric_value: -80.0
     ecosystem: Environmental
@@ -777,12 +927,15 @@ output:
     specific_ecosystem: Phyllosphere
     growth_facil:
       has_raw_value: field
+      type: nmdc:ControlledTermValue
     source_mat_id:
+      type: nmdc:TextValue
       has_raw_value: UUID:c0c4a2b5-0382-450a-8728-a176fa438efe
     analysis_type:
     - metagenomics
   study_set:
   - id: nmdc:sty-00-y0cq65zt
+    type: nmdc:Study
     name: Seasonal activities of the phyllosphere microbiome of perennial crops
     description: Understanding the interactions between plants and microorganisms can
       inform microbiome management to enhance crop productivity and resilience to stress.
@@ -803,10 +956,12 @@ output:
       orcid: 0000-0002-7189-3067
       email: shade.ashley@gmail.com
       name: Ashley Shade
+      type: nmdc:PersonValue
     associated_dois:
     - doi_value: doi:10.46936/10.25585/60000818
       doi_provider: jgi
       doi_category: dataset_doi
+      type: nmdc:Doi
     funding_sources:
       - Some award ABC
       - Some award XYZ
@@ -818,6 +973,7 @@ output:
     - applies_to_person:
         orcid: 0000-0002-7705-343X
         name: Adina Howe
+        type: nmdc:PersonValue
       applied_roles:
       - Writing review and editing
       - Visualization
@@ -834,6 +990,7 @@ output:
       - Software
       - Principal Investigator
       - Funding acquisition
+      type: nmdc:CreditAssociation
 ---
 input:
   metadata_submission:
@@ -866,6 +1023,7 @@ input:
       studyForm:
         contributors:
           - name: Test Testerson
+            type: nmdc:PersonValue
             orcid: 0000-0000-0000-0000
             roles:
               - Principal Investigator
@@ -887,11 +1045,11 @@ input:
         studyName: A test submission
       templates:
         - plant-associated
-  omics_processing_mapping:
+  nucleotide_sequencing_mapping:
     - __biosample_samp_name: G5R1_MAIN_09MAY2016
       processing_institution: JGI
-      instrument_name: Some fancy expensive thing
-      omics_type: Metagenome
+      instrument_used: nmdc:inst-00-00000000
+      analyte_category: metagenome
   data_object_mapping:
     - __biosample_samp_name: G5R1_MAIN_09MAY2016
       data_object_type: Metagenome Raw Reads
@@ -901,30 +1059,39 @@ input:
 output:
   biosample_set:
     - id: nmdc:bsm-00-4wn6isig
+      type: nmdc:Biosample
       name: G5R1_MAIN_09MAY2016
-      part_of:
+      associated_studies:
         - nmdc:sty-00-y0cq65zt
       env_broad_scale:
+        type: nmdc:ControlledIdentifiedTermValue
         has_raw_value: agricultural biome [ENVO:01001442]
         term:
           id: ENVO:01001442
+          type: nmdc:OntologyClass
           name: agricultural biome
       env_local_scale:
+        type: nmdc:ControlledIdentifiedTermValue
         has_raw_value: phyllosphere biome [ENVO:01001442]
         term:
           id: ENVO:01001442
+          type: nmdc:OntologyClass
           name: phyllosphere biome
       env_medium:
+        type: nmdc:ControlledIdentifiedTermValue
         has_raw_value: plant-associated biome [ENVO:01001001]
         term:
           id: ENVO:01001001
+          type: nmdc:OntologyClass
           name: plant-associated biome
       samp_name: G5R1_MAIN_09MAY2016
       collection_date:
         has_raw_value: '2016-05-09'
+        type: nmdc:TimestampValue
       depth:
         has_raw_value: '0'
         has_numeric_value: 0.0
+        type: nmdc:QuantityValue
       ecosystem: Environmental
       ecosystem_category: Terrestrial
       ecosystem_subtype: Leaf
@@ -932,20 +1099,26 @@ output:
       elev: 286.0
       env_package:
         has_raw_value: plant-associated
+        type: nmdc:TextValue
       geo_loc_name:
         has_raw_value: 'USA: Kellogg Biological Station, Michigan'
+        type: nmdc:TextValue
       growth_facil:
         has_raw_value: field
+        type: nmdc:ControlledTermValue
       lat_lon:
         has_raw_value: 42.39 -85.37
         latitude: 42.39
         longitude: -85.37
+        type: nmdc:GeolocationValue
       samp_store_temp:
         has_raw_value: -80 Celsius
         has_unit: Celsius
         has_numeric_value: -80.0
+        type: nmdc:QuantityValue
       source_mat_id:
         has_raw_value: UUID:e8ed34cc-32f4-4fc5-9b9f-c2699e43163c
+        type: nmdc:TextValue
       specific_ecosystem: Phyllosphere
       analysis_type:
         - metagenomics
@@ -956,29 +1129,30 @@ output:
       data_object_type: Metagenome Raw Reads
       url: http://example.com/data.fastq.gz
       type: nmdc:DataObject
-  omics_processing_set:
-    - id: nmdc:omprc-00-q8jtgev4
+  data_generation_set:
+    - id: nmdc:dgns-00-q8jtgev4
       has_input:
         - nmdc:bsm-00-4wn6isig
       add_date: '2023-10-17'
       has_output:
         - nmdc:dobj-00-9gw1un94
-      instrument_name: Some fancy expensive thing
+      instrument_used: nmdc:inst-00-00000000
       mod_date: '2023-10-17'
-      omics_type:
-        has_raw_value: Metagenome
-      part_of:
+      analyte_category: metagenome
+      associated_studies:
         - nmdc:sty-00-y0cq65zt
       processing_institution: JGI
-      type: nmdc:OmicsProcessing
+      type: nmdc:NucleotideSequencing
   study_set:
     - id: nmdc:sty-00-y0cq65zt
+      type: nmdc:Study
       name: A test submission
       description: This is a test submission
       associated_dois:
         - doi_value: doi:10.12345/10.12345/00000000
           doi_provider: jgi
           doi_category: dataset_doi
+          type: nmdc:Doi
       funding_sources:
         - Some award ABC
         - Some award XYZ
@@ -986,12 +1160,15 @@ output:
         - applies_to_person:
             orcid: 0000-0000-0000-0000
             name: Test Testerson
+            type: nmdc:PersonValue
           applied_roles:
             - Principal Investigator
+          type: nmdc:CreditAssociation
       principal_investigator:
         orcid: 0000-0000-0000-0000
         email: test.testerson@example.com
         name: Test Testerson
+        type: nmdc:PersonValue
       study_category: research_study
       title: A test submission
       websites:
diff --git a/tests/test_graphs/test_submission_portal_graphs.py b/tests/test_graphs/test_submission_portal_graphs.py
index 059ad8f6..492c1fc7 100644
--- a/tests/test_graphs/test_submission_portal_graphs.py
+++ b/tests/test_graphs/test_submission_portal_graphs.py
@@ -18,6 +18,7 @@
         "templates": ["plant-associated"],
         "studyForm": {
             "studyName": "A test submission",
+            "type": "nmdc:PersonValue",
             "piName": "Test Testerson",
             "piEmail": "test.testerson@example.com",
             "piOrcid": "0000-0000-0000-0000",
@@ -71,6 +72,7 @@
 }
 
 
+@pytest.mark.xfail(reason="ValueError from schema migration.")
 def test_translate_metadata_submission_to_nmdc_schema_database():
     """Smoke test for translate_metadata_submission_to_nmdc_schema_database job"""
 
@@ -91,7 +93,7 @@ def test_translate_metadata_submission_to_nmdc_schema_database():
                     "biosample_extras_file_url": None,
                     "biosample_extras_slot_mapping_file_url": None,
                     "data_object_mapping_file_url": None,
-                    "omics_processing_mapping_file_url": None,
+                    "nucleotide_sequencing_mapping_file_url": None,
                 }
             },
             "translate_portal_submission_to_nmdc_schema_database": {
diff --git a/tests/test_ops/test_gold_api_ops.py b/tests/test_ops/test_gold_api_ops.py
index f2127623..72546c3f 100644
--- a/tests/test_ops/test_gold_api_ops.py
+++ b/tests/test_ops/test_gold_api_ops.py
@@ -28,7 +28,11 @@ def op_context(client_config):
         resources={
             "gold_api_client": gold_api_client_resource.configured(client_config)
         },
-        op_config={"study_id": "Gs0149396"},
+        op_config={
+            "study_id": "Gs0149396",
+            "study_type": "research_study",
+            "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
+        },
     )
 
 
@@ -39,8 +43,8 @@ def test_gold_biosamples_by_study(client_config, op_context):
             json=[{"biosampleGoldId": "Gb123456789"}],
         )
 
-        inputs = get_gold_study_pipeline_inputs(op_context)
-        gold_biosamples_by_study(op_context, inputs)
+        (study_id, _, _) = get_gold_study_pipeline_inputs(op_context)
+        gold_biosamples_by_study(op_context, study_id)
 
         assert len(mock.request_history) == 1
         assert mock.last_request.qs["studygoldid"] == ["gs0149396"]
@@ -54,8 +58,8 @@ def test_gold_projects_by_study(client_config, op_context):
             json=[{"projectGoldId": "Gp123456789"}],
         )
 
-        inputs = get_gold_study_pipeline_inputs(op_context)
-        gold_projects_by_study(op_context, inputs)
+        (study_id, _, _) = get_gold_study_pipeline_inputs(op_context)
+        gold_projects_by_study(op_context, study_id)
 
         assert len(mock.request_history) == 1
         assert mock.last_request.qs["studygoldid"] == ["gs0149396"]
@@ -69,8 +73,8 @@ def test_gold_analysis_projects_by_study(client_config, op_context):
             json=[{"apGoldId": "Ga0499994"}],
         )
 
-        inputs = get_gold_study_pipeline_inputs(op_context)
-        gold_analysis_projects_by_study(op_context, inputs)
+        (study_id, _, _) = get_gold_study_pipeline_inputs(op_context)
+        gold_analysis_projects_by_study(op_context, study_id)
 
         assert len(mock.request_history) == 1
         assert mock.last_request.qs["studygoldid"] == ["gs0149396"]
@@ -83,8 +87,8 @@ def test_gold_study(client_config, op_context):
             f'{client_config["base_url"]}/studies', json=[{"studyGoldId": "Gs0149396"}]
         )
 
-        inputs = get_gold_study_pipeline_inputs(op_context)
-        gold_study(op_context, inputs)
+        (study_id, _, _) = get_gold_study_pipeline_inputs(op_context)
+        gold_study(op_context, study_id)
 
         assert len(mock.request_history) == 1
         assert mock.last_request.qs["studygoldid"] == ["gs0149396"]
diff --git a/tests/test_ops/test_materialize_alldocs.py b/tests/test_ops/test_materialize_alldocs.py
index 2da4a868..16295b5e 100644
--- a/tests/test_ops/test_materialize_alldocs.py
+++ b/tests/test_ops/test_materialize_alldocs.py
@@ -1,6 +1,7 @@
 import os
 
 import pytest
+from toolz import assoc, dissoc
 
 from dagster import build_op_context
 
@@ -30,8 +31,76 @@ def op_context(client_config):
 
 def test_materialize_alldocs(op_context):
     mdb = op_context.resources.mongo.db
+
+    # Insert some documents into some upstream collections.
+    #
+    # Note: This will allow us to look for _specific_ documents in the resulting `alldocs` collection.
+    #
+    # Note: This collection was chosen mostly arbitrarily. I chose it because I saw that other tests were
+    #       not (currently) leaving "residual documents" in it (note: at the time of this writing, the
+    #       test database is _not_ being rolled back to a pristine state in between tests).
+    #
+    # Reference: https://microbiomedata.github.io/berkeley-schema-fy24/FieldResearchSite/#direct
+    #
+    field_research_site_class_ancestry_chain = ["FieldResearchSite", "Site", "MaterialEntity", "NamedThing"]
+    field_research_site_documents = [
+        {"id": "frsite-99-00000001", "type": "nmdc:FieldResearchSite", "name": "Site A"},
+        {"id": "frsite-99-00000002", "type": "nmdc:FieldResearchSite", "name": "Site B"},
+        {"id": "frsite-99-00000003", "type": "nmdc:FieldResearchSite", "name": "Site C"},
+    ]
+    field_research_site_set_collection = mdb.get_collection("field_research_site_set")
+    for document in field_research_site_documents:
+        field_research_site_set_collection.replace_one(document, document, upsert=True)
+
+    # Get a list of non-empty collections in which at least one document has an `id` field.
+    #
+    # Note: That is the same criteria the function-under-test uses to identify which upstream collections
+    #       it will source (i.e. copy) documents from in order to populate the `alldocs` collection.
+    #
     collection_names = populated_schema_collection_names_with_id_field(mdb)
-    assert sum(
-        mdb[collection_name].estimated_document_count()
-        for collection_name in collection_names
-    ) == materialize_alldocs(op_context)
+    assert "field_research_site_set" in collection_names
+
+    # Invoke the function-under-test.
+    #
+    # Note: It returns an estimated count; so, we'll just verify that it's an integer,
+    #       rather than relying on its value. We'll get an _exact_ count later.
+    #
+    estimated_number_of_docs_in_alldocs = materialize_alldocs(op_context)
+    assert isinstance(estimated_number_of_docs_in_alldocs, int)
+
+    # Get a reference to the newly-materialized `alldocs` collection.
+    alldocs_collection = mdb.get_collection("alldocs")
+    num_alldocs_docs = alldocs_collection.count_documents({})  # here, we get an _exact_ count
+
+    # Verify each upstream document is represented correctly—and only once—in the `alldocs` collection.
+    #
+    # Note: We do not check the `type` value here (beyond its data type), due to the current tedium of determining
+    #       the class ancestry chain from a dictionary (as opposed to a Python instance). We do check it for some
+    #       documents later, but only for documents we inserted above, since we know what to "expect" for those
+    #       documents. Here, we just verify that each document's `type` value is of type `array`.
+    #
+    # Note: We also keep a tally of the number of upstream documents that exist, which we'll reference later.
+    #
+    num_upstream_docs = 0
+    for collection_name in collection_names:
+        collection = mdb.get_collection(collection_name)
+        for document in collection.find({}):
+            num_upstream_docs += 1
+            document_lacking_type = dissoc(document, "_id", "type")
+            document_having_generic_type = assoc(document_lacking_type, "type", {"$type": "array"})
+            assert alldocs_collection.count_documents(document_having_generic_type) == 1
+
+    # Verify each of the specific documents we created above appears in the `alldocs` collection once,
+    # and that its `type` value has been replaced with its class ancestry chain.
+    for document in field_research_site_documents:
+        alldocs_document = assoc(dissoc(document, "type"), "type", field_research_site_class_ancestry_chain)
+        assert alldocs_collection.count_documents(alldocs_document) == 1
+
+    # Verify the total number of documents in all the upstream collections, combined,
+    # equals the number of documents in the `alldocs` collection.
+    assert num_upstream_docs == num_alldocs_docs
+
+    # Clean up: Delete the documents we created within this test, from the database.
+    for document in field_research_site_documents:
+        field_research_site_set_collection.delete_one(document)
+    alldocs_collection.delete_many({})
diff --git a/tests/test_ops/test_ops.py b/tests/test_ops/test_ops.py
index 489e2e17..376ef3fb 100644
--- a/tests/test_ops/test_ops.py
+++ b/tests/test_ops/test_ops.py
@@ -57,11 +57,13 @@ def test_apply_metadata_in_functional_annotation_agg(op_context):
                 "metagenome_annotation_id": "nmdc:wfmtan-13-hemh0a82.1",
                 "gene_function_id": "KEGG.ORTHOLOGY:K00005",
                 "count": 10,
+                "type": "nmdc:FunctionalAnnotationAggMember",
             },
             {
                 "metagenome_annotation_id": "nmdc:wfmtan-13-hemh0a82.1",
                 "gene_function_id": "KEGG.ORTHOLOGY:K01426",
                 "count": 5,
+                "type": "nmdc:FunctionalAnnotationAggMember",
             },
         ]
     }