brainglobe · sfmig · Dec 8, 2023 · Dec 11, 2023 · Dec 11, 2023 · Dec 20, 2023
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -9,8 +9,10 @@ exclude *.ini
 
 recursive-include brainglobe_workflows *.py
 recursive-include brainglobe_workflows/configs *.json
+
 recursive-include benchmarks *.py
 recursive-exclude benchmarks/results *
+recursive-exclude .asv/ *
 include asv.conf.json
 
 recursive-exclude * __pycache__

diff --git a/asv.conf.json b/asv.conf.json
@@ -4,15 +4,15 @@
     "version": 1,
 
     // The name of the project being benchmarked
-    "project": "brainglobe_workflows",
+    "project": "brainglobe-workflows",
 
     // The project's homepage
     "project_url": "https://github.com/brainglobe/brainglobe-workflows",
 
     // The URL or local path of the source code repository for the
     // project being benchmarked
     // "repo": ".",
-    "repo": "https://github.com/brainglobe/brainglobe-workflows",
+    "repo": "https://github.com/brainglobe/brainglobe-workflows.git",
 
     // The Python project's subdirectory in your repo.  If missing or
     // the empty string, the project is assumed to be located at the root
@@ -40,14 +40,14 @@
 
     // List of branches to benchmark. If not provided, defaults to "master"
     // (for git) or "default" (for mercurial).
-    "branches": ["smg/tests-refactor"], // for git
+    "branches": ["smg/tests-refactor-benchmarks"], // for git
     // "branches": ["default"],    // for mercurial
 
     // The DVCS being used.  If not set, it will be automatically
     // determined from "repo" by looking at the protocol in the URL
     // (if remote), or by looking for special directories, such as
     // ".git" (if local).
-    "dvcs": "git",
+    // "dvcs": "git",
 
     // The tool to use to create environments.  May be "conda",
     // "virtualenv", "mamba" (above 3.8)
@@ -147,19 +147,19 @@
 
     // The directory (relative to the current directory) that benchmarks are
     // stored in.  If not provided, defaults to "benchmarks"
-    "benchmark_dir": "brainglobe_benchmarks",
+    "benchmark_dir": "benchmarks",
 
     // The directory (relative to the current directory) to cache the Python
     // environments in.  If not provided, defaults to "env"
     "env_dir": ".asv/env",
 
     // The directory (relative to the current directory) that raw benchmark
     // results are stored in.  If not provided, defaults to "results".
-    "results_dir": "brainglobe_benchmarks/results",
+    "results_dir": "benchmarks/results",
 
     // The directory (relative to the current directory) that the html tree
     // should be written to.  If not provided, defaults to "html".
-    "html_dir": "brainglobe_benchmarks/html",
+    "html_dir": "benchmarks/html",
 
     // The number of characters to retain in the commit hashes.
     // "hash_length": 8,

diff --git a/benchmarks/cellfinder_core.py b/benchmarks/cellfinder_core.py
@@ -2,7 +2,6 @@
 import shutil
 from pathlib import Path
 
-import pooch
 from brainglobe_utils.IO.cells import save_cells
 from cellfinder.core.main import main as cellfinder_run
 from cellfinder.core.tools.IO import read_with_dask
@@ -81,9 +80,10 @@
     # Custom attributes
     input_config_path = str(DEFAULT_JSON_CONFIG_PATH_CELLFINDER)
 
-    def setup_cache(
-        self,
-    ):
+    def setup_cache(self):
+        # here ensure all the data for all possible configs we sweep
+        # thru is available?
+        # but in a benchmark directory? (can I monkeypatch home? :S)
         """
         Download the input data from the GIN repository to the local
         directory specified in the default_config.json
@@ -108,14 +108,14 @@
             config_dict = json.load(cfg)
         config = CellfinderConfig(**config_dict)
 
-        # Download data with pooch
-        _ = pooch.retrieve(
-            url=config.data_url,
-            known_hash=config.data_hash,
-            path=config._install_path,
-            progressbar=True,
-            processor=pooch.Unzip(extract_dir=config.data_dir_relative),
-        )
+        # # Download data with pooch
+        # _ = pooch.retrieve(
+        #     url=config.data_url,
+        #     known_hash=config.data_hash,
+        #     path=config._install_path,
+        #     progressbar=True,
+        #     processor=pooch.Unzip(extract_dir=config.data_dir_relative),
+        # )
 
         # Check paths to input data should now exist in config
         assert Path(config._signal_dir_path).exists()
@@ -129,12 +129,7 @@
         """
 
         # Run setup
-        cfg = setup_cellfinder_workflow(
-            [
-                "--config",
-                self.input_config_path,
-            ]
-        )
+        cfg = setup_cellfinder_workflow(self.input_config_path)
 
         # Save configuration as attribute
         self.cfg = cfg
@@ -162,7 +157,7 @@
         A base class for timing benchmarks for the cellfinder workflow.
     """
 
-    def time_workflow_from_cellfinder_run(self):
+    def time_workflow(self):
         run_workflow_from_cellfinder_run(self.cfg)
 
 
@@ -177,10 +172,10 @@
     """
 
     def time_read_signal_with_dask(self):
-        read_with_dask(self.cfg._signal_dir_path)
+        read_with_dask(str(self.cfg._signal_dir_path))
 
     def time_read_background_with_dask(self):
-        read_with_dask(self.cfg._background_dir_path)
+        read_with_dask(str(self.cfg._background_dir_path))
 
 
 class TimeDetectCells(TimeBenchmarkPrepGIN):
@@ -198,13 +193,37 @@
         # basic setup
         TimeBenchmarkPrepGIN.setup(self)
 
-        # add input data as arrays to config
-        self.signal_array = read_with_dask(self.cfg._signal_dir_path)
-        self.background_array = read_with_dask(self.cfg._background_dir_path)
+        # add input data as arrays to the config
+        self.signal_array = read_with_dask(str(self.cfg._signal_dir_path))
+        self.background_array = read_with_dask(
+            str(self.cfg._background_dir_path)
+        )
 
     def time_cellfinder_run(self):
         cellfinder_run(
-            self.signal_array, self.background_array, self.cfg.voxel_sizes
+            self.signal_array,
+            self.background_array,
+            self.cfg.voxel_sizes,
+            self.cfg.start_plane,
+            self.cfg.end_plane,
+            self.cfg.trained_model,
+            self.cfg.model_weights,
+            self.cfg.model,
+            self.cfg.batch_size,
+            self.cfg.n_free_cpus,
+            self.cfg.network_voxel_sizes,
+            self.cfg.soma_diameter,
+            self.cfg.ball_xy_size,
+            self.cfg.ball_z_size,
+            self.cfg.ball_overlap_fraction,
+            self.cfg.log_sigma_size,
+            self.cfg.n_sds_above_mean_thresh,
+            self.cfg.soma_spread_factor,
+            self.cfg.max_cluster_size,
+            self.cfg.cube_width,
+            self.cfg.cube_height,
+            self.cfg.cube_depth,
+            self.cfg.network_depth,
         )
 
 
@@ -215,12 +234,36 @@
         TimeBenchmarkPrepGIN.setup(self)
 
         # add input data as arrays to config
-        self.signal_array = read_with_dask(self.cfg._signal_dir_path)
-        self.background_array = read_with_dask(self.cfg._background_dir_path)
+        self.signal_array = read_with_dask(str(self.cfg._signal_dir_path))
+        self.background_array = read_with_dask(
+            str(self.cfg._background_dir_path)
+        )
 
         # detect cells
         self.detected_cells = cellfinder_run(
-            self.signal_array, self.background_array, self.cfg.voxel_sizes
+            self.signal_array,
+            self.background_array,
+            self.cfg.voxel_sizes,
+            self.cfg.start_plane,
+            self.cfg.end_plane,
+            self.cfg.trained_model,
+            self.cfg.model_weights,
+            self.cfg.model,
+            self.cfg.batch_size,
+            self.cfg.n_free_cpus,
+            self.cfg.network_voxel_sizes,
+            self.cfg.soma_diameter,
+            self.cfg.ball_xy_size,
+            self.cfg.ball_z_size,
+            self.cfg.ball_overlap_fraction,
+            self.cfg.log_sigma_size,
+            self.cfg.n_sds_above_mean_thresh,
+            self.cfg.soma_spread_factor,
+            self.cfg.max_cluster_size,
+            self.cfg.cube_width,
+            self.cfg.cube_height,
+            self.cfg.cube_depth,
+            self.cfg.network_depth,
         )
 
     def time_save_cells(self):

diff --git a/tests/benchmarks/test_cellfinder.py b/tests/benchmarks/test_cellfinder.py
@@ -7,7 +7,7 @@
 
 
 @pytest.fixture()
-def asv_config_monkeypatched_path(tmp_path: Path) -> str:
+def asv_config_monkeypatched_path(tmp_path: Path) -> Path:
     """
     Create a monkeypatched asv.conf.json file
     in a Pytest-generated temporary directory
@@ -20,22 +20,27 @@ def asv_config_monkeypatched_path(tmp_path: Path) -> str:
 
     Returns
     -------
-    str
+    Path
         Path to monkeypatched asv config file
     """
     # read reference asv config
-    asv_original_path = Path(__file__).resolve().parents[3] / "asv.conf.json"
+    asv_original_path = Path(__file__).resolve().parents[2] / "asv.conf.json"
     asv_monkeypatched_dict = util.load_json(
         asv_original_path, js_comments=True
     )
 
-    # change directories
+    # point to benchmarks directory in config
+    asv_monkeypatched_dict["benchmark_dir"] = str(
+        Path(__file__).resolve().parents[2] / "benchmarks"
+    )
+
+    # change env, results and html directories
     for ky in ["env_dir", "results_dir", "html_dir"]:
         asv_monkeypatched_dict[ky] = str(
             Path(tmp_path) / asv_monkeypatched_dict[ky]
         )
 
-    # change repo to URL rather than local
+    # ensure repo points to URL
     asv_monkeypatched_dict[
         "repo"
     ] = "https://github.com/brainglobe/brainglobe-workflows.git"
@@ -50,51 +55,61 @@ def asv_config_monkeypatched_path(tmp_path: Path) -> str:
     # check json file exists
     assert asv_monkeypatched_path.is_file()
 
-    return str(asv_monkeypatched_path)
-
+    return asv_monkeypatched_path
 
-@pytest.mark.skip(reason="focus of PR32")
-def test_run_benchmarks(asv_config_monkeypatched_path):
-    # --- ideally monkeypatch an asv config so that results are in tmp_dir?
 
-    # set up machine (env_dir, results_dir, html_dir)
+def test_asv_run(asv_config_monkeypatched_path: Path):
     asv_machine_output = subprocess.run(
         [
             "asv",
             "machine",
             "--yes",
             "--config",
-            asv_config_monkeypatched_path,
+            str(asv_config_monkeypatched_path),  # use monkeypatched config
         ]
     )
     assert asv_machine_output.returncode == 0
 
-    # run benchmarks
     asv_benchmark_output = subprocess.run(
         [
             "asv",
             "run",
+            "--quick",  # each benchmark function is run only once
             "--config",
-            asv_config_monkeypatched_path,
-            # "--dry-run"
-            # # Do not save any results to disk? not truly testing then
+            str(asv_config_monkeypatched_path),
         ],
-        cwd=str(
-            Path(asv_config_monkeypatched_path).parent
-        ),  # run from where asv config is
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-        encoding="utf-8",
     )
-    # STDOUT: "· Cloning project\n· Fetching recent changes\n·
-    # Creating environments\n· No __init__.py file in 'benchmarks'\n"
-
-    # check returncode
     assert asv_benchmark_output.returncode == 0
 
-    # check logs?
 
-    # delete directories?
-    # check teardown after yield:
-    # https://docs.pytest.org/en/6.2.x/fixture.html#yield-fixtures-recommended
+def test_asv_run_machine_specific(
+    asv_config_monkeypatched_path: Path,
+):
+    # setup machine
+    asv_specific_machine_name = "CURRENT_MACHINE"
+    asv_machine_output = subprocess.run(
+        [
+            "asv",
+            "machine",
+            "--machine",
+            asv_specific_machine_name,  # name of the current machine
+            "--yes",
+            "--config",
+            str(asv_config_monkeypatched_path),  # use monkeypatched config
+        ]
+    )
+    assert asv_machine_output.returncode == 0
+
+    # run benchmarks on machine
+    asv_benchmark_output = subprocess.run(
+        [
+            "asv",
+            "run",
+            "--quick",  # each benchmark function is run only once
+            "--config",
+            str(asv_config_monkeypatched_path),
+            "--machine",
+            asv_specific_machine_name,
+        ],
+    )
+    assert asv_benchmark_output.returncode == 0