Skip to content

Commit

Permalink
Merge pull request #406 from msk-mind/1629-optimize-dask
Browse files Browse the repository at this point in the history
fix: #1629 improve API for use with dask
  • Loading branch information
raylim authored Nov 20, 2023
2 parents 27154e2 + 965dd6c commit 106fece
Show file tree
Hide file tree
Showing 19 changed files with 1,808 additions and 954 deletions.
9 changes: 4 additions & 5 deletions src/luna/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pandera as pa
from pandera.engines.pandas_engine import PydanticModel
from pandera.typing import Series
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict


class Slide(BaseModel):
Expand All @@ -19,9 +19,9 @@ class Slide(BaseModel):
channel1_R: Optional[float] = None
channel1_G: Optional[float] = None
channel1_B: Optional[float] = None
properties: Optional[dict] = None

class Config:
extra = "allow"
model_config = ConfigDict(extra="allow")


class ShapeFeaturesSchema(pa.DataFrameModel):
Expand All @@ -47,8 +47,7 @@ class Tile(BaseModel):
tile_size: int
tile_units: str

class Config:
extra = "allow"
model_config = ConfigDict(extra="allow")


class StoredTile(Tile):
Expand Down
15 changes: 13 additions & 2 deletions src/luna/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import json
import os
import re
import shutil
import subprocess
import tempfile
import time
import urllib
import warnings
from contextlib import ExitStack
from contextlib import ExitStack, contextmanager
from functools import wraps
from importlib import import_module
from io import BytesIO
Expand Down Expand Up @@ -77,8 +78,18 @@ def wrapper(*args, **kwargs):
return wrapper


@contextmanager
def make_temp_directory():
temp_dir = tempfile.mkdtemp()
try:
yield temp_dir
finally:
shutil.rmtree(temp_dir)


def local_cache_urlpath(
file_key_write_mode: dict[str, str] = {}, dir_key_write_mode: dict[str, str] = {}
file_key_write_mode: dict[str, str] = {},
dir_key_write_mode: dict[str, str] = {},
):
"""Decorator for caching url/paths locally"""

Expand Down
52 changes: 51 additions & 1 deletion src/luna/pathology/cli/dsa_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import requests
from fsspec import open
from loguru import logger
from pandera.typing import DataFrame

from luna.common.models import SlideSchema
from luna.common.utils import get_config, save_metadata, timed
from luna.pathology.dsa.dsa_api_handler import (
get_item_uuid,
Expand Down Expand Up @@ -85,7 +87,7 @@ def cli(
f"Unable to infer image_filename from {annotation_file_urlpath}"
)
logger.info(f"Image filename inferred as {image_filename}")
dsa_uuid = upload_annotation_to_dsa(
dsa_uuid = _upload_annotation_to_dsa(
config["dsa_endpoint_url"],
annotation_file_urlpath,
config["collection_name"],
Expand All @@ -104,6 +106,54 @@ def cli(


def upload_annotation_to_dsa(
dsa_endpoint_url: str,
slide_manifest: DataFrame[SlideSchema],
annotation_column: str,
collection_name: str,
image_filename: str,
username: str,
password: str,
force: bool = False,
insecure: bool = False,
storage_options: dict = {},
):
"""Upload annotation to DSA
Upload json annotation file as a new annotation to the image in the DSA collection.
Args:
dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
annotation_column (string): annotation column of slide_manifest containing the dsa url
collection_name (string): name of the collection in DSA
image_filename (string): name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath
username (string): DSA username (defaults to environment variable DSA_USERNAME)
password (string): DSA password (defaults to environment variable DSA_PASSWORD)
force (bool): upload even if annotation with same name exists for the slide
insecure (bool): insecure ssl
storage_options (dict): options to pass to reading functions
Returns:
DataFrame[SlideSchema]: slide manifest
"""
uuids = []
for _, slide in slide_manifest.iterrows():
uuids = _upload_annotation_to_dsa(
dsa_endpoint_url,
slide[annotation_column],
collection_name,
image_filename,
username,
password,
force,
insecure,
storage_options,
)
uuids.append(uuids[0])
return slide_manifest.assign(**{annotation_column: uuids})


def _upload_annotation_to_dsa(
dsa_endpoint_url: str,
annotation_file_urlpaths: Union[str, List[str]],
collection_name: str,
Expand Down
Loading

0 comments on commit 106fece

Please sign in to comment.