Skip to content

Commit

Permalink
exists() method
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jul 23, 2022
1 parent 6c59cd2 commit 1bd3fd1
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 78 deletions.
59 changes: 19 additions & 40 deletions apis/python/src/tiledbsc/v1/soma_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,49 +55,10 @@ def create(self) -> None:
tiledb.group_create(uri=self._uri, ctx=self._ctx)
self._common_create() # object-type metadata etc

def _create_unless_exists(self) -> None:
"""
Auxiliary method for `_add_object`.
"""
# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-create request, checking for an exception.

try:
self.create()
except tiledb.cc.TileDBError as e:
stre = str(e)
# Local-disk/S3/tiledb-cloud exceptions all three say 'already exists'
if "already exists" in stre:
pass
else:
raise e

# TODO
# delete(uri)
# Delete the SOMACollection specified with the URI.

# exists(uri) -> bool
# Return true if object exists and is a SOMACollection.

# # TODO: static/class method?
# # def exists(uri: str) -> bool
# # """
# # Return true if object exists and is a SOMADataFrame.
# # """

# def exists(self) -> bool:
# """
# Tells whether or not there is storage for the group. This might be in case a SOMA
# object has not yet been populated, e.g. before calling `from_anndata` -- or, if the
# SOMA has been populated but doesn't have this member (e.g. not all SOMAs have a `varp`).
# """
# # For tiledb:// URIs this is a REST-server request which we'd like to cache.
# # However, remove-and-replace use-cases are possible and common in notebooks
# # and it turns out caching the existence-check isn't a robust approach.
# return bool(tiledb.object_type(self._uri, ctx=self._ctx) == "group")

def __len__(self) -> int:
"""
Returns the number of members in the collection. Implements Python's `len(collection)`.
Expand All @@ -109,7 +70,7 @@ def __contains__(self, member_name: str) -> bool:
Tests for the existence of key in collection.
Implements the `in` operator.
"""
return member_name in self._get_child_uris()
return member_name in self._get_member_names_to_uris()

def get(self, member_name: str) -> TileDBObject:
"""
Expand Down Expand Up @@ -186,6 +147,24 @@ def _tiledb_open(self, mode: str = "r") -> tiledb.Group:
# This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods.
return tiledb.Group(self._uri, mode=mode, ctx=self._ctx)

def _create_unless_exists(self) -> None:
"""
Auxiliary method for `_add_object`.
"""
# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-create request, checking for an exception.
try:
self.create()
except tiledb.cc.TileDBError as e:
stre = str(e)
# Local-disk/S3/tiledb-cloud exceptions all three say 'already exists'
if "already exists" in stre:
pass
else:
raise e

def _get_child_uris(self, member_names: Sequence[str]) -> Dict[str, str]:
"""
Computes the URIs for one or more children of the given object. For local disk, S3, and
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/v1/soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _create_empty_non_indexed(
tiledb.Array.create(self._uri, sch, ctx=self._ctx)

# TODO
# def get_shape() -> Tuple[int]:
# def get_shape() -> NTuple:
# """
# Return length of each dimension, always a list of length ``ndims``
# """
Expand Down
19 changes: 13 additions & 6 deletions apis/python/src/tiledbsc/v1/soma_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

from .soma_collection import SOMACollection
from .tiledb_array import TileDBArray
from .types import NTuple
from .util import tiledb_type_from_arrow_type


# TODO: rethink parenting -- add a middle layer
class SOMADenseNdArray(TileDBArray):
"""
Represents ``X`` and others.
Expand All @@ -32,7 +32,7 @@ def __init__(
def create(
self,
type: pa.DataType,
shape: Union[Tuple, List[int]],
shape: Union[NTuple, List[int]],
) -> None:
"""
Create a SOMADenseNdArray named with the URI.
Expand Down Expand Up @@ -92,21 +92,25 @@ def create(

self._common_create() # object-type metadata etc

def get_shape(self) -> Tuple[int]:
def get_shape(self) -> NTuple:
"""
Return length of each dimension, always a list of length ``ndims``
"""
# TODO: cache read
# return self._shape
with self._tiledb_open() as A:
return A.schema.domain.shape
# mypy says:
# error: Returning Any from function declared to return "Tuple[int]" [no-any-return]
return A.schema.domain.shape # type: ignore

def get_ndims(self) -> int:
"""
Return number of index columns
"""
with self._tiledb_open() as A:
return A.schema.domain.ndim
# mypy says:
# Returning Any from function declared to return "int" [no-any-return]
return A.schema.domain.ndim # type: ignore

# TODO
# def get_schema(self) -> Arrow.Schema:
Expand All @@ -121,6 +125,7 @@ def get_is_sparse(self) -> bool:
return False

def read(
self,
slice: Any, # TODO: scalar, slice/range, or list of any of the above
# TODO: partitions: Optional[SOMAReadPartitions] = None,,
# TODO: result_order: one of 'row-major' or 'column-major'
Expand All @@ -147,7 +152,9 @@ def read(

def write(
self,
coords: Union[Tuple, List[int]],
# TODO: rework callsites with regard to the very latest spec rev
# coords: Union[tuple, tuple[slice], NTuple, List[int]],
coords: Any,
values: pa.Tensor,
) -> None:
"""
Expand Down
27 changes: 15 additions & 12 deletions apis/python/src/tiledbsc/v1/soma_sparse_nd_array.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Union

import numpy as np
import pyarrow as pa
import tiledb

from .soma_collection import SOMACollection
from .tiledb_array import TileDBArray
from .types import NTuple
from .util import tiledb_type_from_arrow_type


Expand Down Expand Up @@ -33,7 +34,7 @@ def __init__(
def create(
self,
type: pa.DataType,
shape: Union[Tuple, List[int]],
shape: Union[NTuple, List[int]],
) -> None:
"""
Create a SOMASparseNdArray named with the URI.
Expand Down Expand Up @@ -95,17 +96,19 @@ def create(

self._common_create() # object-type metadata etc

def get_shape(self) -> Tuple:
"""
Return length of each dimension, always a list of length ``ndims``
"""
return self._shape
# TODO
# def get_shape(self) -> NTuple:
# """
# Return length of each dimension, always a list of length ``ndims``
# """
# return self._shape

def get_ndims(self) -> int:
"""
Return number of index columns
"""
return len(self._shape)
# TODO
# def get_ndims(self) -> int:
# """
# Return number of index columns
# """
# return len(self._shape)

# TODO
# def get_schema(self) -> Arrow.Schema:
Expand Down
9 changes: 0 additions & 9 deletions apis/python/src/tiledbsc/v1/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,6 @@ def __init__(
"""
super().__init__(uri, name=name, parent=parent)

# TODO
# def exists(self) -> bool:
# """
# Tells whether or not there is storage for the array. This might be in case a SOMA
# object has not yet been populated, e.g. before calling `from_anndata` -- or, if the
# SOMA has been populated but doesn't have this member (e.g. not all SOMAs have a `varp`).
# """
# return bool(tiledb.array_exists(self._uri))

# TODO
# def delete(uri: str) -> None
# """
Expand Down
36 changes: 30 additions & 6 deletions apis/python/src/tiledbsc/v1/tiledb_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ def __repr__(self) -> str:
"""
return f"name={self._name},uri={self._uri}"

@abstractmethod
def _tiledb_open(self, mode: str = "r") -> Union[tiledb.Array, tiledb.Group]:
"""Open the underlying TileDB array or Group"""

def get_name(self) -> str:
return self._name

Expand All @@ -82,6 +78,32 @@ def get_uri(self) -> str:
def get_type(self) -> str:
return type(self).__name__

def exists(self) -> bool:
"""
Returns true if the object exists and has the desired class name.
This might be in case an object has not yet been populated, or, if a containing object has
been populated but doesn't have a particular member (e.g. not all `SOMAMeasurement` objects
have a `varp`).
For tiledb:// URIs this is a REST-server request which we'd like to cache.
However, remove-and-replace use-cases are possible and common in notebooks
and it turns out caching the existence-check isn't a robust approach.
"""

# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-open request, checking for an exception.
try:
return self._get_object_type_from_metadata() == self.get_type()
except tiledb.cc.TileDBError:
return False

@abstractmethod
def _tiledb_open(self, mode: str = "r") -> Union[tiledb.Array, tiledb.Group]:
"""Open the underlying TileDB array or Group"""

def _common_create(self) -> None:
"""
Utility method for various constructors.
Expand All @@ -97,7 +119,7 @@ def _set_object_type_metadata(self) -> None:
with self._tiledb_open("w") as obj:
obj.meta.update(
{
util.SOMA_OBJECT_TYPE_METADATA_KEY: self.__class__.__name__,
util.SOMA_OBJECT_TYPE_METADATA_KEY: self.get_type(),
util.SOMA_ENCODING_VERSION_METADATA_KEY: util.SOMA_ENCODING_VERSION,
}
)
Expand All @@ -106,4 +128,6 @@ def _get_object_type_from_metadata(self) -> str:
"""
Returns the class name associated with the group/array.
"""
return self.metadata.get(util.SOMA_OBJECT_TYPE_METADATA_KEY)
# mypy says:
# error: Returning Any from function declared to return "str" [no-any-return]
return self.metadata.get(util.SOMA_OBJECT_TYPE_METADATA_KEY) # type: ignore
4 changes: 3 additions & 1 deletion apis/python/src/tiledbsc/v1/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pathlib
from typing import List, Sequence, Union
from typing import List, Sequence, Tuple, Union

import numpy as np
import pandas as pd
Expand All @@ -12,3 +12,5 @@
Labels = Union[Sequence[str], pd.Index]

Matrix = Union[np.ndarray, sp.csr_matrix, sp.csc_matrix]

NTuple = Union[Tuple[int], Tuple[int, int]]
4 changes: 3 additions & 1 deletion apis/python/src/tiledbsc/v1/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ def tiledb_type_from_arrow_type(t: pa.DataType) -> type:
# isn't acceptable to tiledb -- we must say str.
return str
else:
return t.to_pandas_dtype()
# mypy says:
# Returning Any from function declared to return "type" [no-any-return]
return t.to_pandas_dtype() # type: ignore


def is_tiledb_creation_uri(uri: str) -> bool:
Expand Down
4 changes: 2 additions & 2 deletions apis/python/tests/test_v1_soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_soma_dataframe_non_indexed(tmp_path):
sdf.create(schema=asch, indexed=False)

# Write
for i in range(3):
for _i in range(3):
pydict = {}
pydict["soma_rowid"] = [0, 1, 2, 3, 4]
pydict["foo"] = [10, 20, 30, 40, 50]
Expand Down Expand Up @@ -115,7 +115,7 @@ def test_soma_dataframe_indexed(tmp_path):
sdf.create(schema=asch, indexed=True, index_column_names=["foo"])

# Write
for i in range(3):
for _ in range(3):
pydict = {}
pydict["foo"] = [10, 20, 30, 40, 50]
pydict["bar"] = [4.1, 5.2, 6.3, 7.4, 8.5]
Expand Down
25 changes: 25 additions & 0 deletions apis/python/tests/test_v1_soma_experiment_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,22 @@ def test_soma_experiment_basic(tmp_path):
assert len(experiment) == 2
assert isinstance(experiment.obs, t.SOMADataFrame)
assert isinstance(experiment.ms, t.SOMACollection)
assert "obs" in experiment
assert "ms" in experiment
assert "nonesuch" not in experiment

assert len(experiment.ms) == 1
assert isinstance(experiment.ms["meas1"], t.SOMAMeasurement)

assert len(experiment.ms["meas1"]) == 2
assert "meas1" in experiment.ms
assert "meas2" not in experiment.ms
assert isinstance(experiment.ms["meas1"].var, t.SOMADataFrame)
assert isinstance(experiment.ms["meas1"].X, t.SOMACollection)

assert len(experiment.ms["meas1"].X) == 1
assert "data" in experiment.ms["meas1"].X
assert "nonesuch" not in experiment.ms["meas1"].X
assert isinstance(experiment.ms["meas1"].X["data"], t.SOMASparseNdArray)

# >>> experiment.ms.meas1.X.data._tiledb_open().df[:]
Expand All @@ -119,4 +126,22 @@ def test_soma_experiment_basic(tmp_path):
# 1 3 1 8
# 2 4 2 9

# ----------------------------------------------------------------
# Paths exist and are of the right type
assert experiment.exists()
assert experiment.obs.exists()
assert experiment.ms.exists()
assert experiment.ms["meas1"].exists()
assert experiment.ms["meas1"].X.exists()
assert experiment.ms["meas1"].X["data"].exists()

# Paths exist but are not of the right type
assert not t.SOMADataFrame(experiment.get_uri()).exists()
assert not t.SOMACollection(experiment.obs.get_uri()).exists()

# Paths do not exist
assert not t.SOMAExperiment("/nonesuch/no/nope/nope/never").exists()
assert not t.SOMADataFrame("/nonesuch/no/nope/nope/never").exists()

# ----------------------------------------------------------------
# TODO: check more things

0 comments on commit 1bd3fd1

Please sign in to comment.