Skip to content

Commit

Permalink
exists() method
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jul 23, 2022
1 parent 6c59cd2 commit a0d61ab
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 54 deletions.
57 changes: 18 additions & 39 deletions apis/python/src/tiledbsc/v1/soma_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,49 +55,10 @@ def create(self) -> None:
tiledb.group_create(uri=self._uri, ctx=self._ctx)
self._common_create() # object-type metadata etc

def _create_unless_exists(self) -> None:
"""
Auxiliary method for `_add_object`.
"""
# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-create request, checking for an exception.

try:
self.create()
except tiledb.cc.TileDBError as e:
stre = str(e)
# Local-disk/S3/tiledb-cloud exceptions all three say 'already exists'
if "already exists" in stre:
pass
else:
raise e

# TODO
# delete(uri)
# Delete the SOMACollection specified with the URI.

# exists(uri) -> bool
# Return true if object exists and is a SOMACollection.

# # TODO: static/class method?
# # def exists(uri: str) -> bool
# # """
# # Return true if object exists and is a SOMADataFrame.
# # """

# def exists(self) -> bool:
# """
# Tells whether or not there is storage for the group. This might be in case a SOMA
# object has not yet been populated, e.g. before calling `from_anndata` -- or, if the
# SOMA has been populated but doesn't have this member (e.g. not all SOMAs have a `varp`).
# """
# # For tiledb:// URIs this is a REST-server request which we'd like to cache.
# # However, remove-and-replace use-cases are possible and common in notebooks
# # and it turns out caching the existence-check isn't a robust approach.
# return bool(tiledb.object_type(self._uri, ctx=self._ctx) == "group")

def __len__(self) -> int:
"""
Returns the number of members in the collection. Implements Python's `len(collection)`.
Expand Down Expand Up @@ -186,6 +147,24 @@ def _tiledb_open(self, mode: str = "r") -> tiledb.Group:
# This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods.
return tiledb.Group(self._uri, mode=mode, ctx=self._ctx)

def _create_unless_exists(self) -> None:
"""
Auxiliary method for `_add_object`.
"""
# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-create request, checking for an exception.
try:
self.create()
except tiledb.cc.TileDBError as e:
stre = str(e)
# Local-disk/S3/tiledb-cloud exceptions all three say 'already exists'
if "already exists" in stre:
pass
else:
raise e

def _get_child_uris(self, member_names: Sequence[str]) -> Dict[str, str]:
"""
Computes the URIs for one or more children of the given object. For local disk, S3, and
Expand Down
9 changes: 0 additions & 9 deletions apis/python/src/tiledbsc/v1/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,6 @@ def __init__(
"""
super().__init__(uri, name=name, parent=parent)

# TODO
# def exists(self) -> bool:
# """
# Tells whether or not there is storage for the array. This might be in case a SOMA
# object has not yet been populated, e.g. before calling `from_anndata` -- or, if the
# SOMA has been populated but doesn't have this member (e.g. not all SOMAs have a `varp`).
# """
# return bool(tiledb.array_exists(self._uri))

# TODO
# def delete(uri: str) -> None
# """
Expand Down
32 changes: 27 additions & 5 deletions apis/python/src/tiledbsc/v1/tiledb_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ def __repr__(self) -> str:
"""
return f"name={self._name},uri={self._uri}"

@abstractmethod
def _tiledb_open(self, mode: str = "r") -> Union[tiledb.Array, tiledb.Group]:
"""Open the underlying TileDB array or Group"""

def get_name(self) -> str:
return self._name

Expand All @@ -82,6 +78,32 @@ def get_uri(self) -> str:
def get_type(self) -> str:
return type(self).__name__

def exists(self) -> bool:
"""
Returns true if the object exists and has the desired class name.
This might be in case an object has not yet been populated, or, if a containing object has
been populated but doesn't have a particular member (e.g. not all `SOMAMeasurement` objects
have a `varp`).
For tiledb:// URIs this is a REST-server request which we'd like to cache.
However, remove-and-replace use-cases are possible and common in notebooks
and it turns out caching the existence-check isn't a robust approach.
"""

# Pre-checking if the group exists by calling tiledb.object_type is simple, however, for
# tiledb-cloud URIs that occurs a penalty of two HTTP requests to the REST server, even
# before a third, successful HTTP request for group-open. Instead, we directly attempt the
# group-open request, checking for an exception.
try:
return self._get_object_type_from_metadata() == self.get_type()
except tiledb.cc.TileDBError:
return False

@abstractmethod
def _tiledb_open(self, mode: str = "r") -> Union[tiledb.Array, tiledb.Group]:
"""Open the underlying TileDB array or Group"""

def _common_create(self) -> None:
"""
Utility method for various constructors.
Expand All @@ -97,7 +119,7 @@ def _set_object_type_metadata(self) -> None:
with self._tiledb_open("w") as obj:
obj.meta.update(
{
util.SOMA_OBJECT_TYPE_METADATA_KEY: self.__class__.__name__,
util.SOMA_OBJECT_TYPE_METADATA_KEY: self.get_type(),
util.SOMA_ENCODING_VERSION_METADATA_KEY: util.SOMA_ENCODING_VERSION,
}
)
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_v1_soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_soma_dataframe_non_indexed(tmp_path):
sdf.create(schema=asch, indexed=False)

# Write
for i in range(3):
for _i in range(3):
pydict = {}
pydict["soma_rowid"] = [0, 1, 2, 3, 4]
pydict["foo"] = [10, 20, 30, 40, 50]
Expand Down
18 changes: 18 additions & 0 deletions apis/python/tests/test_v1_soma_experiment_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,22 @@ def test_soma_experiment_basic(tmp_path):
# 1 3 1 8
# 2 4 2 9

# ----------------------------------------------------------------
# Paths exist and are of the right type
assert experiment.exists()
assert experiment.obs.exists()
assert experiment.ms.exists()
assert experiment.ms["meas1"].exists()
assert experiment.ms["meas1"].X.exists()
assert experiment.ms["meas1"].X["data"].exists()

# Paths exist but are not of the right type
assert not t.SOMADataFrame(experiment.get_uri()).exists()
assert not t.SOMACollection(experiment.obs.get_uri()).exists()

# Paths do not exist
assert not t.SOMAExperiment("/nonesuch/no/nope/nope/never").exists()
assert not t.SOMADataFrame("/nonesuch/no/nope/nope/never").exists()

# ----------------------------------------------------------------
# TODO: check more things

0 comments on commit a0d61ab

Please sign in to comment.