csiro-coasts · david-sh-csiro · Jan 21, 2025 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/docs/releases/development.rst b/docs/releases/development.rst
@@ -2,4 +2,5 @@
 Next release (in development)
 =============================
 
-* ...
+* Fix datasets hash_key generation when geometry encoding
+  is missing a dtype (:pr:`166`).
diff --git a/src/emsarray/conventions/_base.py b/src/emsarray/conventions/_base.py
@@ -1973,7 +1973,7 @@ def hash_geometry(self, hash: "hashlib._Hash") -> None:
             # Include the dtype of the data array.
             # A float array and an int array mean very different things,
             # but could have identical byte patterns.
-            hash_string(hash, data_array.encoding['dtype'].name)
+            hash_string(hash, data_array.encoding.get('dtype', data_array.values.dtype).name)
 
             # Include the size and shape of the data.
             # 1D coordinate arrays are very different to 2D coordinate arrays,

diff --git a/tests/operations/test_cache.py b/tests/operations/test_cache.py
@@ -200,3 +200,25 @@ def test_cache_key_cfgrid1d_sha1(datasets: pathlib.Path):
     assert result_cache_key_cf is not None
 
     assert result_cache_key_cf == cache_key_hash_cf1d_sha1
+
+
+def test_cache_key_with_missing_data_array_encoding_type(datasets: pathlib.Path):
+    dataset_ugrid = emsarray.open_dataset(datasets / 'ugrid_mesh2d.nc')
+
+    data_array = dataset_ugrid.ems.topology.face_node_connectivity
+    data_array_dtype_dropped = data_array.copy()
+    data_array_dtype_dropped.encoding.pop('dtype', None)
+
+    with_dtype_hash = hashlib.sha1()
+    without_dtype_hash = hashlib.sha1()
+
+    dataset_ugrid.ems.hash_geometry(with_dtype_hash)
+    dataset_ugrid['mesh_face_node'] = data_array_dtype_dropped
+    dataset_ugrid.ems.hash_geometry(without_dtype_hash)
+
+    with_dtype_digest = with_dtype_hash.hexdigest()
+    without_dtype_digest = without_dtype_hash.hexdigest()
+
+    assert with_dtype_digest != without_dtype_digest
+
+    assert data_array_dtype_dropped.equals(data_array)