Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: preserve list meanings #575

Merged
merged 13 commits into from
Dec 12, 2024
66 changes: 34 additions & 32 deletions google/cloud/datastore/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
"""

import datetime
import itertools

from google.protobuf import struct_pb2
from google.type import latlng_pb2
Expand All @@ -43,36 +42,29 @@ def _get_meaning(value_pb, is_list=False):
:param is_list: Boolean indicating if the ``value_pb`` contains
a list value.

:rtype: int
:rtype: int | Tuple[Optional[int], Optional[list[int | None]]] | None
:returns: The meaning for the ``value_pb`` if one is set, else
:data:`None`. For a list value, if there are disagreeing
means it just returns a list of meanings. If all the
list meanings agree, it just condenses them.
:data:`None`. For a list value, returns a tuple of
the root meaning of the list, and a list of meanings
of each sub-value. If subvalues are all empty, returns
:data:`None` instead of a list.
"""
if is_list:
root_meaning = value_pb.meaning or None
values = value_pb.array_value.values

# An empty list will have no values, hence no shared meaning
# set among them.
if len(values) == 0:
return None

# We check among all the meanings, some of which may be None,
# the rest which may be enum/int values.
all_meanings = [_get_meaning(sub_value_pb) for sub_value_pb in values]
unique_meanings = set(all_meanings)

if len(unique_meanings) == 1:
# If there is a unique meaning, we preserve it.
return unique_meanings.pop()
else: # We know len(value_pb.array_value.values) > 0.
# If the meaning is not unique, just return all of them.
return all_meanings

elif value_pb.meaning: # Simple field (int32).
return value_pb.meaning

return None
sub_meanings = [sub_value_pb.meaning or None for sub_value_pb in values]
if not any(meaning is not None for meaning in sub_meanings):
Copy link

@NickChittle NickChittle Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious, why wouldn't we just return a tuple of (None, []), instead of transforming the return type here to just a single None?

(To me it would be simpler to have a guaranteed format for List, and a guaranteed format for non-lists, or maybe this saves some RAM?)

Copy link
Contributor Author

@daniel-sanche daniel-sanche Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case it's not clear: the reason for this transformation is to flatten giant lists of [None, None, None, ...], so we don't have to iterate over the whole thing again later, when there are no meanings present (which is probably the most common situation)

You're right that we could return an empty list instead of None here to signal that there are no meanings present. But it's a trade-off. That would buy us type consistency, but then we'd lose the property that the sub-meaning list is always be the same size as the original array. We'd need to remember to do size comparisons before iterating, so we don't really win anything from that change. And size mismatches wouldn't be flagged by type checkers like missing None checks, so those bugs would be harder to notice.

TL;DR: This is a special case in our processing logic, and it's usually safer to use None to signal special cases, instead of returning lists with unpredictable sizes

sub_meanings = None
if root_meaning is None and sub_meanings is None:
# no meanings to save
return None
else:
return root_meaning, sub_meanings
else:
return value_pb.meaning or None


def _new_value_pb(entity_pb, name):
Expand Down Expand Up @@ -156,6 +148,10 @@ def entity_from_protobuf(pb):
def _set_pb_meaning_from_entity(entity, name, value, value_pb, is_list=False):
"""Add meaning information (from an entity) to a protobuf.

value_pb is assumed to have no `meaning` data currently present.
This means if the entity's meaning data is None, this function will do nothing,
rather than removing any existing data.

:type entity: :class:`google.cloud.datastore.entity.Entity`
:param entity: The entity to be turned into a protobuf.

Expand All @@ -181,14 +177,20 @@ def _set_pb_meaning_from_entity(entity, name, value, value_pb, is_list=False):
if orig_value is not value:
return

# For lists, we set meaning on each sub-element.
if is_list:
if not isinstance(meaning, list):
meaning = itertools.repeat(meaning)
val_iter = zip(value_pb.array_value.values, meaning)
for sub_value_pb, sub_meaning in val_iter:
if sub_meaning is not None:
sub_value_pb.meaning = sub_meaning
if meaning is None:
# no meaning data to set
return
elif is_list:
# for lists, set meaning on the root pb and on each sub-element
root_meaning, sub_meaning_list = meaning
if root_meaning is not None:
value_pb.meaning = root_meaning
if sub_meaning_list:
for sub_value_pb, sub_meaning in zip(
value_pb.array_value.values, sub_meaning_list
):
if sub_meaning is not None:
sub_value_pb.meaning = sub_meaning
else:
value_pb.meaning = meaning

Expand Down
148 changes: 142 additions & 6 deletions tests/unit/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,19 +361,21 @@ def test_entity_to_protobuf_w_variable_meanings():
entity = Entity()
name = "quux"
entity[name] = values = [1, 20, 300]
meaning = 9
entity._meanings[name] = ([None, meaning, None], values)
root_meaning = 31
sub_meaning = 9
entity._meanings[name] = ((root_meaning, [None, sub_meaning, None]), values)
entity_pb = entity_to_protobuf(entity)

# Construct the expected protobuf.
expected_pb = entity_pb2.Entity()
value_pb = _new_value_pb(expected_pb, name)
value_pb.meaning = root_meaning
value0 = value_pb.array_value.values.add()
value0.integer_value = values[0]
# The only array entry with a meaning is the middle one.
value1 = value_pb.array_value.values.add()
value1.integer_value = values[1]
value1.meaning = meaning
value1.meaning = sub_meaning
value2 = value_pb.array_value.values.add()
value2.integer_value = values[2]

Expand Down Expand Up @@ -1179,7 +1181,46 @@ def test__get_meaning_w_array_value():
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
assert meaning == result
# should preserve sub-value meanings as list
assert (None, [meaning, meaning]) == result


def test__get_meaning_w_array_value_root_meaning():
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import _get_meaning

value_pb = entity_pb2.Value()
meaning = 9
value_pb.meaning = meaning
sub_value_pb1 = value_pb._pb.array_value.values.add()
sub_value_pb2 = value_pb._pb.array_value.values.add()

sub_value_pb1.string_value = "hi"
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
# should preserve sub-value meanings as list
assert (meaning, None) == result


def test__get_meaning_w_array_value_root_and_sub_meanings():
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import _get_meaning

value_pb = entity_pb2.Value()
root_meaning = 9
sub_meaning = 3
value_pb.meaning = root_meaning
sub_value_pb1 = value_pb._pb.array_value.values.add()
sub_value_pb2 = value_pb._pb.array_value.values.add()

sub_value_pb1.meaning = sub_value_pb2.meaning = sub_meaning
sub_value_pb1.string_value = "hi"
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
# should preserve sub-value meanings as list
assert (root_meaning, [sub_meaning, sub_meaning]) == result


def test__get_meaning_w_array_value_multiple_meanings():
Expand All @@ -1198,7 +1239,7 @@ def test__get_meaning_w_array_value_multiple_meanings():
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
assert result == [meaning1, meaning2]
assert result == (None, [meaning1, meaning2])


def test__get_meaning_w_array_value_meaning_partially_unset():
Expand All @@ -1215,7 +1256,102 @@ def test__get_meaning_w_array_value_meaning_partially_unset():
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
assert result == [meaning1, None]
assert result == (None, [meaning1, None])


def test__get_meaning_w_array_value_meaning_fully_unset():
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import _get_meaning

value_pb = entity_pb2.Value()
sub_value_pb1 = value_pb._pb.array_value.values.add()
sub_value_pb2 = value_pb._pb.array_value.values.add()

sub_value_pb1.string_value = "hi"
sub_value_pb2.string_value = "bye"

result = _get_meaning(value_pb, is_list=True)
assert result is None


@pytest.mark.parametrize("orig_root_meaning", [0, 1])
@pytest.mark.parametrize("orig_sub_meaning", [0, 1])
def test__set_pb_meaning_w_array_value_fully_unset(orig_root_meaning, orig_sub_meaning):
"""
call _set_pb_meaning_from_entity with meaning=None data.
Should not touch proto's meaning field
"""
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import _set_pb_meaning_from_entity
from google.cloud.datastore.entity import Entity

orig_pb = entity_pb2.Entity()
value_pb = orig_pb._pb.properties.get_or_create("value")
value_pb.meaning = orig_root_meaning
sub_value_pb1 = value_pb.array_value.values.add()
sub_value_pb1.meaning = orig_sub_meaning

entity = Entity(key="key")
entity._meanings = {"value": ((None, None), None)}
_set_pb_meaning_from_entity(entity, "value", None, value_pb, is_list=True)
assert value_pb.meaning == orig_root_meaning
assert value_pb.array_value.values[0].meaning == orig_sub_meaning


@pytest.mark.parametrize("orig_meaning", [0, 1])
def test__set_pb_meaning_w_value_unset(orig_meaning):
"""
call _set_pb_meaning_from_entity with meaning=None data.
Should not touch proto's meaning field
"""
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import _set_pb_meaning_from_entity
from google.cloud.datastore.entity import Entity

orig_pb = entity_pb2.Entity()
value_pb = orig_pb._pb.properties.get_or_create("value")
value_pb.meaning = orig_meaning

entity = Entity(key="key")
entity._meanings = {"value": (None, None)}
_set_pb_meaning_from_entity(entity, "value", None, value_pb, is_list=False)
assert value_pb.meaning == orig_meaning


def test__array_w_meaning_end_to_end():
"""
Test proto->entity->proto with an array with a meaning field
"""
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore.helpers import entity_from_protobuf
from google.cloud.datastore.helpers import entity_to_protobuf

orig_pb = entity_pb2.Entity()
value_pb = orig_pb._pb.properties.get_or_create("value")
value_pb.meaning = 31
sub_value_pb1 = value_pb.array_value.values.add()
sub_value_pb1.double_value = 1
sub_value_pb1.meaning = 1
sub_value_pb2 = value_pb.array_value.values.add()
sub_value_pb2.double_value = 2
sub_value_pb3 = value_pb.array_value.values.add()
sub_value_pb3.double_value = 3
sub_value_pb3.meaning = 3
# convert to entity
entity = entity_from_protobuf(orig_pb._pb)
assert entity._meanings["value"][0] == (31, [1, None, 3])
assert entity._meanings["value"][1] == [1, 2, 3]
# convert back to pb
output_entity_pb = entity_to_protobuf(entity)
final_pb = output_entity_pb._pb.properties["value"]
assert final_pb.meaning == 31
assert len(final_pb.array_value.values) == 3
assert final_pb.array_value.values[0].meaning == 1
assert final_pb.array_value.values[0].double_value == 1
assert final_pb.array_value.values[1].meaning == 0
assert final_pb.array_value.values[1].double_value == 2
assert final_pb.array_value.values[2].meaning == 3
assert final_pb.array_value.values[2].double_value == 3


def _make_geopoint(*args, **kwargs):
Expand Down
Loading