Skip to content

Commit

Permalink
fix: Resolve hbase hotspot issue when materializing
Browse files Browse the repository at this point in the history
Signed-off-by: Hai Nguyen <[email protected]>
  • Loading branch information
sudohainguyen committed Oct 20, 2023
1 parent 018d0ea commit cf5179e
Showing 1 changed file with 37 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from feast import Entity
from feast.feature_view import FeatureView
from feast.infra.key_encoding_utils import serialize_entity_key
from feast.infra.online_stores.helpers import compute_entity_id
from feast.infra.online_stores.online_store import OnlineStore
from feast.infra.utils.hbase_utils import HbaseConstants, HbaseUtils
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
Expand Down Expand Up @@ -108,10 +108,11 @@ def online_write_batch(

b = hbase.batch(table_name)
for entity_key, values, timestamp, created_ts in data:
row_key = serialize_entity_key(
row_key = self._hbase_row_key(
entity_key,
entity_key_serialization_version=config.entity_key_serialization_version,
).hex()
feature_view_name=table.name,
config=config,
)
values_dict = {}
for feature_name, val in values.items():
values_dict[
Expand Down Expand Up @@ -157,10 +158,11 @@ def online_read(
result: List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]] = []

row_keys = [
serialize_entity_key(
self._hbase_row_key(
entity_key,
entity_key_serialization_version=config.entity_key_serialization_version,
).hex()
feature_view_name=table.name,
config=config,
)
for entity_key in entity_keys
]
rows = hbase.rows(table_name, row_keys=row_keys)
Expand Down Expand Up @@ -234,6 +236,34 @@ def teardown(
table_name = _table_id(project, table)
hbase.delete_table(table_name)

def _hbase_row_key(
self,
entity_key: EntityKeyProto,
feature_view_name: str,
config: RepoConfig,
) -> bytes:
"""
Computes the HBase row key for a given entity key and feature view name.
Args:
entity_key (EntityKeyProto): The entity key to compute the row key for.
feature_view_name (str): The name of the feature view to compute the row key for.
config (RepoConfig): The configuration for the Feast repository.
Returns:
bytes: The HBase row key for the given entity key and feature view name.
"""
entity_id = compute_entity_id(
entity_key,
entity_key_serialization_version=config.entity_key_serialization_version,
)
# Even though `entity_id` uniquely identifies an entity, we use the same table
# for multiple feature_views with the same set of entities.
# To uniquely identify the row for a feature_view, we suffix the name of the feature_view itself.
# This also ensures that features for entities from various feature_views are
# colocated.
return f"{entity_id}#{feature_view_name}".encode()


def _table_id(project: str, table: FeatureView) -> str:
"""
Expand Down

0 comments on commit cf5179e

Please sign in to comment.