Skip to content

Commit

Permalink
Minor tweaks to ens/_normalization.py for better readability
Browse files Browse the repository at this point in the history
- also addresses comments on PR #3000
  • Loading branch information
fselmo committed Jun 22, 2023
1 parent 2ce8651 commit 4b699e2
Showing 1 changed file with 26 additions and 21 deletions.
47 changes: 26 additions & 21 deletions ens/_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
)


# -- setup -- #


def _json_list_mapping_to_dict(
f: Dict[str, Any],
list_mapped_key: str,
Expand Down Expand Up @@ -82,9 +85,9 @@ class Token:

restricted: bool = False

def __init__(self, original_text: str, original_codepoints: List[int]) -> None:
self._raw_text = original_text
self._original_codepoints = original_codepoints
def __init__(self, codepoints: List[int]) -> None:
self._original_codepoints = codepoints
self._original_text = "".join(chr(cp) for cp in codepoints)

@property
def codepoints(self) -> List[int]:
Expand All @@ -96,7 +99,7 @@ def codepoints(self) -> List[int]:

@property
def text(self) -> str:
return _cps_to_text(self.codepoints)
return _codepoints_to_text(self.codepoints)


class EmojiToken(Token):
Expand Down Expand Up @@ -220,16 +223,18 @@ def _is_fenced(cp: int) -> bool:
return cp in [fenced[0] for fenced in NORMALIZATION_SPEC["fenced"]]


def _cps_to_text(cps: Union[List[List[int]], List[int]]) -> str:
return "".join(chr(cp) if isinstance(cp, int) else _cps_to_text(cp) for cp in cps)
def _codepoints_to_text(cps: Union[List[List[int]], List[int]]) -> str:
return "".join(
chr(cp) if isinstance(cp, int) else _codepoints_to_text(cp) for cp in cps
)


def _validate_tokens_and_get_label_type(tokens: List[Token]) -> str:
"""
Validate tokens and return the label type.
:param List[Token] tokens: the tokens to validate
:raises InvalidName: if the tokens are invalid
:raises InvalidName: if any of the tokens are invalid
"""

if all(token.type == TokenType.EMOJI for token in tokens):
Expand Down Expand Up @@ -411,6 +416,16 @@ def _build_and_validate_label_from_tokens(tokens: List[Token]) -> Label:
return label


def _buffer_codepoints_to_chars(buffer: Union[List[int], List[List[int]]]) -> str:
return "".join(
"".join(chr(c) for c in char) if isinstance(char, list) else chr(char)
for char in buffer
)


# -----


def normalize_name_ensip15(name: str) -> ENSNormalizedName:
"""
Normalize an ENS name according to ENSIP-15
Expand Down Expand Up @@ -462,13 +477,11 @@ def normalize_name_ensip15(name: str) -> ENSNormalizedName:
if emoji_codepoint:
if len(buffer) > 0:
# emit `Text` token with values in buffer
chars = _buffer_codepoints_to_chars(buffer)
tokens.append(TextToken(chars, buffer))
tokens.append(TextToken(buffer))
buffer = [] # clear the buffer

# emit `Emoji` token with values in emoji_codepoint
emoji_text = "".join([chr(codepoint) for codepoint in emoji_codepoint])
tokens.append(EmojiToken(emoji_text, emoji_codepoint))
tokens.append(EmojiToken(emoji_codepoint))
_input = _input[len(emoji_codepoint) :]

else:
Expand All @@ -492,21 +505,13 @@ def normalize_name_ensip15(name: str) -> ENSNormalizedName:
)

if len(buffer) > 0 and len(_input) == 0:
chars = _buffer_codepoints_to_chars(buffer)
tokens.append(TextToken(chars, buffer))
tokens.append(TextToken(buffer))

# create a `Label` instance from tokens
# - Apply NFC to each `Text` token
# - Run tokens through "Validation" section of ENSIP-15
normalized_label = _build_and_validate_label_from_tokens(tokens)
normalized_labels.append(normalized_label)

# - Join raw_labels back together
# - join labels back together after normalization
return ENSNormalizedName(normalized_labels)


def _buffer_codepoints_to_chars(buffer: Union[List[int], List[List[int]]]) -> str:
return "".join(
"".join(chr(c) for c in char) if isinstance(char, list) else chr(char)
for char in buffer
)

0 comments on commit 4b699e2

Please sign in to comment.