Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
generall committed May 22, 2024
1 parent 223352e commit 6e5eafc
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
2 changes: 1 addition & 1 deletion fastembed/sparse/bm42.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _rescore_vector(self, vector: Dict[str, float]) -> Dict[int, float]:

new_vector = {}

for token, value in vector:
for token, value in vector.items():
token_id = abs(mmh3.hash(token))
# Examples:
# Num 0: Log(1/1 + 1) = 0.6931471805599453
Expand Down
24 changes: 21 additions & 3 deletions tests/test_attention_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ def test_attention_embeddings():
"I must not fear. Fear is the mind-killer.",
]))

assert len(output) == 1

for result in output:
print(result)
assert len(result.indices) == len(result.values)

output = list(model.embed([
quotes = [
"I must not fear. Fear is the mind-killer.",
"All animals are equal, but some animals are more equal than others.",
"It was a pleasure to burn.",
Expand All @@ -23,7 +25,23 @@ def test_attention_embeddings():
"We're not in Infinity; we're in the suburbs.",
"I was a thousand times more evil than thou!",
"History is merely a list of surprises... It can only prepare us to be surprised yet again.",
]

output = list(model.embed(quotes))

assert len(output) == len(quotes)

for result in output:
assert len(result.indices) == len(result.values)
assert len(result.indices) > 0

# Test support for unknown languages
output = list(model.query_embed([
"привет мир!",
]))

assert len(output) == 1

for result in output:
print(result)
assert len(result.indices) == len(result.values)
assert len(result.indices) == 2

0 comments on commit 6e5eafc

Please sign in to comment.