Skip to content

Commit

Permalink
Merge pull request #229 from togethercomputer/artem/fix-empty-messages
Browse files Browse the repository at this point in the history
Fix empty messages
  • Loading branch information
artek0chumak authored Jan 2, 2025
2 parents 298901e + ac770fb commit 5cd3742
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"

[tool.poetry]
name = "together"
version = "1.3.10"
version = "1.3.11"
authors = [
"Together AI <[email protected]>"
]
Expand Down
8 changes: 8 additions & 0 deletions src/together/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,14 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
error_source="key_value",
)

if len(json_line[message_column]) == 0:
raise InvalidFileFormatError(
message=f"Invalid format on line {idx + 1} of the input file. "
f"Expected a non-empty list of messages. Found empty list",
line_number=idx + 1,
error_source="key_value",
)

for turn_id, turn in enumerate(json_line[message_column]):
if not isinstance(turn, dict):
raise InvalidFileFormatError(
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/test_files_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,16 @@ def test_check_jsonl_extra_column(tmp_path: Path):
report = check_file(file)
assert not report["is_check_passed"]
assert "Found extra column" in report["message"]


def test_check_jsonl_empty_messages(tmp_path: Path):
file = tmp_path / "empty_messages.jsonl"
content = [{"messages": []}]
with file.open("w") as f:
f.write("\n".join(json.dumps(item) for item in content))

report = check_file(file)
assert not report["is_check_passed"]
assert (
"Expected a non-empty list of messages. Found empty list" in report["message"]
)

0 comments on commit 5cd3742

Please sign in to comment.