Skip to content

Commit

Permalink
[Python] Scanner dataset and projection schema (#1089)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyxu authored Jul 24, 2023
1 parent 656ace2 commit 00f9f4e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
4 changes: 2 additions & 2 deletions python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,9 +860,9 @@ def from_batches(*args, **kwargs):
raise NotImplementedError("from batches")

@property
def dataset_schema(self):
def dataset_schema(self) -> Schema:
"""The schema with which batches will be read from fragments."""
raise NotImplementedError("")
return self._ds.schema

def scan_batches(self):
"""
Expand Down
11 changes: 11 additions & 0 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,3 +515,14 @@ def test_scan_with_batch_size(tmp_path: Path):
assert batch.num_rows == 16
df = batch.to_pandas()
assert df["a"].iloc[0] == idx * 16


def test_scanner_schemas(tmp_path: Path):
base_dir = tmp_path / "dataset"
df = pd.DataFrame({"a": range(50), "s": [f"s-{i}" for i in range(50)]})

dataset = lance.write_dataset(df, base_dir)

scanner = dataset.scanner(columns=["a"])
assert scanner.dataset_schema == dataset.schema
assert scanner.projected_schema == pa.schema([pa.field("a", pa.int64())])

0 comments on commit 00f9f4e

Please sign in to comment.