From 58d6b33babcca3bc9af9998b93a8e88f9184a2a0 Mon Sep 17 00:00:00 2001 From: coufon Date: Sun, 3 Mar 2024 02:01:56 +0000 Subject: [PATCH 1/2] Update docs for branches --- README.md | 20 +++++++++++++++----- python/pyproject.toml | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6c87b0e..d9358c3 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ print(catalog.datasets()) ### Write and Read -Append, delete some data. Each mutation generates a new version of data, represented by an increasing integer ID. We expect to support the [Iceberg](https://iceberg.apache.org/docs/latest/branching/) style tags and branches for better version management. +Append, delete some data. Each mutation generates a new version of data, represented by an increasing integer ID. Users can add tags to version IDs as alias. ```py import pyarrow.compute as pc from space import RayOptions @@ -170,12 +170,23 @@ runner.read_all( ) # Read the changes between version 0 and 2. -for change_type, data in runner.diff(0, "after_delete"): - print(change_type) - print(data) +for change in runner.diff(0, "after_delete"): + print(change.change_type) + print(change.data) print("===============") ``` +Create a new branch and make changes in the new branch: + +```py +# The default branch is "main" +ds.add_branch("dev") +ds.set_current_branch("dev") +# Make changes in the new branch, the main branch is not updated. +# Switch back to the main branch. +ds.set_current_branch("main") +``` + ### Transform and Materialized Views Space supports transforming a dataset to a view, and materializing the view to files. The transforms include: @@ -285,7 +296,6 @@ ds.storage.record_manifest() # Accept filter and snapshot_id Space is a new project under active development. :construction: Ongoing tasks: -- Iceberg style version branches. - Performance benchmark and improvement. ## Disclaimer diff --git a/python/pyproject.toml b/python/pyproject.toml index 61cdac0..1700297 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "space-datasets" -version = "0.0.10" +version = "0.0.11" authors = [{ name = "Space team", email = "no-reply@google.com" }] description = "Unified storage framework for machine learning datasets" readme = "README.md" From 86be26474c3a26362fc6b9590f995e04bb9621e9 Mon Sep 17 00:00:00 2001 From: coufon Date: Sun, 3 Mar 2024 02:08:18 +0000 Subject: [PATCH 2/2] Use yield from --- python/src/space/core/ops/change_data.py | 3 +-- python/src/space/ray/ops/change_data.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/python/src/space/core/ops/change_data.py b/python/src/space/core/ops/change_data.py index 890d0a4..3f4c34d 100644 --- a/python/src/space/core/ops/change_data.py +++ b/python/src/space/core/ops/change_data.py @@ -93,8 +93,7 @@ def read_change_data(storage: Storage, start_snapshot_id: int, """ for snapshot_id in ordered_snapshot_ids(storage, start_snapshot_id, end_snapshot_id): - for change in LocalChangeDataReadOp(storage, snapshot_id, read_options): - yield change + yield from LocalChangeDataReadOp(storage, snapshot_id, read_options) class LocalChangeDataReadOp(StoragePathsMixin): diff --git a/python/src/space/ray/ops/change_data.py b/python/src/space/ray/ops/change_data.py index 3cfb02a..b8e272e 100644 --- a/python/src/space/ray/ops/change_data.py +++ b/python/src/space/ray/ops/change_data.py @@ -38,9 +38,8 @@ def read_change_data(storage: Storage, start_snapshot_id: int, """ for snapshot_id in ordered_snapshot_ids(storage, start_snapshot_id, end_snapshot_id): - for change in _RayChangeDataReadOp(storage, snapshot_id, ray_options, - read_options): - yield change + yield from _RayChangeDataReadOp(storage, snapshot_id, ray_options, + read_options) class _RayChangeDataReadOp(LocalChangeDataReadOp):