Skip to content

Commit

Permalink
feat: allow replacement of entire datafile when the schema lines up c…
Browse files Browse the repository at this point in the history
…orrectly
  • Loading branch information
chebbyChefNEQ committed Feb 2, 2025
1 parent c73d717 commit d757a62
Show file tree
Hide file tree
Showing 4 changed files with 663 additions and 16 deletions.
9 changes: 8 additions & 1 deletion protos/transaction.proto
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ message Transaction {
}
}

// An operation that replaces the data in a region of the table with new data.
message DataReplacement {
repeated uint64 old_fragment_ids = 1;
repeated DataFile new_datafiles = 2;
}

// The operation of this transaction.
oneof operation {
Append append = 100;
Expand All @@ -186,11 +192,12 @@ message Transaction {
Update update = 108;
Project project = 109;
UpdateConfig update_config = 110;
DataReplacement data_replacement = 111;
}

// An operation to apply to the blob dataset
oneof blob_operation {
Append blob_append = 200;
Overwrite blob_overwrite = 202;
}
}
}
10 changes: 9 additions & 1 deletion python/python/lance/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from .dependencies import numpy as np
from .dependencies import pandas as pd
from .fragment import FragmentMetadata, LanceFragment
from .fragment import DataFile, FragmentMetadata, LanceFragment
from .lance import (
CleanupStats,
Compaction,
Expand Down Expand Up @@ -2935,6 +2935,14 @@ class CreateIndex(BaseOperation):
dataset_version: int
fragment_ids: Set[int]

@dataclass
class DataReplacement(BaseOperation):
"""
"""

fragment_ids: List[int]
new_files: List[DataFile]


class ScannerBuilder:
def __init__(self, ds: LanceDataset):
Expand Down
Loading

0 comments on commit d757a62

Please sign in to comment.