Skip to content

Commit

Permalink
fix: offset MB processed by size of RECORD envelop
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronsteers committed Sep 17, 2024
1 parent f10933f commit 566889d
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion airbyte/_connector_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,13 +401,20 @@ def _execute(
# Fail early if the connector is not installed.
self.executor.ensure_installation(auto_fix=False)

# When calculating MB read, we need to account for the envelope size.
envelope_size = (
len('{"type": "RECORD", "record": }')
+ len('{"stream": "", "data": {}, "emitted_at": 1234567890}')
# + len('"namespace": "", ') # We're knowingly omitting this to keep perf impact low.
)

try:
for line in self.executor.execute(args, stdin=stdin):
try:
message: AirbyteMessage = AirbyteMessage.model_validate_json(json_data=line)
if progress_tracker and message.record:
progress_tracker.tally_bytes_read(
len(line),
bytes_read=len(line) - envelope_size - len(message.record.stream),
stream_name=message.record.stream,
)
self._peek_airbyte_message(message)
Expand Down

0 comments on commit 566889d

Please sign in to comment.