Skip to content

Commit

Permalink
Merge pull request #625 from Aiven-Open/sebinsunny-reduce-the-snapsho…
Browse files Browse the repository at this point in the history
…t-stalled-log

fix: Reduce the stalled logging for snapshot
  • Loading branch information
facetoe authored Jun 25, 2024
2 parents d28945a + e1ac0bb commit 021be69
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 13 deletions.
12 changes: 1 addition & 11 deletions pghoard/basebackup/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class HasReadAndSeek(HasRead, HasSeek, Protocol):
FilesChunk = Set[Tuple]
SnapshotFiles = Dict[str, SnapshotFile]
PROGRESS_CHECK_INTERVAL = 10
STALLED_PROGRESS_THRESHOLD = 600

EMPTY_FILE_HASH = hashlib.blake2s().hexdigest()

Expand Down Expand Up @@ -88,25 +87,16 @@ def progress_callback(progress_step: ProgressStep, progress_data: ProgressMetric
persisted_progress = PersistedProgress.read(self.metrics)
progress_info = persisted_progress.get(key)
tags: dict = {"phase": progress_step.value}
self.last_flush_time = time.monotonic()

if progress_data["handled"] > progress_info.current_progress:
progress_info.update(progress_data["handled"])
persisted_progress.write(self.metrics)
self.last_flush_time = time.monotonic()
self.metrics.gauge("pghoard.seconds_since_backup_progress_stalled", 0, tags=tags)
self.log.info(
"Updated snapshot progress for %s to %d files; elapsed time since last check: %.2f seconds.",
progress_step.value, progress_data["handled"], elapsed
)
else:
stalled_age = progress_info.age
self.metrics.gauge("pghoard.seconds_since_backup_progress_stalled", stalled_age, tags=tags)

if stalled_age >= STALLED_PROGRESS_THRESHOLD:
self.log.warning(
"Snapshot progress for %s has been stalled for %s seconds.", progress_step, stalled_age
)

self.last_flush_time = time.monotonic()
snapshotter.snapshot(reuse_old_snapshotfiles=False, progress_callback=progress_callback)
snapshot_result = SnapshotResult(end=None, state=None, hashes=None)
Expand Down
3 changes: 1 addition & 2 deletions pghoard/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,9 +412,8 @@ def run_safe(self):
if file_to_transfer.callback_queue:
file_to_transfer.callback_queue.put(result)

operation_type = file_to_transfer.operation
status = "FAILED" if not result.success else "successfully"
log_msg = f"{operation_type.capitalize()} of key: {key}, " \
log_msg = f"{oper.capitalize()} of key: {key}, " \
f"size: {oper_size}, {status} in {time.monotonic() - start_time:.3f}s"
self.log.info(log_msg)

Expand Down

0 comments on commit 021be69

Please sign in to comment.