Skip to content

Commit

Permalink
Minor logging updates
Browse files Browse the repository at this point in the history
  • Loading branch information
john-b-yang committed Jul 8, 2024
1 parent 7c95d6d commit 4356cd7
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 21 deletions.
12 changes: 5 additions & 7 deletions swebench/collect/build_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def load_repo(repo_name):
completed += 1
if has_test_patch(pr):
with_tests += 1
logger.info(f"{len(seen_prs)} instance_ids previously recorded")
logger.info(f"Will skip {len(seen_prs)} pull requests that have already been inspected")

# Write to .all file for all PRs
write_mode_all = "w" if not os.path.exists(all_output) else "a"
Expand All @@ -147,7 +147,8 @@ def load_repo(repo_name):
pull = json.loads(line)
if ix % 100 == 0:
logger.info(
f"[{pull['base']['repo']['full_name']}] ( Up to {ix} checked ) {completed} valid, {with_tests} with tests."
f"[{pull['base']['repo']['full_name']}] (Up to {ix} checked) "
f"{completed} valid, {with_tests} with tests."
)
# Construct instance fields
instance_id = (
Expand Down Expand Up @@ -176,11 +177,8 @@ def load_repo(repo_name):
# If has test suite, write to output file
print(json.dumps(instance), end="\n", flush=True, file=output)
with_tests += 1
logger.info(
f"Total instances: {total_instances}, completed: {completed}, with tests: {with_tests}"
)
logger.info(f"Didn't see {len(seen_prs)} instances previously recorded")
logger.info("\n".join(sorted(seen_prs)))
logger.info(f"[{", ".join(repos.keys())}] Total instances: {total_instances}, completed: {completed}, with tests: {with_tests}")
logger.info(f"[{", ".join(repos.keys())}] Skipped {len(seen_prs)} pull requests that have already been inspected")


if __name__ == "__main__":
Expand Down
14 changes: 4 additions & 10 deletions swebench/collect/get_tasks_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,17 @@ def construct_data_files(data: dict):
max_pulls=max_pulls,
cutoff_date=cutoff_date
)
print(f"Successfully saved PR data for {repo} to {path_pr}")
print(f"Successfully saved PR data for {repo} to {path_pr}")
else:
print(
f"Pull request data for {repo} already exists at {path_pr}, skipping..."
)
print(f"📁 Pull request data for {repo} already exists at {path_pr}, skipping...")

path_task = os.path.join(path_tasks, f"{repo_name}-task-instances.jsonl")
if not os.path.exists(path_task):
print(f"Task instance data for {repo} not found, creating...")
build_dataset(path_pr, path_task, token)
print(
f"Successfully saved task instance data for {repo} to {path_task}"
)
print(f"✅ Successfully saved task instance data for {repo} to {path_task}")
else:
print(
f"Task instance data for {repo} already exists at {path_task}, skipping..."
)
print(f"📁 Task instance data for {repo} already exists at {path_task}, skipping...")
except Exception as e:
print("-"*80)
print(f"Something went wrong for {repo}, skipping: {e}")
Expand Down
14 changes: 10 additions & 4 deletions swebench/collect/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def call_api(self, func: Callable, **kwargs) -> dict|None:
while True:
rl = self.api.rate_limit.get()
logger.info(
f"[{self.owner}/{self.name}] Rate limit exceeded, waiting for 5 minutes, remaining: {rl.resources.core.remaining}"
f"[{self.owner}/{self.name}] Rate limit exceeded for token {self.token[:10]}, "
f"waiting for 5 minutes, remaining calls: {rl.resources.core.remaining}"
)
if rl.resources.core.remaining > 0:
break
Expand Down Expand Up @@ -140,20 +141,25 @@ def get_all_loop(
if not quiet:
rl = self.api.rate_limit.get()
logger.info(
f"[{self.owner}/{self.name}] Processed page {page} ({per_page} values per page). Remaining calls: {rl.resources.core.remaining}"
f"[{self.owner}/{self.name}] Processed page {page} ({per_page} values per page). "
f"Remaining calls: {rl.resources.core.remaining}"
)
if num_pages is not None and page >= num_pages:
break
page += 1
except Exception as e:
# Rate limit handling
logger.error(f"Error processing page {page}: {e}")
logger.error(
f"[{self.owner}/{self.name}] Error processing page {page} "
f"w/ token {self.token[:10]} - {e}"
)
while True:
rl = self.api.rate_limit.get()
if rl.resources.core.remaining > 0:
break
logger.info(
f"[{self.owner}/{self.name}] Waiting for rate limit reset, checking again in 5 minutes"
f"[{self.owner}/{self.name}] Waiting for rate limit reset "
f"for token {self.token[:10]}, checking again in 5 minutes"
)
time.sleep(60 * 5)
if not quiet:
Expand Down

0 comments on commit 4356cd7

Please sign in to comment.