Skip to content

Commit

Permalink
Merge pull request #4757 from freelawproject/add-independent-sweep-in…
Browse files Browse the repository at this point in the history
…dexer-wait-setting

Added a separate setting for waiting between sweep indexer chunks
  • Loading branch information
albertisfu authored Nov 29, 2024
2 parents 8c15b92 + ceb049e commit 61e5660
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
4 changes: 4 additions & 0 deletions cl/corpus_importer/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def update_latest_case_id_and_schedule_iquery_sweep(docket: Docket) -> None:
countdown=task_scheduled_countdown,
queue=settings.CELERY_IQUERY_QUEUE,
)
logger.info(
f"Enqueued iquery docket case ID: {iquery_pacer_case_id_current} "
f"for court {court_id} with countdown {task_scheduled_countdown}"
)

# Update the iquery_pacer_case_id_current in Redis
r.hset(
Expand Down
4 changes: 2 additions & 2 deletions cl/search/management/commands/sweep_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,13 +406,13 @@ def process_queryset(

accumulated_chunk += len(chunk)
if not testing_mode:
# Wait for 1/ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL
# Wait for 1/ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS
# before processing the next chunk.
# e.g: With a poll interval of 10 and a chunk size of 10,
# it will wait for 0.1 seconds for every 10 documents processed,
# maintaining an index rate of 100 documents per second.
time.sleep(
1 / settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL # type: ignore
1 / settings.ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS # type: ignore
)
self.stdout.write(
"\rProcessed {}/{}, ({:.0%}), last {} ID indexed: {},".format(
Expand Down
5 changes: 4 additions & 1 deletion cl/settings/third_party/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@
"ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
)
ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL = env(
"ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=5
"ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=10
)
ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS = env(
"ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS", default=3
)
ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
"ELASTICSEARCH_SWEEP_INDEXER_MODELS",
Expand Down

0 comments on commit 61e5660

Please sign in to comment.