From dcb3d4b8d4ef6aec9fe11dd00e53143d5f9f29b7 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Tue, 21 May 2024 15:47:48 +0100
Subject: [PATCH] Added Response.search_after() method (#1829)

* Added Response.search_after() method

* add match clause to pytest.raises

(cherry picked from commit 891ba7c6a790975d4fc43b47ef1f7137c9e03d13)
---
 elasticsearch_dsl/response/__init__.py       | 32 ++++++++++++
 elasticsearch_dsl/search_base.py             | 30 +++++++++++
 tests/test_integration/_async/test_search.py | 54 ++++++++++++++++++++
 tests/test_integration/_sync/test_search.py  | 54 ++++++++++++++++++++
 4 files changed, 170 insertions(+)

diff --git a/elasticsearch_dsl/response/__init__.py b/elasticsearch_dsl/response/__init__.py
index 7af054b5b..482400a4d 100644
--- a/elasticsearch_dsl/response/__init__.py
+++ b/elasticsearch_dsl/response/__init__.py
@@ -90,6 +90,38 @@ def aggs(self):
             super(AttrDict, self).__setattr__("_aggs", aggs)
         return self._aggs
 
+    def search_after(self):
+        """
+        Return a ``Search`` instance that retrieves the next page of results.
+
+        This method provides an easy way to paginate a long list of results using
+        the ``search_after`` option. For example::
+
+            page_size = 20
+            s = Search()[:page_size].sort("date")
+
+            while True:
+                # get a page of results
+                r = await s.execute()
+
+                # do something with this page of results
+
+                # exit the loop if we reached the end
+                if len(r.hits) < page_size:
+                    break
+
+                # get a search object with the next page of results
+                s = r.search_after()
+
+        Note that the ``search_after`` option requires the search to have an
+        explicit ``sort`` order.
+        """
+        if len(self.hits) == 0:
+            raise ValueError("Cannot use search_after when there are no search results")
+        if not hasattr(self.hits[-1].meta, "sort"):
+            raise ValueError("Cannot use search_after when results are not sorted")
+        return self._search.extra(search_after=self.hits[-1].meta.sort)
+
 
 class AggResponse(AttrDict):
     def __init__(self, aggs, search, data):
diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py
index d54b6b925..5680778cb 100644
--- a/elasticsearch_dsl/search_base.py
+++ b/elasticsearch_dsl/search_base.py
@@ -760,6 +760,36 @@ def suggest(self, name, text, **kwargs):
         s._suggest[name].update(kwargs)
         return s
 
+    def search_after(self):
+        """
+        Return a ``Search`` instance that retrieves the next page of results.
+
+        This method provides an easy way to paginate a long list of results using
+        the ``search_after`` option. For example::
+
+            page_size = 20
+            s = Search()[:page_size].sort("date")
+
+            while True:
+                # get a page of results
+                r = await s.execute()
+
+                # do something with this page of results
+
+                # exit the loop if we reached the end
+                if len(r.hits) < page_size:
+                    break
+
+                # get a search object with the next page of results
+                s = s.search_after()
+
+        Note that the ``search_after`` option requires the search to have an
+        explicit ``sort`` order.
+        """
+        if not hasattr(self, "_response"):
+            raise ValueError("A search must be executed before using search_after")
+        return self._response.search_after()
+
     def to_dict(self, count=False, **kwargs):
         """
         Serialize the search into the dictionary that will be sent over as the
diff --git a/tests/test_integration/_async/test_search.py b/tests/test_integration/_async/test_search.py
index 7fc56c870..6d6a5ab98 100644
--- a/tests/test_integration/_async/test_search.py
+++ b/tests/test_integration/_async/test_search.py
@@ -125,6 +125,60 @@ async def test_scan_iterates_through_all_docs(async_data_client):
     assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
 
 
+@pytest.mark.asyncio
+async def test_search_after(async_data_client):
+    page_size = 7
+    s = AsyncSearch(index="flat-git")[:page_size].sort("authored_date")
+    commits = []
+    while True:
+        r = await s.execute()
+        commits += r.hits
+        if len(r.hits) < page_size:
+            break
+        s = r.search_after()
+
+    assert 52 == len(commits)
+    assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
+
+
+@pytest.mark.asyncio
+async def test_search_after_no_search(async_data_client):
+    s = AsyncSearch(index="flat-git")
+    with raises(
+        ValueError, match="A search must be executed before using search_after"
+    ):
+        await s.search_after()
+    await s.count()
+    with raises(
+        ValueError, match="A search must be executed before using search_after"
+    ):
+        await s.search_after()
+
+
+@pytest.mark.asyncio
+async def test_search_after_no_sort(async_data_client):
+    s = AsyncSearch(index="flat-git")
+    r = await s.execute()
+    with raises(
+        ValueError, match="Cannot use search_after when results are not sorted"
+    ):
+        await r.search_after()
+
+
+@pytest.mark.asyncio
+async def test_search_after_no_results(async_data_client):
+    s = AsyncSearch(index="flat-git")[:100].sort("authored_date")
+    r = await s.execute()
+    assert 52 == len(r.hits)
+    s = r.search_after()
+    r = await s.execute()
+    assert 0 == len(r.hits)
+    with raises(
+        ValueError, match="Cannot use search_after when there are no search results"
+    ):
+        await r.search_after()
+
+
 @pytest.mark.asyncio
 async def test_response_is_cached(async_data_client):
     s = Repository.search()
diff --git a/tests/test_integration/_sync/test_search.py b/tests/test_integration/_sync/test_search.py
index b31ef8b3d..09c318369 100644
--- a/tests/test_integration/_sync/test_search.py
+++ b/tests/test_integration/_sync/test_search.py
@@ -117,6 +117,60 @@ def test_scan_iterates_through_all_docs(data_client):
     assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
 
 
+@pytest.mark.sync
+def test_search_after(data_client):
+    page_size = 7
+    s = Search(index="flat-git")[:page_size].sort("authored_date")
+    commits = []
+    while True:
+        r = s.execute()
+        commits += r.hits
+        if len(r.hits) < page_size:
+            break
+        s = r.search_after()
+
+    assert 52 == len(commits)
+    assert {d["_id"] for d in FLAT_DATA} == {c.meta.id for c in commits}
+
+
+@pytest.mark.sync
+def test_search_after_no_search(data_client):
+    s = Search(index="flat-git")
+    with raises(
+        ValueError, match="A search must be executed before using search_after"
+    ):
+        s.search_after()
+    s.count()
+    with raises(
+        ValueError, match="A search must be executed before using search_after"
+    ):
+        s.search_after()
+
+
+@pytest.mark.sync
+def test_search_after_no_sort(data_client):
+    s = Search(index="flat-git")
+    r = s.execute()
+    with raises(
+        ValueError, match="Cannot use search_after when results are not sorted"
+    ):
+        r.search_after()
+
+
+@pytest.mark.sync
+def test_search_after_no_results(data_client):
+    s = Search(index="flat-git")[:100].sort("authored_date")
+    r = s.execute()
+    assert 52 == len(r.hits)
+    s = r.search_after()
+    r = s.execute()
+    assert 0 == len(r.hits)
+    with raises(
+        ValueError, match="Cannot use search_after when there are no search results"
+    ):
+        r.search_after()
+
+
 @pytest.mark.sync
 def test_response_is_cached(data_client):
     s = Repository.search()