From fd2c9cb8610796540eecfe16c705232ff86a7c3e Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Tue, 9 Apr 2024 19:30:27 +0100 Subject: [PATCH 1/7] Added support for slicing multiple times in Search class Fixes #799 --- elasticsearch_dsl/search_base.py | 27 +++++++++++++++++---------- tests/_async/test_search.py | 7 +++++++ tests/_sync/test_search.py | 7 +++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py index 096c2855..c8d601ce 100644 --- a/elasticsearch_dsl/search_base.py +++ b/elasticsearch_dsl/search_base.py @@ -352,20 +352,27 @@ def __getitem__(self, n): # If negative slicing, abort. if n.start and n.start < 0 or n.stop and n.stop < 0: raise ValueError("Search does not support negative slicing.") - # Elasticsearch won't get all results so we default to size: 10 if - # stop not given. - s._extra["from"] = n.start or 0 - s._extra["size"] = max( - 0, n.stop - (n.start or 0) if n.stop is not None else 10 - ) - return s + slice_start = n.start + slice_stop = n.stop else: # This is an index lookup, equivalent to slicing by [n:n+1]. # If negative index, abort. if n < 0: raise ValueError("Search does not support negative indexing.") - s._extra["from"] = n - s._extra["size"] = 1 - return s + slice_start = n + slice_stop = n + 1 + # Elasticsearch won't get all results so we default to size: 10 if + # stop not given. + old_from = s._extra.get("from", 0) + old_to = old_from + s._extra.get( + "size", slice_stop or (slice_start or old_from) + 10 + ) + new_from = old_from + (slice_start or 0) + new_to = ( + min(old_to, old_from + slice_stop) if slice_stop is not None else old_to + ) + s._extra["from"] = new_from + s._extra["size"] = max(0, new_to - new_from) + return s @classmethod def from_dict(cls, d): diff --git a/tests/_async/test_search.py b/tests/_async/test_search.py index 013eebfb..1631c56b 100644 --- a/tests/_async/test_search.py +++ b/tests/_async/test_search.py @@ -367,11 +367,18 @@ def test_slice(): assert {"from": 3, "size": 10} == s[3:].to_dict() assert {"from": 0, "size": 0} == s[0:0].to_dict() assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() def test_index(): s = AsyncSearch() assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() def test_search_to_dict(): diff --git a/tests/_sync/test_search.py b/tests/_sync/test_search.py index d786ef0f..36bd9150 100644 --- a/tests/_sync/test_search.py +++ b/tests/_sync/test_search.py @@ -367,11 +367,18 @@ def test_slice(): assert {"from": 3, "size": 10} == s[3:].to_dict() assert {"from": 0, "size": 0} == s[0:0].to_dict() assert {"from": 20, "size": 0} == s[20:0].to_dict() + assert {"from": 10, "size": 5} == s[10:][:5].to_dict() + assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() + assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() def test_index(): s = Search() assert {"from": 3, "size": 1} == s[3].to_dict() + assert {"from": 3, "size": 1} == s[3][0].to_dict() + assert {"from": 8, "size": 0} == s[3][5].to_dict() + assert {"from": 4, "size": 1} == s[3:10][1].to_dict() def test_search_to_dict(): From a9a450d4e93472113eaf0e6dc7db880c289f38eb Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 10 Apr 2024 11:27:40 +0100 Subject: [PATCH 2/7] simplify slicing logic --- elasticsearch_dsl/search_base.py | 21 +++++++++++++++------ tests/_async/test_search.py | 9 +++++++++ tests/_sync/test_search.py | 8 ++++++++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py index c8d601ce..91460a83 100644 --- a/elasticsearch_dsl/search_base.py +++ b/elasticsearch_dsl/search_base.py @@ -363,13 +363,22 @@ def __getitem__(self, n): # Elasticsearch won't get all results so we default to size: 10 if # stop not given. old_from = s._extra.get("from", 0) - old_to = old_from + s._extra.get( - "size", slice_stop or (slice_start or old_from) + 10 - ) + if "size" in s._extra: + old_to = old_from + s._extra["size"] + elif slice_stop is not None: + # inherit a size from the given slice + old_to = old_from + slice_stop + elif slice_start is not None: + # assume a default size of 10 from the given slice start + old_to = old_from + slice_start + 10 + else: + # with no other information, the default size is 10 + old_to = old_from + 10 new_from = old_from + (slice_start or 0) - new_to = ( - min(old_to, old_from + slice_stop) if slice_stop is not None else old_to - ) + if slice_stop is not None: + new_to = min(old_to, old_from + slice_stop) + else: + new_to = old_to s._extra["from"] = new_from s._extra["size"] = max(0, new_to - new_from) return s diff --git a/tests/_async/test_search.py b/tests/_async/test_search.py index 1631c56b..45c61b90 100644 --- a/tests/_async/test_search.py +++ b/tests/_async/test_search.py @@ -371,6 +371,13 @@ def test_slice(): assert {"from": 10, "size": 0} == s[:5][10:].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {"from": 0, "size": 10} == s[:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] def test_index(): @@ -379,6 +386,8 @@ def test_index(): assert {"from": 3, "size": 1} == s[3][0].to_dict() assert {"from": 8, "size": 0} == s[3][5].to_dict() assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] def test_search_to_dict(): diff --git a/tests/_sync/test_search.py b/tests/_sync/test_search.py index 36bd9150..2ba45a35 100644 --- a/tests/_sync/test_search.py +++ b/tests/_sync/test_search.py @@ -371,6 +371,12 @@ def test_slice(): assert {"from": 10, "size": 0} == s[:5][10:].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + with raises(ValueError): + s[-1:] + with raises(ValueError): + s[4:-1] + with raises(ValueError): + s[-3:-2] def test_index(): @@ -379,6 +385,8 @@ def test_index(): assert {"from": 3, "size": 1} == s[3][0].to_dict() assert {"from": 8, "size": 0} == s[3][5].to_dict() assert {"from": 4, "size": 1} == s[3:10][1].to_dict() + with raises(ValueError): + s[-3] def test_search_to_dict(): From adb6addf42ff2270c9162c6d46c2f090c1465eba Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 10 Apr 2024 12:00:48 +0100 Subject: [PATCH 3/7] A few more slicing unit tests --- tests/_async/test_search.py | 3 +++ tests/_sync/test_search.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tests/_async/test_search.py b/tests/_async/test_search.py index 45c61b90..04b99143 100644 --- a/tests/_async/test_search.py +++ b/tests/_async/test_search.py @@ -369,6 +369,9 @@ def test_slice(): assert {"from": 20, "size": 0} == s[20:0].to_dict() assert {"from": 10, "size": 5} == s[10:][:5].to_dict() assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"from": 0, "size": 10} == s[:10][:40].to_dict() + assert {"from": 0, "size": 10} == s[:40][:10].to_dict() + assert {"from": 0, "size": 40} == s[:40][:80].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() assert {"from": 0, "size": 10} == s[:].to_dict() diff --git a/tests/_sync/test_search.py b/tests/_sync/test_search.py index 2ba45a35..a74c627c 100644 --- a/tests/_sync/test_search.py +++ b/tests/_sync/test_search.py @@ -369,8 +369,12 @@ def test_slice(): assert {"from": 20, "size": 0} == s[20:0].to_dict() assert {"from": 10, "size": 5} == s[10:][:5].to_dict() assert {"from": 10, "size": 0} == s[:5][10:].to_dict() + assert {"from": 0, "size": 10} == s[:10][:40].to_dict() + assert {"from": 0, "size": 10} == s[:40][:10].to_dict() + assert {"from": 0, "size": 40} == s[:40][:80].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() + assert {"from": 0, "size": 10} == s[:].to_dict() with raises(ValueError): s[-1:] with raises(ValueError): From 431372215b117eeb6d1b37a1d88471e6184e4b11 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 10 Apr 2024 12:11:40 +0100 Subject: [PATCH 4/7] Removed unnecessary comment --- elasticsearch_dsl/search_base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py index 91460a83..d81e9f6e 100644 --- a/elasticsearch_dsl/search_base.py +++ b/elasticsearch_dsl/search_base.py @@ -360,8 +360,6 @@ def __getitem__(self, n): raise ValueError("Search does not support negative indexing.") slice_start = n slice_stop = n + 1 - # Elasticsearch won't get all results so we default to size: 10 if - # stop not given. old_from = s._extra.get("from", 0) if "size" in s._extra: old_to = old_from + s._extra["size"] From 4f2b9d87ebc631fc58a3006eed18388eb3884701 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Wed, 10 Apr 2024 16:45:09 +0100 Subject: [PATCH 5/7] simplified slicing logic even more --- elasticsearch_dsl/search_base.py | 30 +++++++++++++++--------------- tests/_async/test_search.py | 12 ++++++------ tests/_sync/test_search.py | 12 ++++++------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py index d81e9f6e..65dc92eb 100644 --- a/elasticsearch_dsl/search_base.py +++ b/elasticsearch_dsl/search_base.py @@ -360,25 +360,25 @@ def __getitem__(self, n): raise ValueError("Search does not support negative indexing.") slice_start = n slice_stop = n + 1 - old_from = s._extra.get("from", 0) - if "size" in s._extra: - old_to = old_from + s._extra["size"] - elif slice_stop is not None: - # inherit a size from the given slice - old_to = old_from + slice_stop - elif slice_start is not None: - # assume a default size of 10 from the given slice start - old_to = old_from + slice_start + 10 + + old_from = s._extra.get("from") + old_to = (old_from or 0) + s._extra["size"] if "size" in s._extra else None + + if slice_start is not None: + new_from = (old_from or 0) + slice_start else: - # with no other information, the default size is 10 - old_to = old_from + 10 - new_from = old_from + (slice_start or 0) + new_from = old_from if slice_stop is not None: - new_to = min(old_to, old_from + slice_stop) + new_to = (old_from or 0) + slice_stop + if old_to is not None and old_to < new_to: + new_to = old_to else: new_to = old_to - s._extra["from"] = new_from - s._extra["size"] = max(0, new_to - new_from) + + if new_from is not None: + s._extra["from"] = new_from + if new_to is not None: + s._extra["size"] = max(0, new_to - (new_from or 0)) return s @classmethod diff --git a/tests/_async/test_search.py b/tests/_async/test_search.py index 04b99143..1fc1ac98 100644 --- a/tests/_async/test_search.py +++ b/tests/_async/test_search.py @@ -363,18 +363,18 @@ def test_collapse(): def test_slice(): s = AsyncSearch() assert {"from": 3, "size": 7} == s[3:10].to_dict() - assert {"from": 0, "size": 5} == s[:5].to_dict() - assert {"from": 3, "size": 10} == s[3:].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() assert {"from": 0, "size": 0} == s[0:0].to_dict() assert {"from": 20, "size": 0} == s[20:0].to_dict() assert {"from": 10, "size": 5} == s[10:][:5].to_dict() assert {"from": 10, "size": 0} == s[:5][10:].to_dict() - assert {"from": 0, "size": 10} == s[:10][:40].to_dict() - assert {"from": 0, "size": 10} == s[:40][:10].to_dict() - assert {"from": 0, "size": 40} == s[:40][:80].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() - assert {"from": 0, "size": 10} == s[:].to_dict() + assert {} == s[:].to_dict() with raises(ValueError): s[-1:] with raises(ValueError): diff --git a/tests/_sync/test_search.py b/tests/_sync/test_search.py index a74c627c..9a32ee13 100644 --- a/tests/_sync/test_search.py +++ b/tests/_sync/test_search.py @@ -363,18 +363,18 @@ def test_collapse(): def test_slice(): s = Search() assert {"from": 3, "size": 7} == s[3:10].to_dict() - assert {"from": 0, "size": 5} == s[:5].to_dict() - assert {"from": 3, "size": 10} == s[3:].to_dict() + assert {"size": 5} == s[:5].to_dict() + assert {"from": 3} == s[3:].to_dict() assert {"from": 0, "size": 0} == s[0:0].to_dict() assert {"from": 20, "size": 0} == s[20:0].to_dict() assert {"from": 10, "size": 5} == s[10:][:5].to_dict() assert {"from": 10, "size": 0} == s[:5][10:].to_dict() - assert {"from": 0, "size": 10} == s[:10][:40].to_dict() - assert {"from": 0, "size": 10} == s[:40][:10].to_dict() - assert {"from": 0, "size": 40} == s[:40][:80].to_dict() + assert {"size": 10} == s[:10][:40].to_dict() + assert {"size": 10} == s[:40][:10].to_dict() + assert {"size": 40} == s[:40][:80].to_dict() assert {"from": 12, "size": 0} == s[:5][10:][2:].to_dict() assert {"from": 15, "size": 0} == s[10:][:5][5:].to_dict() - assert {"from": 0, "size": 10} == s[:].to_dict() + assert {} == s[:].to_dict() with raises(ValueError): s[-1:] with raises(ValueError): From 908cd40e5f3a9dd6c8eaa5864d8dce10fa3ad117 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 11 Apr 2024 10:32:42 +0100 Subject: [PATCH 6/7] added suggested changes --- elasticsearch_dsl/search_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py index 65dc92eb..d54b6b92 100644 --- a/elasticsearch_dsl/search_base.py +++ b/elasticsearch_dsl/search_base.py @@ -362,18 +362,18 @@ def __getitem__(self, n): slice_stop = n + 1 old_from = s._extra.get("from") - old_to = (old_from or 0) + s._extra["size"] if "size" in s._extra else None + old_to = None + if "size" in s._extra: + old_to = (old_from or 0) + s._extra["size"] + new_from = old_from if slice_start is not None: new_from = (old_from or 0) + slice_start - else: - new_from = old_from + new_to = old_to if slice_stop is not None: new_to = (old_from or 0) + slice_stop if old_to is not None and old_to < new_to: new_to = old_to - else: - new_to = old_to if new_from is not None: s._extra["from"] = new_from From 863f033cc8bc63c2f641684e97f74294bad5e4b7 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 11 Apr 2024 10:38:56 +0100 Subject: [PATCH 7/7] add more slicing examples to documentation --- docs/search_dsl.rst | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/search_dsl.rst b/docs/search_dsl.rst index 2663fdf8..83756f97 100644 --- a/docs/search_dsl.rst +++ b/docs/search_dsl.rst @@ -410,16 +410,25 @@ To specify the from/size parameters, use the Python slicing API: .. code:: python - s = s[10:20] - # {"from": 10, "size": 10} + s = s[10:20] + # {"from": 10, "size": 10} + + s = s[:20] + # {"size": 20} + + s = s[10:] + # {"from": 10} + + s = s[10:20][2:] + # {"from": 12, "size": 8} If you want to access all the documents matched by your query you can use the ``scan`` method which uses the scan/scroll elasticsearch API: .. code:: python - for hit in s.scan(): - print(hit.title) + for hit in s.scan(): + print(hit.title) Note that in this case the results won't be sorted.