Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Elasticsearch 8 #1664

Merged
merged 1 commit into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
"3.10",
"3.11",
]
es-version: [7.0.0, 7.10.0]
es-version: [8.0.0, 8.9.0]

steps:
- name: Checkout Repository
Expand All @@ -76,7 +76,7 @@ jobs:
run: |
mkdir /tmp/elasticsearch
wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${{ matrix.es-version }}-linux-x86_64.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1
/tmp/elasticsearch/bin/elasticsearch -d
/tmp/elasticsearch/bin/elasticsearch -E xpack.security.enabled=false -E discovery.type=single-node -d
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think disabling security is OK here as HTTPS is not a concern of this library: this is handled at the elasticsearch-py/elastic-transport-python level.

- name: Setup Python - ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand Down
5 changes: 3 additions & 2 deletions elasticsearch_dsl/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ class Connections:
singleton in this module.
"""

def __init__(self):
def __init__(self, *, elasticsearch_class=Elasticsearch):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change allows test_connections.py to use a dummy Elasticsearch class for most of the tests.

self._kwargs = {}
self._conns = {}
self.elasticsearch_class = elasticsearch_class

def configure(self, **kwargs):
"""
Expand Down Expand Up @@ -80,7 +81,7 @@ def create_connection(self, alias="default", **kwargs):
it under given alias.
"""
kwargs.setdefault("serializer", serializer)
conn = self._conns[alias] = Elasticsearch(**kwargs)
conn = self._conns[alias] = self.elasticsearch_class(**kwargs)
return conn

def get_connection(self, alias="default"):
Expand Down
10 changes: 6 additions & 4 deletions elasticsearch_dsl/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import collections.abc
import copy

from elasticsearch.exceptions import TransportError
from elasticsearch.exceptions import ApiError
from elasticsearch.helpers import scan

from .aggs import A, AggBase
Expand Down Expand Up @@ -693,7 +693,8 @@ def count(self):

d = self.to_dict(count=True)
# TODO: failed shards detection
return es.count(index=self._index, body=d, **self._params)["count"]
resp = es.count(index=self._index, query=d.get("query", None), **self._params)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not keep body because the tests used mocker.spy and since body is dynamically added in elasticsearch-py the mock did not see it. But I could also use a different testing strategy if you prefer.

return resp["count"]

def execute(self, ignore_cache=False):
"""
Expand All @@ -707,7 +708,8 @@ def execute(self, ignore_cache=False):
es = get_connection(self._using)

self._response = self._response_class(
self, es.search(index=self._index, body=self.to_dict(), **self._params)
self,
es.search(index=self._index, body=self.to_dict(), **self._params).body,
)
return self._response

Expand Down Expand Up @@ -799,7 +801,7 @@ def execute(self, ignore_cache=False, raise_on_error=True):
for s, r in zip(self._searches, responses["responses"]):
if r.get("error", False):
if raise_on_error:
raise TransportError("N/A", r["error"]["type"], r["error"])
raise ApiError("N/A", meta=responses.meta, body=r)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

elasticsearch-py 8.x has two types of errors now, ApiError being used for errors reported by Elasticsearch itself and TransportError being used for errors at the transport layer (such as connection issues).

r = None
else:
r = Response(s, r)
Expand Down
5 changes: 2 additions & 3 deletions examples/alias_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,8 @@ def migrate(move_data=True, update_alias=True):

if move_data:
# move data from current alias to the new index
es.reindex(
body={"source": {"index": ALIAS}, "dest": {"index": next_index}},
request_timeout=3600,
es.options(request_timeout=3600).reindex(
body={"source": {"index": ALIAS}, "dest": {"index": next_index}}
)
# refresh the index to make the changes visible
es.indices.refresh(index=next_index)
Expand Down
4 changes: 1 addition & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,4 @@ filterwarnings =
# The body parameter is no longer deprecated, see
# https://github.com/elastic/elasticsearch-py/issues/2181#issuecomment-1490932964
ignore:The 'body' parameter is deprecated .*:DeprecationWarning
# calendar_interval was only added in Elasticsearch 7.2 and we still support Elasticsearch 7.0
# using `default` instead of `ignore` to show it in the output as a reminder to remove it for Elasticsearch 8
default:\[interval\] on \[date_histogram\] is deprecated, use \[fixed_interval\] or \[calendar_interval\] in the future.:elasticsearch.exceptions.ElasticsearchWarning
ignore:Legacy index templates are deprecated in favor of composable templates.:elasticsearch.exceptions.ElasticsearchWarning
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

install_requires = [
"python-dateutil",
"elasticsearch>=7.0.0,<8.0.0",
"elasticsearch>=8.0.0,<9.0.0",
]

develop_requires = [
Expand Down
103 changes: 54 additions & 49 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from unittest import SkipTest, TestCase
from unittest.mock import Mock

from elastic_transport import ObjectApiResponse
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import ConnectionError
from elasticsearch.helpers import bulk
Expand All @@ -47,7 +48,7 @@

def get_test_client(wait=True, **kwargs):
# construct kwargs from the environment
kw = {"timeout": 30}
kw = {"request_timeout": 30}

if "PYTHON_CONNECTION_CLASS" in os.environ:
from elasticsearch import connection
Expand Down Expand Up @@ -131,8 +132,9 @@ def es_version(client):
@fixture
def write_client(client):
yield client
client.indices.delete(index="test-*", ignore=404)
client.indices.delete_template(name="test-template", ignore=404)
for index_name in client.indices.get(index="test-*", expand_wildcards="all"):
client.indices.delete(index=index_name)
Comment on lines +135 to +136
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deleting indices with a wildcard require a cluster setting in Elasticsearch 8. I had to specify expand_wildcards as some tests are closing indices.

client.options(ignore_status=404).indices.delete_template(name="test-template")


@fixture
Expand Down Expand Up @@ -160,55 +162,58 @@ def data_client(client):

@fixture
def dummy_response():
return {
"_shards": {"failed": 0, "successful": 10, "total": 10},
"hits": {
"hits": [
{
"_index": "test-index",
"_type": "company",
"_id": "elasticsearch",
"_score": 12.0,
"_source": {"city": "Amsterdam", "name": "Elasticsearch"},
},
{
"_index": "test-index",
"_type": "employee",
"_id": "42",
"_score": 11.123,
"_routing": "elasticsearch",
"_source": {
"name": {"first": "Shay", "last": "Bannon"},
"lang": "java",
"twitter": "kimchy",
return ObjectApiResponse(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In elasticsearch-py 8.x, the response pretends it is a dictionary to be backwards-compatible, but it does not support __setitem__ that is needed for faceted search to work. This is why we explicitly request .body in Search.execute: this is an actual dictionary that supports __setitem__.

The alternative is to add __setitem__ support in elastic-transport-python.

meta=None,
body={
"_shards": {"failed": 0, "successful": 10, "total": 10},
"hits": {
"hits": [
{
"_index": "test-index",
"_type": "company",
"_id": "elasticsearch",
"_score": 12.0,
"_source": {"city": "Amsterdam", "name": "Elasticsearch"},
},
{
"_index": "test-index",
"_type": "employee",
"_id": "42",
"_score": 11.123,
"_routing": "elasticsearch",
"_source": {
"name": {"first": "Shay", "last": "Bannon"},
"lang": "java",
"twitter": "kimchy",
},
},
{
"_index": "test-index",
"_type": "employee",
"_id": "47",
"_score": 1,
"_routing": "elasticsearch",
"_source": {
"name": {"first": "Honza", "last": "Král"},
"lang": "python",
"twitter": "honzakral",
},
},
},
{
"_index": "test-index",
"_type": "employee",
"_id": "47",
"_score": 1,
"_routing": "elasticsearch",
"_source": {
"name": {"first": "Honza", "last": "Král"},
"lang": "python",
"twitter": "honzakral",
{
"_index": "test-index",
"_type": "employee",
"_id": "53",
"_score": 16.0,
"_routing": "elasticsearch",
},
},
{
"_index": "test-index",
"_type": "employee",
"_id": "53",
"_score": 16.0,
"_routing": "elasticsearch",
},
],
"max_score": 12.0,
"total": 123,
],
"max_score": 12.0,
"total": 123,
},
"timed_out": False,
"took": 123,
},
"timed_out": False,
"took": 123,
}
)


@fixture
Expand Down
52 changes: 35 additions & 17 deletions tests/test_connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
from elasticsearch_dsl import connections, serializer


class DummyElasticsearch:
def __init__(self, *args, hosts, **kwargs):
self.hosts = hosts


def test_default_connection_is_returned_by_default():
c = connections.Connections()

Expand All @@ -33,27 +38,36 @@ def test_default_connection_is_returned_by_default():


def test_get_connection_created_connection_if_needed():
c = connections.Connections()
c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]})
c = connections.Connections(elasticsearch_class=DummyElasticsearch)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those tests are about elasticsearch_dsl.connections, not about how we store hosts in Elasticsearch. In elasticsearch-py 8.x, the hosts are technically available, but they're inside a deep hierarchy of classes, so using a dummy class avoids depending on those internals.

c.configure(
default={"hosts": ["https://es.com:9200"]},
local={"hosts": ["https://localhost:9200"]},
)

default = c.get_connection()
local = c.get_connection("local")

assert isinstance(default, Elasticsearch)
assert isinstance(local, Elasticsearch)
assert isinstance(default, DummyElasticsearch)
assert isinstance(local, DummyElasticsearch)

assert [{"host": "es.com"}] == default.transport.hosts
assert [{"host": "localhost"}] == local.transport.hosts
assert default.hosts == ["https://es.com:9200"]
assert local.hosts == ["https://localhost:9200"]


def test_configure_preserves_unchanged_connections():
c = connections.Connections()
c = connections.Connections(elasticsearch_class=DummyElasticsearch)

c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]})
c.configure(
default={"hosts": ["https://es.com:9200"]},
local={"hosts": ["https://localhost:9200"]},
)
default = c.get_connection()
local = c.get_connection("local")

c.configure(default={"hosts": ["not-es.com"]}, local={"hosts": ["localhost"]})
c.configure(
default={"hosts": ["https://not-es.com:9200"]},
local={"hosts": ["https://localhost:9200"]},
)
new_default = c.get_connection()
new_local = c.get_connection("local")

Expand All @@ -62,9 +76,12 @@ def test_configure_preserves_unchanged_connections():


def test_remove_connection_removes_both_conn_and_conf():
c = connections.Connections()
c = connections.Connections(elasticsearch_class=DummyElasticsearch)

c.configure(default={"hosts": ["es.com"]}, local={"hosts": ["localhost"]})
c.configure(
default={"hosts": ["https://es.com:9200"]},
local={"hosts": ["https://localhost:9200"]},
)
c.add_connection("local2", object())

c.remove_connection("default")
Expand All @@ -77,15 +94,16 @@ def test_remove_connection_removes_both_conn_and_conf():


def test_create_connection_constructs_client():
c = connections.Connections()
c.create_connection("testing", hosts=["es.com"])
c = connections.Connections(elasticsearch_class=DummyElasticsearch)
c.create_connection("testing", hosts=["https://es.com:9200"])

con = c.get_connection("testing")
assert [{"host": "es.com"}] == con.transport.hosts
assert con.hosts == ["https://es.com:9200"]


def test_create_connection_adds_our_serializer():
c = connections.Connections()
c.create_connection("testing", hosts=["es.com"])
c = connections.Connections(elasticsearch_class=Elasticsearch)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test however uses the real client, for what it's worth.

c.create_connection("testing", hosts=["https://es.com:9200"])

assert c.get_connection("testing").transport.serializer is serializer.serializer
c_serializers = c.get_connection("testing").transport.serializers
assert c_serializers.serializers["application/json"] is serializer.serializer
10 changes: 6 additions & 4 deletions tests/test_integration/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ def test_save_and_update_return_doc_meta(write_client):
"_primary_term",
"_seq_no",
"_shards",
"_type",
"_version",
"result",
}
Expand All @@ -295,7 +294,6 @@ def test_save_and_update_return_doc_meta(write_client):
"_primary_term",
"_seq_no",
"_shards",
"_type",
"_version",
"result",
}
Expand All @@ -318,11 +316,15 @@ def test_get_raises_404_on_non_existent_id(data_client):


def test_get_returns_none_if_404_ignored(data_client):
assert None is Repository.get("elasticsearch-dsl-php", ignore=404)
assert None is Repository.get(
"elasticsearch-dsl-php", using=data_client.options(ignore_status=404)
)


def test_get_returns_none_if_404_ignored_and_index_doesnt_exist(data_client):
assert None is Repository.get("42", index="not-there", ignore=404)
assert None is Repository.get(
"42", index="not-there", using=data_client.options(ignore_status=404)
)


def test_get(data_client):
Expand Down
5 changes: 4 additions & 1 deletion tests/test_integration/test_examples/test_composite_aggs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ def test_scan_aggs_with_multiple_aggs(data_client):
{"files": A("terms", field="files")},
{
"months": {
"date_histogram": {"field": "committed_date", "interval": "month"}
"date_histogram": {
"field": "committed_date",
"calendar_interval": "month",
}
}
},
]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_integration/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.


from elasticsearch import TransportError
from elasticsearch import ApiError
from pytest import raises

from elasticsearch_dsl import Date, Document, Keyword, MultiSearch, Q, Search, Text
Expand Down Expand Up @@ -143,7 +143,7 @@ def test_multi_missing(data_client):
ms = MultiSearch()
ms = ms.add(s1).add(s2).add(s3)

with raises(TransportError):
with raises(ApiError):
ms.execute()

r1, r2, r3 = ms.execute(raise_on_error=False)
Expand Down
Loading