Skip to content

Commit

Permalink
Add index creation to aggregations example
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelgrinberg committed Jul 9, 2024
1 parent 663d6f4 commit 0d49272
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 88 deletions.
14 changes: 13 additions & 1 deletion examples/async/composite_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
import os
from typing import Any, AsyncIterator, Dict, List, Optional, Union

from elasticsearch.helpers import async_bulk

from elasticsearch_dsl import A, Agg, AsyncSearch, Response, async_connections
from tests.test_integration.test_data import DATA, GIT_INDEX


async def scan_aggs(
Expand Down Expand Up @@ -56,8 +59,17 @@ async def run_search(**kwargs: Any) -> Response:

async def main() -> None:
# initiate the default connection to elasticsearch
async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
client = async_connections.create_connection(
hosts=[os.environ["ELASTICSEARCH_URL"]]
)

# create the index and populate it with some data
# note that the dataset is imported from the library's test suite
await client.indices.delete(index="git", ignore_unavailable=True)
await client.indices.create(index="git", **GIT_INDEX)
await async_bulk(client, DATA, raise_on_error=True, refresh=True)

# run some aggregations on the data
async for b in scan_aggs(
AsyncSearch(index="git"),
{"files": A("terms", field="files")},
Expand Down
12 changes: 11 additions & 1 deletion examples/composite_agg.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
import os
from typing import Any, Dict, Iterator, List, Optional, Union

from elasticsearch.helpers import bulk

from elasticsearch_dsl import A, Agg, Response, Search, connections
from tests.test_integration.test_data import DATA, GIT_INDEX


def scan_aggs(
Expand Down Expand Up @@ -55,8 +58,15 @@ def run_search(**kwargs: Any) -> Response:

def main() -> None:
# initiate the default connection to elasticsearch
connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])

# create the index and populate it with some data
# note that the dataset is imported from the library's test suite
client.indices.delete(index="git", ignore_unavailable=True)
client.indices.create(index="git", **GIT_INDEX)
bulk(client, DATA, raise_on_error=True, refresh=True)

# run some aggregations on the data
for b in scan_aggs(
Search(index="git"),
{"files": A("terms", field="files")},
Expand Down
164 changes: 78 additions & 86 deletions tests/test_integration/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,99 +19,91 @@

from elasticsearch import Elasticsearch

user_mapping = {
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
}

def create_flat_git_index(client: Elasticsearch, index: str) -> None:
# we will use user on several places
user_mapping = {
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
}

client.indices.create(
index=index,
body={
"settings": {
# just one shard, no replicas for testing
"number_of_shards": 1,
"number_of_replicas": 0,
# custom analyzer for analyzing file paths
"analysis": {
"analyzer": {
"file_path": {
"type": "custom",
"tokenizer": "path_hierarchy",
"filter": ["lowercase"],
}
}
},
},
"mappings": {
"properties": {
"description": {"type": "text", "analyzer": "snowball"},
"author": user_mapping,
"authored_date": {"type": "date"},
"committer": user_mapping,
"committed_date": {"type": "date"},
"parent_shas": {"type": "keyword"},
"files": {
"type": "text",
"analyzer": "file_path",
"fielddata": True,
},
FLAT_GIT_INDEX: Dict[str, Any] = {
"settings": {
# just one shard, no replicas for testing
"number_of_shards": 1,
"number_of_replicas": 0,
# custom analyzer for analyzing file paths
"analysis": {
"analyzer": {
"file_path": {
"type": "custom",
"tokenizer": "path_hierarchy",
"filter": ["lowercase"],
}
}
},
},
"mappings": {
"properties": {
"description": {"type": "text", "analyzer": "snowball"},
"author": user_mapping,
"authored_date": {"type": "date"},
"committer": user_mapping,
"committed_date": {"type": "date"},
"parent_shas": {"type": "keyword"},
"files": {
"type": "text",
"analyzer": "file_path",
"fielddata": True,
},
}
},
}

GIT_INDEX: Dict[str, Any] = {
"settings": {
# just one shard, no replicas for testing
"number_of_shards": 1,
"number_of_replicas": 0,
# custom analyzer for analyzing file paths
"analysis": {
"analyzer": {
"file_path": {
"type": "custom",
"tokenizer": "path_hierarchy",
"filter": ["lowercase"],
}
}
},
)
},
"mappings": {
"properties": {
# common fields
"description": {"type": "text", "analyzer": "snowball"},
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
# COMMIT mappings
"author": user_mapping,
"authored_date": {"type": "date"},
"committer": user_mapping,
"committed_date": {"type": "date"},
"parent_shas": {"type": "keyword"},
"files": {
"type": "text",
"analyzer": "file_path",
"fielddata": True,
},
# REPO mappings
"is_public": {"type": "boolean"},
"owner": user_mapping,
"created_at": {"type": "date"},
"tags": {"type": "keyword"},
}
},
}


def create_git_index(client: Elasticsearch, index: str) -> None:
# we will use user on several places
user_mapping = {
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
}
def create_flat_git_index(client: Elasticsearch, index: str) -> None:
client.indices.create(index=index, body=FLAT_GIT_INDEX)

client.indices.create(
index=index,
body={
"settings": {
# just one shard, no replicas for testing
"number_of_shards": 1,
"number_of_replicas": 0,
# custom analyzer for analyzing file paths
"analysis": {
"analyzer": {
"file_path": {
"type": "custom",
"tokenizer": "path_hierarchy",
"filter": ["lowercase"],
}
}
},
},
"mappings": {
"properties": {
# common fields
"description": {"type": "text", "analyzer": "snowball"},
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
# COMMIT mappings
"author": user_mapping,
"authored_date": {"type": "date"},
"committer": user_mapping,
"committed_date": {"type": "date"},
"parent_shas": {"type": "keyword"},
"files": {
"type": "text",
"analyzer": "file_path",
"fielddata": True,
},
# REPO mappings
"is_public": {"type": "boolean"},
"owner": user_mapping,
"created_at": {"type": "date"},
"tags": {"type": "keyword"},
}
},
},
)

def create_git_index(client: Elasticsearch, index: str) -> None:
client.indices.create(index=index, body=GIT_INDEX)


DATA = [
Expand Down
1 change: 1 addition & 0 deletions utils/run-unasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def main(check=False):
"async_connections": "connections",
"async_scan": "scan",
"async_simulate": "simulate",
"async_bulk": "bulk",
"async_mock_client": "mock_client",
"async_client": "client",
"async_data_client": "data_client",
Expand Down

0 comments on commit 0d49272

Please sign in to comment.