Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cli/lite): fix datahub lite serve command #7089

Merged
merged 3 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/datahub_lite.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ The following features are **NOT** supported:

## Prerequisites

There are no pre-requisites for DataHub Lite other than having a Python 3.7+ environment and a [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6. Install the `datahub` Python cli using the [instructions](./cli.md#using-pip).
To use `datahub lite` commands, you need to install [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6 ([install instructions](./cli.md#using-pip)) and the `datahub-lite` plugin.

```shell
pip install acryl-datahub
pip install acryl-datahub[datahub-lite]
```

## Importing Metadata
Expand Down
5 changes: 4 additions & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def get_long_description():
# Sink plugins.
"datahub-kafka": kafka_common,
"datahub-rest": rest_common,
"datahub-lite": set(),
"datahub-lite": {
"fastapi",
"uvicorn",
},
# Integrations.
"airflow": {
"apache-airflow >= 2.0.2",
Expand Down
5 changes: 2 additions & 3 deletions metadata-ingestion/src/datahub/cli/lite_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,10 @@ def serve(port: int) -> None:
uvicorn.run(app, port=port)


@lite.command(context_settings=dict(allow_extra_args=True))
@lite.command()
@click.argument("path", required=False, type=CompleteablePath())
@click.pass_context
@telemetry.with_telemetry
def ls(ctx: click.Context, path: Optional[str]) -> None:
def ls(path: Optional[str]) -> None:
"""List at a path"""

start_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
)
profile_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns to filter tables for profiling during ingestion. Allowed by the `table_pattern`.",
description="Regex patterns to filter tables (or specific columns) for profiling during ingestion. Note that only tables allowed by the `table_pattern` will be considered.",
)
domain: Dict[str, AllowDenyPattern] = Field(
default=dict(),
Expand Down
3 changes: 3 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,10 @@ def _populate_usage_stat_registry(self):
if self.server is None:
return

view: TSC.ViewItem
for view in TSC.Pager(self.server.views, usage=True):
if not view.id:
continue
self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
logger.debug("Tableau stats %s", self.tableau_stat_registry)

Expand Down
44 changes: 22 additions & 22 deletions metadata-ingestion/src/datahub/lite/lite_server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Dict, Iterable, List, Optional, Union
from typing import Dict, List, Optional, Union

from fastapi import Depends, FastAPI, Query
from fastapi import Depends, FastAPI, HTTPException, Query
from fastapi.responses import RedirectResponse

from datahub.lite.lite_local import (
Expand All @@ -10,7 +10,6 @@
Searchable,
SearchFlavor,
)
from datahub.metadata.schema_classes import SystemMetadataClass, _Aspect

app = FastAPI()
logger = logging.getLogger(__name__)
Expand All @@ -22,8 +21,8 @@ def redirect_to_docs():
return RedirectResponse(app.docs_url)


@app.get("/ping") # type: ignore
def ping() -> dict: # type: ignore
@app.get("/ping")
def ping() -> dict:
return {"ping": "pong"}


Expand All @@ -34,44 +33,45 @@ def lite() -> DataHubLiteLocal:
return lite


@app.get("/entities") # type: ignore
def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> Iterable[str]: # type: ignore
@app.get("/entities")
def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> List[str]:
# TODO add some filtering capabilities
return lite.list_ids()
return list(lite.list_ids())


@app.get("/entities/{id}") # type: ignore
def entities_get( # type: ignore
@app.get("/entities/{id}")
def entities_get(
id: str,
aspects: Optional[List[str]] = Query(None),
lite: DataHubLiteLocal = Depends(lite),
) -> Optional[
Dict[str, Union[str, Dict[str, Union[dict, _Aspect, SystemMetadataClass]]]]
]:
) -> Dict[str, Union[str, Dict[str, dict]]]:
# Queried as GET /entities/<url-encoded urn>?aspects=aspect1&aspects=aspect2&...
logger.warning(f"get {id} aspects={aspects}")
return lite.get(id, aspects=aspects)
entities = lite.get(id, aspects=aspects, typed=False)
if not entities:
raise HTTPException(status_code=404, detail="Entity not found")
return entities # type: ignore


@app.get("/browse") # type: ignore
def browse( # type: ignore
@app.get("/browse")
def browse(
path: str = Query("/"),
catalog: DataHubLiteLocal = Depends(lite),
) -> Iterable[Browseable]:
) -> List[Browseable]:
# Queried as GET /browse/?path=<url-encoded-path>
logger.info(f"browse {path}")
return catalog.ls(path)
return list(catalog.ls(path))


@app.get("/search") # type: ignore
def search( # type: ignore
@app.get("/search")
def search(
query: str = Query("*"),
flavor: SearchFlavor = Query(SearchFlavor.FREE_TEXT),
lite: DataHubLiteLocal = Depends(lite),
) -> Iterable[Searchable]:
) -> List[Searchable]:
# Queried as GET /search/?query=<url-encoded-query>
logger.info(f"search {query}")
return lite.search(query=query, flavor=flavor)
return list(lite.search(query=query, flavor=flavor))


# TODO put command