-
Notifications
You must be signed in to change notification settings - Fork 386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[#2113] feat(pyClient): initial Gravitino Python client module #2676
Changes from 10 commits
ae36106
1c916da
ba4abfb
0be1a6e
f31225e
11665e8
18db11d
33d0ae6
e14faf1
ee4403d
a9a327d
655bb61
c39e26a
834c6f3
ff6850c
c19f12a
036655e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
.PHONY: mkvenv install test clean | ||
|
||
mkvenv: | ||
python -m venv venv && source ./venv/bin/activate && pip install -U pip | ||
|
||
install: | ||
source ./venv/bin/activate && pip install -e . | ||
|
||
test: | ||
source ./venv/bin/activate && python -m unittest | ||
|
||
clean: | ||
rm -rf venv |
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
|
||
from gravitino.gravitino_client import ( | ||
GravitinoClient, | ||
gravitino_metalake, | ||
MetaLake, | ||
Catalog, | ||
Schema, | ||
Table, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
""" | ||
|
||
TIMEOUT = 10 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
|
||
import re | ||
|
||
from gravitino.constants import TIMEOUT | ||
from gravitino.service import initialize_service, service | ||
from gravitino.typing import JSON_ro | ||
|
||
|
||
class MetaLake: | ||
def __init__(self, payload: JSON_ro): | ||
self.name = payload.get("name") | ||
self.payload = payload | ||
self.service = service["service"] | ||
self.catalogs = self.service.get_catalogs(self.name) | ||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __repr__(self): | ||
return f"MetaLake<{self.name}>" | ||
|
||
def __getattr__(self, catalog_name): | ||
if catalog_name in dir(self): | ||
return Catalog(self.name, catalog_name) | ||
|
||
def __dir__(self): | ||
return [catalog["name"] for catalog in self.catalogs] | ||
|
||
|
||
class Catalog: | ||
def __init__(self, metalake_name: str, catalog_name: str): | ||
self.metalake_name = metalake_name | ||
self.catalog_name = catalog_name | ||
self.name = catalog_name | ||
self.service = service["service"] | ||
self.schemas = self.service.get_schemas(metalake_name, catalog_name) | ||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __repr__(self): | ||
return f"Catalog<{self.name}>" | ||
|
||
def __getattr__(self, schema_name): | ||
if schema_name in dir(self): | ||
return Schema(self.metalake_name, self.catalog_name, schema_name) | ||
|
||
def __dir__(self): | ||
return [schema["name"] for schema in self.schemas] | ||
|
||
|
||
class Schema: | ||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def __init__(self, metalake_name: str, catalog_name: str, schema_name: str): | ||
self.metalake_name = metalake_name | ||
self.catalog_name = catalog_name | ||
self.schema_name = schema_name | ||
self.name = schema_name | ||
self.service = service["service"] | ||
self.tables = self.service.get_tables(metalake_name, catalog_name, schema_name) | ||
|
||
def __repr__(self): | ||
return f"Schema<{self.name}>" | ||
|
||
def __getattr__(self, table_name): | ||
if table_name in dir(self): | ||
return Table( | ||
self.metalake_name, self.catalog_name, self.schema_name, table_name | ||
) | ||
|
||
def __dir__(self): | ||
return [table["name"] for table in self.tables] | ||
|
||
|
||
class Table: | ||
def __init__( | ||
self, metalake_name: str, catalog_name: str, schema_name: str, table_name: str | ||
): | ||
self.metalake_name = metalake_name | ||
self.catalog_name = catalog_name | ||
self.schema_name = schema_name | ||
self.table_name = table_name | ||
self.name = schema_name | ||
self.service = service["service"] | ||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def __repr__(self): | ||
return f"Table<{self.name}>" | ||
|
||
def info(self): | ||
return self.service.get_table( | ||
self.metalake_name, self.catalog_name, self.schema_name, self.table_name | ||
) | ||
|
||
|
||
class GravitinoClient: | ||
def __init__( | ||
self, | ||
host: str, | ||
*, | ||
protocol: str = "http", | ||
port: int = 8090, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think maybe we didn't need these parameter There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Depending on how the Client class is initialized, e.g.:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hi @zhaoyongjie thank you for your reply. |
||
prefix: str = "/api", | ||
timeout: int = TIMEOUT, | ||
debug: bool = False, | ||
) -> None: | ||
if re.search(r"^https?:\/\/", host): | ||
_host = host.rstrip("/") | ||
else: | ||
_host = f"{protocol}://{host.rstrip('/')}" | ||
|
||
if not re.search(r"[0-9]{2,5}$", _host): | ||
_host = f"{_host}:{port}" | ||
|
||
_base_url = f"{_host}/{prefix.strip('/')}" | ||
initialize_service(_base_url, timeout) | ||
self.service = service["service"] | ||
self.debug = debug | ||
|
||
@classmethod | ||
def initialize_metalake( | ||
cls, | ||
host: str, | ||
metalake_name: str, | ||
*, | ||
protocol: str = "http", | ||
port: int = 8090, | ||
zhaoyongjie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
prefix: str = "/api", | ||
timeout: int = TIMEOUT, | ||
debug: bool = False, | ||
) -> MetaLake: | ||
# keep in mind, all constructors should include same interface as __init__ function | ||
client = cls( | ||
host, | ||
protocol=protocol, | ||
port=port, | ||
prefix=prefix, | ||
timeout=timeout, | ||
debug=debug, | ||
) | ||
return client.get_metalake(metalake_name) | ||
|
||
@property | ||
def version(self): | ||
return self.service.get_version() | ||
|
||
def get_metalakes(self) -> [MetaLake]: | ||
return [MetaLake(metalake) for metalake in self.service.get_metalakes()] | ||
|
||
def get_metalake(self, metalake: str) -> MetaLake: | ||
return MetaLake(self.service.get_metalake(metalake)) | ||
|
||
|
||
def gravitino_metalake( | ||
host: str, | ||
metalake_name: str, | ||
*, | ||
protocol: str = "http", | ||
port: int = 8090, | ||
prefix: str = "/api", | ||
timeout: int = TIMEOUT, | ||
debug: bool = False, | ||
) -> MetaLake: | ||
return GravitinoClient.initialize_metalake( | ||
host, | ||
metalake_name, | ||
protocol=protocol, | ||
port=port, | ||
prefix=prefix, | ||
timeout=timeout, | ||
debug=debug, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
|
||
from gravitino.utils import HTTPClient, unpack, Response | ||
from gravitino.constants import TIMEOUT | ||
|
||
|
||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
class _Service: | ||
zhaoyongjie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def __init__( | ||
self, | ||
url: str, | ||
timeout: int = TIMEOUT, | ||
) -> None: | ||
self.http_client = HTTPClient(url, timeout=timeout) | ||
|
||
@unpack("version") | ||
def get_version(self) -> Response: | ||
return self.http_client.get("/version") | ||
|
||
@unpack("metalakes") | ||
def get_metalakes(self) -> Response: | ||
zhaoyongjie marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self.http_client.get("/metalakes") | ||
|
||
@unpack("metalake") | ||
def get_metalake(self, metalake: str) -> Response: | ||
return self.http_client.get(f"/metalakes/{metalake}") | ||
|
||
@unpack("identifiers") | ||
def get_catalogs(self, metalake: str) -> Response: | ||
return self.http_client.get(f"/metalakes/{metalake}/catalogs/") | ||
|
||
@unpack("catalog") | ||
def get_catalog(self, metalake: str, catalog: str) -> Response: | ||
return self.http_client.get(f"/metalakes/{metalake}/catalogs/{catalog}") | ||
|
||
@unpack("identifiers") | ||
def get_schemas(self, metalake: str, catalog: str) -> Response: | ||
return self.http_client.get(f"/metalakes/{metalake}/catalogs/{catalog}/schemas") | ||
|
||
@unpack("schema") | ||
def get_schema(self, metalake: str, catalog: str, schema: str) -> Response: | ||
return self.http_client.get( | ||
f"/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}" | ||
) | ||
|
||
@unpack("identifiers") | ||
def get_tables(self, metalake: str, catalog: str, schema: str) -> Response: | ||
return self.http_client.get( | ||
f"/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/tables" | ||
) | ||
|
||
@unpack("table") | ||
def get_table( | ||
self, metalake: str, catalog: str, schema: str, table: str | ||
) -> Response: | ||
return self.http_client.get( | ||
f"/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/tables/{table}" | ||
) | ||
|
||
@unpack("names") | ||
def get_partitions( | ||
self, metalake: str, catalog: str, schema: str, table: str | ||
) -> Response: | ||
return self.http_client.get( | ||
f"/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/tables/{table}/partitions" | ||
) | ||
|
||
|
||
service = {} | ||
|
||
|
||
def initialize_service(url: str, timeout: int = TIMEOUT): | ||
global service | ||
if not service: | ||
service["service"] = _Service(url, timeout) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
""" | ||
Copyright 2024 Datastrato Pvt Ltd. | ||
This software is licensed under the Apache License version 2. | ||
""" | ||
|
||
from typing import Mapping, Sequence, Union | ||
|
||
# https://github.com/python/typing/issues/182#issuecomment-1320974824 | ||
JSON_ro = Union[ | ||
xunliu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Mapping[str, "JSON_ro"], Sequence["JSON_ro"], str, int, float, bool, None | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gravitino use Gradle to compile, so we didn't need this file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's for client test and build. Gradle might run it as well if you want.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I commit a PR #2753 to support Gradle to build and test Python client module, This way we can use the Gradle command in a unified way.
./gradlew build
./gradlew clean
./gradlew test
So, I sure we didn't need use this Makefile file.