Skip to content

Commit

Permalink
sdk/python: Refactor module structure
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Wilson <[email protected]>
  • Loading branch information
aaronnw committed Sep 25, 2024
1 parent b550c0b commit 0fc6e98
Show file tree
Hide file tree
Showing 44 changed files with 242 additions and 219 deletions.
2 changes: 1 addition & 1 deletion python/aistore/pytorch/base_iter_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import List, Union, Iterable, Dict, Iterator, Tuple
import torch.utils.data as torch_utils
from abc import ABC, abstractmethod
from aistore.sdk.ais_source import AISSource
from aistore.sdk import AISSource


class AISBaseIterDataset(ABC, torch_utils.IterableDataset):
Expand Down
3 changes: 1 addition & 2 deletions python/aistore/pytorch/base_map_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from typing import List, Union, Dict
from abc import ABC, abstractmethod
from torch.utils.data import Dataset
from aistore.sdk.ais_source import AISSource
from aistore.sdk.object import Object
from aistore.sdk import AISSource, Object


class AISBaseMapDataset(ABC, Dataset):
Expand Down
2 changes: 1 addition & 1 deletion python/aistore/pytorch/iter_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from aistore.pytorch.base_iter_dataset import AISBaseIterDataset
from typing import List, Union, Dict
from aistore.sdk.ais_source import AISSource
from aistore.sdk import AISSource
from alive_progress import alive_it


Expand Down
2 changes: 1 addition & 1 deletion python/aistore/pytorch/map_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing import List, Union, Dict

from aistore.sdk.ais_source import AISSource
from aistore.sdk import AISSource
from aistore.pytorch.base_map_dataset import AISBaseMapDataset


Expand Down
6 changes: 2 additions & 4 deletions python/aistore/pytorch/multishard_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
"""

from aistore.sdk.dataset.data_shard import DataShard
from aistore.sdk import ArchiveConfig, DataShard, ListObjectFlag
from aistore.sdk import Bucket
from typing import Iterator, List, Iterable
from aistore.sdk.list_object_flag import ListObjectFlag
from aistore.sdk.types import ArchiveSettings
from torch.utils.data import IterableDataset


Expand Down Expand Up @@ -62,5 +60,5 @@ def _get_shard_objects_iterator(
obj_name = obj.name.replace(f"{path}/", "", 1)
yield bucket.object(path).get(
etl_name=etl_name,
archive_settings=ArchiveSettings(archpath=obj_name),
archive_config=ArchiveConfig(archpath=obj_name),
).read_all()
3 changes: 1 addition & 2 deletions python/aistore/pytorch/shard_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
"""

from aistore.sdk.bucket import Bucket
from aistore.sdk import Bucket, ListObjectFlag
from typing import Dict, Iterator, List, Union
from aistore.pytorch.utils import get_basename, get_extension
from aistore.pytorch.base_iter_dataset import AISBaseIterDataset
from alive_progress import alive_it
from io import BytesIO
from tarfile import open, TarError
from aistore.sdk.list_object_flag import ListObjectFlag


class AISShardReader(AISBaseIterDataset):
Expand Down
25 changes: 22 additions & 3 deletions python/aistore/sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
"""
Import client-accessible components here to provide consistent imports via `from aistore.sdk import *`
Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
"""

# Clients
from aistore.sdk.client import Client
from aistore.sdk.list_object_flag import ListObjectFlag
from aistore.sdk.bucket import Bucket
from aistore.sdk.namespace import Namespace
from aistore.sdk.authn.authn_client import AuthNClient

# Core components
from aistore.sdk.cluster import Cluster
from aistore.sdk.namespace import Namespace
from aistore.sdk.ais_source import AISSource
from aistore.sdk.bucket import Bucket
from aistore.sdk.obj.object import Object
from aistore.sdk.job import Job
from aistore.sdk.etl.etl import Etl

# Config objects, types and dataclasses
from aistore.sdk.archive_config import ArchiveConfig
from aistore.sdk.blob_download_config import BlobDownloadConfig
from aistore.sdk.dataset.data_shard import DataShard
from aistore.sdk.list_object_flag import ListObjectFlag
2 changes: 1 addition & 1 deletion python/aistore/sdk/ais_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
from abc import ABC, abstractmethod
from typing import Iterable
from aistore.sdk.object import Object
from aistore.sdk.obj.object import Object
from aistore.sdk.request_client import RequestClient


Expand Down
58 changes: 58 additions & 0 deletions python/aistore/sdk/archive_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
from dataclasses import dataclass
from enum import Enum


class ArchiveMode(Enum):
"""
Archive mode for getting files/objects from an archive in a bucket
See `MatchMode` enum in the cmn/archive/read.go
"""

REGEXP = "regexp"
PREFIX = "prefix"
SUFFIX = "suffix"
SUBSTR = "substr"
WDSKEY = "wdskey"


@dataclass
class ArchiveConfig:
"""
Configuration for extracting files from an archive
Attributes:
archpath (str, optional): If the object is an archive, use `archpath` to extract a single file
from the archive
regex (str, optional): A prefix, suffix, WebDataset key, or general-purpose regular expression
used to match filenames within the archive and select possibly multiple files
mode (ArchiveMode, optional): Specifies the mode of archive extraction when using `regex`
Example:
# Extract a single file from an archive
single_file_settings = ArchiveConfig(
archpath="path/to/your/file.txt"
)
# Extract multiple files from an archive
multi_file_settings = ArchiveConfig(
regex = "log", # Retrieve all log files from the archive
mode=ArchiveMode.SUFFIX,
)
"""

archpath: str = ""
regex: str = ""
mode: ArchiveMode = None

def __post_init__(self):
if self.mode and not self.regex:
raise ValueError("Archive mode requires archive regex")

if self.regex and not self.mode:
raise ValueError("Archive regex requires archive mode")

if self.regex and self.archpath:
raise ValueError("Cannot use both Archive regex and Archive path")
18 changes: 0 additions & 18 deletions python/aistore/sdk/archive_mode.py

This file was deleted.

25 changes: 25 additions & 0 deletions python/aistore/sdk/blob_download_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
from dataclasses import dataclass


@dataclass
class BlobDownloadConfig:
"""
Configuration for downloading objects using a blob downloader
Attributes:
chunk_size (str, optional): Chunk size for the blob downloader. It can be specified in IEC
or SI units, or as raw bytes (e.g., "4mb", "1MiB", "1048576", "128k")
num_workers (str, optional): Number of concurrent workers for the blob downloader
Example:
blob_settings = BlobDownloadConfig(
chunk_size="1MiB", # 1 MiB per chunk
num_workers="5" # 5 concurrent download workers
)
"""

chunk_size: str = None
num_workers: str = None
8 changes: 4 additions & 4 deletions python/aistore/sdk/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import requests

from aistore.sdk.ais_source import AISSource
from aistore.sdk.etl_const import DEFAULT_ETL_TIMEOUT
from aistore.sdk.object_iterator import ObjectIterator
from aistore.sdk.etl.etl_const import DEFAULT_ETL_TIMEOUT
from aistore.sdk.obj.object_iterator import ObjectIterator
from aistore.sdk.const import (
ACT_COPY_BCK,
ACT_CREATE_BCK,
Expand Down Expand Up @@ -58,7 +58,7 @@
)
from aistore.sdk.multiobj import ObjectGroup, ObjectRange
from aistore.sdk.request_client import RequestClient
from aistore.sdk.object import Object
from aistore.sdk.obj.object import Object
from aistore.sdk.types import (
ActionMsg,
BucketEntry,
Expand All @@ -73,7 +73,7 @@
)
from aistore.sdk.list_object_flag import ListObjectFlag
from aistore.sdk.utils import validate_directory, get_file_size
from aistore.sdk.object_props import ObjectProps
from aistore.sdk.obj.object_props import ObjectProps

Header = NewType("Header", requests.structures.CaseInsensitiveDict)

Expand Down
4 changes: 2 additions & 2 deletions python/aistore/sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from aistore.sdk.session_manager import SessionManager
from aistore.sdk.types import Namespace
from aistore.sdk.job import Job
from aistore.sdk.etl import Etl
from aistore.sdk.etl.etl import Etl
from aistore.sdk.utils import parse_url
from aistore.sdk.object import Object
from aistore.sdk.obj.object import Object
from aistore.sdk.errors import InvalidURLException


Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
URL_PATH_ETL,
UTF_ENCODING,
)
from aistore.sdk.etl_const import (
from aistore.sdk.etl.etl_const import (
ETL_SUPPORTED_PYTHON_VERSIONS,
DEFAULT_ETL_RUNTIME,
DEFAULT_ETL_COMM,
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 4 additions & 0 deletions python/aistore/sdk/list_object_flag.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
#
# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
#

from __future__ import annotations
from enum import Enum

Expand Down
4 changes: 2 additions & 2 deletions python/aistore/sdk/multiobj/object_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
ACT_TRANSFORM_OBJECTS,
ACT_ARCHIVE_OBJECTS,
)
from aistore.sdk.etl_const import DEFAULT_ETL_TIMEOUT
from aistore.sdk.object import Object
from aistore.sdk.etl.etl_const import DEFAULT_ETL_TIMEOUT
from aistore.sdk.obj.object import Object
from aistore.sdk.multiobj.object_names import ObjectNames
from aistore.sdk.multiobj.object_range import ObjectRange
from aistore.sdk.multiobj.object_template import ObjectTemplate
Expand Down
Empty file.
Loading

0 comments on commit 0fc6e98

Please sign in to comment.