Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactor] Refactor fileio but without breaking bc #533

Merged
merged 19 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions docs/en/api/fileio.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mmengine.fileio

.. currentmodule:: mmengine.fileio

File Client
File Backend
----------------

.. autosummary::
Expand All @@ -22,11 +22,18 @@ File Client
BaseStorageBackend
FileClient
HardDiskBackend
LocalBackend
HTTPBackend
LmdbBackend
MemcachedBackend
PetrelBackend

.. autosummary::
:toctree: generated
:nosignatures:

register_backend

File Handler
----------------

Expand All @@ -40,6 +47,12 @@ File Handler
PickleHandler
YamlHandler

.. autosummary::
:toctree: generated
:nosignatures:

register_handler

File IO
----------------

Expand All @@ -49,7 +62,27 @@ File IO

dump
load
register_handler
copy_if_symlink_fails
copyfile
copyfile_from_local
copyfile_to_local
copytree
copytree_from_local
copytree_to_local
exists
generate_presigned_url
get
get_file_backend
get_local_path
get_text
isdir
isfile
join_path
list_dir_or_file
put
put_text
remove
rmtree

Parse File
----------------
Expand Down
37 changes: 35 additions & 2 deletions docs/zh_cn/api/fileio.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mmengine.fileio

.. currentmodule:: mmengine.fileio

File Client
File Backend
----------------

.. autosummary::
Expand All @@ -22,11 +22,18 @@ File Client
BaseStorageBackend
FileClient
HardDiskBackend
LocalBackend
HTTPBackend
LmdbBackend
MemcachedBackend
PetrelBackend

.. autosummary::
:toctree: generated
:nosignatures:

register_backend

File Handler
----------------

Expand All @@ -40,6 +47,12 @@ File Handler
PickleHandler
YamlHandler

.. autosummary::
:toctree: generated
:nosignatures:

register_handler

File IO
----------------

Expand All @@ -49,7 +62,27 @@ File IO

dump
load
register_handler
copy_if_symlink_fails
copyfile
copyfile_from_local
copyfile_to_local
copytree
copytree_from_local
copytree_to_local
exists
generate_presigned_url
get
get_file_backend
get_local_path
get_text
isdir
isfile
join_path
list_dir_or_file
put
put_text
remove
rmtree

Parse File
----------------
Expand Down
29 changes: 21 additions & 8 deletions mmengine/fileio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .file_client import (BaseStorageBackend, FileClient, HardDiskBackend,
HTTPBackend, LmdbBackend, MemcachedBackend,
PetrelBackend)
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
from .io import dump, load, register_handler
from .backends import (BaseStorageBackend, HTTPBackend, LmdbBackend,
LocalBackend, MemcachedBackend, PetrelBackend,
register_backend)
from .file_client import FileClient, HardDiskBackend
from .handlers import (BaseFileHandler, JsonHandler, PickleHandler,
YamlHandler, register_handler)
from .io import (copy_if_symlink_fails, copyfile, copyfile_from_local,
copyfile_to_local, copytree, copytree_from_local,
copytree_to_local, dump, exists, generate_presigned_url, get,
get_file_backend, get_local_path, get_text, isdir, isfile,
join_path, list_dir_or_file, load, put, put_text, remove,
rmtree)
from .parse import dict_from_file, list_from_file

__all__ = [
'BaseStorageBackend', 'FileClient', 'PetrelBackend', 'MemcachedBackend',
'LmdbBackend', 'HardDiskBackend', 'HTTPBackend', 'load', 'dump',
'register_handler', 'BaseFileHandler', 'JsonHandler', 'PickleHandler',
'YamlHandler', 'list_from_file', 'dict_from_file'
'LmdbBackend', 'HardDiskBackend', 'LocalBackend', 'HTTPBackend',
'copy_if_symlink_fails', 'copyfile', 'copyfile_from_local',
'copyfile_to_local', 'copytree', 'copytree_from_local',
'copytree_to_local', 'exists', 'generate_presigned_url', 'get',
'get_file_backend', 'get_local_path', 'get_text', 'isdir', 'isfile',
'join_path', 'list_dir_or_file', 'put', 'put_text', 'remove', 'rmtree',
'load', 'dump', 'register_handler', 'BaseFileHandler', 'JsonHandler',
'PickleHandler', 'YamlHandler', 'list_from_file', 'dict_from_file',
'register_backend'
]
14 changes: 14 additions & 0 deletions mmengine/fileio/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseStorageBackend
from .http_backend import HTTPBackend
from .lmdb_backend import LmdbBackend
from .local_backend import LocalBackend
from .memcached_backend import MemcachedBackend
from .petrel_backend import PetrelBackend
from .registry_utils import backends, prefix_to_backends, register_backend

__all__ = [
'BaseStorageBackend', 'LocalBackend', 'HTTPBackend', 'LmdbBackend',
'MemcachedBackend', 'PetrelBackend', 'register_backend', 'backends',
'prefix_to_backends'
]
36 changes: 36 additions & 0 deletions mmengine/fileio/backends/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from abc import ABCMeta, abstractmethod


class BaseStorageBackend(metaclass=ABCMeta):
"""Abstract class of storage backends.

All backends need to implement two apis: :meth:`get()` and
:meth:`get_text()`.

- :meth:`get()` reads the file as a byte stream.
- :meth:`get_text()` reads the file as texts.
"""

# a flag to indicate whether the backend can create a symlink for a file
# This attribute will be deprecated in future.
_allow_symlink = False

@property
def allow_symlink(self):
warnings.warn('allow_symlink will be deprecated in future',
DeprecationWarning)
return self._allow_symlink

@property
def name(self):
return self.__class__.__name__

@abstractmethod
def get(self, filepath):
pass

@abstractmethod
def get_text(self, filepath):
pass
78 changes: 78 additions & 0 deletions mmengine/fileio/backends/http_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Generator, Union
from urllib.request import urlopen

from .base import BaseStorageBackend


class HTTPBackend(BaseStorageBackend):
"""HTTP and HTTPS storage bachend."""

def get(self, filepath: str) -> bytes:
"""Read bytes from a given ``filepath``.

Args:
filepath (str): Path to read data.

Returns:
bytes: Expected bytes object.

Examples:
>>> backend = HTTPBackend()
>>> backend.get('http://path/of/file')
b'hello world'
"""
return urlopen(filepath).read()

def get_text(self, filepath, encoding='utf-8') -> str:
"""Read text from a given ``filepath``.

Args:
filepath (str): Path to read data.
encoding (str): The encoding format used to open the ``filepath``.
Defaults to 'utf-8'.

Returns:
str: Expected text reading from ``filepath``.

Examples:
>>> backend = HTTPBackend()
>>> backend.get_text('http://path/of/file')
'hello world'
"""
return urlopen(filepath).read().decode(encoding)

@contextmanager
def get_local_path(
self, filepath: str) -> Generator[Union[str, Path], None, None]:
"""Download a file from ``filepath`` to a local temporary directory,
and return the temporary path.

``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
can be called with ``with`` statement, and when exists from the
``with`` statement, the temporary path will be released.

Args:
filepath (str): Download a file from ``filepath``.

Yields:
Iterable[str]: Only yield one temporary path.

Examples:
>>> backend = HTTPBackend()
>>> # After existing from the ``with`` clause,
>>> # the path will be removed
>>> with backend.get_local_path('http://path/of/file') as path:
... # do something here
"""
try:
f = tempfile.NamedTemporaryFile(delete=False)
f.write(self.get(filepath))
f.close()
yield f.name
finally:
os.remove(f.name)
81 changes: 81 additions & 0 deletions mmengine/fileio/backends/lmdb_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) OpenMMLab. All rights reserved.
from pathlib import Path
from typing import Union

from .base import BaseStorageBackend


class LmdbBackend(BaseStorageBackend):
"""Lmdb storage backend.

Args:
db_path (str): Lmdb database path.
readonly (bool): Lmdb environment parameter. If True, disallow any
write operations. Defaults to True.
lock (bool): Lmdb environment parameter. If False, when concurrent
access occurs, do not lock the database. Defaults to False.
readahead (bool): Lmdb environment parameter. If False, disable the OS
filesystem readahead mechanism, which may improve random read
performance when a database is larger than RAM. Defaults to False.

zhouzaida marked this conversation as resolved.
Show resolved Hide resolved
Attributes:
db_path (str): Lmdb database path.
"""

def __init__(self,
db_path,
readonly=True,
lock=False,
readahead=False,
**kwargs):
try:
import lmdb # noqa: F401
except ImportError:
raise ImportError(
'Please run "pip install lmdb" to enable LmdbBackend.')

self.db_path = str(db_path)
self.readonly = readonly
self.lock = lock
self.readahead = readahead
self.kwargs = kwargs
self._client = None

def get(self, filepath: Union[str, Path]) -> bytes:
"""Get values according to the filepath.

Args:
filepath (str or Path): Here, filepath is the lmdb key.

Returns:
bytes: Expected bytes object.

Examples:
>>> backend = LmdbBackend('path/to/lmdb')
>>> backend.get('key')
b'hello world'
"""
if self._client is None:
self._client = self._get_client()

filepath = str(filepath)
with self._client.begin(write=False) as txn:
value_buf = txn.get(filepath.encode('ascii'))
return value_buf

def get_text(self, filepath, encoding=None):
raise NotImplementedError

def _get_client(self):
import lmdb

return lmdb.open(
self.db_path,
readonly=self.readonly,
lock=self.lock,
readahead=self.readahead,
**self.kwargs)

def __del__(self):
if self._client is not None:
self._client.close()
Loading