Skip to content

Commit

Permalink
[Refactor] Refactor fileio without breaking back compatibility (#533)
Browse files Browse the repository at this point in the history
* [Refactor] Refactor fileio but without breaking bc

* handle compatibility

* fix format

* modify io functions

* fix ut

* fix ut

* rename method names

* refine

* refine docstring

* fix ut in windows

* update ut

* minor fix

* ensure client is not None when closing it

* add more examples for list_dir_or_file interface

* refine docstring

* refine deprecated info

* fix ut

* add a description for lmdb docstring
  • Loading branch information
zhouzaida authored Sep 26, 2022
1 parent 8d14eb6 commit ed84dfd
Show file tree
Hide file tree
Showing 33 changed files with 5,139 additions and 859 deletions.
37 changes: 35 additions & 2 deletions docs/en/api/fileio.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mmengine.fileio

.. currentmodule:: mmengine.fileio

File Client
File Backend
----------------

.. autosummary::
Expand All @@ -22,11 +22,18 @@ File Client
BaseStorageBackend
FileClient
HardDiskBackend
LocalBackend
HTTPBackend
LmdbBackend
MemcachedBackend
PetrelBackend

.. autosummary::
:toctree: generated
:nosignatures:

register_backend

File Handler
----------------

Expand All @@ -40,6 +47,12 @@ File Handler
PickleHandler
YamlHandler

.. autosummary::
:toctree: generated
:nosignatures:

register_handler

File IO
----------------

Expand All @@ -49,7 +62,27 @@ File IO

dump
load
register_handler
copy_if_symlink_fails
copyfile
copyfile_from_local
copyfile_to_local
copytree
copytree_from_local
copytree_to_local
exists
generate_presigned_url
get
get_file_backend
get_local_path
get_text
isdir
isfile
join_path
list_dir_or_file
put
put_text
remove
rmtree

Parse File
----------------
Expand Down
37 changes: 35 additions & 2 deletions docs/zh_cn/api/fileio.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mmengine.fileio

.. currentmodule:: mmengine.fileio

File Client
File Backend
----------------

.. autosummary::
Expand All @@ -22,11 +22,18 @@ File Client
BaseStorageBackend
FileClient
HardDiskBackend
LocalBackend
HTTPBackend
LmdbBackend
MemcachedBackend
PetrelBackend

.. autosummary::
:toctree: generated
:nosignatures:

register_backend

File Handler
----------------

Expand All @@ -40,6 +47,12 @@ File Handler
PickleHandler
YamlHandler

.. autosummary::
:toctree: generated
:nosignatures:

register_handler

File IO
----------------

Expand All @@ -49,7 +62,27 @@ File IO

dump
load
register_handler
copy_if_symlink_fails
copyfile
copyfile_from_local
copyfile_to_local
copytree
copytree_from_local
copytree_to_local
exists
generate_presigned_url
get
get_file_backend
get_local_path
get_text
isdir
isfile
join_path
list_dir_or_file
put
put_text
remove
rmtree

Parse File
----------------
Expand Down
29 changes: 21 additions & 8 deletions mmengine/fileio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .file_client import (BaseStorageBackend, FileClient, HardDiskBackend,
HTTPBackend, LmdbBackend, MemcachedBackend,
PetrelBackend)
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
from .io import dump, load, register_handler
from .backends import (BaseStorageBackend, HTTPBackend, LmdbBackend,
LocalBackend, MemcachedBackend, PetrelBackend,
register_backend)
from .file_client import FileClient, HardDiskBackend
from .handlers import (BaseFileHandler, JsonHandler, PickleHandler,
YamlHandler, register_handler)
from .io import (copy_if_symlink_fails, copyfile, copyfile_from_local,
copyfile_to_local, copytree, copytree_from_local,
copytree_to_local, dump, exists, generate_presigned_url, get,
get_file_backend, get_local_path, get_text, isdir, isfile,
join_path, list_dir_or_file, load, put, put_text, remove,
rmtree)
from .parse import dict_from_file, list_from_file

__all__ = [
'BaseStorageBackend', 'FileClient', 'PetrelBackend', 'MemcachedBackend',
'LmdbBackend', 'HardDiskBackend', 'HTTPBackend', 'load', 'dump',
'register_handler', 'BaseFileHandler', 'JsonHandler', 'PickleHandler',
'YamlHandler', 'list_from_file', 'dict_from_file'
'LmdbBackend', 'HardDiskBackend', 'LocalBackend', 'HTTPBackend',
'copy_if_symlink_fails', 'copyfile', 'copyfile_from_local',
'copyfile_to_local', 'copytree', 'copytree_from_local',
'copytree_to_local', 'exists', 'generate_presigned_url', 'get',
'get_file_backend', 'get_local_path', 'get_text', 'isdir', 'isfile',
'join_path', 'list_dir_or_file', 'put', 'put_text', 'remove', 'rmtree',
'load', 'dump', 'register_handler', 'BaseFileHandler', 'JsonHandler',
'PickleHandler', 'YamlHandler', 'list_from_file', 'dict_from_file',
'register_backend'
]
14 changes: 14 additions & 0 deletions mmengine/fileio/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseStorageBackend
from .http_backend import HTTPBackend
from .lmdb_backend import LmdbBackend
from .local_backend import LocalBackend
from .memcached_backend import MemcachedBackend
from .petrel_backend import PetrelBackend
from .registry_utils import backends, prefix_to_backends, register_backend

__all__ = [
'BaseStorageBackend', 'LocalBackend', 'HTTPBackend', 'LmdbBackend',
'MemcachedBackend', 'PetrelBackend', 'register_backend', 'backends',
'prefix_to_backends'
]
36 changes: 36 additions & 0 deletions mmengine/fileio/backends/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from abc import ABCMeta, abstractmethod


class BaseStorageBackend(metaclass=ABCMeta):
"""Abstract class of storage backends.
All backends need to implement two apis: :meth:`get()` and
:meth:`get_text()`.
- :meth:`get()` reads the file as a byte stream.
- :meth:`get_text()` reads the file as texts.
"""

# a flag to indicate whether the backend can create a symlink for a file
# This attribute will be deprecated in future.
_allow_symlink = False

@property
def allow_symlink(self):
warnings.warn('allow_symlink will be deprecated in future',
DeprecationWarning)
return self._allow_symlink

@property
def name(self):
return self.__class__.__name__

@abstractmethod
def get(self, filepath):
pass

@abstractmethod
def get_text(self, filepath):
pass
78 changes: 78 additions & 0 deletions mmengine/fileio/backends/http_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Generator, Union
from urllib.request import urlopen

from .base import BaseStorageBackend


class HTTPBackend(BaseStorageBackend):
"""HTTP and HTTPS storage bachend."""

def get(self, filepath: str) -> bytes:
"""Read bytes from a given ``filepath``.
Args:
filepath (str): Path to read data.
Returns:
bytes: Expected bytes object.
Examples:
>>> backend = HTTPBackend()
>>> backend.get('http://path/of/file')
b'hello world'
"""
return urlopen(filepath).read()

def get_text(self, filepath, encoding='utf-8') -> str:
"""Read text from a given ``filepath``.
Args:
filepath (str): Path to read data.
encoding (str): The encoding format used to open the ``filepath``.
Defaults to 'utf-8'.
Returns:
str: Expected text reading from ``filepath``.
Examples:
>>> backend = HTTPBackend()
>>> backend.get_text('http://path/of/file')
'hello world'
"""
return urlopen(filepath).read().decode(encoding)

@contextmanager
def get_local_path(
self, filepath: str) -> Generator[Union[str, Path], None, None]:
"""Download a file from ``filepath`` to a local temporary directory,
and return the temporary path.
``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
can be called with ``with`` statement, and when exists from the
``with`` statement, the temporary path will be released.
Args:
filepath (str): Download a file from ``filepath``.
Yields:
Iterable[str]: Only yield one temporary path.
Examples:
>>> backend = HTTPBackend()
>>> # After existing from the ``with`` clause,
>>> # the path will be removed
>>> with backend.get_local_path('http://path/of/file') as path:
... # do something here
"""
try:
f = tempfile.NamedTemporaryFile(delete=False)
f.write(self.get(filepath))
f.close()
yield f.name
finally:
os.remove(f.name)
82 changes: 82 additions & 0 deletions mmengine/fileio/backends/lmdb_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) OpenMMLab. All rights reserved.
from pathlib import Path
from typing import Union

from .base import BaseStorageBackend


class LmdbBackend(BaseStorageBackend):
"""Lmdb storage backend.
Args:
db_path (str): Lmdb database path.
readonly (bool): Lmdb environment parameter. If True, disallow any
write operations. Defaults to True.
lock (bool): Lmdb environment parameter. If False, when concurrent
access occurs, do not lock the database. Defaults to False.
readahead (bool): Lmdb environment parameter. If False, disable the OS
filesystem readahead mechanism, which may improve random read
performance when a database is larger than RAM. Defaults to False.
**kwargs: Keyword arguments passed to `lmdb.open`.
Attributes:
db_path (str): Lmdb database path.
"""

def __init__(self,
db_path,
readonly=True,
lock=False,
readahead=False,
**kwargs):
try:
import lmdb # noqa: F401
except ImportError:
raise ImportError(
'Please run "pip install lmdb" to enable LmdbBackend.')

self.db_path = str(db_path)
self.readonly = readonly
self.lock = lock
self.readahead = readahead
self.kwargs = kwargs
self._client = None

def get(self, filepath: Union[str, Path]) -> bytes:
"""Get values according to the filepath.
Args:
filepath (str or Path): Here, filepath is the lmdb key.
Returns:
bytes: Expected bytes object.
Examples:
>>> backend = LmdbBackend('path/to/lmdb')
>>> backend.get('key')
b'hello world'
"""
if self._client is None:
self._client = self._get_client()

filepath = str(filepath)
with self._client.begin(write=False) as txn:
value_buf = txn.get(filepath.encode('ascii'))
return value_buf

def get_text(self, filepath, encoding=None):
raise NotImplementedError

def _get_client(self):
import lmdb

return lmdb.open(
self.db_path,
readonly=self.readonly,
lock=self.lock,
readahead=self.readahead,
**self.kwargs)

def __del__(self):
if self._client is not None:
self._client.close()
Loading

0 comments on commit ed84dfd

Please sign in to comment.