Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zstandard compression feature #801

Merged
merged 4 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ def read(fname):
azure_deps = ['azure-storage-blob', 'azure-common', 'azure-core']
http_deps = ['requests']
ssh_deps = ['paramiko']
zst_deps = ['zstandard']

all_deps = aws_deps + gcs_deps + azure_deps + http_deps + ssh_deps
all_deps = aws_deps + gcs_deps + azure_deps + http_deps + ssh_deps + zst_deps
tests_require = all_deps + [
'moto[server]<5.0',
'responses',
Expand Down Expand Up @@ -80,6 +81,7 @@ def read(fname):
'http': http_deps,
'webhdfs': http_deps,
'ssh': ssh_deps,
'zst': zst_deps,
},
python_requires=">=3.6,<4.0",

Expand Down
7 changes: 7 additions & 0 deletions smart_open/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ def _handle_gzip(file_obj, mode):
return result


def _handle_zstd(file_obj, mode):
import zstandard as zstd
result = zstd.ZstdDecompressor().stream_reader(file_obj, closefd=True)
return result


def compression_wrapper(file_obj, mode, compression=INFER_FROM_EXTENSION, filename=None):
"""
Wrap `file_obj` with an appropriate [de]compression mechanism based on its file extension.
Expand Down Expand Up @@ -145,3 +151,4 @@ def compression_wrapper(file_obj, mode, compression=INFER_FROM_EXTENSION, filena
#
register_compressor('.bz2', _handle_bz2)
register_compressor('.gz', _handle_gzip)
register_compressor('.zst', _handle_zstd)
9 changes: 7 additions & 2 deletions smart_open/tests/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
# This code is distributed under the terms and conditions
# from the MIT License (MIT).
#
import io
import gzip
import io

import pytest
import zstandard as zstd

import smart_open.compression


plain = 'доброе утро планета!'.encode()


Expand All @@ -32,6 +33,10 @@ def label(thing, name):
(io.BytesIO(gzip.compress(plain)), 'infer_from_extension', 'file.GZ'),
(label(io.BytesIO(gzip.compress(plain)), 'file.gz'), 'infer_from_extension', ''),
(io.BytesIO(gzip.compress(plain)), '.gz', 'file.gz'),
(io.BytesIO(zstd.ZstdCompressor().compress(plain)), 'infer_from_extension', 'file.zst'),
(io.BytesIO(zstd.ZstdCompressor().compress(plain)), 'infer_from_extension', 'file.ZST'),
(label(io.BytesIO(zstd.ZstdCompressor().compress(plain)), 'file.zst'), 'infer_from_extension', ''),
(io.BytesIO(zstd.ZstdCompressor().compress(plain)), '.zst', 'file.zst'),
]
)
def test_compression_wrapper_read(fileobj, compression, filename):
Expand Down
Loading