Skip to content

Commit

Permalink
Merge pull request #67 from emontnemery/store_plaintext_size
Browse files Browse the repository at this point in the history
Store size of plaintext as custom header
  • Loading branch information
agners authored Jan 13, 2025
2 parents dcf6bb6 + 29f1a73 commit 1afc048
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 8 deletions.
33 changes: 26 additions & 7 deletions securetar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@

BLOCK_SIZE = 16
BLOCK_SIZE_BITS = 128
IV_SIZE = BLOCK_SIZE
DEFAULT_BUFSIZE = 10240

PLAINTEXT_SIZE_HEADER = "_securetar.plaintext_size"

MOD_READ = "r"
MOD_WRITE = "w"

Expand Down Expand Up @@ -71,6 +74,7 @@ def __init__(
self._decrypt: CipherContext | None = None
self._encrypt: CipherContext | None = None
self._padder: padding.PaddingContext | None = None
self._padding = bytearray()

def create_inner_tar(
self, name: str, key: bytes | None = None, gzip: bool = True
Expand Down Expand Up @@ -129,9 +133,9 @@ def _open_file(self) -> None:
def _setup_cipher(self) -> None:
# Extract IV for CBC
if self._mode == MOD_READ:
cbc_rand = self._file.read(16)
cbc_rand = self._file.read(IV_SIZE)
else:
cbc_rand = os.urandom(16)
cbc_rand = os.urandom(IV_SIZE)
self._file.write(cbc_rand)

# Create Cipher
Expand All @@ -156,7 +160,8 @@ def _close_file(self) -> None:
"""Close file."""
if self._file:
if not self._mode.startswith("r"):
self._file.write(self._encrypt.update(self._padder.finalize()))
self._padding += self._padder.finalize()
self._file.write(self._encrypt.update(self._padding))
if not self._fileobj:
self._file.close()
self._file = None
Expand Down Expand Up @@ -188,7 +193,7 @@ def read(self, size: int = 0) -> bytes:

data = self._parent.read(size)
self._pos += len(data)
if not data or self._size - self._pos > 16:
if not data or self._size - self._pos > BLOCK_SIZE:
return data

# Last block, read tail and discard padding
Expand Down Expand Up @@ -266,7 +271,7 @@ def __enter__(self) -> tarfile.TarFile:
tar_info.mtime = time.time()
else:
tar_info.mtime = int(time.time())
self.stream = _add_stream(self.outer_tar, tar_info)
self.stream = _add_stream(self.outer_tar, tar_info, self._padding)
self.stream.__enter__()
return super().__enter__()

Expand All @@ -278,7 +283,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:

@contextmanager
def _add_stream(
tar: tarfile.TarFile, tar_info: tarfile.TarInfo
tar: tarfile.TarFile, tar_info: tarfile.TarInfo, padding: bytearray
) -> Generator[BinaryIO, None, None]:
"""Add a stream to the tarfile.
Expand All @@ -292,6 +297,13 @@ def _add_stream(
It is critical that the tar_info is not modified
inside the context manager, as the tar file header
size may change.
:param tar: The outer tar file to add the stream to.
:param tar_info: TarInfo for the added stream.
:param padding: PKCS7 padding added at the end of the stream. If non-empty,
the inner tar is encrypted, and we calculate the plaintext size from the padding
and add a pax header with the plaintext size. If empty, the inner tar is not
encrypted and we don't add a plaintext size pax header.
"""
fileobj = tar.fileobj
tell_before_adding_inner_file_header = fileobj.tell()
Expand Down Expand Up @@ -320,6 +332,13 @@ def _add_stream(
tar.offset += size_of_inner_tar + padding_size

tar_info.size = size_of_inner_tar
if padding:
tar_info.pax_headers = {
**tar_info.pax_headers,
# The plaintext size is the size of the written ciphertext
# minus the size of the padding and the IV
PLAINTEXT_SIZE_HEADER: str(size_of_inner_tar - len(padding) - IV_SIZE),
}
# Now that we know the size of the inner tar, we seek back
# to where we started and re-add the member with the correct size
fileobj.seek(tell_before_adding_inner_file_header)
Expand All @@ -339,7 +358,7 @@ def _generate_iv(key: bytes, salt: bytes) -> bytes:
temp_iv = key + salt
for _ in range(100):
temp_iv = hashlib.sha256(temp_iv).digest()
return temp_iv[:16]
return temp_iv[:IV_SIZE]


def secure_path(tar: tarfile.TarFile) -> Generator[tarfile.TarInfo, None, None]:
Expand Down
18 changes: 17 additions & 1 deletion tests/test_tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,19 @@ def test_encrypted_gzipped_tar_inside_tar(tmp_path: Path, bufsize: int) -> None:

assert main_tar.exists()

# Iterate over the tar file
file_sizes: dict[str, int] = {}
with SecureTarFile(main_tar, "r", gzip=False, bufsize=bufsize) as tar_file:
for tar_info in tar_file:
file_sizes[tar_info.name] = tar_info.pax_headers[
"_securetar.plaintext_size"
]
assert set(file_sizes) == {
"core.tar.gz",
"core2.tar.gz",
"core3.tar.gz",
}

# Decrypt the inner tar
temp_decrypted = tmp_path.joinpath("decrypted")
os.makedirs(temp_decrypted, exist_ok=True)
Expand All @@ -362,6 +375,9 @@ def test_encrypted_gzipped_tar_inside_tar(tmp_path: Path, bufsize: int) -> None:
while data := decrypted.read(bufsize):
file.write(data)

# Check the indicated size is correct
assert inner_tar_path.stat().st_size == int(file_sizes[tar_info.name])

# Check decrypted file is valid gzip, this fails if the padding is not
# discarded correctly
assert inner_tar_path.stat().st_size > 0
Expand Down Expand Up @@ -451,7 +467,7 @@ def test_tar_stream(tmp_path: Path, format: int) -> None:
with patch.object(tarfile, "DEFAULT_FORMAT", format):
with SecureTarFile(main_tar, "w", gzip=False) as tar_file:
tar_info = tarfile.TarInfo(name="test.txt")
with _add_stream(tar_file, tar_info) as stream:
with _add_stream(tar_file, tar_info, bytearray()) as stream:
stream.write(b"test")

# Restore
Expand Down

0 comments on commit 1afc048

Please sign in to comment.