From 7a4a2e57ab90400f295e513d89278a68c0f0a457 Mon Sep 17 00:00:00 2001
From: moonshadow565 <moonshadow565@gmail.com>
Date: Thu, 13 Oct 2022 02:18:26 +0200
Subject: [PATCH] advanced chunking

---
 CMakeLists.txt         |   8 +++
 lib/rlib/ar.cpp        |  81 ++++++++++++++++++++++++++
 lib/rlib/ar.hpp        |  28 +++++++++
 lib/rlib/ar/bnk.cpp    |  89 +++++++++++++++++++++++++++++
 lib/rlib/ar/bnk.hpp    |  21 +++++++
 lib/rlib/ar/wad.cpp    | 126 +++++++++++++++++++++++++++++++++++++++++
 lib/rlib/ar/wad.hpp    |  21 +++++++
 lib/rlib/ar/wpk.cpp    | 103 +++++++++++++++++++++++++++++++++
 lib/rlib/ar/wpk.hpp    |  20 +++++++
 lib/rlib/rmanifest.cpp |  51 ++++++++---------
 lib/rlib/rmanifest.hpp |   4 +-
 src/rman_make.cpp      |  62 ++++++++++----------
 12 files changed, 554 insertions(+), 60 deletions(-)
 create mode 100644 lib/rlib/ar.cpp
 create mode 100644 lib/rlib/ar.hpp
 create mode 100644 lib/rlib/ar/bnk.cpp
 create mode 100644 lib/rlib/ar/bnk.hpp
 create mode 100644 lib/rlib/ar/wad.cpp
 create mode 100644 lib/rlib/ar/wad.hpp
 create mode 100644 lib/rlib/ar/wpk.cpp
 create mode 100644 lib/rlib/ar/wpk.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6cd3cc..5806978 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 add_subdirectory(dep)
 
 add_library(rlib STATIC
+    lib/rlib/ar.hpp
+    lib/rlib/ar.cpp
+    lib/rlib/ar/bnk.hpp
+    lib/rlib/ar/bnk.cpp
+    lib/rlib/ar/wad.hpp
+    lib/rlib/ar/wad.cpp
+    lib/rlib/ar/wpk.hpp
+    lib/rlib/ar/wpk.cpp
     lib/rlib/common.hpp
     lib/rlib/common.cpp
     lib/rlib/iofile.cpp
diff --git a/lib/rlib/ar.cpp b/lib/rlib/ar.cpp
new file mode 100644
index 0000000..7cde88d
--- /dev/null
+++ b/lib/rlib/ar.cpp
@@ -0,0 +1,81 @@
+#include "ar.hpp"
+
+#include "ar/bnk.hpp"
+#include "ar/wad.hpp"
+#include "ar/wpk.hpp"
+
+using namespace rlib;
+using namespace rlib::ar;
+
+auto ArSplit::operator()(IO const& io, offset_cb cb) const -> void {
+    process(io, cb, 0, {.offset = 0, .size = io.size()});
+}
+
+template <typename T>
+auto ArSplit::process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void {
+    auto archive = T{};
+    if (auto error = archive.read(io, top_entry.offset, top_entry.size)) rlib_error(error);
+
+    // ensure offsets are processed in order
+    std::sort(archive.entries.begin(), archive.entries.end(), [](auto const& lhs, auto const& rhs) {
+        if (lhs.offset < rhs.offset) return true;
+        if (lhs.offset == rhs.offset && lhs.size > rhs.size) return true;
+        return false;
+    });
+
+    auto cur = top_entry.offset;
+    for (auto entry : archive.entries) {
+        // skip empty entries
+        if (!entry.size) continue;
+
+        // skip duplicate or overlapping entries
+        if (entry.offset < cur) {
+            continue;
+        }
+
+        // process any skipped data
+        if (auto leftover = entry.offset - cur) {
+            process(io, cb, -1, {.offset = cur, .size = leftover, .compressed = top_entry.compressed});
+        }
+
+        // process current entry
+        process(io,
+                cb,
+                T::can_nest && !no_nest && !entry.compressed ? 1 : -1,
+                {
+                    .offset = entry.offset,
+                    .size = entry.size,
+                    .compressed = entry.compressed,
+                });
+
+        // go to next entry
+        cur = entry.offset + entry.size;
+    }
+
+    // process any remaining data
+    if (auto remain = (top_entry.offset + top_entry.size) - cur) {
+        process(io, cb, -1, {.offset = cur, .size = remain, .compressed = top_entry.compressed});
+    }
+}
+
+auto ArSplit::process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void {
+    if (depth >= 0 && top_entry.size >= 64) {
+        char buffer[8] = {};
+        rlib_assert(io.read(top_entry.offset, buffer));
+        if (!no_bnk && BNK::check_magic(buffer)) {
+            return process_ar<BNK>(io, cb, top_entry);
+        }
+        if (!no_wad && depth < 1 && WAD::check_magic(buffer)) {
+            return process_ar<WAD>(io, cb, top_entry);
+        }
+        if (!no_wpk && WPK::check_magic(buffer)) {
+            return process_ar<WPK>(io, cb, top_entry);
+        }
+    }
+    for (auto i = top_entry.offset, remain = top_entry.size; remain;) {
+        auto size = std::min(chunk_size, remain);
+        cb({.offset = i, .size = size, .compressed = top_entry.compressed});
+        i += size;
+        remain -= size;
+    }
+}
diff --git a/lib/rlib/ar.hpp b/lib/rlib/ar.hpp
new file mode 100644
index 0000000..92b0eda
--- /dev/null
+++ b/lib/rlib/ar.hpp
@@ -0,0 +1,28 @@
+#pragma once
+#include <rlib/common.hpp>
+#include <rlib/iofile.hpp>
+
+namespace rlib {
+    struct ArSplit {
+        struct Entry {
+            std::size_t offset;
+            std::size_t size;
+            bool compressed;
+        };
+        using offset_cb = function_ref<void(Entry)>;
+
+        std::size_t chunk_size;
+        bool no_bnk;
+        bool no_wad;
+        bool no_wpk;
+        bool no_nest;
+
+        auto operator()(IO const& io, offset_cb cb) const -> void;
+
+    private:
+        auto process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void;
+
+        template <typename T>
+        auto process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void;
+    };
+}
diff --git a/lib/rlib/ar/bnk.cpp b/lib/rlib/ar/bnk.cpp
new file mode 100644
index 0000000..215b913
--- /dev/null
+++ b/lib/rlib/ar/bnk.cpp
@@ -0,0 +1,89 @@
+#include "bnk.hpp"
+
+#include <map>
+
+#define ar_assert(...)                                          \
+    do {                                                        \
+        if (!(__VA_ARGS__)) return " BNK::read: " #__VA_ARGS__; \
+    } while (false)
+
+using namespace rlib;
+using namespace rlib::ar;
+
+struct BNK::Entry::Raw {
+    std::array<char, 4> type;
+    std::uint32_t size;
+};
+
+struct BNK::Entry::DIDX {
+    std::uint32_t id;
+    std::uint32_t offset;
+    std::uint32_t size;
+};
+
+auto BNK::check_magic(std::span<char const> data) noexcept -> bool {
+    return data.size() >= 4 && std::memcmp(data.data(), "BKHD", 4) == 0;
+}
+
+auto BNK::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
+    using TYPE = std::array<char, 4>;
+    static constexpr auto BKHD = TYPE{'B', 'K', 'H', 'D'};
+    static constexpr auto DIDX = TYPE{'D', 'I', 'D', 'X'};
+    static constexpr auto DATA = TYPE{'D', 'A', 'T', 'A'};
+
+    auto magic = TYPE{};
+    ar_assert(size >= 8);
+    io.read(offset, magic);
+    ar_assert(magic == BKHD);
+
+    auto sections = std::map<TYPE, Entry>{};
+    for (std::size_t i = offset; i != offset + size;) {
+        Entry::Raw raw = {};
+        ar_assert(size >= i);
+        ar_assert(size - i >= sizeof(raw));
+        io.read(i, {(char*)&raw, sizeof(raw)});
+
+        i += sizeof(Entry::Raw);
+        ar_assert(size - i >= raw.size);
+
+        sections[raw.type] = Entry{.offset = i, .size = raw.size};
+
+        i += raw.size;
+    }
+
+    entries.clear();
+    entries.reserve(sections.size());
+
+    auto i_didx = sections.find(DIDX);
+    auto i_data = sections.find(DATA);
+    if (i_didx != sections.end() && i_data != sections.end()) {
+        auto didx_base = i_didx->second;
+        auto data_base = i_data->second;
+
+        ar_assert(didx_base.size % sizeof(Entry::DIDX) == 0);
+        auto didx_list = std::vector<Entry::DIDX>(didx_base.size / sizeof(Entry::DIDX));
+        io.read(didx_base.offset, {(char*)didx_list.data(), didx_base.size});
+
+        entries.reserve(sections.size() + didx_list.size());
+        for (auto const& didx : didx_list) {
+            ar_assert(data_base.size >= didx.offset);
+            ar_assert(data_base.size - didx.offset >= didx.size);
+            entries.push_back(Entry{
+                .offset = data_base.offset + didx.offset,
+                .size = didx.size,
+                .compressed = true,
+            });
+        }
+
+        i_didx->second.size = 0;
+        i_data->second.size = 0;
+    }
+
+    for (auto [key, entry] : sections) {
+        entry.offset -= sizeof(Entry::Raw);
+        entry.size += sizeof(Entry::Raw);
+        entries.push_back(entry);
+    }
+
+    return nullptr;
+}
\ No newline at end of file
diff --git a/lib/rlib/ar/bnk.hpp b/lib/rlib/ar/bnk.hpp
new file mode 100644
index 0000000..b870256
--- /dev/null
+++ b/lib/rlib/ar/bnk.hpp
@@ -0,0 +1,21 @@
+#pragma once
+#include <rlib/common.hpp>
+#include <rlib/iofile.hpp>
+
+namespace rlib::ar {
+    struct BNK {
+        static constexpr bool can_nest = false;
+
+        struct Entry {
+            struct Raw;
+            struct DIDX;
+            std::size_t offset;
+            std::size_t size;
+            bool compressed;
+        };
+        std::vector<Entry> entries;
+
+        static auto check_magic(std::span<char const> data) noexcept -> bool;
+        auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
+    };
+}
\ No newline at end of file
diff --git a/lib/rlib/ar/wad.cpp b/lib/rlib/ar/wad.cpp
new file mode 100644
index 0000000..6f1ff92
--- /dev/null
+++ b/lib/rlib/ar/wad.cpp
@@ -0,0 +1,126 @@
+#include "wad.hpp"
+
+using namespace rlib;
+using namespace rlib::ar;
+
+#define ar_assert(...)                                          \
+    do {                                                        \
+        if (!(__VA_ARGS__)) return " WAD::read: " #__VA_ARGS__; \
+    } while (false)
+
+struct WAD::Header {
+    struct Base;
+    struct V1;
+    struct V2;
+    struct V3;
+
+    std::size_t entry_size;
+    std::size_t entry_count;
+    std::size_t toc_start;
+    std::size_t toc_size;
+};
+
+struct WAD::Header::Base {
+    std::array<char, 2> magic;
+    std::uint8_t version[2];
+};
+
+struct WAD::Header::V1 : Base {
+    std::uint16_t toc_start;
+    std::uint16_t entry_size;
+    std::uint32_t entry_count;
+};
+
+struct WAD::Header::V2 : Base {
+    std::array<std::uint8_t, 84> signature;
+    std::array<std::uint8_t, 8> checksum;
+    std::uint16_t toc_start;
+    std::uint16_t entry_size;
+    std::uint32_t entry_count;
+};
+
+struct WAD::Header::V3 : Base {
+    std::uint8_t signature[256];
+    std::array<std::uint8_t, 8> checksum;
+    static constexpr std::uint16_t toc_start = 272;
+    static constexpr std::uint16_t entry_size = 32;
+    std::uint32_t entry_count;
+};
+
+struct WAD::Entry::Raw {
+    std::uint64_t path;
+    std::uint32_t offset;
+    std::uint32_t size_compressed;
+    std::uint32_t size_uncompressed;
+    std::uint8_t type : 4;
+    std::uint8_t subchunks : 4;
+    std::uint8_t pad[3];
+};
+
+auto WAD::check_magic(std::span<char const> data) noexcept -> bool {
+    return data.size() >= 4 && std::memcmp(data.data(), "RW", 2) == 0 && (uint8_t)data[2] <= 10;
+}
+
+auto WAD::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
+    static constexpr auto MAGIC = std::array{'R', 'W'};
+
+    Header::Base header_base = {};
+    ar_assert(size >= sizeof(header_base));
+    io.read(offset, {(char*)&header_base, sizeof(header_base)});
+    ar_assert(header_base.magic == MAGIC);
+
+    Header header = {};
+    switch (header_base.version[0]) {
+#define read_header($V)                                           \
+    do {                                                          \
+        Header::V##$V v_header = {};                              \
+        ar_assert(size >= sizeof(header));                        \
+        io.read(offset, {(char*)&v_header, sizeof(v_header)});    \
+        header.entry_size = v_header.entry_size;                  \
+        header.entry_count = v_header.entry_count;                \
+        header.toc_start = v_header.toc_start;                    \
+        header.toc_size = header.entry_size * header.entry_count; \
+    } while (false)
+        case 0:
+        case 1:
+            read_header(1);
+            break;
+        case 2:
+            read_header(2);
+            break;
+        case 3:
+            read_header(3);
+            break;
+#undef read_header
+        default:
+            return "Unknown wad version";
+    }
+    ar_assert(size >= header.toc_start);
+    ar_assert(size - header.toc_start >= header.toc_size);
+    header.toc_start += offset;
+
+    entries.clear();
+    entries.reserve(header.entry_count + 1);
+
+    entries.push_back(Entry{
+        .offset = header.toc_start,
+        .size = header.toc_size,
+        .compressed = false,
+    });
+    for (std::size_t i = 0; i != header.entry_count; ++i) {
+        auto raw_entry = Entry::Raw{};
+        io.read(header.toc_start + i * header.entry_size, {(char*)&raw_entry, header.entry_size});
+
+        auto entry = Entry{
+            .offset = offset + raw_entry.offset,
+            .size = raw_entry.size_compressed,
+            .compressed = raw_entry.type != 0,
+        };
+        ar_assert(entry.offset >= header.toc_start + header.toc_size);
+        ar_assert(size >= entry.offset);
+        ar_assert(size - entry.offset >= entry.size);
+        entries.push_back(entry);
+    }
+
+    return nullptr;
+}
\ No newline at end of file
diff --git a/lib/rlib/ar/wad.hpp b/lib/rlib/ar/wad.hpp
new file mode 100644
index 0000000..acbb3c4
--- /dev/null
+++ b/lib/rlib/ar/wad.hpp
@@ -0,0 +1,21 @@
+#pragma once
+#include <rlib/common.hpp>
+#include <rlib/iofile.hpp>
+
+namespace rlib::ar {
+    struct WAD {
+        static constexpr bool can_nest = true;
+
+        struct Header;
+        struct Entry {
+            struct Raw;
+            std::size_t offset;
+            std::size_t size;
+            bool compressed;
+        };
+        std::vector<Entry> entries;
+
+        static auto check_magic(std::span<char const> data) noexcept -> bool;
+        auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
+    };
+}
\ No newline at end of file
diff --git a/lib/rlib/ar/wpk.cpp b/lib/rlib/ar/wpk.cpp
new file mode 100644
index 0000000..5977ba9
--- /dev/null
+++ b/lib/rlib/ar/wpk.cpp
@@ -0,0 +1,103 @@
+#include "wpk.hpp"
+
+using namespace rlib;
+using namespace rlib::ar;
+
+#define ar_assert(...)                                          \
+    do {                                                        \
+        if (!(__VA_ARGS__)) return " WPK::read: " #__VA_ARGS__; \
+    } while (false)
+
+struct WPK::Header {
+    struct Base;
+    struct V1;
+    struct V2;
+    struct V3;
+
+    std::size_t entry_count;
+    std::size_t toc_start;
+    std::size_t toc_size;
+};
+
+struct WPK::Header::Base {
+    std::array<char, 4> magic;
+    std::uint32_t version;
+};
+
+struct WPK::Header::V1 : Base {
+    std::uint32_t entry_count;
+    static constexpr std::size_t toc_start = 12;
+};
+
+struct WPK::Entry::Raw {
+    std::uint32_t offset;
+    std::uint32_t size;
+};
+
+auto WPK::check_magic(std::span<char const> data) noexcept -> bool {
+    return data.size() >= 6 && std::memcmp(data.data(), "r3d2", 4) == 0 && (uint8_t)data[4] <= 10;
+}
+
+auto WPK::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
+    static constexpr auto MAGIC = std::array{'r', '3', 'd', '2'};
+
+    Header::Base header_base = {};
+    ar_assert(size >= sizeof(header_base));
+    io.read(offset, {(char*)&header_base, sizeof(header_base)});
+    ar_assert(header_base.magic == MAGIC);
+
+    Header header = {};
+    switch (header_base.version) {
+#define read_header($V)                                        \
+    case $V: {                                                 \
+        Header::V##$V v_header = {};                           \
+        ar_assert(size >= sizeof(header));                     \
+        io.read(offset, {(char*)&v_header, sizeof(v_header)}); \
+        header.entry_count = v_header.entry_count;             \
+        header.toc_start = v_header.toc_start;                 \
+        header.toc_size = 4 * header.entry_count;              \
+    } break;
+        read_header(1);
+#undef read_header
+        default:
+            return "Unsuported WPK version!";
+    }
+
+    ar_assert(size >= header.toc_start);
+    ar_assert(size - header.toc_start >= header.toc_size);
+
+    header.toc_start += offset;
+
+    auto offsets = std::vector<std::uint32_t>(header.entry_count);
+    io.read(header.toc_start, {(char*)offsets.data(), header.toc_size});
+
+    entries.clear();
+    entries.reserve(header.entry_count + 1);
+    entries.push_back(Entry{
+        .offset = header.toc_start,
+        .size = header.toc_size,
+        .compressed = false,
+    });
+
+    for (auto entry_offset : offsets) {
+        auto raw_entry = Entry::Raw{};
+        ar_assert(size >= entry_offset);
+        ar_assert(size - entry_offset >= sizeof(raw_entry));
+
+        entry_offset += offset;
+        ar_assert(entry_offset >= header.toc_start + header.toc_size);
+        io.read(entry_offset, {(char*)&raw_entry, sizeof(raw_entry)});
+
+        auto entry = Entry{
+            .offset = offset + raw_entry.offset,
+            .size = raw_entry.size,
+            .compressed = true,
+        };
+        ar_assert(entry.offset >= header.toc_start + header.toc_size);
+        ar_assert(size >= raw_entry.offset);
+        ar_assert(size - raw_entry.offset >= raw_entry.size);
+        entries.push_back(entry);
+    }
+
+    return nullptr;
+}
diff --git a/lib/rlib/ar/wpk.hpp b/lib/rlib/ar/wpk.hpp
new file mode 100644
index 0000000..24f97fa
--- /dev/null
+++ b/lib/rlib/ar/wpk.hpp
@@ -0,0 +1,20 @@
+#pragma once
+#include <rlib/common.hpp>
+#include <rlib/iofile.hpp>
+
+namespace rlib::ar {
+    struct WPK {
+        static constexpr bool can_nest = false;
+        struct Header;
+        struct Entry {
+            struct Raw;
+            std::size_t offset;
+            std::size_t size;
+            bool compressed;
+        };
+        std::vector<Entry> entries;
+
+        static auto check_magic(std::span<char const> data) noexcept -> bool;
+        auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
+    };
+}
\ No newline at end of file
diff --git a/lib/rlib/rmanifest.cpp b/lib/rlib/rmanifest.cpp
index aefcbe5..77bb5a2 100644
--- a/lib/rlib/rmanifest.cpp
+++ b/lib/rlib/rmanifest.cpp
@@ -385,35 +385,32 @@ RMAN RMAN::read(std::span<char const> data) {
     };
 }
 
-auto RMAN::dump() const -> std::string {
-    auto jfiles = json::array();
-    for (auto const& file : files) {
-        auto& jfile = jfiles.emplace_back();
-        jfile = json::object();
-        auto& jparams = jfile["params"];
-        jparams = json::object();
-        jparams["max_uncompressed"] = file.params.max_uncompressed;
-        jfile["permissions"] = file.permissions;
-        jfile["fileId"] = fmt::format("{}", file.fileId);
-        jfile["path"] = file.path;
-        jfile["link"] = file.link;
-        jfile["langs"] = file.langs;
-        jfile["size"] = file.size;
-        auto& jchunks = jfile["chunks"];
-        jchunks = json::array();
-        for (auto const& chunk : file.chunks) {
-            auto& jchunk = jchunks.emplace_back();
-            jchunk = json::object();
-            jchunk["chunkId"] = fmt::format("{}", chunk.chunkId);
-            jchunk["uncompressed_size"] = chunk.uncompressed_size;
-            if (jparams.contains("hash_type")) {
-                rlib_assert(jparams["hash_type"] == chunk.hash_type);
-            } else {
-                jparams["hash_type"] = chunk.hash_type;
-            }
+auto RMAN::File::dump() const -> std::string {
+    auto const& file = *this;
+    auto jfile = json::object();
+    auto& jparams = jfile["params"];
+    jparams = json::object();
+    jparams["max_uncompressed"] = file.params.max_uncompressed;
+    jfile["permissions"] = file.permissions;
+    jfile["fileId"] = fmt::format("{}", file.fileId);
+    jfile["path"] = file.path;
+    jfile["link"] = file.link;
+    jfile["langs"] = file.langs;
+    jfile["size"] = file.size;
+    auto& jchunks = jfile["chunks"];
+    jchunks = json::array();
+    for (auto const& chunk : file.chunks) {
+        auto& jchunk = jchunks.emplace_back();
+        jchunk = json::object();
+        jchunk["chunkId"] = fmt::format("{}", chunk.chunkId);
+        jchunk["uncompressed_size"] = chunk.uncompressed_size;
+        if (jparams.contains("hash_type")) {
+            rlib_assert(jparams["hash_type"] == chunk.hash_type);
+        } else {
+            jparams["hash_type"] = chunk.hash_type;
         }
     }
-    return jfiles.dump(2);
+    return jfile.dump();
 }
 
 auto RMAN::File::matches(Filter const& filter) const noexcept -> bool {
diff --git a/lib/rlib/rmanifest.hpp b/lib/rlib/rmanifest.hpp
index 5eae08c..eae0c4e 100644
--- a/lib/rlib/rmanifest.hpp
+++ b/lib/rlib/rmanifest.hpp
@@ -42,6 +42,8 @@ namespace rlib {
             std::string langs;
             std::vector<RChunk::Dst> chunks;
 
+            auto dump() const -> std::string;
+
             auto matches(Filter const& filter) const noexcept -> bool;
 
             auto verify(fs::path const& path, RChunk::Dst::data_cb on_good) const -> std::vector<RChunk::Dst>;
@@ -53,8 +55,6 @@ namespace rlib {
 
         static RMAN read(std::span<char const> data);
 
-        auto dump() const -> std::string;
-
     private:
         struct Raw;
     };
diff --git a/src/rman_make.cpp b/src/rman_make.cpp
index e1f56eb..04c4f9f 100644
--- a/src/rman_make.cpp
+++ b/src/rman_make.cpp
@@ -4,6 +4,7 @@
 
 #include <argparse.hpp>
 #include <iostream>
+#include <rlib/ar.hpp>
 #include <rlib/common.hpp>
 #include <rlib/iofile.hpp>
 #include <rlib/rcache.hpp>
@@ -22,6 +23,7 @@ struct Main {
         std::size_t chunk_size = 0;
         std::uint32_t level = 0;
         std::uint32_t buffer = {};
+        ArSplit ar = {};
     } cli = {};
 
     auto parse_args(int argc, char** argv) -> void {
@@ -31,8 +33,12 @@ struct Main {
         program.add_argument("outbundle").help("Bundle file to write into.").required();
         program.add_argument("rootfolder").help("Root folder to rebase from.").required();
         program.add_argument("input").help("Files or folders for manifest.").remaining().required();
-        program.add_argument("-a", "--append").help("Do not print progress.").default_value(false).implicit_value(true);
         program.add_argument("--no-progress").help("Do not print progress.").default_value(false).implicit_value(true);
+        program.add_argument("--no-ar-bnk").help("Disable bnk spliting.").default_value(false).implicit_value(true);
+        program.add_argument("--no-ar-wad").help("Disable wad spliting.").default_value(false).implicit_value(true);
+        program.add_argument("--no-ar-wpk").help("Disable wpk spliting.").default_value(false).implicit_value(true);
+        program.add_argument("--no-ar-nest").help("Disable nested spliting.").default_value(false).implicit_value(true);
+
         program.add_argument("--chunk-size")
             .default_value(std::uint32_t{256})
             .help("Chunk size in kilobytes.")
@@ -58,10 +64,15 @@ struct Main {
         cli.outbundle = program.get<std::string>("outbundle");
         cli.rootfolder = program.get<std::string>("rootfolder");
         cli.inputs = program.get<std::vector<std::string>>("input");
-        cli.append = program.get<bool>("--append");
         cli.no_progress = program.get<bool>("--no-progress");
-        cli.chunk_size = program.get<std::uint32_t>("--chunk-size") * 1024u;
         cli.level = program.get<std::uint32_t>("--level");
+
+        cli.ar = ArSplit{
+            .chunk_size = program.get<std::uint32_t>("--chunk-size") * 1024u,
+            .no_bnk = program.get<bool>("--no-ar-bnk"),
+            .no_wad = program.get<bool>("--no-ar-wad"),
+            .no_wpk = program.get<bool>("--no-ar-wpk"),
+        };
     }
 
     auto run() -> void {
@@ -71,35 +82,24 @@ struct Main {
             [](fs::path const& p) { return true; },
             true);
 
-        std::cerr << "Create output manifest..." << std::endl;
-        auto outmanifest = RMAN{};
-        auto lookup = std::unordered_map<std::string, std::size_t>{};
-        auto outfile = IO::File(cli.outmanifest, IO::WRITE);
-        if (outfile.size() && cli.append) {
-            outmanifest = RMAN::read(outfile.copy(0, outfile.size()));
-            for (std::size_t i = 0; auto const& file : outmanifest.files) {
-                lookup[file.path] = i;
-                ++i;
-            }
-        }
-
         std::cerr << "Processing output bundle ... " << std::endl;
         auto outbundle = RCache(RCache::Options{.path = cli.outbundle, .readonly = false, .flush_size = cli.buffer});
 
+        std::cerr << "Create output manifest..." << std::endl;
+        auto outfile = IO::File(cli.outmanifest, IO::WRITE);
+        outfile.resize(0, 0);
+        outfile.write(0, {"[", 1});
+        std::string_view separator = "\n";
+
         std::cerr << "Processing input files ... " << std::endl;
         for (std::uint32_t index = paths.size(); auto const& path : paths) {
             auto file = add_file(path, outbundle, index--);
-            if (lookup.contains(file.path)) {
-                outmanifest.files[index] = std::move(file);
-            } else {
-                outmanifest.files.push_back(std::move(file));
-            }
+            auto outjson = std::string(separator) + file.dump();
+            outfile.write(outfile.size(), outjson);
+            separator = ",\n";
         }
 
-        std::cerr << "Writing output manifest ... " << std::endl;
-        auto outjson = outmanifest.dump();
-        outfile.resize(0, 0);
-        outfile.write(0, outjson);
+        outfile.write(outfile.size(), {"\n]\n", 3});
     }
 
     auto add_file(fs::path const& path, RCache& outbundle, std::uint32_t index) -> RMAN::File {
@@ -111,15 +111,15 @@ struct Main {
         rfile.langs = "none";
         rfile.path = fs::relative(fs::absolute(path), fs::absolute(cli.rootfolder)).generic_string();
         progress_bar p("PROCESSED", cli.no_progress, index, 0, infile.size());
-        for (std::uint64_t offset = 0; offset < infile.size();) {
-            auto src = infile.copy(offset, std::min(cli.chunk_size, infile.size() - offset));
-            RChunk::Dst chunk = {outbundle.add_uncompressed(src, cli.level)};
+        cli.ar(infile, [&](ArSplit::Entry entry) {
+            auto src = infile.copy(entry.offset, entry.size);
+            auto level = entry.compressed ? 0 : cli.level;
+            RChunk::Dst chunk = {outbundle.add_uncompressed(src, level)};
             chunk.hash_type = HashType::RITO_HKDF;
             rfile.chunks.push_back(chunk);
-            chunk.uncompressed_offset = offset;
-            offset += chunk.uncompressed_size;
-            p.update(offset);
-        }
+            chunk.uncompressed_offset = entry.offset;
+            p.update(entry.offset + entry.size);
+        });
         auto xxstate = XXH64_createState();
         rlib_assert(xxstate);
         XXH64_reset(xxstate, 0);