Skip to content

Commit

Permalink
advanced chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
moonshadow565 committed Oct 13, 2022
1 parent 5fb623d commit 7a4a2e5
Show file tree
Hide file tree
Showing 12 changed files with 554 additions and 60 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(dep)

add_library(rlib STATIC
lib/rlib/ar.hpp
lib/rlib/ar.cpp
lib/rlib/ar/bnk.hpp
lib/rlib/ar/bnk.cpp
lib/rlib/ar/wad.hpp
lib/rlib/ar/wad.cpp
lib/rlib/ar/wpk.hpp
lib/rlib/ar/wpk.cpp
lib/rlib/common.hpp
lib/rlib/common.cpp
lib/rlib/iofile.cpp
Expand Down
81 changes: 81 additions & 0 deletions lib/rlib/ar.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include "ar.hpp"

#include "ar/bnk.hpp"
#include "ar/wad.hpp"
#include "ar/wpk.hpp"

using namespace rlib;
using namespace rlib::ar;

auto ArSplit::operator()(IO const& io, offset_cb cb) const -> void {
process(io, cb, 0, {.offset = 0, .size = io.size()});
}

template <typename T>
auto ArSplit::process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void {
auto archive = T{};
if (auto error = archive.read(io, top_entry.offset, top_entry.size)) rlib_error(error);

// ensure offsets are processed in order
std::sort(archive.entries.begin(), archive.entries.end(), [](auto const& lhs, auto const& rhs) {
if (lhs.offset < rhs.offset) return true;
if (lhs.offset == rhs.offset && lhs.size > rhs.size) return true;
return false;
});

auto cur = top_entry.offset;
for (auto entry : archive.entries) {
// skip empty entries
if (!entry.size) continue;

// skip duplicate or overlapping entries
if (entry.offset < cur) {
continue;
}

// process any skipped data
if (auto leftover = entry.offset - cur) {
process(io, cb, -1, {.offset = cur, .size = leftover, .compressed = top_entry.compressed});
}

// process current entry
process(io,
cb,
T::can_nest && !no_nest && !entry.compressed ? 1 : -1,
{
.offset = entry.offset,
.size = entry.size,
.compressed = entry.compressed,
});

// go to next entry
cur = entry.offset + entry.size;
}

// process any remaining data
if (auto remain = (top_entry.offset + top_entry.size) - cur) {
process(io, cb, -1, {.offset = cur, .size = remain, .compressed = top_entry.compressed});
}
}

auto ArSplit::process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void {
if (depth >= 0 && top_entry.size >= 64) {
char buffer[8] = {};
rlib_assert(io.read(top_entry.offset, buffer));
if (!no_bnk && BNK::check_magic(buffer)) {
return process_ar<BNK>(io, cb, top_entry);
}
if (!no_wad && depth < 1 && WAD::check_magic(buffer)) {
return process_ar<WAD>(io, cb, top_entry);
}
if (!no_wpk && WPK::check_magic(buffer)) {
return process_ar<WPK>(io, cb, top_entry);
}
}
for (auto i = top_entry.offset, remain = top_entry.size; remain;) {
auto size = std::min(chunk_size, remain);
cb({.offset = i, .size = size, .compressed = top_entry.compressed});
i += size;
remain -= size;
}
}
28 changes: 28 additions & 0 deletions lib/rlib/ar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once
#include <rlib/common.hpp>
#include <rlib/iofile.hpp>

namespace rlib {
struct ArSplit {
struct Entry {
std::size_t offset;
std::size_t size;
bool compressed;
};
using offset_cb = function_ref<void(Entry)>;

std::size_t chunk_size;
bool no_bnk;
bool no_wad;
bool no_wpk;
bool no_nest;

auto operator()(IO const& io, offset_cb cb) const -> void;

private:
auto process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void;

template <typename T>
auto process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void;
};
}
89 changes: 89 additions & 0 deletions lib/rlib/ar/bnk.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "bnk.hpp"

#include <map>

#define ar_assert(...) \
do { \
if (!(__VA_ARGS__)) return " BNK::read: " #__VA_ARGS__; \
} while (false)

using namespace rlib;
using namespace rlib::ar;

struct BNK::Entry::Raw {
std::array<char, 4> type;
std::uint32_t size;
};

struct BNK::Entry::DIDX {
std::uint32_t id;
std::uint32_t offset;
std::uint32_t size;
};

auto BNK::check_magic(std::span<char const> data) noexcept -> bool {
return data.size() >= 4 && std::memcmp(data.data(), "BKHD", 4) == 0;
}

auto BNK::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
using TYPE = std::array<char, 4>;
static constexpr auto BKHD = TYPE{'B', 'K', 'H', 'D'};
static constexpr auto DIDX = TYPE{'D', 'I', 'D', 'X'};
static constexpr auto DATA = TYPE{'D', 'A', 'T', 'A'};

auto magic = TYPE{};
ar_assert(size >= 8);
io.read(offset, magic);
ar_assert(magic == BKHD);

auto sections = std::map<TYPE, Entry>{};
for (std::size_t i = offset; i != offset + size;) {
Entry::Raw raw = {};
ar_assert(size >= i);
ar_assert(size - i >= sizeof(raw));
io.read(i, {(char*)&raw, sizeof(raw)});

i += sizeof(Entry::Raw);
ar_assert(size - i >= raw.size);

sections[raw.type] = Entry{.offset = i, .size = raw.size};

i += raw.size;
}

entries.clear();
entries.reserve(sections.size());

auto i_didx = sections.find(DIDX);
auto i_data = sections.find(DATA);
if (i_didx != sections.end() && i_data != sections.end()) {
auto didx_base = i_didx->second;
auto data_base = i_data->second;

ar_assert(didx_base.size % sizeof(Entry::DIDX) == 0);
auto didx_list = std::vector<Entry::DIDX>(didx_base.size / sizeof(Entry::DIDX));
io.read(didx_base.offset, {(char*)didx_list.data(), didx_base.size});

entries.reserve(sections.size() + didx_list.size());
for (auto const& didx : didx_list) {
ar_assert(data_base.size >= didx.offset);
ar_assert(data_base.size - didx.offset >= didx.size);
entries.push_back(Entry{
.offset = data_base.offset + didx.offset,
.size = didx.size,
.compressed = true,
});
}

i_didx->second.size = 0;
i_data->second.size = 0;
}

for (auto [key, entry] : sections) {
entry.offset -= sizeof(Entry::Raw);
entry.size += sizeof(Entry::Raw);
entries.push_back(entry);
}

return nullptr;
}
21 changes: 21 additions & 0 deletions lib/rlib/ar/bnk.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once
#include <rlib/common.hpp>
#include <rlib/iofile.hpp>

namespace rlib::ar {
struct BNK {
static constexpr bool can_nest = false;

struct Entry {
struct Raw;
struct DIDX;
std::size_t offset;
std::size_t size;
bool compressed;
};
std::vector<Entry> entries;

static auto check_magic(std::span<char const> data) noexcept -> bool;
auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
};
}
126 changes: 126 additions & 0 deletions lib/rlib/ar/wad.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include "wad.hpp"

using namespace rlib;
using namespace rlib::ar;

#define ar_assert(...) \
do { \
if (!(__VA_ARGS__)) return " WAD::read: " #__VA_ARGS__; \
} while (false)

struct WAD::Header {
struct Base;
struct V1;
struct V2;
struct V3;

std::size_t entry_size;
std::size_t entry_count;
std::size_t toc_start;
std::size_t toc_size;
};

struct WAD::Header::Base {
std::array<char, 2> magic;
std::uint8_t version[2];
};

struct WAD::Header::V1 : Base {
std::uint16_t toc_start;
std::uint16_t entry_size;
std::uint32_t entry_count;
};

struct WAD::Header::V2 : Base {
std::array<std::uint8_t, 84> signature;
std::array<std::uint8_t, 8> checksum;
std::uint16_t toc_start;
std::uint16_t entry_size;
std::uint32_t entry_count;
};

struct WAD::Header::V3 : Base {
std::uint8_t signature[256];
std::array<std::uint8_t, 8> checksum;
static constexpr std::uint16_t toc_start = 272;
static constexpr std::uint16_t entry_size = 32;
std::uint32_t entry_count;
};

struct WAD::Entry::Raw {
std::uint64_t path;
std::uint32_t offset;
std::uint32_t size_compressed;
std::uint32_t size_uncompressed;
std::uint8_t type : 4;
std::uint8_t subchunks : 4;
std::uint8_t pad[3];
};

auto WAD::check_magic(std::span<char const> data) noexcept -> bool {
return data.size() >= 4 && std::memcmp(data.data(), "RW", 2) == 0 && (uint8_t)data[2] <= 10;
}

auto WAD::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
static constexpr auto MAGIC = std::array{'R', 'W'};

Header::Base header_base = {};
ar_assert(size >= sizeof(header_base));
io.read(offset, {(char*)&header_base, sizeof(header_base)});
ar_assert(header_base.magic == MAGIC);

Header header = {};
switch (header_base.version[0]) {
#define read_header($V) \
do { \
Header::V##$V v_header = {}; \
ar_assert(size >= sizeof(header)); \
io.read(offset, {(char*)&v_header, sizeof(v_header)}); \
header.entry_size = v_header.entry_size; \
header.entry_count = v_header.entry_count; \
header.toc_start = v_header.toc_start; \
header.toc_size = header.entry_size * header.entry_count; \
} while (false)
case 0:
case 1:
read_header(1);
break;
case 2:
read_header(2);
break;
case 3:
read_header(3);
break;
#undef read_header
default:
return "Unknown wad version";
}
ar_assert(size >= header.toc_start);
ar_assert(size - header.toc_start >= header.toc_size);
header.toc_start += offset;

entries.clear();
entries.reserve(header.entry_count + 1);

entries.push_back(Entry{
.offset = header.toc_start,
.size = header.toc_size,
.compressed = false,
});
for (std::size_t i = 0; i != header.entry_count; ++i) {
auto raw_entry = Entry::Raw{};
io.read(header.toc_start + i * header.entry_size, {(char*)&raw_entry, header.entry_size});

auto entry = Entry{
.offset = offset + raw_entry.offset,
.size = raw_entry.size_compressed,
.compressed = raw_entry.type != 0,
};
ar_assert(entry.offset >= header.toc_start + header.toc_size);
ar_assert(size >= entry.offset);
ar_assert(size - entry.offset >= entry.size);
entries.push_back(entry);
}

return nullptr;
}
21 changes: 21 additions & 0 deletions lib/rlib/ar/wad.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once
#include <rlib/common.hpp>
#include <rlib/iofile.hpp>

namespace rlib::ar {
struct WAD {
static constexpr bool can_nest = true;

struct Header;
struct Entry {
struct Raw;
std::size_t offset;
std::size_t size;
bool compressed;
};
std::vector<Entry> entries;

static auto check_magic(std::span<char const> data) noexcept -> bool;
auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
};
}
Loading

0 comments on commit 7a4a2e5

Please sign in to comment.