Skip to content

Commit

Permalink
WIP: Add blob_file class and update related classes as part of BLOB s…
Browse files Browse the repository at this point in the history
…upport

- Added `blob_file` class to manage persistent BLOB data.
- Updated existing classes to integrate with the new `blob_file` class.
- Added `blob_pool` class as a placeholder with a dummy implementation.
- Commit represents an intermediate step in implementing complete BLOB support.
  • Loading branch information
umegane committed Jan 23, 2025
1 parent 2192348 commit fa35e2c
Show file tree
Hide file tree
Showing 10 changed files with 448 additions and 6 deletions.
25 changes: 20 additions & 5 deletions include/limestone/api/blob_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,41 @@
*/
#pragma once

#include <boost/filesystem.hpp>

namespace limestone::api {

/**
* @brief represents a BLOB file that can provide persistent BLOB data.
*/
class blob_file {
private:
boost::filesystem::path blob_path_;
bool available_ = false;

public:
/**
* @brief Constructor to create a BLOB file instance.
* @param path Path to the BLOB file.
* @param available Initial availability status of the BLOB file (default: false).
*/
explicit blob_file(boost::filesystem::path const& path, bool available = false);

/**
* @brief retrieves the path to the BLOB file.
* @brief Retrieves the path to the BLOB file.
* @returns BLOB file path
*/
[[nodiscard]] boost::filesystem::path const& path() const noexcept;

/**
* @brief returns whether this BLOB file is available.
* @brief Returns whether this BLOB file is available.
* @return true if this is available
* @return false otherwise
*/
[[nodiscard]] explicit operator bool() const noexcept;

/**
* @brief Sets the availability status of the BLOB file.
* @param available New availability status.
*/
void set_availability(bool available) noexcept;
};

} // namespace limestone::api
7 changes: 6 additions & 1 deletion include/limestone/api/datastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
#include <limestone/api/restore_progress.h>

namespace limestone::internal {
class compaction_catalog;
class compaction_catalog;
class blob_file_resolver;
}
namespace limestone::api {

Expand Down Expand Up @@ -450,6 +451,10 @@ class datastore {
virtual void write_epoch_to_file(epoch_id_type epoch_id);

int epoch_write_counter = 0;

std::unique_ptr<limestone::internal::blob_file_resolver> blob_file_resolver_;

std::atomic<std::uint64_t> next_blob_id_{0};
};

} // namespace limestone::api
37 changes: 37 additions & 0 deletions src/limestone/blob_file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2022-2025 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <limestone/api/blob_file.h>
#include <boost/filesystem.hpp>

namespace limestone::api {

blob_file::blob_file(boost::filesystem::path const& path, bool available)

Check warning on line 22 in src/limestone/blob_file.cpp

View workflow job for this annotation

GitHub Actions / Clang-Tidy

modernize-pass-by-value

pass by value and use std::move
: blob_path_(path), available_(available) {}

boost::filesystem::path const& blob_file::path() const noexcept {
return blob_path_;
}

blob_file::operator bool() const noexcept {
return available_;
}

void blob_file::set_availability(bool available) noexcept {
available_ = available;
}

} // namespace limestone::api
112 changes: 112 additions & 0 deletions src/limestone/blob_file_resolver.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2022-2025 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <boost/filesystem.hpp>
#include <sstream>
#include <iomanip>
#include <functional>
#include <unordered_map>
#include <vector>

#include <limestone/api/blob_file.h>
#include <limestone/api/blob_pool.h>

namespace limestone::internal {

using limestone::api::blob_id_type;
using limestone::api::blob_file;

/**
* @brief Resolves file paths for given BLOB IDs with precomputed directory caching.
*/
class blob_file_resolver {
public:
/**
* @brief Constructs a blob_file_resolver with the given base directory.
*
* The BLOB files are assumed to be stored under `<base_directory>/blob/`.
*
* @param base_directory The base directory for storing BLOB files.
* @param directory_count The number of subdirectories to distribute files into.
* @param hash_function The function used to map `blob_id` to a directory index.
*/
explicit blob_file_resolver(
boost::filesystem::path base_directory,
std::size_t directory_count = 100,
std::function<std::size_t(blob_id_type)> hash_function = [](blob_id_type id) { return id; })
: blob_directory_(std::move(base_directory) / "blob"),
directory_count_(directory_count),
hash_function_(std::move(hash_function)) {
// Precompute and cache all directory paths
precompute_directory_cache();
}

/**
* @brief Resolves the file path for the given BLOB ID.
*
* @param blob_id The ID of the BLOB.
* @return The resolved file path.
*/
[[nodiscard]] boost::filesystem::path resolve_path(blob_id_type blob_id) const {
// Calculate directory index
std::size_t directory_index = hash_function_(blob_id) % directory_count_;

// Retrieve precomputed directory path
const boost::filesystem::path& subdirectory = directory_cache_[directory_index];

// Generate the file name
std::ostringstream file_name;
file_name << std::hex << std::setw(16) << std::setfill('0') << blob_id << ".blob";

return subdirectory / file_name.str();
}

/**
* @brief Resolves the BLOB file for the given BLOB ID.
*
* @param blob_id The ID of the BLOB.
* @param available Initial availability status of the BLOB file (default: false).
* @return A blob_file instance corresponding to the BLOB ID.
*/
[[nodiscard]] blob_file resolve_blob_file(blob_id_type blob_id, bool available = false) const {
boost::filesystem::path file_path = resolve_path(blob_id);
return blob_file(file_path, available);
}

private:
/**
* @brief Precomputes all directory paths and stores them in the cache.
*/
void precompute_directory_cache() {
directory_cache_.reserve(directory_count_);
for (std::size_t i = 0; i < directory_count_; ++i) {
std::ostringstream dir_name;
dir_name << "dir_" << std::setw(2) << std::setfill('0') << i;
directory_cache_.emplace_back(blob_directory_ / dir_name.str());
}
}

boost::filesystem::path blob_directory_; // Full path to the `blob` directory
std::size_t directory_count_; // Number of directories for distribution
std::function<std::size_t(blob_id_type)> hash_function_; // Hash function to map blob_id to directory index

std::vector<boost::filesystem::path> directory_cache_; // Precomputed cache for directory paths
};

} // namespace limestone::internal

44 changes: 44 additions & 0 deletions src/limestone/blob_pool_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright 2022-2025 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blob_pool_impl.h"

namespace limestone::internal {

blob_pool_impl::blob_pool_impl(std::function<blob_id_type()> id_generator,
limestone::internal::blob_file_resolver& resolver)
: id_generator_(std::move(id_generator)), resolver_(resolver) {}

blob_id_type blob_pool_impl::generate_blob_id() {
return id_generator_();
}

void blob_pool_impl::release() {
// 空実装
}

blob_id_type blob_pool_impl::register_file(boost::filesystem::path const& /*file*/, bool /*is_temporary_file*/) {
return generate_blob_id(); // ダミーとして新しいIDを返す
}

blob_id_type blob_pool_impl::register_data(std::string_view /*data*/) {
return generate_blob_id(); // ダミーとして新しいIDを返す
}

blob_id_type blob_pool_impl::duplicate_data(blob_id_type /*reference*/) {
return generate_blob_id(); // ダミーとして新しいIDを返す
}

} // namespace limestone::internal
64 changes: 64 additions & 0 deletions src/limestone/blob_pool_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2022-2025 Project Tsurugi.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <limestone/api/blob_pool.h>
#include <functional>
#include "blob_file_resolver.h"

namespace limestone::internal {

using namespace limestone::api;


/**
* @brief Implementation of the blob_pool interface.
*/
class blob_pool_impl : public blob_pool {
public:
/**
* @brief Constructs a blob_pool_impl instance with the given ID generator and blob_file_resolver.
* @param id_generator A callable object that generates unique IDs of type blob_id_type.
* @param resolver Reference to a blob_file_resolver instance.
*/
explicit blob_pool_impl(std::function<blob_id_type()> id_generator,
limestone::internal::blob_file_resolver& resolver);

void release() override;

[[nodiscard]] blob_id_type register_file(boost::filesystem::path const& file,
bool is_temporary_file) override;

[[nodiscard]] blob_id_type register_data(std::string_view data) override;

[[nodiscard]] blob_id_type duplicate_data(blob_id_type reference) override;


private:
/**
* @brief Generates a unique ID for a BLOB.
*
* @return A unique ID of type blob_id_type.
*/
[[nodiscard]] blob_id_type generate_blob_id();

std::function<blob_id_type()> id_generator_; // Callable object for ID generation

blob_file_resolver& resolver_; // reference to a blob_file_resolver instance

Check failure on line 61 in src/limestone/blob_pool_impl.h

View workflow job for this annotation

GitHub Actions / Clang-Tidy

clang-diagnostic-unused-private-field

private field 'resolver_' is not used
};

} // namespace limestone::internal
19 changes: 19 additions & 0 deletions src/limestone/datastore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "log_entry.h"
#include "online_compaction.h"
#include "compaction_catalog.h"
#include "blob_file_resolver.h"
#include "blob_pool_impl.h"

namespace limestone::api {
using namespace limestone::internal;
Expand Down Expand Up @@ -109,6 +111,8 @@ datastore::datastore(configuration const& conf) : location_(conf.data_locations_
recover_max_parallelism_ = conf.recover_max_parallelism_;
LOG(INFO) << "/:limestone:config:datastore setting the number of recover process thread = " << recover_max_parallelism_;

blob_file_resolver_ = std::make_unique<blob_file_resolver>(location_);

VLOG_LP(log_debug) << "datastore is created, location = " << location_.string();
} catch (...) {
HANDLE_EXCEPTION_AND_ABORT();
Expand Down Expand Up @@ -697,5 +701,20 @@ void datastore::compact_with_online() {
TRACE_END;
}

std::unique_ptr<blob_pool> datastore::acquire_blob_pool() {
// Store the ID generation logic as a lambda function in a variable
auto id_generator = [this]() {
return next_blob_id_.fetch_add(1, std::memory_order_relaxed);
};

// Pass the lambda function as a constructor argument to create blob_pool_impl
return std::make_unique<limestone::internal::blob_pool_impl>(id_generator, *blob_file_resolver_);
}

blob_file datastore::get_blob_file(blob_id_type reference) {
check_after_ready(static_cast<const char*>(__func__));
return blob_file_resolver_->resolve_blob_file(reference, true);
}

} // namespace limestone::api

1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ endfunction (add_test_executable)

file(GLOB SRCS
"limestone/api/*.cpp"
"limestone/blob/*.cpp"
"limestone/log/*.cpp"
"limestone/epoch/*.cpp"
"limestone/utils/*.cpp"
Expand Down
Loading

0 comments on commit fa35e2c

Please sign in to comment.