Skip to content

Commit

Permalink
Implement BLOB-related methods in datastore class
Browse files Browse the repository at this point in the history
- Implement datastore::get_blob_file to handle BLOB file access
  - Add file existence check with error handling
  - Add BLOB ID range validation
  - Add file availability status handling

- Implement datastore::acquire_blob_pool for BLOB pool management
  - Add thread-safe BLOB ID generation
  - Add overflow handling for BLOB IDs
  - Add integration with blob_file_resolver

- Add and update test cases
  - Add datastore_blob_test for BLOB-related functionality
  - Add test cases for normal and error conditions
  - Add boundary condition tests for BLOB IDs
  • Loading branch information
umegane committed Jan 24, 2025
1 parent 7f68845 commit 91e1789
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 79 deletions.
7 changes: 1 addition & 6 deletions include/limestone/api/blob_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class blob_file {
* @param path Path to the BLOB file.
* @param available Initial availability status of the BLOB file (default: false).
*/
explicit blob_file(boost::filesystem::path path, bool available = false);
explicit blob_file(boost::filesystem::path path, bool available = false) noexcept;

/**
* @brief retrieves the path to the BLOB file.
Expand All @@ -48,11 +48,6 @@ class blob_file {
* Otherwise, if this is NOT available, the path() may return invalid path.
*/
[[nodiscard]] explicit operator bool() const noexcept;
/**
* @brief Sets the availability status of the BLOB file.
* @param available New availability status.
*/
void set_availability(bool available) noexcept;
};

} // namespace limestone::api
1 change: 1 addition & 0 deletions include/limestone/api/datastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ class datastore {
auto epoch_id_switched_for_tests() const noexcept { return epoch_id_switched_.load(); }
auto& files_for_tests() const noexcept { return files_; }
void rotate_epoch_file_for_tests() { rotate_epoch_file(); }
void set_next_blob_id_for_tests(blob_id_type next_blob_id) noexcept { next_blob_id_ = next_blob_id; }

// These virtual methods are hooks for testing thread synchronization.
// They allow derived classes to inject custom behavior or notifications
Expand Down
6 changes: 1 addition & 5 deletions src/limestone/blob_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

namespace limestone::api {

blob_file::blob_file(boost::filesystem::path path, bool available)
blob_file::blob_file(boost::filesystem::path path, bool available) noexcept
: blob_path_(std::move(path)), available_(available) {}

boost::filesystem::path const& blob_file::path() const noexcept {
Expand All @@ -30,8 +30,4 @@ blob_file::operator bool() const noexcept {
return available_;
}

void blob_file::set_availability(bool available) noexcept {
available_ = available;
}

} // namespace limestone::api
18 changes: 3 additions & 15 deletions src/limestone/blob_file_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class blob_file_resolver {
explicit blob_file_resolver(
boost::filesystem::path base_directory,
std::size_t directory_count = 100,
std::function<std::size_t(blob_id_type)> hash_function = [](blob_id_type id) { return id; })
std::function<std::size_t(blob_id_type)> hash_function = [](blob_id_type id) { return id; }) noexcept
: blob_directory_(std::move(base_directory) / "blob"),
directory_count_(directory_count),
hash_function_(std::move(hash_function)) {
Expand All @@ -62,7 +62,7 @@ class blob_file_resolver {
* @param blob_id The ID of the BLOB.
* @return The resolved file path.
*/
[[nodiscard]] boost::filesystem::path resolve_path(blob_id_type blob_id) const {
[[nodiscard]] boost::filesystem::path resolve_path(blob_id_type blob_id) const noexcept {
// Calculate directory index
std::size_t directory_index = hash_function_(blob_id) % directory_count_;

Expand All @@ -76,23 +76,11 @@ class blob_file_resolver {
return subdirectory / file_name.str();
}

/**
* @brief Resolves the BLOB file for the given BLOB ID.
*
* @param blob_id The ID of the BLOB.
* @param available Initial availability status of the BLOB file (default: false).
* @return A blob_file instance corresponding to the BLOB ID.
*/
[[nodiscard]] blob_file resolve_blob_file(blob_id_type blob_id, bool available = false) const {
boost::filesystem::path file_path = resolve_path(blob_id);
return blob_file(file_path, available);
}

private:
/**
* @brief Precomputes all directory paths and stores them in the cache.
*/
void precompute_directory_cache() {
void precompute_directory_cache() noexcept {
directory_cache_.reserve(directory_count_);
for (std::size_t i = 0; i < directory_count_; ++i) {
std::ostringstream dir_name;
Expand Down
40 changes: 35 additions & 5 deletions src/limestone/datastore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,18 +702,48 @@ void datastore::compact_with_online() {
}

std::unique_ptr<blob_pool> datastore::acquire_blob_pool() {
// Store the ID generation logic as a lambda function in a variable
TRACE_START;

// Define a lambda function for generating unique blob IDs in a thread-safe manner.
// This function uses a CAS (Compare-And-Swap) loop to ensure atomic updates to the ID.
// If the maximum value for blob IDs is reached, the function returns the max value, signaling an overflow condition.
auto id_generator = [this]() {
return next_blob_id_.fetch_add(1, std::memory_order_relaxed);
blob_id_type current = 0;
do {
current = next_blob_id_.load(std::memory_order_acquire); // Load the current ID atomically.
if (current == std::numeric_limits<blob_id_type>::max()) {
return current; // Return max value to indicate overflow.
}
} while (!next_blob_id_.compare_exchange_weak(
current,
current + 1,
std::memory_order_acq_rel, // Ensure atomicity of the update with acquire-release semantics.
std::memory_order_acquire));
return current; // Return the successfully updated ID.
};

// Pass the lambda function as a constructor argument to create blob_pool_impl
return std::make_unique<limestone::internal::blob_pool_impl>(id_generator, *blob_file_resolver_);
// Create a blob_pool_impl instance by passing the ID generator lambda and blob_file_resolver.
// This approach allows flexible configuration and dependency injection for the blob pool.
auto pool = std::make_unique<limestone::internal::blob_pool_impl>(id_generator, *blob_file_resolver_);
TRACE_END;
return pool; // Return the constructed blob pool.
}

blob_file datastore::get_blob_file(blob_id_type reference) {
TRACE_START << "reference=" << reference;
check_after_ready(static_cast<const char*>(__func__));
return blob_file_resolver_->resolve_blob_file(reference, true);
auto path = blob_file_resolver_->resolve_path(reference);
bool available = reference < next_blob_id_.load(std::memory_order_acquire);
if (available) {
try {
available = boost::filesystem::exists(path);
} catch (const boost::filesystem::filesystem_error& e) {
LOG_LP(ERROR) << "Failed to check blob file existence: " << e.what();
available = false;
}
}
TRACE_END;
return blob_file(path, available);
}

} // namespace limestone::api
Expand Down
35 changes: 0 additions & 35 deletions test/limestone/blob/blob_file_resolver_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,39 +64,4 @@ TEST_F(blob_file_resolver_test, handles_multiple_blob_ids) {
}
}

TEST_F(blob_file_resolver_test, resolves_blob_file) {
// Test that blob_file is resolved correctly
blob_id_type blob_id = 123456;
bool initial_availability = true;

auto blob = resolver_->resolve_blob_file(blob_id, initial_availability);

// Expected path
std::ostringstream dir_name;
dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count
boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str();
expected_path /= "000000000001e240.blob"; // Blob ID in hex: 123456 = 1e240

ASSERT_EQ(blob.path(), expected_path);
ASSERT_EQ(static_cast<bool>(blob), initial_availability);
}

TEST_F(blob_file_resolver_test, resolves_multiple_blob_files) {
// Test multiple blob IDs resolve to correct blob_file instances
for (blob_id_type blob_id = 0; blob_id < 100; ++blob_id) {
bool initial_availability = (blob_id % 2 == 0); // Alternate availability
auto blob = resolver_->resolve_blob_file(blob_id, initial_availability);

std::ostringstream dir_name;
dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count
boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str();
std::ostringstream file_name;
file_name << std::hex << std::setw(16) << std::setfill('0') << blob_id << ".blob";
expected_path /= file_name.str();

ASSERT_EQ(blob.path(), expected_path);
ASSERT_EQ(static_cast<bool>(blob), initial_availability);
}
}

} // namespace limestone::testing
13 changes: 0 additions & 13 deletions test/limestone/blob/blob_file_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,6 @@ TEST(blob_file_test, constructor_with_availability) {
EXPECT_TRUE(static_cast<bool>(blob));
}

TEST(blob_file_test, set_availability) {
boost::filesystem::path test_path("/path/to/blob");
blob_file blob(test_path);

EXPECT_FALSE(static_cast<bool>(blob));

blob.set_availability(true);
EXPECT_TRUE(static_cast<bool>(blob));

blob.set_availability(false);
EXPECT_FALSE(static_cast<bool>(blob));
}

TEST(blob_file_test, path_returns_correct_value) {
boost::filesystem::path test_path("/path/to/blob");
blob_file blob(test_path);
Expand Down
102 changes: 102 additions & 0 deletions test/limestone/blob/datastore_blob_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include <gtest/gtest.h>
#include <limestone/api/datastore.h>
#include <limestone/api/blob_pool.h>
#include <limestone/api/blob_file.h>
#include <limits>
#include <string>
#include <memory>
#include <boost/filesystem.hpp>
#include "test_root.h"

namespace limestone::testing {

constexpr const char* data_location = "/tmp/datastore_blob_test/data_location";
constexpr const char* metadata_location = "/tmp/datastore_blob_test/metadata_location";

class datastore_blob_test : public ::testing::Test {
protected:
std::unique_ptr<api::datastore_test> datastore_;
boost::filesystem::path location_;

void SetUp() override {
if (system("chmod -R a+rwx /tmp/datastore_blob_test") != 0) {
std::cerr << "cannot change permission" << std::endl;
}

if (system("rm -rf /tmp/datastore_blob_test") != 0) {
std::cerr << "cannot remove directory" << std::endl;
}
if (system("mkdir -p /tmp/datastore_blob_test/data_location /tmp/datastore_blob_test/metadata_location") != 0) {
std::cerr << "cannot make directory" << std::endl;
}

std::vector<boost::filesystem::path> data_locations{};
data_locations.emplace_back(data_location);
boost::filesystem::path metadata_location_path{metadata_location};
limestone::api::configuration conf(data_locations, metadata_location_path);

datastore_ = std::make_unique<limestone::api::datastore_test>(conf);
}

void TearDown() override {
if (datastore_) {
datastore_->shutdown();
}
boost::filesystem::remove_all(location_);
}

void create_dummy_file(api::blob_id_type existing_blob_id) {
auto file = datastore_->get_blob_file(existing_blob_id);
auto path = file.path();
boost::filesystem::create_directories(path.parent_path());
boost::filesystem::ofstream dummy_file(path);
dummy_file << "test data";
dummy_file.close();
}
};

TEST_F(datastore_blob_test, acquire_blob_pool_basic) {
auto pool = datastore_->acquire_blob_pool();
ASSERT_NE(pool, nullptr);
}


TEST_F(datastore_blob_test, get_blob_file_basic) {
// Setup: Initialize BLOB IDs
int next_blob_id = 12345;
int existing_blob_id = 12344;
datastore_->set_next_blob_id(next_blob_id);

// Prepare test files
create_dummy_file(existing_blob_id);
create_dummy_file(next_blob_id);

// Case 1: Normal case - file exists and is accessible
auto file = datastore_->get_blob_file(existing_blob_id);
EXPECT_TRUE(static_cast<bool>(file));

// Case 2: File exists but directory permissions prevent access check
// This tests get_blob_file's behavior when exists() throws an exception
boost::filesystem::permissions(file.path().parent_path(), boost::filesystem::perms::no_perms);
EXPECT_THROW(boost::filesystem::exists(file.path()), boost::filesystem::filesystem_error);

Check failure on line 81 in test/limestone/blob/datastore_blob_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-22.04)

datastore_blob_test.get_blob_file_basic

Expected: boost::filesystem::exists(file.path()) throws an exception of type boost::filesystem::filesystem_error. Actual: it throws nothing.

Check failure on line 81 in test/limestone/blob/datastore_blob_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-24.04)

datastore_blob_test.get_blob_file_basic

Expected: boost::filesystem::exists(file.path()) throws an exception of type boost::filesystem::filesystem_error. Actual: it throws nothing.
auto file_excption_throws = datastore_->get_blob_file(existing_blob_id);
EXPECT_FALSE(static_cast<bool>(file_excption_throws));

Check failure on line 83 in test/limestone/blob/datastore_blob_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-22.04)

datastore_blob_test.get_blob_file_basic

Value of: static_cast<bool>(file_excption_throws) Actual: true Expected: false

Check failure on line 83 in test/limestone/blob/datastore_blob_test.cpp

View workflow job for this annotation

GitHub Actions / CTest (ubuntu-24.04)

datastore_blob_test.get_blob_file_basic

Value of: static_cast<bool>(file_excption_throws) Actual: true Expected: false

// Cleanup: Restore permissions for subsequent tests
boost::filesystem::permissions(file.path().parent_path(), boost::filesystem::perms::all_all);
auto file_permission_restored = datastore_->get_blob_file(existing_blob_id);
EXPECT_TRUE(static_cast<bool>(file_permission_restored));

// Case 3: File is removed after being confirmed to exist
boost::filesystem::remove(file.path());
auto file_removed = datastore_->get_blob_file(existing_blob_id);
EXPECT_FALSE(static_cast<bool>(file_removed));

// Case 4: Boundary condition - ID equal to next_blob_id
auto file_next_blob_id = datastore_->get_blob_file(next_blob_id);
EXPECT_TRUE(boost::filesystem::exists(file_next_blob_id.path()));
EXPECT_FALSE(static_cast<bool>(file_removed));
}


} // namespace limestone::testing
1 change: 1 addition & 0 deletions test/test_root.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class datastore_test : public datastore {
auto epoch_id_switched() const noexcept { return epoch_id_switched_for_tests(); }
auto& files() const noexcept { return files_for_tests(); }
void rotate_epoch_file() { rotate_epoch_file_for_tests(); }
void set_next_blob_id(blob_id_type next_blob_id) noexcept { set_next_blob_id_for_tests(next_blob_id); }

protected:
inline void execute_callback(const std::function<void()>& callback) noexcept {
Expand Down

0 comments on commit 91e1789

Please sign in to comment.