Skip to content

Commit

Permalink
Cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Jun 13, 2023
1 parent bcf4cdb commit c195db1
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
2 changes: 0 additions & 2 deletions src/common/io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,6 @@ char* PrivateMmapStream::Open(std::string path, bool read_only, std::size_t offs
char* ptr{nullptr};
auto view_start = offset / GetPageSize() * GetPageSize();
auto view_size = length + (offset - view_start);
std::cout << "offset:" << offset << ", length:" << length << ", start:" << view_start << ", size:" << view_size << std::endl;
// std::cout << view_start << " size: " << view_size << std::endl;
#if defined(__linux__) || defined(__GLIBC__)
int prot{PROT_READ};
if (!read_only) {
Expand Down
7 changes: 3 additions & 4 deletions src/data/sparse_page_source.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
}
// An heuristic for number of pre-fetched batches. We can make it part of BatchParam
// to let user adjust number of pre-fetched batches when needed.
uint32_t constexpr kPreFetch = 1;
uint32_t constexpr kPreFetch = 4;

size_t n_prefetch_batches = std::min(kPreFetch, n_batches_);
CHECK_GT(n_prefetch_batches, 0) << "total batches:" << n_batches_;
Expand All @@ -132,7 +132,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
CHECK_LT(fetch_it, cache_info_->offset.size());
ring_->at(fetch_it) = std::async(std::launch::async, [fetch_it, self, this]() {
auto page = std::make_shared<S>();
// this->exec_.Run([&] {
this->exec_.Run([&] {
common::Timer timer;
timer.Start();
std::unique_ptr<SparsePageFormat<S>> fmt{CreatePageFormat<S>("raw")};
Expand All @@ -144,7 +144,7 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
auto fi = std::make_unique<common::PrivateMmapStream>(n, true, offset, length);
CHECK(fmt->Read(page.get(), fi.get()));
LOG(INFO) << "Read a page in " << timer.ElapsedSeconds() << " seconds.";
// });
});
return page;
});
}
Expand Down Expand Up @@ -174,7 +174,6 @@ class SparsePageSourceImpl : public BatchIteratorImpl<S> {
}

auto bytes = fmt->Write(*page_, fo.get());
std::cout << "wrote: " << bytes << std::endl;

timer.Stop();
LOG(INFO) << static_cast<double>(bytes) / 1024.0 / 1024.0 << " MB written in "
Expand Down
35 changes: 35 additions & 0 deletions tests/cpp/data/test_sparse_page_raw_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,40 @@ TEST(SparsePageRawFormat, CSCPage) {
TEST(SparsePageRawFormat, SortedCSCPage) {
TestSparsePageRawFormat<SortedCSCPage>();
}

TEST(Debug, WritePage) {
std::string path {"testfile"};
std::unique_ptr<SparsePageFormat<SparsePage>> fmt{CreatePageFormat<SparsePage>("raw")};
auto Xy = RandomDataGenerator{ 8192 * 8, 12, 0.0 }.GenerateDMatrix();
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "w")};
for (auto const& page : Xy->GetBatches<SparsePage>()) {
std::cout << "back:" << page.offset.HostVector().back() << std::endl;
fmt->Write(page, fo.get());
}
}
{
std::unique_ptr<dmlc::Stream> fo{dmlc::Stream::Create(path.c_str(), "ab")};
for (auto const& page : Xy->GetBatches<SparsePage>()) {
std::cout << "back:" << page.offset.HostVector().back() << std::endl;
fmt->Write(page, fo.get());
}
}

{
std::unique_ptr<SparsePageFormat<SparsePage>> fmt{CreatePageFormat<SparsePage>("raw")};
std::unique_ptr<dmlc::SeekStream> fi{dmlc::SeekStream::CreateForRead(path.c_str())};
{
SparsePage page;
fmt->Read(&page, fi.get());
std::cout << "back:" << page.offset.HostVector().back() << std::endl;
}
{
SparsePage page;
fmt->Read(&page, fi.get());
std::cout << "back:" << page.offset.HostVector().back() << std::endl;
}
}
}
} // namespace data
} // namespace xgboost
1 change: 1 addition & 0 deletions tests/cpp/test_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
int main(int argc, char ** argv) {
xgboost::Args args {{"verbosity", "2"}};
xgboost::ConsoleLogger::Configure(args);

testing::InitGoogleTest(&argc, argv);
testing::FLAGS_gtest_death_test_style = "threadsafe";
auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv);
Expand Down
9 changes: 8 additions & 1 deletion tests/cpp/tree/hist/test_histogram.cc
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,15 @@ TEST(CPUHistogram, ExternalMemory) {
int32_t constexpr kBins = 256;
Context ctx;

std::cout << "l:" << __LINE__ << std::endl;
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, false);
TestHistogramExternalMemory(&ctx, BatchParam{kBins, common::Span<float>{}, false}, true, true);

float sparse_thresh{0.5};
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
sparse_thresh = std::numeric_limits<float>::quiet_NaN();
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, false);
TestHistogramExternalMemory(&ctx, {kBins, sparse_thresh}, false, true);
}
} // namespace tree
} // namespace xgboost

0 comments on commit c195db1

Please sign in to comment.