Skip to content

Commit

Permalink
fixup! GPU weights not in RAM, direct loading with cuFile
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed May 19, 2023
1 parent 1bfe5a9 commit 24d5ddf
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
6 changes: 3 additions & 3 deletions llama-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ struct llama_mmap {
#ifdef _POSIX_MAPPED_FILES
static constexpr bool SUPPORTED = true;

llama_mmap(struct llama_file * file, bool prefetch = true) {
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
size = file->size;
int fd = fileno(file->fp);
int flags = MAP_SHARED;
Expand All @@ -184,9 +184,9 @@ struct llama_mmap {
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
}

if (prefetch) {
if (prefetch > 0) {
// Advise the kernel to preload the mapped memory
if (madvise(addr, file->size, MADV_WILLNEED)) {
if (madvise(addr, std::min(file->size, prefetch), MADV_WILLNEED)) {
fprintf(stderr, "warning: madvise(.., MADV_WILLNEED) failed: %s\n",
strerror(errno));
}
Expand Down
8 changes: 6 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,12 +679,16 @@ struct llama_model_loader {

void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
size_t data_size = 0;
size_t prefetch_size = 0;
for (const llama_load_tensor & lt : tensors_map.tensors) {
data_size += lt.size;
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
prefetch_size += lt.size;
}
}

if (use_mmap) {
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, false));
mapping.reset(new llama_mmap(&file_loaders.at(0)->file, prefetch_size));
if (!lmlock) {
// Don't call the callback since the actual loading will be lazy
// and we can't measure it.
Expand Down Expand Up @@ -2317,7 +2321,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *

// maybe this should in llama_model_loader
if (model_loader->use_mmap) {
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ false));
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, /* prefetch */ 0));
}
}

Expand Down

0 comments on commit 24d5ddf

Please sign in to comment.