Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

fix 65B model #66

Merged
merged 5 commits into from Mar 23, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llama-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,9 @@ impl Model {
f16_: _,
} = self.hparams;

let mut buf_size = 512 * 1024 * 1024;
// For the first run, we need to guess a maximum buffer size so we can measure
// the actual memory consumption of the temporary ggml context.
let mut buf_size = 1024 * 1024 * 1024;
if session.mem_per_token > 0 && session.mem_per_token * n > buf_size {
// add 10% to account for ggml object overhead
buf_size = (1.1f64 * session.mem_per_token as f64 * n as f64) as usize;
Expand Down