Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

fix 65B model #66

Merged
merged 5 commits into from Mar 23, 2023
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llama-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ impl Model {
f16_: _,
} = self.hparams;

let mut buf_size = 512 * 1024 * 1024;
let mut buf_size = 1024 * 1024 * 1024;
if session.mem_per_token > 0 && session.mem_per_token * n > buf_size {
// add 10% to account for ggml object overhead
buf_size = (1.1f64 * session.mem_per_token as f64 * n as f64) as usize;
Expand Down