Skip to content

Commit

Permalink
minimize diff
Browse files Browse the repository at this point in the history
  • Loading branch information
ochafik committed Feb 19, 2025
1 parent 83aa5bf commit e36f75a
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 9 deletions.
3 changes: 0 additions & 3 deletions common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
#endif // LLAMA_USE_LLGUIDANCE
} else {
// ^(a|b|(?:[\s\S]*?)(c|d))(?:.*)
// ^[\s\S]*?(c|d)[\s\S]*
// ^(a|b)[\s\S]*
std::vector<std::string> escaped_triggers_at_start;
std::vector<std::string> escaped_triggers_anywhere;
for (const auto & trigger : params.grammar_trigger_words) {
Expand Down
2 changes: 1 addition & 1 deletion examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2046,8 +2046,8 @@ struct server_context {

if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {
// Might be better to reject the request with a 400 ?
SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d\n", slot.params.n_predict, slot.n_predict);
slot.params.n_predict = slot.n_predict;
SLT_WRN(slot, "n_predict = %d exceeds server configuration, setting to %d", slot.n_predict, slot.n_predict);
}

if (slot.params.ignore_eos && has_eos_token) {
Expand Down
3 changes: 0 additions & 3 deletions examples/server/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class ServerProcess:
model_url: str | None = None
model_file: str | None = None
model_draft: str | None = None
model_draft_hf_repo: str | None = None
n_threads: int | None = None
n_gpu_layer: int | None = None
n_batch: int | None = None
Expand Down Expand Up @@ -119,8 +118,6 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None:
server_args.extend(["--model-draft", self.model_draft])
if self.model_hf_repo:
server_args.extend(["--hf-repo", self.model_hf_repo])
if self.model_draft_hf_repo:
server_args.extend(["--hf-repo-draft", self.model_draft_hf_repo])
if self.model_hf_file:
server_args.extend(["--hf-file", self.model_hf_file])
if self.n_batch:
Expand Down
2 changes: 0 additions & 2 deletions scripts/tool_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def stop():
parser = argparse.ArgumentParser(description='Run tests for the chat server.')
parser.add_argument('--model', type=str, help='Name of the model to test (implementation agnostic)', required=True)
parser.add_argument('--hf', type=str, help='GGUF huggingface model repo id (+ optional quant) to test w/ llama-server')
parser.add_argument('--hfd', type=str, help='GGUF huggingface draft model repo id (+ optional quant) to test w/ llama-server')
parser.add_argument('--chat-template', type=str, help='Chat template override for llama-server')
parser.add_argument('--ollama', type=str, help='Ollama model tag to test')
parser.add_argument('--n', type=int, help='Number of times to run each test', default=30)
Expand Down Expand Up @@ -162,7 +161,6 @@ def elapsed():
server.n_predict = 512 # High because of DeepSeek R1
server.model_hf_repo = args.hf
server.model_hf_file = None
server.model_draft_hf_repo = args.hfd
server.chat_template = args.chat_template
if args.port is not None:
server.server_port = args.port
Expand Down

0 comments on commit e36f75a

Please sign in to comment.