diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 966f79f8e..30e0de3ba 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -24,6 +24,7 @@
from . import llama_cpp
from .llama_types import *
from .llama_grammar import LlamaGrammar
+from . import llama_chat_format
import numpy as np
import numpy.typing as npt
@@ -243,6 +244,8 @@ def __init__(
lora_path: Optional[str] = None,
# Backend Params
numa: bool = False,
+ # Chat Format Params
+ chat_format: str = "llama-2",
# Misc
verbose: bool = True,
# Extra Params
@@ -273,6 +276,7 @@ def __init__(
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
lora_path: Path to a LoRA file to apply to the model.
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
+ chat_format: String specifying the chat format to use when calling create_chat_completion.
verbose: Print verbose output to stderr.
kwargs: Unused keyword arguments (for additional backwards compatibility).
@@ -388,6 +392,8 @@ def __init__(
if self.verbose:
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
+
+ self.chat_format = chat_format
self._n_vocab = self.n_vocab()
self._n_ctx = self.n_ctx()
@@ -1565,9 +1571,21 @@ def _convert_text_completion_chunks_to_chat(
],
}
+ def _convert_completion_to_chat(
+ self,
+ completion_or_chunks: Union[Completion, Iterator[CompletionChunk]],
+ stream: bool = False,
+ ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+ if stream:
+ chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
+ return self._convert_text_completion_chunks_to_chat(chunks)
+ else:
+ completion: Completion = completion_or_chunks # type: ignore
+ return self._convert_text_completion_to_chat(completion)
+
def create_chat_completion(
self,
- messages: List[ChatCompletionMessage],
+ messages: List[ChatCompletionRequestMessage],
functions: Optional[List[ChatCompletionFunction]] = None,
function_call: Optional[Union[str, ChatCompletionFunctionCall]] = None,
temperature: float = 0.2,
@@ -1602,26 +1620,28 @@ def create_chat_completion(
Returns:
Generated chat completion or a stream of chat completion chunks.
"""
- stop = (
- stop if isinstance(stop, list) else [stop] if isinstance(stop, str) else []
- )
- chat_history = "".join(
- f'### {"Human" if message["role"] == "user" else "Assistant"}:{message["content"]}'
- for message in messages
+
+ format = llama_chat_format.get_chat_format(self.chat_format)
+ result = format(
+ messages=messages,
)
- PROMPT = chat_history + "### Assistant:"
- PROMPT_STOP = ["### Assistant:", "### Human:"]
- completion_or_chunks = self(
- prompt=PROMPT,
- stop=PROMPT_STOP + stop,
+ prompt = result.prompt
+ if result.stop is not None:
+ stop = [] if stop is None else [stop] if isinstance(stop, str) else stop
+ rstop = result.stop if isinstance(result.stop, list) else [result.stop]
+ stop = stop + rstop
+
+ completion_or_chunks = self.create_completion(
+ prompt=prompt,
temperature=temperature,
top_p=top_p,
top_k=top_k,
stream=stream,
+ stop=stop,
max_tokens=max_tokens,
- repeat_penalty=repeat_penalty,
presence_penalty=presence_penalty,
frequency_penalty=frequency_penalty,
+ repeat_penalty=repeat_penalty,
tfs_z=tfs_z,
mirostat_mode=mirostat_mode,
mirostat_tau=mirostat_tau,
@@ -1630,12 +1650,7 @@ def create_chat_completion(
logits_processor=logits_processor,
grammar=grammar,
)
- if stream:
- chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
- return self._convert_text_completion_chunks_to_chat(chunks)
- else:
- completion: Completion = completion_or_chunks # type: ignore
- return self._convert_text_completion_to_chat(completion)
+ return self._convert_completion_to_chat(completion_or_chunks, stream=stream) # type: ignore
def __del__(self):
if hasattr(self, "model") and self.model is not None:
@@ -1675,6 +1690,8 @@ def __getstate__(self):
lora_path=self.lora_path,
# Backend Params
numa=self.numa,
+ # Chat Format Params
+ chat_format=self.chat_format,
# Misc
verbose=self.verbose,
)
@@ -1708,6 +1725,8 @@ def __setstate__(self, state):
lora_path=state["lora_path"],
# Backend Params
numa=state["numa"],
+ # Chat Format Params
+ chat_format=state["chat_format"],
# Misc
verbose=state["verbose"],
)
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
new file mode 100644
index 000000000..bd8110f2c
--- /dev/null
+++ b/llama_cpp/llama_chat_format.py
@@ -0,0 +1,292 @@
+import dataclasses
+from typing import Any, Dict, List, Optional, Tuple, Union, Protocol
+from . import llama_types
+
+
+def _get_system_message(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+) -> str:
+ """Get the first system message."""
+ for message in messages:
+ if message["role"] == "system":
+ return message["content"] or ""
+ return ""
+
+
+def _map_roles(
+ messages: List[llama_types.ChatCompletionRequestMessage], role_map: Dict[str, str]
+) -> List[Tuple[str, Optional[str]]]:
+ """Map the message roles."""
+ output: List[Tuple[str, Optional[str]]] = []
+ for message in messages:
+ role = message["role"]
+ if role in role_map:
+ output.append((role_map[role], message["content"]))
+ return output
+
+
+def _format_llama2(
+ system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
+) -> str:
+ """Format the prompt with the llama2 style."""
+ ret = system_message + sep
+ for role, message in messages:
+ if message:
+ ret += message + " "
+ else:
+ ret += role + " "
+ return ret
+
+
+def _format_add_colon_single(
+ system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
+) -> str:
+ """Format the prompt with the add-colon-single style."""
+ ret = system_message + sep
+ for role, message in messages:
+ if message:
+ ret += role + ": " + message + sep
+ else:
+ ret += role + ":"
+ return ret
+
+
+def _format_add_colon_two(
+ system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str
+) -> str:
+ """Format the prompt with the add-colon-two style."""
+ seps = [sep, sep2]
+ ret = system_message + seps[0]
+ for i, (role, message) in enumerate(messages):
+ if message:
+ ret += role + ": " + message + seps[i % 2]
+ else:
+ ret += role + ":"
+ return ret
+
+
+def _format_no_colon_single(
+ system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
+) -> str:
+ """Format the prompt with the no-colon-single style."""
+ ret = system_message
+ for role, message in messages:
+ if message:
+ ret += role + message + sep
+ else:
+ ret += role
+ return ret
+
+
+def _format_add_colon_space_single(
+ system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str
+) -> str:
+ """Format the prompt with the add-colon-space-single style."""
+ ret = system_message + sep
+ for role, message in messages:
+ if message:
+ ret += role + ": " + message + sep
+ else:
+ ret += role + ": " # must be end with a space
+ return ret
+
+
+@dataclasses.dataclass
+class ChatFormatterResponse:
+ prompt: str
+ stop: Optional[Union[str, List[str]]] = None
+
+
+class ChatFormatter(Protocol):
+ def __call__(
+ self,
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+ ) -> ChatFormatterResponse:
+ ...
+
+
+_CHAT_FORMATS: Dict[str, ChatFormatter] = {}
+
+
+def register_chat_format(name: str):
+ def decorator(f: ChatFormatter):
+ _CHAT_FORMATS[name] = f
+ return f
+
+ return decorator
+
+
+def get_chat_format(name: str):
+ try:
+ return _CHAT_FORMATS[name]
+ except KeyError:
+ raise ValueError(
+ f"Invalid chat format: {name} (valid formats: {list(_CHAT_FORMATS.keys())})"
+ )
+
+
+@register_chat_format("llama-2")
+def format_llama2(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _system_template = "[INST] <>\n{system_message}\n<>\n\n"
+ _roles = dict(user="[INST]", assistant="[/INST]")
+ _sep = "\n\n"
+ system_message = _get_system_message(messages)
+ system_message = _system_template.format(system_message=system_message)
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_llama2(system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("alpaca")
+def format_alpaca(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _roles = dict(user="### Instruction", assistant="### Response")
+ _sep = "\n\n"
+ _sep2 = ""
+ system_message = _get_system_message(messages)
+ _messages = _map_roles(messages, _roles)
+ _prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("vicuna")
+def format(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _system_message = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."
+ _roles = dict(user="USER", assistant="ASSISTANT")
+ _sep = " "
+ _sep2 = ""
+ system_message = _system_message
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("oasst_llama")
+def format_oasst_llama(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _system_template = "[INST] <>\n{system_message}\n<>\n\n"
+ _roles = dict(user="<|prompter|>", assistant="<|assistant|>")
+ _sep = ""
+ system_message = _get_system_message(messages)
+ system_message = _system_template.format(system_message=system_message)
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_no_colon_single(system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("openbuddy")
+def format_openbuddy(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _system_message = """Consider a conversation between User (a human) and Assistant (named Buddy).
+Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy
+Buddy cannot access the Internet.
+Buddy can fluently speak the user's language (e.g. English, Chinese).
+Buddy can generate poems, stories, code, essays, songs, parodies, and more.
+Buddy possesses vast knowledge about the world, history, and culture.
+Buddy's responses are always safe, creative, high-quality, human-like, and interesting.
+Buddy strictly refuses to discuss political, NSFW, or other unsafe topics.
+
+User: Hi.
+Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?"""
+ _roles = dict(user="User", assistant="Assistant")
+ _sep = "\n"
+ system_message = _system_message
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_add_colon_single(system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("redpajama-incite")
+def format_redpajama_incite(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _system_message = _get_system_message(messages)
+ _roles = dict(user="", assistant="")
+ _sep = "\n"
+ _stop = ""
+ system_message = _system_message
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_add_colon_single(system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt, stop=_stop)
+
+
+@register_chat_format("snoozy")
+def format_snoozy(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ system_template = "### Instruction:\n{system_message}"
+ default_system_message = "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response."
+ _system_message = _get_system_message(messages)
+ _system_message = (
+ _system_message if _system_message != "" else default_system_message
+ )
+ system_message = system_template.format(system_message=_system_message)
+ _roles = dict(user="### Prompt", assistant="### Response")
+ _sep = "\n"
+ _stop = "###"
+ system_message = _system_message
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_add_colon_single(system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt, stop=_stop)
+
+
+@register_chat_format("phind")
+def format_phind(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ _roles = dict(user="### User Message", assistant="### Assistant")
+ _sep = "\n\n"
+ _system_message = "### System Prompt\nYou are an intelligent programming assistant."
+ _messages = _map_roles(messages, _roles)
+ _messages.append((_roles["assistant"], None))
+ _prompt = _format_add_colon_single(_system_message, _messages, _sep)
+ return ChatFormatterResponse(prompt=_prompt)
+
+
+@register_chat_format("open-orca")
+def format_open_orca(
+ messages: List[llama_types.ChatCompletionRequestMessage],
+ **kwargs: Any,
+) -> ChatFormatterResponse:
+ system_template = "{system_message}"
+ system_message = (
+ "You are a helpful assistant. Please answer truthfully and write out your "
+ )
+ "thinking step by step to be sure you get the right answer. If you make a mistake or encounter "
+ "an error in your thinking, say so out loud and attempt to correct it. If you don't know or "
+ "aren't sure about something, say so clearly. You will act as a professional logician, mathematician, "
+ "and physicist. You will also act as the most appropriate type of expert to answer any particular "
+ "question or solve the relevant problem; state which expert type your are, if so. Also think of "
+ "any particular named expert that would be ideal to answer the relevant question or solve the "
+ "relevant problem; name and act as them, if appropriate."
+ roles = ("User", "Assistant")
+ sep = "<|end_of_turn|>\n"
+ # stop_token_ids=[32000, 32001], # "<|end_of_turn|>"
+ stop_str = "User"
+ system_message = system_template.format(system_message=system_message)
+ _messages = _map_roles(messages, dict(zip(roles, roles)))
+ _messages.append((roles[1], None))
+ _prompt = _format_add_colon_space_single(system_message, _messages, sep)
+ return ChatFormatterResponse(prompt=_prompt, stop=stop_str)