forked from taketwo/llm-ollama
-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathllm_ollama.py
220 lines (194 loc) · 7.11 KB
/
llm_ollama.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import contextlib
from collections import defaultdict
from typing import List, Optional, Tuple
import click
import llm
import ollama
from pydantic import Field
@llm.hookimpl
def register_commands(cli):
@cli.group(name="ollama")
def ollama_group():
"Commands for working with models hosted on Ollama"
@ollama_group.command(name="list-models")
def list_models():
"""List models that are available locally on Ollama server."""
for model in _get_ollama_models():
click.echo(model["name"])
@llm.hookimpl
def register_models(register):
models = defaultdict(list)
for model in _get_ollama_models():
models[model["digest"]].append(model["name"])
if model["name"].endswith(":latest"):
models[model["digest"]].append(model["name"][: -len(":latest")])
for names in models.values():
name, aliases = _pick_primary_name(names)
register(Ollama(name), aliases=aliases)
class Ollama(llm.Model):
can_stream: bool = True
class Options(llm.Options):
"""Parameters that can be set when the model is run by Ollama.
See: https://github.com/ollama/ollama/blob/main/docs/modelfile.md#parameter
"""
mirostat: Optional[int] = Field(
default=None,
description=("Enable Mirostat sampling for controlling perplexity."),
)
mirostat_eta: Optional[float] = Field(
default=None,
description=(
"Influences how quickly the algorithm responds to feedback from the generated text."
),
)
mirostat_tau: Optional[float] = Field(
default=None,
description=(
"Controls the balance between coherence and diversity of the output."
),
)
num_ctx: Optional[int] = Field(
default=None,
description="The size of the context window used to generate the next token.",
)
temperature: Optional[float] = Field(
default=None,
description=(
"The temperature of the model. Increasing the temperature will make the model answer more creatively."
),
)
seed: Optional[int] = Field(
default=None,
description=(
"Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt."
),
)
stop: Optional[List[str]] = Field(
default=None,
description=(
"Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return."
),
)
tfs_z: Optional[float] = Field(
default=None,
description=(
"Tail free sampling is used to reduce the impact of less probable tokens from the output."
),
)
num_predict: Optional[int] = Field(
default=None,
description=("Maximum number of tokens to predict when generating text."),
)
top_k: Optional[int] = Field(
default=None,
description=("Reduces the probability of generating nonsense."),
)
top_p: Optional[float] = Field(
default=None,
description=(
"Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text."
),
)
json_object: Optional[bool] = Field(
default=None,
description="Output a valid JSON object {...}. Prompt must mention JSON.",
)
def __init__(
self,
model_id: str,
) -> None:
self.model_id = model_id
def __str__(self) -> str:
return f"Ollama: {self.model_id}"
def execute(
self,
prompt: llm.Prompt,
stream: bool,
response: llm.Response,
conversation=None,
):
messages = self.build_messages(prompt, conversation)
response._prompt_json = {"messages": messages}
options = prompt.options.model_dump(exclude_none=True)
json_object = options.pop("json_object", None)
kwargs = {}
if json_object:
kwargs["format"] = "json"
if stream:
response_stream = ollama.chat(
model=self.model_id,
messages=messages,
stream=True,
options=options,
**kwargs,
)
for chunk in response_stream:
with contextlib.suppress(KeyError):
yield chunk["message"]["content"]
else:
response.response_json = ollama.chat(
model=self.model_id,
messages=messages,
options=options,
**kwargs,
)
yield response.response_json["message"]["content"]
def build_messages(self, prompt, conversation):
messages = []
if not conversation:
if prompt.system:
messages.append({"role": "system", "content": prompt.system})
messages.append({"role": "user", "content": prompt.prompt})
return messages
current_system = None
for prev_response in conversation.responses:
if (
prev_response.prompt.system
and prev_response.prompt.system != current_system
):
messages.append(
{"role": "system", "content": prev_response.prompt.system},
)
current_system = prev_response.prompt.system
messages.append({"role": "user", "content": prev_response.prompt.prompt})
messages.append({"role": "assistant", "content": prev_response.text()})
if prompt.system and prompt.system != current_system:
messages.append({"role": "system", "content": prompt.system})
messages.append({"role": "user", "content": prompt.prompt})
return messages
def _pick_primary_name(names: List[str]) -> Tuple[str, List[str]]:
"""Pick the primary model name from a list of names.
The picking algorithm prefers names with the most specific tag, e.g. "llama2:7b"
over "llama2:latest" over "llama2".
Parameters
----------
names : list[str]
A non-empty list of model names.
Returns
-------
tuple[str, list[str, ...]]
The primary model name and a list with the secondary names.
"""
if len(names) == 1:
return names[0], ()
sorted_names = sorted(
names,
key=lambda name: (
":" not in name,
name.endswith(":latest"),
name,
),
)
return sorted_names[0], tuple(sorted_names[1:])
def _get_ollama_models() -> List[dict]:
"""Get a list of models available on Ollama.
Returns
-------
list[dict]
A list of models available on Ollama. If the Ollama server is down, an empty
list is returned.
"""
try:
return ollama.list()["models"]
except:
return []