-
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdocumentation.lisp
363 lines (254 loc) · 8.87 KB
/
documentation.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
(in-package #:org.shirakumo.maiden.agents.markov)
;; generator.lisp
(docs:define-docs
(variable *start*
"Predefined index character that marks the beginning of a chain.")
(variable *end*
"Predefined index character that marks the end of a chain.")
(type generator
"This contains all the state to allow you to generate new sentences using Markov chains.
The chains are stored in an efficient indexed format
in order to take up as little memory as possible.
See WORDS
See WORD-MAP
See CHAINS
See VISUALIZE
See WORD
See WORD-INDEX
See CHAIN
See ENSURE-CHAIN
See ADD-CHAIN
See NEXT-WORD-INDEX
See RANDOM-TOKEN
See MAKE-SENTENCE
See FIND-SENTENCE
See TOKEN-SCORE
See FIND-TOPIC
See LEARN-SENTENCE
See LEARN
See LEARN-FROM-FILE")
(function words
"Accessor to the vector of word strings.
The order of the words is vitally important. Shuffling words
around will corrupt the chains irrevocably. You should not
manage this vector manually.
See WORD
See GENERATOR")
(function word-map
"This is a hash table mapping word strings to their respective index.
See WORD
See GENERATOR")
(function chains
"This is a table mapping a word index to an inner table that maps the second word index to an array of possible follow-up words.
See GENERATOR
See CHAIN")
(function visualize
"Attempt to visualise the generator's chains.
Invoking this on generators with a large dictionary is not a
good idea.
See GENERATOR")
(function word
"Accessor to a word in the generator.
THING can be a number, symbol, or string, and returned is
the word's index.
If you set this place with a string, the string is added
to the generator and gets its own index, which is returned.
See GENERATOR
See WORD-MAP
See WORDS")
(function word-index
"Return the index of the word.
This adds the word to the generator if it doesn't exist in
its dictionary yet.
See WORD
See GENERATOR")
(function chain
"This accesses a vector of possible follow-up words for the given first and second words.
The chains tables are automatically extended as necessary.
See WORD-INDEX
See CHAINS
See GENERATOR")
(function ensure-chain
"This ensures that a chain for the given first and second words exists.
See CHAIN
See GENERATOR")
(function add-chain
"Add a new chain for the given first and second words and follow-up possibilities.
See ENSURE-CHAIN
See WORD-INDEX
See GENERATOR")
(function next-word-index
"Return the index of a follow-up word for the given first and second words.
This index is chosen randomly from the possible indexes
of follow-up words in the corresponding chain. If no
follow-up can be found, NIL is returned.
See CHAIN
See GENERATOR")
(function random-token
"Find a random starting token to start a chain with.
This searches through all possible starting chains in
a hopefully somewhat random manner.
See CHAINS
See GENERATOR")
(function make-sentence
"Build a new sentence generated by markov chains in the generator.
START can be a token to begin the chain with. If not
given, a random starting token is picked.
Chains are generated by using two words, selecting
the chain the two words designate, and picking a random
follow-up word from that chain. The second and new word
then build the input for the next iteration until an END
token is encountered as the next token.
See *START*
See *END*
See WORD-INDEX
See NEXT-WORD-INDEX
See WORD
See RANDOM-TOKEN
See GENERATOR")
(function find-sentence
"Attempt to find a sentence that somehow relates to the words given.
This proceeds by generating completely random sentences
until one is found that contains one of the given words.
Since this is not always guaranteed to work --luck may not
be on your side-- it might also return NIL even though it
is theoretically possible to build a chain. If nothing has
been found after MAX-ATTEMPTS generations, it simply gives
up.
See MAKE-SENTENCE
See GENERATOR")
(function remove-artefacts
"Strip URLs and such artefacts from the string as those can be confusing to the chains.")
(function find-sentences
"Split the string into individual sentences.
This is not perfect and just splits by frequent sentence delimiting characters.")
(function find-tokens
"Split the string into individual tokens.
This is not perfect and just splits by frequent word delimiting characters.")
(function token-score
"Get the score of the token.
This is equal to the number of possible second words
for the token, if any.")
(function find-topic
"Attempt to find a token that best communicates the topic this string is about.
See TOKEN-SCORE
See FIND-SENTENCES
See FIND-TOKENS")
(function learn-sentence
"Learn the sentence.
This splits the sentence into tokens and builds Markov
chains from the tokens, thus \"learning\" the sentence.
See FIND-TOKENS
See ADD-CHAIN")
(function learn
"Learn the string.
The string may contain multiple sentences at once.
See FIND-SENTENCES
See LEARN-SENTENCE")
(function learn-from-file
"This reads a file line-by-line and feeds each line to LEARN.
See LEARN"))
;; interface.lisp
(docs:define-docs
(type markov
"This agent implements a markov-chain based chat bot for amusement. It can learn from the actual chat messages of a channel.")
(function save-counter
"Accessor to the counter responsible to see whether it is time to save the registry yet.
See MARKOV")
(function file
"Returns the file suitable for the dictionary storage of the client.
See MARKOV
See MAIDEN-STORAGE:WITH-STORAGE")
(function save-frequency
"Returns the frequency in which the markov dictionary should be saved to disk.
See MARKOV
See MAIDEN-STORAGE:WITH-STORAGE")
(function ramble-chance
"Accessor to the chance (in percent) that the bot might reply to a message with random rambling.
See MARKOV
See MAIDEN-STORAGE:WITH-STORAGE")
(function generator
"Returns the generator of the markov client.
If the client does not have a generator yet, a new one is
created and set into the client's slit.
See MARKOV
See GENERATOR")
(function maybe-save
"Might save the registry to disk.
The saving only happens if the save-counter exceeds the
save-frequency. You should call this after learning a
sentence.
See SAVE-COUNTER
See SAVE-FREQUENCY
See WRITE-GENERATOR
See GENERATOR")
(function count-uniques
"Count the number of unique words in the sequence.
Uniqueness is counted by EQL.")
(command ramble
"Generates a random sentence.")
(command ramble-about
"Generates a random sentence that should be related to the given topic.")
(command ramble-chance
"Display the chance [0,1] for the bot to reply to a message.")
(command set-ramble-chance
"Update the chance [0,1] for the bot to reply to a message.")
(command stats
"Display some statistics about the dictionary of markov chains."))
;; storage.lisp
(docs:define-docs
(function read-vec-using
"Reads a 32be number from the stream and constructs a vector of that size, which is then filled using READER.")
(function read-word
"Read a word string from the stream.
Words are encoded as 0-terminated UTF8 byte arrays.")
(function write-word
"Write a word string to the stream.
Words are encoded as 0-termianted UTF8 byte arrays.")
(function read-words
"Read an array of words and return both the array and the word-index map generated by it.
See READ-VEC-USING")
(function write-words
"Write an array of words to the stream.
First writes the number of words as a 32be number,
then writes each word.
See WRITE-WORD")
(function read-ref
"Read a word reference from stream.
A word reference is a 32be number.")
(function write-ref
"Write a word reference to stream.
A word reference is a 32be number.")
(function read-chain
"Read a chain from the stream.
Returns three values: the first word, the second
word, and the vector of follow-up words. All words
are returned as indexes.
See READ-REF")
(function write-chain
"Writes a chain to the stream.
Writes the first two words as references to the
stream, then write the number of follow-up words,
and finally write all follow-up words as references.
See WRITE-REF")
(function read-chains
"Read all chains from the stream into a chain table.
This reads a counter for how many chains there are
and then reads each chain into a map, taking care
to properly process consecutive chains of the same
word.
See READ-CHAIN")
(function write-chains
"Writes all chains from the chain table to the stream.
Writes the number of entries in the table as a 32be
to the stream, then goes through each chain and
writes it out.
See WRITE-CHAIN")
(function read-generator
"Reads a complete Markov generator instance from the source.
Source may be a pathname-designator, a stream, or a
fast-io::input-buffer.")
(function write-generator
"Writes a complete Markov generator instance to the source.
Source maybe be a pathname-designator, a stream, or a
fast-io::output-buffer."))