fix: improve the Token Counter & correct the counting

- support all OpenAI chat models (GPT-3.5 & 4) - split into 3 counts: system messages, middle & last 2 - hide the counter until it has a minimal count to display
AurelienStebe · Mar 7, 2024 · d06e725 · d06e725
1 parent 101fe19
commit d06e725
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 20 deletions.
diff --git a/src/gladdis.ts b/src/gladdis.ts
@@ -51,6 +51,12 @@ export async function doGladdis(context: Context): Promise<void> {
 
     void logGladdisCall(context)
     void logGladdisChat(context)
+
+    let tokenModal = getTokenModal(context)
+    if (tokenModal.contains('__@__@__@__')) tokenModal = ''
+
+    const message = `${tokenModal}\n\n---\n\n__${context.user.label}:__ `
+    await context.file.disk.appendFile(context.file.path, message)
 }
 
 export async function callGladdis(context: Context): Promise<Context> {
@@ -93,8 +99,5 @@ export async function callGladdis(context: Context): Promise<Context> {
 
     context.user.history.push({ role: 'assistant', content: response.join('') })
 
-    await disk.appendFile(context.file.path, getTokenModal(context))
-    await disk.appendFile(context.file.path, `\n\n---\n\n__${context.user.label}:__ `)
-
     return context
 }
diff --git a/src/obsidian.ts b/src/obsidian.ts
@@ -373,12 +373,12 @@ class GladdisSettingTab extends PluginSettingTab {
             .addDropdown((dropdown) =>
                 dropdown
                     .addOptions({
-                        'gpt-4-1106-preview': 'GPT-4 Preview (128k)',
+                        'gpt-4-turbo-preview': 'GPT-4 Preview (128k)',
                         'gpt-4-32k': 'GPT-4 (32k)',
                         'gpt-4': 'GPT-4 (8k)',
-                        'gpt-3.5-turbo-1106': 'GPT-3.5 Updated (16k)',
+                        'gpt-3.5-turbo': 'GPT-3.5 Updated (16k)',
                         'gpt-3.5-turbo-16k': 'GPT-3.5 (16k)',
-                        'gpt-3.5-turbo': 'GPT-3.5 (4k)',
+                        'gpt-3.5-turbo-0613': 'GPT-3.5 (4k)',
                     })
                     .setValue(this.plugin.settings.GLADDIS_DEFAULT_MODEL)
                     .onChange(async (value) => {

diff --git a/src/utils/loggers.ts b/src/utils/loggers.ts
@@ -8,6 +8,21 @@ import { writeHistory } from './history.js'
 
 import type { Context, ChatMessage } from '../types/context.js'
 
+const tiktoken = new Tiktoken(cl100k_base)
+
+const modelLimit: Record<string, number> = {
+    'gpt-3.5-turbo': 16385,
+    'gpt-3.5-turbo-0125': 16385,
+    'gpt-3.5-turbo-1106': 16385,
+    'gpt-3.5-turbo-16k': 16385,
+    'gpt-3.5-turbo-16k-0613': 16385,
+    'gpt-4-turbo-preview': 128000,
+    'gpt-4-0125-preview': 128000,
+    'gpt-4-1106-preview': 128000,
+    'gpt-4-32k': 32768,
+    'gpt-4-32k-0613': 32768,
+}
+
 export async function logGladdisCall(context: Context): Promise<void> {
     const disk = context.file.disk
 
@@ -34,9 +49,13 @@ export async function logGladdisChat(context: Context): Promise<void> {
 
     await disk.pathEnsure(logPath)
 
-    const history = deepmerge({}, context.user.history.slice(-2))
-    history[0].content = `[${context.file.date.toISOString().split('T')[1]}] ${history[0].content}`
-    history[1].content = `[${new Date().toISOString().split('T')[1]}] ${history[1].content}`
+    const userIdx = context.user.history.findLastIndex((message) => message.role === 'user')
+    const history = deepmerge({}, context.user.history.slice(userIdx))
+
+    history.forEach((message, index) => {
+        const date = index === 0 ? context.file.date : new Date()
+        message.content = `[${date.toISOString().split('T')[1]}] ${message.content}`
+    })
 
     const logContext = deepmerge({}, context)
     logContext.user.history = history
@@ -45,25 +64,40 @@ export async function logGladdisChat(context: Context): Promise<void> {
 }
 
 export function getTokenModal(context: Context): string {
-    const tokenLength = getTokenCount(context.user.history)
+    const lowerLimit = context.gladdis.model.startsWith('gpt-4') ? 8192 : 4096
+    const tokenLimit = modelLimit[context.gladdis.model] ?? lowerLimit
+
+    const getTokenRatio = (count: number): number => Math.min(Math.ceil((count / tokenLimit) * 36), 36)
+
+    const systemIndex = context.user.history.findIndex((message) => message.role !== 'system')
+    const promptIndex = context.user.history.findLastIndex((message) => message.role === 'user')
+
+    const systemCount = getTokenCount(context.user.history.slice(0, systemIndex))
+    const systemGraph = '@'.repeat(Math.max(getTokenRatio(systemCount), 1))
+
+    const middleCount = getTokenCount(context.user.history.slice(systemIndex, promptIndex))
+    const middleGraph = '@'.repeat(Math.max(getTokenRatio(middleCount), 1))
+
+    const promptCount = getTokenCount(context.user.history.slice(promptIndex))
+    const promptGraph = '@'.repeat(Math.max(getTokenRatio(promptCount), 1))
 
-    let tokenLimit = context.gladdis.model.startsWith('gpt-4') ? 8192 : 4096
-    if (context.gladdis.model.startsWith('gpt-4-32k')) tokenLimit = 32768
-    if (context.gladdis.model.startsWith('gpt-3.5-turbo-16k')) tokenLimit = 16384
+    const tokenCount = systemCount + middleCount + promptCount
+    const tokenRatio = Math.max(getTokenRatio(tokenCount), 3)
 
-    const tokenRatio = Math.min(Math.ceil((tokenLength / tokenLimit) * 36), 36)
-    const tokenGraph = `[**${'#'.repeat(tokenRatio)}**${'-'.repeat(36 - tokenRatio)}]`
-    const tokenCount = `**${tokenLength.toLocaleString()}** tokens out of **${tokenLimit.toLocaleString()}**`
+    const tokenGraph = `[__${systemGraph}__${middleGraph}__${promptGraph}__${'-'.repeat(36 - tokenRatio)}]`
+    const tokenUsage = `**${tokenCount.toLocaleString()}** tokens out of **${tokenLimit.toLocaleString()}**`
 
     const label = tokenRatio > 33 ? 'DANGER' : tokenRatio > 22 ? 'WARNING' : 'NOTE'
-    return `\n\n> [!${label}]- ${tokenGraph}\n> Using ${tokenCount} max tokens.`
+    return `\n\n> [!${label}]- ${tokenGraph}\n> Using ${tokenUsage} max tokens.`
 }
 
 export function getTokenCount(messages: ChatMessage[]): number {
-    const tiktoken = new Tiktoken(cl100k_base)
+    let tokenLength = messages.length * 4
 
-    const fullHistory = messages.map((message) => `${message.name ?? message.role}\n${message.content}`)
-    const tokenLength = tiktoken.encode(fullHistory.join('\n')).length + messages.length * 3
+    for (const message of messages) {
+        const prefix = message.name !== undefined ? message.name + '\n' : ''
+        tokenLength += tiktoken.encode(prefix + message.content).length
+    }
 
     return tokenLength
 }