Skip to content

Commit

Permalink
feat: add the Web Browsing feature for static websites
Browse files Browse the repository at this point in the history
  • Loading branch information
AurelienStebe committed Mar 8, 2024
1 parent dd8c50c commit 2357149
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 5 deletions.
7 changes: 5 additions & 2 deletions src/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { gfm } from 'turndown-plugin-gfm'
import { doGladdis } from './gladdis.js'
import { parseLinks } from './utils/scanner.js'
import { transcribe } from './utils/whisper.js'
import { webBrowser } from './utils/browser.js'
import { getTokenModal } from './utils/loggers.js'
import { loadContext, loadContent } from './utils/loaders.js'

Expand Down Expand Up @@ -69,8 +70,9 @@ export async function processContent(context: Context): Promise<void> {
message.content = await parseLinks(message.content, context)
}

context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await parseLinks(context.user.prompt, context)
context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await webBrowser(context.user.prompt, context)

context.user.history.push({
role: 'user',
Expand All @@ -89,8 +91,9 @@ export async function processPrompt(context: Context): Promise<void> {
context.whisper.echoOutput = true
context.whisper.deleteFile = false

context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await parseLinks(context.user.prompt, context)
context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await webBrowser(context.user.prompt, context)

context.user.history = [
{
Expand Down
4 changes: 3 additions & 1 deletion src/gladdis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { deepmerge } from 'deepmerge-ts'

import { parseLinks } from './utils/scanner.js'
import { transcribe } from './utils/whisper.js'
import { webBrowser } from './utils/browser.js'
import { logGladdisCall, logGladdisChat, getTokenModal } from './utils/loggers.js'

import type { Context } from './types/context.js'
Expand Down Expand Up @@ -38,8 +39,9 @@ export async function doGladdis(context: Context): Promise<void> {

context.user.history.unshift({ role: 'system', content: corePrompt })

context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await parseLinks(context.user.prompt, context)
context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await webBrowser(context.user.prompt, context)

context.user.history.push({
role: 'user',
Expand Down
7 changes: 5 additions & 2 deletions src/obsidian.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { deepmerge } from 'deepmerge-ts'
import { doGladdis } from './gladdis.js'
import { parseLinks } from './utils/scanner.js'
import { transcribe } from './utils/whisper.js'
import { webBrowser } from './utils/browser.js'
import { getTokenModal } from './utils/loggers.js'
import { loadContext, loadContent } from './utils/loaders.js'

Expand Down Expand Up @@ -79,8 +80,9 @@ export default class GladdisPlugin extends Plugin {
message.content = await parseLinks(message.content, context)
}

context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await parseLinks(context.user.prompt, context)
context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await webBrowser(context.user.prompt, context)

context.user.history.push({
role: 'user',
Expand All @@ -107,8 +109,9 @@ export default class GladdisPlugin extends Plugin {
context.whisper.echoOutput = true
context.whisper.deleteFile = false

context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await parseLinks(context.user.prompt, context)
context.user.prompt = await transcribe(context.user.prompt, context)
context.user.prompt = await webBrowser(context.user.prompt, context)

context.user.history = [
{
Expand Down
54 changes: 54 additions & 0 deletions src/utils/browser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { Readability } from '@mozilla/readability'

import { processText } from './history.js'
import { parseDOM, request, turndown } from '../commands.js'

import type { Context } from '../types/context.js'

const linkRegex = /(?<!<%.*)<(https:\/\/[^>]+?)>(?!.*%>)/gis

export async function webBrowser(content: string, context: Context): Promise<string> {
return await processText(content, context, async (content, context) => {
for (const [fullMatch, pageURL] of content.matchAll(linkRegex)) {
const disk = context.file.disk
let webPage: string | undefined

try {
const pageDoc = parseDOM(await request(pageURL))

const baseTag = pageDoc.head.getElementsByTagName('base')[0]
if (baseTag === undefined) pageDoc.head.appendChild(pageDoc.createElement('base'))

pageDoc.head.getElementsByTagName('base')[0].href = pageURL
const article = new Readability(pageDoc).parse()

webPage = turndown(article?.content ?? 'No Content Found.')
if (article?.title !== undefined) webPage = `# ${article.title}\n\n${webPage}`
} catch (error: any) {
const errorName: string = error?.message ?? 'Web Page Browsing Error'
const errorJSON: string = '```json\n> ' + JSON.stringify(error) + '\n> ```'

const errorFull = `\n\n> [!BUG]+ **${errorName}**\n> ${errorJSON}`
await disk.appendFile(context.file.path, errorFull)
}

if (webPage === undefined) continue

if (webPage === 'No Content Found.') {
const webPageError = `\n\n> [!ERROR]- No Content from "${pageURL}"\n> `
await disk.appendFile(context.file.path, webPageError + webPage)

continue
}

const webPageEsc = webPage.replace(/<(\/?[!a-z])/gi, '<\uFEFF$1')
const webPageLabel = `\n\n> [!EXAMPLE]- Content from "${pageURL}"`
const webPageQuote = '\n> ' + webPageEsc.split('\n').join('\n> ')

await disk.appendFile(context.file.path, webPageLabel + webPageQuote)
content = content.replace(fullMatch, `@${pageURL}\n"""\n${webPage}\n"""\n\n`)
}

return content
})
}
9 changes: 9 additions & 0 deletions src/utils/history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { Context, ChatMessage, ChatRoleEnum } from '../types/context.js'
export type Processor = (content: string, context: Context) => Promise<string>

const transcriptRegex = /^\[!QUOTE\][+-]? Transcript from "(.+?)"$/i
const webContentRegex = /^\[!EXAMPLE\][+-]? Content from "(.+?)"$/i

export function parseHistory(context: Context): ChatMessage[] {
const lines = (context.file.text + '\n---\n').split('\n')
Expand Down Expand Up @@ -67,13 +68,21 @@ export function parsePrompt(label: string, prompt: string[], quotes: string[][],

for (const lines of quotes) {
if (lines[0] === undefined) continue

const transcriptMatch = transcriptRegex.exec(lines[0])

if (transcriptMatch !== null) {
const transcript = lines.slice(1).join('\n').trim()
if (context.whisper.deleteFile) void context.file.disk.deleteFile(transcriptMatch[1])
content = content.replace(`![[${transcriptMatch[1]}]]`, `"${transcript}" (${context.whisper.readSuffix})`)
}

const webContentMatch = webContentRegex.exec(lines[0])

if (webContentMatch !== null) {
const webContent = lines.slice(1).join('\n').trim().replaceAll('<\uFEFF', '<')
content = content.replace(`<${webContentMatch[1]}>`, `@${webContentMatch[1]}\n"""\n${webContent}\n"""\n\n`)
}
}

let role: ChatRoleEnum = 'user'
Expand Down

0 comments on commit 2357149

Please sign in to comment.