Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ai-help): index short_title #10579

Merged
merged 1 commit into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 43 additions & 18 deletions scripts/ai-help-macros.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ interface IndexedDoc {
id: number;
mdn_url: string;
title: string;
title_short: string;
token_count: number | null;
hash: string;
text_hash: string;
Expand All @@ -36,6 +37,7 @@ interface IndexedDoc {
interface Doc {
mdn_url: string;
title: string;
title_short: string;
hash: string;
html: string;
markdown: string;
Expand Down Expand Up @@ -108,9 +110,15 @@ export async function updateEmbeddings(
const updates: Doc[] = [];
const formattingUpdates: Doc[] = [];

for await (const { mdn_url, title, hash, html, markdown, text } of builtDocs(
directory
)) {
for await (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
text,
} of builtDocs(directory)) {
seenUrls.add(mdn_url);

// Check for existing document in DB and compare checksums.
Expand All @@ -122,6 +130,7 @@ export async function updateEmbeddings(
updates.push({
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -132,6 +141,7 @@ export async function updateEmbeddings(
formattingUpdates.push({
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -154,6 +164,7 @@ export async function updateEmbeddings(
for (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -173,27 +184,30 @@ export async function updateEmbeddings(
INSERT INTO mdn_doc_macro(
mdn_url,
title,
title_short,
hash,
html,
markdown,
token_count,
embedding,
text_hash
)
VALUES($1, $2, $3, $4, $5, $6, $7, $8) ON CONFLICT (mdn_url) DO
VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4,
markdown = $5,
token_count = $6,
embedding = $7,
text_hash = $8
title_short = $3,
hash = $4,
html = $5,
markdown = $6,
token_count = $7,
embedding = $8,
text_hash = $9
`,
values: [
mdn_url,
title,
title_short,
hash,
html,
markdown,
Expand All @@ -211,7 +225,14 @@ export async function updateEmbeddings(
console.error(context);
}
}
for (const { mdn_url, title, hash, html, markdown } of formattingUpdates) {
for (const {
mdn_url,
title,
title_short,
hash,
html,
markdown,
} of formattingUpdates) {
try {
console.log(
`-> [${mdn_url}] Updating document without generating new embedding...`
Expand All @@ -221,16 +242,17 @@ export async function updateEmbeddings(
const query = {
name: "upsert-doc",
text: `
INSERT INTO mdn_doc_macro(mdn_url, title, hash, html, markdown)
VALUES($1, $2, $3, $4, $5) ON CONFLICT (mdn_url) DO
INSERT INTO mdn_doc_macro(mdn_url, title, title_short, hash, html, markdown)
VALUES($1, $2, $3, $4, $5, $6) ON CONFLICT (mdn_url) DO
UPDATE
SET mdn_url = $1,
title = $2,
hash = $3,
html = $4,
markdown = $5
title_short = $3,
hash = $4,
html = $5,
markdown = $6
`,
values: [mdn_url, title, hash, html, markdown],
values: [mdn_url, title, title_short, hash, html, markdown],
rowMode: "array",
};

Expand Down Expand Up @@ -286,7 +308,9 @@ async function* builtDocs(directory: string) {
for await (const metadataPath of builtPaths(directory)) {
try {
const raw = await readFile(metadataPath, "utf-8");
const { title, mdn_url, hash } = JSON.parse(raw) as DocMetadata;
const { title, short_title, mdn_url, hash } = JSON.parse(
raw
) as DocMetadata;

const plainPath = path.join(path.dirname(metadataPath), "plain.html");
const plainHTML = await readFile(plainPath, "utf-8");
Expand Down Expand Up @@ -314,6 +338,7 @@ async function* builtDocs(directory: string) {
yield {
mdn_url,
title,
title_short: short_title || title,
hash,
html,
markdown,
Expand Down
1 change: 1 addition & 0 deletions scripts/ai-help.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ create table
id bigserial,
hash text null,
title text not null,
title_short text not null,
mdn_url text not null,
html text null,
markdown text null,
Expand Down
Loading