Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🧮 Frontmatter in include files removed, math/abbreviations maintained #1156

Merged
merged 5 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/red-wolves-nail.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'myst-transforms': patch
'myst-cli': patch
---

Included files remove frontmatter and update math/abbr
12 changes: 7 additions & 5 deletions packages/myst-cli/src/frontmatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,25 @@ export function frontmatterValidationOpts(
};
}
/**
* Get page frontmatter from mdast tree and fill in missing info from project frontmatter
* Get page frontmatter from mdast tree
*
* @param session
* @param path - project path for loading project config/frontmatter
* @param tree - mdast tree already loaded from 'file'
* @param file - file source for mdast 'tree' - this is only used for logging; tree is not reloaded
* @param removeNode - if true, mdast tree will be mutated to remove frontmatter once read
* @param tree - mdast tree already loaded
* @param vfile - vfile used for logging
* @param preFrontmatter - incoming frontmatter for the page that is not from the project or in the tree
* @param keepTitleNode - do not remove leading H1 even if it is lifted as title
*/
export function getPageFrontmatter(
session: ISession,
tree: GenericParent,
vfile: VFile,
preFrontmatter?: Record<string, any>,
keepTitleNode?: boolean,
): { frontmatter: PageFrontmatter; identifiers: string[] } {
const { frontmatter: rawPageFrontmatter, identifiers } = getFrontmatter(vfile, tree, {
propagateTargets: true,
preFrontmatter,
keepTitleNode,
});
unnestKernelSpec(rawPageFrontmatter);
const pageFrontmatter = validatePageFrontmatter(
Expand Down
159 changes: 96 additions & 63 deletions packages/myst-cli/src/process/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import { createHash } from 'node:crypto';
import { tic } from 'myst-cli-utils';
import { TexParser } from 'tex-to-myst';
import { VFile } from 'vfile';
import type { GenericParent } from 'myst-common';
import { RuleId, toText } from 'myst-common';
import type { PageFrontmatter } from 'myst-frontmatter';
import { validatePageFrontmatter } from 'myst-frontmatter';
import { SourceFileKind } from 'myst-spec-ext';
import { frontmatterValidationOpts, getPageFrontmatter } from '../frontmatter.js';
Expand All @@ -19,6 +21,15 @@ import { parseMyst } from './myst.js';
import { processNotebook } from './notebook.js';
import { selectors } from '../store/index.js';

type LoadFileOptions = { preFrontmatter?: Record<string, any>; keepTitleNode?: boolean };

export type LoadFileResult = {
kind: SourceFileKind;
mdast: GenericParent;
frontmatter?: PageFrontmatter;
identifiers?: string[];
};

function checkCache(cache: ISessionWithCache, content: string, file: string) {
const sha256 = createHash('sha256').update(content).digest('hex');
cache.store.dispatch(watch.actions.markFileChanged({ path: file, sha256 }));
Expand All @@ -27,6 +38,70 @@ function checkCache(cache: ISessionWithCache, content: string, file: string) {
return { useCache, sha256 };
}

export function loadMdFile(
session: ISession,
content: string,
file: string,
opts?: LoadFileOptions,
): LoadFileResult {
const vfile = new VFile();
vfile.path = file;
const mdast = parseMyst(session, content, file);
const { frontmatter, identifiers } = getPageFrontmatter(
session,
mdast,
vfile,
opts?.preFrontmatter,
opts?.keepTitleNode,
);
return { kind: SourceFileKind.Article, mdast, frontmatter, identifiers };
}

export async function loadNotebookFile(
session: ISession,
content: string,
file: string,
opts?: LoadFileOptions,
): Promise<LoadFileResult> {
const vfile = new VFile();
vfile.path = file;
const mdast = await processNotebook(session, file, content);
const { frontmatter, identifiers } = getPageFrontmatter(
session,
mdast,
vfile,
opts?.preFrontmatter,
opts?.keepTitleNode,
);
return { kind: SourceFileKind.Notebook, mdast, frontmatter, identifiers };
}

export function loadTexFile(
session: ISession,
content: string,
file: string,
opts?: LoadFileOptions,
): LoadFileResult {
const vfile = new VFile();
vfile.path = file;
const tex = new TexParser(content, vfile);
const frontmatter = validatePageFrontmatter(
{
title: toText(tex.data.frontmatter.title as any),
short_title: toText(tex.data.frontmatter.short_title as any),
authors: tex.data.frontmatter.authors,
// TODO: affiliations: tex.data.frontmatter.affiliations,
keywords: tex.data.frontmatter.keywords,
math: tex.data.macros,
bibliography: tex.data.bibliography,
...(opts?.preFrontmatter ?? {}),
},
frontmatterValidationOpts(vfile),
);
logMessagesFromVFile(session, vfile);
return { kind: SourceFileKind.Article, mdast: tex.ast as GenericParent, frontmatter };
}

/**
* Attempt to load a file into the current session. Unsupported files with
* issue a warning
Expand All @@ -48,8 +123,8 @@ export async function loadFile(
session: ISession,
file: string,
projectPath?: string,
extension?: '.md' | '.ipynb' | '.bib',
opts?: { preFrontmatter?: Record<string, any> },
extension?: '.md' | '.ipynb' | '.tex' | '.bib',
opts?: LoadFileOptions,
): Promise<PreRendererData | undefined> {
await session.loadPlugins();
const toc = tic();
Expand All @@ -63,82 +138,34 @@ export async function loadFile(
}
// ensure forward slashes and not windows backslashes
location = location.replaceAll('\\', '/');
const vfile = new VFile();
vfile.path = file;

try {
const content = fs.readFileSync(file).toString();
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error early on file does not exist.

const { sha256, useCache } = checkCache(cache, content, file);
if (useCache) {
session.log.debug(toc(`loadFile: ${file} already loaded.`));
return cache.$getMdast(file)?.pre;
}
const ext = extension || path.extname(file).toLowerCase();
let loadResult: LoadFileResult | undefined;
switch (ext) {
case '.md': {
const content = fs.readFileSync(file).toString();
const { sha256, useCache } = checkCache(cache, content, file);
if (useCache) break;
const mdast = parseMyst(session, content, file);
const { frontmatter, identifiers } = getPageFrontmatter(
session,
mdast,
vfile,
opts?.preFrontmatter,
);
cache.$setMdast(file, {
sha256,
pre: { kind: SourceFileKind.Article, file, location, mdast, frontmatter, identifiers },
});
loadResult = loadMdFile(session, content, file, opts);
break;
}
case '.ipynb': {
const content = fs.readFileSync(file).toString();
const { sha256, useCache } = checkCache(cache, content, file);
if (useCache) break;
const mdast = await processNotebook(cache, file, content);
const { frontmatter, identifiers } = getPageFrontmatter(
session,
mdast,
vfile,
opts?.preFrontmatter,
);
cache.$setMdast(file, {
sha256,
pre: { kind: SourceFileKind.Notebook, file, location, mdast, frontmatter, identifiers },
});
loadResult = await loadNotebookFile(session, content, file, opts);
break;
}
case '.tex': {
loadResult = loadTexFile(session, content, file, opts);
break;
}
case '.bib': {
const renderer = await loadBibTeXCitationRenderers(session, file);
cache.$citationRenderers[file] = renderer;
break;
}
case '.tex': {
const content = fs.readFileSync(file).toString();
const { sha256, useCache } = checkCache(cache, content, file);
if (useCache) break;
const tex = new TexParser(content, vfile);
const frontmatter = validatePageFrontmatter(
{
title: toText(tex.data.frontmatter.title as any),
short_title: toText(tex.data.frontmatter.short_title as any),
authors: tex.data.frontmatter.authors,
// TODO: affiliations: tex.data.frontmatter.affiliations,
keywords: tex.data.frontmatter.keywords,
math: tex.data.macros,
bibliography: tex.data.bibliography,
...(opts?.preFrontmatter ?? {}),
},
frontmatterValidationOpts(vfile),
);
logMessagesFromVFile(session, vfile);
cache.$setMdast(file, {
sha256,
pre: {
kind: SourceFileKind.Article,
file,
mdast: tex.ast as any,
location,
frontmatter,
},
});
break;
}
default:
addWarningForFile(session, file, 'Unrecognized extension', 'error', {
ruleId: RuleId.mystFileLoads,
Expand All @@ -148,6 +175,12 @@ export async function loadFile(
);
success = false;
}
if (loadResult) {
cache.$setMdast(file, {
sha256,
pre: { file, location, ...loadResult },
});
}
} catch (error) {
session.log.debug(`\n\n${(error as Error)?.stack}\n\n`);
addWarningForFile(session, file, `Error reading file: ${error}`, 'error', {
Expand Down
2 changes: 1 addition & 1 deletion packages/myst-cli/src/process/mdast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ export async function transformMdast(
// Import additional content from mdast or other files
frontmatterPartsTransform(session, file, mdast, frontmatter);
importMdastFromJson(session, file, mdast);
await includeFilesTransform(session, file, mdast, vfile);
await includeFilesTransform(session, file, mdast, frontmatter, vfile);
rawDirectiveTransform(mdast, vfile);
// This needs to come before basic transformations since it may add labels to blocks
liftCodeMetadataToBlock(session, vfile, mdast);
Expand Down
27 changes: 15 additions & 12 deletions packages/myst-cli/src/transforms/include.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ import path from 'node:path';
import fs from 'node:fs';
import type { GenericParent } from 'myst-common';
import { RuleId, fileError } from 'myst-common';
import type { PageFrontmatter } from 'myst-frontmatter';
import { SourceFileKind } from 'myst-spec-ext';
import { includeDirectiveTransform } from 'myst-transforms';
import type { VFile } from 'vfile';
import { parseMyst } from '../process/myst.js';
import type { ISession } from '../session/types.js';
import { watch } from '../store/reducers.js';
import { TexParser } from 'tex-to-myst';
import { processNotebook } from '../process/notebook.js';
import type { LoadFileResult } from '../process/file.js';
import { loadMdFile, loadNotebookFile, loadTexFile } from '../process/file.js';

/**
* Return resolveFile function
Expand Down Expand Up @@ -57,31 +58,33 @@ export const makeFileLoader = (session: ISession, baseFile: string) => (fullFile
* Handles html and tex files separately; all other files are treated as MyST md.
*/
export const makeContentParser =
(session: ISession) => async (filename: string, content: string, vfile: VFile) => {
(session: ISession, file: string) =>
async (filename: string, content: string): Promise<LoadFileResult> => {
if (filename.toLowerCase().endsWith('.html')) {
return [{ type: 'html', value: content }];
const mdast = { type: 'root', children: [{ type: 'html', value: content }] };
return { mdast, kind: SourceFileKind.Article };
}
const opts = { keepTitleNode: true };
if (filename.toLowerCase().endsWith('.tex')) {
const subTex = new TexParser(content, vfile);
return subTex.ast.children ?? [];
return loadTexFile(session, content, file, opts);
}
if (filename.toLowerCase().endsWith('.ipynb')) {
const mdast = await processNotebook(session, filename, content);
return mdast.children;
return loadNotebookFile(session, content, file, opts);
}
return parseMyst(session, content, filename).children;
return loadMdFile(session, content, file, opts);
};

export async function includeFilesTransform(
session: ISession,
baseFile: string,
tree: GenericParent,
frontmatter: PageFrontmatter,
vfile: VFile,
) {
const parseContent = makeContentParser(session);
const parseContent = makeContentParser(session, baseFile);
const loadFile = makeFileLoader(session, baseFile);
const resolveFile = makeFileResolver(baseFile);
await includeDirectiveTransform(tree, vfile, {
await includeDirectiveTransform(tree, frontmatter, vfile, {
resolveFile,
loadFile,
parseContent,
Expand Down
9 changes: 8 additions & 1 deletion packages/myst-transforms/src/frontmatter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ type Options = {
* is defined.
*/
preFrontmatter?: Record<string, any>;
/**
* By default, if the page starts with an H1 heading and has no title in the
* frontmatter, the heading will become the title and be removed.
* If `keepTitleNode` is true, the heading will still become the title
* but the node will not be removed.
*/
keepTitleNode?: boolean;
};

export function getFrontmatter(
Expand Down Expand Up @@ -70,7 +77,7 @@ export function getFrontmatter(
if (nextNodeIsH1 && !titleNull) {
const title = toText(nextNode.children);
// Only remove the title if it is the same
if (frontmatter.title && frontmatter.title === title) {
if (frontmatter.title && frontmatter.title === title && !opts.keepTitleNode) {
Copy link
Collaborator Author

@fwkoch fwkoch Apr 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think an option like this existed before, and I got rid of it a little hastily... Now, we can once again leave the title node in the tree. Useful for include files where we almost certainly do not want to remove headings.

(nextNode as any).type = '__delete__';
frontmatter.content_includes_title = false;
// If this has a label add it to the page identifiers for reference resolution
Expand Down
Loading
Loading