Just imported Mathworks jupyterlab plugin as is. The plan is to exten…

…d it in the future.
alberti42 · Nov 2, 2024 · 472dfe8 · 472dfe8
1 parent c17cf26
commit 472dfe8
Show file tree

Hide file tree

Showing 12 changed files with 665 additions and 0 deletions.
diff --git a/src/codemirror-lang-matlab/codemirror-lang-matlab.ts b/src/codemirror-lang-matlab/codemirror-lang-matlab.ts
@@ -0,0 +1,19 @@
+// Copyright 2024 The MathWorks, Inc.
+
+import { parser } from '../lezer-matlab/dist/index';
+import { LRLanguage, LanguageSupport } from '@codemirror/language';
+
+// Define a CodeMirror language from the Lezer parser.
+// https://codemirror.net/docs/ref/#language.LRLanguage
+export const matlabLanguage = LRLanguage.define({
+    name: 'matlab',
+    parser,
+    languageData: {
+        commentTokens: { line: '%' }
+    }
+});
+
+// MATLAB language support
+export function matlab () {
+    return new LanguageSupport(matlabLanguage);
+}
diff --git a/src/lezer-matlab/package.json b/src/lezer-matlab/package.json
@@ -0,0 +1,36 @@
+{
+  "name": "@lezer/matlab",
+  "version": "1.0.0",
+  "description": "Lezer-based MATLAB grammar",
+  "main": "dist/index.cjs",
+  "type": "module",
+  "exports": {
+    "import": "./dist/index.js",
+    "require": "./dist/index.cjs"
+  },
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "author": {
+    "name": "The MathWorks Inc.",
+    "email": "[email protected]"
+  },
+  "license": "SEE LICENSE IN LICENSE.md",
+  "devDependencies": {
+    "@lezer/generator": "^1.0.0",
+    "@rollup/plugin-node-resolve": "^15.3.0",
+    "mocha": "^10.2.0",
+    "rollup": "^4.22.4"
+  },
+  "dependencies": {
+    "@lezer/common": "^1.2.0",
+    "@lezer/highlight": "^1.0.0",
+    "@lezer/lr": "^1.0.0"
+  },
+  "scripts": {
+    "build": "lezer-generator src/matlab.grammar -o src/parser && rollup -c && npm run copy-lezer-files-to-build-on-windows",
+    "build-debug": "lezer-generator src/matlab.grammar --names -o src/parser && rollup -c && npm run copy-lezer-files-to-build-on-windows",
+    "copy-lezer-files-to-build-on-windows": "copy src\\*.js dist\\ || true",
+    "prepare": "npm run build",
+    "test": "mocha test/test-*.js"
+  }
+}
diff --git a/src/lezer-matlab/rollup.config.js b/src/lezer-matlab/rollup.config.js
@@ -0,0 +1,26 @@
+// Copyright 2024 The MathWorks, Inc.
+
+import { nodeResolve } from '@rollup/plugin-node-resolve';
+import path from 'path';
+
+const entryModule = './src/parser.js';
+
+export default {
+  input: entryModule,
+  output: [{
+    format: "cjs",
+    file: "./dist/index.cjs"
+  }, {
+    format: "es",
+    file: "./dist/index.js"
+  }],
+  external(id) {
+    if (id === path.resolve(entryModule)) {
+      return false;
+    }
+    return !/^[\.\/]/.test(id);
+  },
+  plugins: [
+    nodeResolve()
+  ]
+}
diff --git a/src/lezer-matlab/src/highlight.js b/src/lezer-matlab/src/highlight.js
@@ -0,0 +1,18 @@
+// Copyright 2024 The MathWorks, Inc.
+
+import { styleTags, tags as t } from '@lezer/highlight';
+
+// Associate nodes in the Lezer tree with styles.
+// https://lezer.codemirror.net/docs/ref/#highlight.styleTags
+export const matlabHighlighting = styleTags({
+    Keyword: t.keyword,
+    Identifier: t.variableName,
+    LineComment: t.comment,
+    MultilineComment: t.comment,
+    SystemCommand: t.meta,
+    String: t.string,
+    Magic: t.monospace,
+    '( )': t.paren,
+    '[ ]': t.squareBracket,
+    '{ }': t.brace
+});
diff --git a/src/lezer-matlab/src/matlab.grammar b/src/lezer-matlab/src/matlab.grammar
@@ -0,0 +1,47 @@
+// Copyright 2024 The MathWorks, Inc.
+
+@top Script { expression* }
+
+expression {
+  Identifier |
+  String |
+  MultilineComment |
+  keyword |
+  Symbol |
+  SystemCommand |
+  Magic
+}
+
+@external propSource matlabHighlighting from "./highlight.js"
+
+// Call out to comment parser. Since this is above the tokens block in this grammar, it takes precedence.
+@external tokens parseComments from "./parse_comments.js" { MultilineComment, LineComment, Magic }
+
+// See https://lezer.codemirror.net/docs/guide/ for documentation on syntax
+// specific to @tokens blocks, and how it differs from regular expression syntax.
+@tokens {
+  Identifier { $[a-zA-Z0-9_]+ $[a-zA-Z0-9_']* }
+  charVector { '"' (!["\n])* '"' }
+  stringArray { "'" (!['\n])* "'" }
+  SystemCommand { "!" (![\n])* }
+  Symbol { "+" | "-" | "*" | "=" | ";" | ":" | "(" | ")" | "{" | "}" | "[" | "]" }
+  space { @whitespace+ }
+  @precedence { SystemCommand, Identifier }
+  @precedence { SystemCommand, space }
+  @precedence { Identifier, charVector }
+  @precedence { Identifier, stringArray }
+}
+
+String { charVector | stringArray }
+
+// Once a string has been parsed and found to be a Identifier, it will then
+// be tested against its specialize table, to test if it is a keyword.
+// The keyword node name is "Keyword".
+// https://lezer.codemirror.net/docs/guide/#token-specialization
+keyword {
+  @specialize[@name=Keyword]<Identifier, "break" | "case" | "classdef" | "continue" | "global" | "otherwise" | "persistent" | "return" | "spmd" | "arguments" | "enumeration" | "events" | "for" | "function" | "if" | "methods" | "parfor" | "properties" | "try" | "while" | "elseif" | "else" | "end" | "switch" | "catch">
+}
+
+@skip { space | LineComment }
+
+@detectDelim
diff --git a/src/lezer-matlab/src/parse_comments.js b/src/lezer-matlab/src/parse_comments.js
@@ -0,0 +1,160 @@
+// Copyright 2024 The MathWorks, Inc.
+
+import { ExternalTokenizer } from '@lezer/lr';
+// This file is created by lezer-generator during the build.
+import { MultilineComment, LineComment, Magic } from './parser.terms.js';
+
+const percent = '%'.charCodeAt(0);
+const openBrace = '{'.charCodeAt(0);
+const closeBrace = '}'.charCodeAt(0);
+const fileStart = -1;
+const fileEnd = -1;
+const newline = '\n'.charCodeAt(0);
+const carriageReturn = '\r'.charCodeAt(0);
+
+const isAlphabetical = (char) => /^[a-zA-Z]$/.test(String.fromCharCode(char));
+
+const lineEndArray = [newline, carriageReturn, fileEnd, fileStart];
+
+const isWhitespace = (char) => /\s/.test(char);
+
+const precededByWhitespaceOnly = (input) => {
+    // Scan from current position to start of line.
+    // Return False if non-whitespace found.
+    // Always return input back to where it started.
+    const startPos = input.pos;
+    let onlyWhitespace = true;
+    while (!lineEndArray.includes(input.peek(-1))) {
+        if (isWhitespace(input.peek(-1))) {
+            input.advance(-1);
+        } else {
+            onlyWhitespace = false;
+            break;
+        }
+    }
+    while (input.pos < startPos) { input.advance(1); }
+    return onlyWhitespace;
+};
+
+const followedByWhitespaceOnly = (input) => {
+    // Scan from current position to end of line.
+    // Return False if non-whitespace found.
+    // Always return input back to where it started.
+    const startPos = input.pos;
+    let onlyWhitespace = true;
+    while (!lineEndArray.includes(input.peek(0))) {
+        if (isWhitespace(input.peek(0))) {
+            input.advance(1);
+        } else {
+            onlyWhitespace = false;
+            break;
+        }
+    }
+    while (input.pos > startPos) { input.advance(-1); }
+    return onlyWhitespace;
+};
+
+const validMultiLineCommentStart = (input) => {
+    if (input.peek(0) !== percent || input.peek(1) !== openBrace) {
+        return false;
+    }
+    if (!precededByWhitespaceOnly(input)) {
+        return false;
+    }
+    // Consume the %{
+    input.advance(2);
+    if (!followedByWhitespaceOnly(input)) {
+        return false;
+    }
+    input.advance(-2);
+    return true;
+};
+
+const validMultiLineCommentEnd = (input) => {
+    if (input.peek(0) !== percent || input.peek(1) !== closeBrace) {
+        return false;
+    }
+    if (!precededByWhitespaceOnly(input)) {
+        return false;
+    }
+    // Consume the %}
+    input.advance(2);
+    if (!followedByWhitespaceOnly(input)) {
+        return false;
+    }
+    input.advance(-2);
+    return true;
+};
+
+const validMagic = (input) => {
+    if (input.notMagic !== undefined) {
+        return false;
+    }
+    var isMagic = false;
+    if (
+        input.peek(0) === percent &&
+        input.peek(1) === percent &&
+        isAlphabetical(input.peek(2))
+    ) {
+        isMagic = true;
+    }
+    return isMagic;
+};
+
+export const parseComments = new ExternalTokenizer((input) => {
+    // Tokenize only if the line is a comment, multiline comment
+    // or a magic and starts with a percentage.
+    if (input.peek(0) !== percent) {
+        // If the line starts with anything other than a percentage then it is MATLAB Code.
+        // If the input.input.string exists then check it's length otherwise ignore the keys by returning true.
+        if (
+            !lineEndArray.includes(input.peek(0)) &&
+            (
+                !('input' in input) ||
+                !('string' in input.input) ||
+                input.input.string.length !== 0
+            )
+        ) {
+            input.notMagic = true;
+        }
+        return;
+    } else if (validMagic(input)) {
+        while (!lineEndArray.includes(input.peek(0))) {
+            input.advance(1);
+        }
+        input.acceptToken(Magic);
+        return;
+    } else if (validMultiLineCommentStart(input)) {
+        // Consume the %{
+        input.advance(2);
+        // Multiline comments are treated as MATLAB Code.
+        input.notMagic = true;
+        // Now we know we've started a multiline comment, so
+        // continue until the end of the input or until the comment is closed.
+        // We need to keep track of the depth of nested multiline comments.
+        let depth = 1;
+        while (input.peek(0) !== fileEnd) {
+            if (validMultiLineCommentEnd(input)) {
+                input.advance(2);
+                depth--;
+                if (depth === 0) {
+                    break;
+                }
+            } else if (validMultiLineCommentStart(input)) {
+                depth++;
+            }
+            input.advance(1);
+        }
+
+        // Emit the token for the entire multiline comment
+        input.acceptToken(MultilineComment);
+    } else {
+        // Comments are also treated as MATLAB Code.
+        input.notMagic = true;
+        while (!lineEndArray.includes(input.peek(0))) {
+            input.advance(1);
+        }
+        input.acceptToken(LineComment);
+        return;
+    }
+});