Skip to content

Commit

Permalink
Just imported Mathworks jupyterlab plugin as is. The plan is to exten…
Browse files Browse the repository at this point in the history
…d it in the future.
  • Loading branch information
alberti42 committed Nov 2, 2024
1 parent c17cf26 commit 472dfe8
Show file tree
Hide file tree
Showing 12 changed files with 665 additions and 0 deletions.
19 changes: 19 additions & 0 deletions src/codemirror-lang-matlab/codemirror-lang-matlab.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2024 The MathWorks, Inc.

import { parser } from '../lezer-matlab/dist/index';
import { LRLanguage, LanguageSupport } from '@codemirror/language';

// Define a CodeMirror language from the Lezer parser.
// https://codemirror.net/docs/ref/#language.LRLanguage
export const matlabLanguage = LRLanguage.define({
name: 'matlab',
parser,
languageData: {
commentTokens: { line: '%' }
}
});

// MATLAB language support
export function matlab () {
return new LanguageSupport(matlabLanguage);
}
36 changes: 36 additions & 0 deletions src/lezer-matlab/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "@lezer/matlab",
"version": "1.0.0",
"description": "Lezer-based MATLAB grammar",
"main": "dist/index.cjs",
"type": "module",
"exports": {
"import": "./dist/index.js",
"require": "./dist/index.cjs"
},
"module": "dist/index.js",
"types": "dist/index.d.ts",
"author": {
"name": "The MathWorks Inc.",
"email": "[email protected]"
},
"license": "SEE LICENSE IN LICENSE.md",
"devDependencies": {
"@lezer/generator": "^1.0.0",
"@rollup/plugin-node-resolve": "^15.3.0",
"mocha": "^10.2.0",
"rollup": "^4.22.4"
},
"dependencies": {
"@lezer/common": "^1.2.0",
"@lezer/highlight": "^1.0.0",
"@lezer/lr": "^1.0.0"
},
"scripts": {
"build": "lezer-generator src/matlab.grammar -o src/parser && rollup -c && npm run copy-lezer-files-to-build-on-windows",
"build-debug": "lezer-generator src/matlab.grammar --names -o src/parser && rollup -c && npm run copy-lezer-files-to-build-on-windows",
"copy-lezer-files-to-build-on-windows": "copy src\\*.js dist\\ || true",
"prepare": "npm run build",
"test": "mocha test/test-*.js"
}
}
26 changes: 26 additions & 0 deletions src/lezer-matlab/rollup.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2024 The MathWorks, Inc.

import { nodeResolve } from '@rollup/plugin-node-resolve';
import path from 'path';

const entryModule = './src/parser.js';

export default {
input: entryModule,
output: [{
format: "cjs",
file: "./dist/index.cjs"
}, {
format: "es",
file: "./dist/index.js"
}],
external(id) {
if (id === path.resolve(entryModule)) {
return false;
}
return !/^[\.\/]/.test(id);
},
plugins: [
nodeResolve()
]
}
18 changes: 18 additions & 0 deletions src/lezer-matlab/src/highlight.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright 2024 The MathWorks, Inc.

import { styleTags, tags as t } from '@lezer/highlight';

// Associate nodes in the Lezer tree with styles.
// https://lezer.codemirror.net/docs/ref/#highlight.styleTags
export const matlabHighlighting = styleTags({
Keyword: t.keyword,
Identifier: t.variableName,
LineComment: t.comment,
MultilineComment: t.comment,
SystemCommand: t.meta,
String: t.string,
Magic: t.monospace,
'( )': t.paren,
'[ ]': t.squareBracket,
'{ }': t.brace
});
47 changes: 47 additions & 0 deletions src/lezer-matlab/src/matlab.grammar
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2024 The MathWorks, Inc.

@top Script { expression* }

expression {
Identifier |
String |
MultilineComment |
keyword |
Symbol |
SystemCommand |
Magic
}

@external propSource matlabHighlighting from "./highlight.js"

// Call out to comment parser. Since this is above the tokens block in this grammar, it takes precedence.
@external tokens parseComments from "./parse_comments.js" { MultilineComment, LineComment, Magic }

// See https://lezer.codemirror.net/docs/guide/ for documentation on syntax
// specific to @tokens blocks, and how it differs from regular expression syntax.
@tokens {
Identifier { $[a-zA-Z0-9_]+ $[a-zA-Z0-9_']* }
charVector { '"' (!["\n])* '"' }
stringArray { "'" (!['\n])* "'" }
SystemCommand { "!" (![\n])* }
Symbol { "+" | "-" | "*" | "=" | ";" | ":" | "(" | ")" | "{" | "}" | "[" | "]" }
space { @whitespace+ }
@precedence { SystemCommand, Identifier }
@precedence { SystemCommand, space }
@precedence { Identifier, charVector }
@precedence { Identifier, stringArray }
}

String { charVector | stringArray }

// Once a string has been parsed and found to be a Identifier, it will then
// be tested against its specialize table, to test if it is a keyword.
// The keyword node name is "Keyword".
// https://lezer.codemirror.net/docs/guide/#token-specialization
keyword {
@specialize[@name=Keyword]<Identifier, "break" | "case" | "classdef" | "continue" | "global" | "otherwise" | "persistent" | "return" | "spmd" | "arguments" | "enumeration" | "events" | "for" | "function" | "if" | "methods" | "parfor" | "properties" | "try" | "while" | "elseif" | "else" | "end" | "switch" | "catch">
}

@skip { space | LineComment }

@detectDelim
160 changes: 160 additions & 0 deletions src/lezer-matlab/src/parse_comments.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// Copyright 2024 The MathWorks, Inc.

import { ExternalTokenizer } from '@lezer/lr';
// This file is created by lezer-generator during the build.
import { MultilineComment, LineComment, Magic } from './parser.terms.js';

const percent = '%'.charCodeAt(0);
const openBrace = '{'.charCodeAt(0);
const closeBrace = '}'.charCodeAt(0);
const fileStart = -1;
const fileEnd = -1;
const newline = '\n'.charCodeAt(0);
const carriageReturn = '\r'.charCodeAt(0);

const isAlphabetical = (char) => /^[a-zA-Z]$/.test(String.fromCharCode(char));

const lineEndArray = [newline, carriageReturn, fileEnd, fileStart];

const isWhitespace = (char) => /\s/.test(char);

const precededByWhitespaceOnly = (input) => {
// Scan from current position to start of line.
// Return False if non-whitespace found.
// Always return input back to where it started.
const startPos = input.pos;
let onlyWhitespace = true;
while (!lineEndArray.includes(input.peek(-1))) {
if (isWhitespace(input.peek(-1))) {
input.advance(-1);
} else {
onlyWhitespace = false;
break;
}
}
while (input.pos < startPos) { input.advance(1); }
return onlyWhitespace;
};

const followedByWhitespaceOnly = (input) => {
// Scan from current position to end of line.
// Return False if non-whitespace found.
// Always return input back to where it started.
const startPos = input.pos;
let onlyWhitespace = true;
while (!lineEndArray.includes(input.peek(0))) {
if (isWhitespace(input.peek(0))) {
input.advance(1);
} else {
onlyWhitespace = false;
break;
}
}
while (input.pos > startPos) { input.advance(-1); }
return onlyWhitespace;
};

const validMultiLineCommentStart = (input) => {
if (input.peek(0) !== percent || input.peek(1) !== openBrace) {
return false;
}
if (!precededByWhitespaceOnly(input)) {
return false;
}
// Consume the %{
input.advance(2);
if (!followedByWhitespaceOnly(input)) {
return false;
}
input.advance(-2);
return true;
};

const validMultiLineCommentEnd = (input) => {
if (input.peek(0) !== percent || input.peek(1) !== closeBrace) {
return false;
}
if (!precededByWhitespaceOnly(input)) {
return false;
}
// Consume the %}
input.advance(2);
if (!followedByWhitespaceOnly(input)) {
return false;
}
input.advance(-2);
return true;
};

const validMagic = (input) => {
if (input.notMagic !== undefined) {
return false;
}
var isMagic = false;
if (
input.peek(0) === percent &&
input.peek(1) === percent &&
isAlphabetical(input.peek(2))
) {
isMagic = true;
}
return isMagic;
};

export const parseComments = new ExternalTokenizer((input) => {
// Tokenize only if the line is a comment, multiline comment
// or a magic and starts with a percentage.
if (input.peek(0) !== percent) {
// If the line starts with anything other than a percentage then it is MATLAB Code.
// If the input.input.string exists then check it's length otherwise ignore the keys by returning true.
if (
!lineEndArray.includes(input.peek(0)) &&
(
!('input' in input) ||
!('string' in input.input) ||
input.input.string.length !== 0
)
) {
input.notMagic = true;
}
return;
} else if (validMagic(input)) {
while (!lineEndArray.includes(input.peek(0))) {
input.advance(1);
}
input.acceptToken(Magic);
return;
} else if (validMultiLineCommentStart(input)) {
// Consume the %{
input.advance(2);
// Multiline comments are treated as MATLAB Code.
input.notMagic = true;
// Now we know we've started a multiline comment, so
// continue until the end of the input or until the comment is closed.
// We need to keep track of the depth of nested multiline comments.
let depth = 1;
while (input.peek(0) !== fileEnd) {
if (validMultiLineCommentEnd(input)) {
input.advance(2);
depth--;
if (depth === 0) {
break;
}
} else if (validMultiLineCommentStart(input)) {
depth++;
}
input.advance(1);
}

// Emit the token for the entire multiline comment
input.acceptToken(MultilineComment);
} else {
// Comments are also treated as MATLAB Code.
input.notMagic = true;
while (!lineEndArray.includes(input.peek(0))) {
input.advance(1);
}
input.acceptToken(LineComment);
return;
}
});
Loading

0 comments on commit 472dfe8

Please sign in to comment.