From 305c20932e90f0e0dfbacbdd78b5f2fe62234070 Mon Sep 17 00:00:00 2001 From: guillermooo Date: Mon, 16 Nov 2015 21:04:04 +0100 Subject: [PATCH 1/3] refactor cmd line lexer + parser --- src/cmd_line/command_node.ts | 5 +- src/cmd_line/lexer.ts | 304 ++++++++++---------- src/cmd_line/main.ts | 2 +- src/cmd_line/parser.ts | 4 +- src/cmd_line/{lexer_state.ts => scanner.ts} | 147 +++++----- src/cmd_line/token.ts | 125 ++------ test/lexer.test.ts | 105 +++++++ test/lexer_state.test.ts | 67 ----- test/scanner.test.ts | 170 +++++------ 9 files changed, 428 insertions(+), 501 deletions(-) rename src/cmd_line/{lexer_state.ts => scanner.ts} (81%) create mode 100644 test/lexer.test.ts delete mode 100644 test/lexer_state.test.ts diff --git a/src/cmd_line/command_node.ts b/src/cmd_line/command_node.ts index 2d8ad3d9eb4..53b5fd54438 100644 --- a/src/cmd_line/command_node.ts +++ b/src/cmd_line/command_node.ts @@ -17,7 +17,10 @@ export class WriteCommand implements node.CommandBase { } runOn(textEditor : vscode.TextEditor) : void { - if (this.args || !textEditor.document.fileName) util.showInfo("Not implemented."); + if (this.args || !textEditor.document.fileName) { + util.showInfo("Not implemented."); + return; + } textEditor.document.save(); } } \ No newline at end of file diff --git a/src/cmd_line/lexer.ts b/src/cmd_line/lexer.ts index 8bc111a9ed6..b15491fc8ef 100644 --- a/src/cmd_line/lexer.ts +++ b/src/cmd_line/lexer.ts @@ -1,171 +1,175 @@ -import {State} from './lexer_state'; -import * as token from './token'; +import {Scanner} from './scanner'; +import {Token, TokenType} from './token'; -interface ScanFunction { - (state: State, tokens: token.Token[]) : ScanFunction; +// Describes a function that can lex part of a Vim command line. +interface LexFunction { + (state: Scanner, tokens: Token[]) : LexFunction; } -export function scan(input : string) : token.Token[] { - var state = new State(input); - var tokens : token.Token[] = []; - var f : ScanFunction = scanRange; // first scanning function +export function lex(input : string) : Token[] { + // we use a character scanner as state for the lexer + var state = new Scanner(input); + var tokens : Token[] = []; + var f : LexFunction = LexFunctions.lexRange; // first lexing function while (f) { - // Each scanning function returns the next scanning function or null. + // Each lexing function returns the next lexing function or null. f = f(state, tokens); } return tokens; } -function scanRange(state : State, tokens : token.Token[]): ScanFunction { - while (true) { - if (state.isAtEof) { - break; - } - var c = state.next(); - switch (c) { - case ',': - tokens.push(new token.TokenComma()); - state.ignore(); - continue; - case '%': - tokens.push(new token.TokenPercent()); - state.ignore(); - continue; - case '$': - tokens.push(new token.TokenDollar()); - state.ignore(); - continue; - case '.': - tokens.push(new token.TokenDot()); - state.ignore(); - continue; - case '/': - return scanForwardSearch; - case '?': - return scanReverseSearch - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return scanLineRef; - case '+': - tokens.push(new token.TokenPlus()); - state.ignore(); - continue; - case '-': - tokens.push(new token.TokenMinus()); - state.ignore(); - continue; - default: - state.backup(); - return scanCommand; - } - } - return null; +function emitToken(type : TokenType, state : Scanner) : Token { + var content = state.emit(); + return (content.length > 0) ? new Token(type, content) : null; } -function scanLineRef(state : State, tokens : token.Token[]): ScanFunction { - while (true) { - if (state.isAtEof) { - var emitted = state.emit(); - if (emitted) tokens.push(new token.TokenLineNumber(emitted)); - return null; - } - var c = state.next(); - switch (c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - continue; - default: - state.backup(); - var emitted = state.emit(); - if (emitted) tokens.push(new token.TokenLineNumber(emitted)); - return scanRange; +module LexFunctions { + // starts lexing a Vim command line and forwards later parts to other scanning functions. + export function lexRange(state : Scanner, tokens : Token[]): LexFunction { + while (true) { + if (state.isAtEof) { + break; + } + var c = state.next(); + switch (c) { + case ',': + tokens.push(emitToken(TokenType.Comma, state)); + continue; + case '%': + tokens.push(emitToken(TokenType.Percent, state)); + continue; + case '$': + tokens.push(emitToken(TokenType.Dollar, state)); + continue; + case '.': + tokens.push(emitToken(TokenType.Dot, state)); + continue; + case '/': + return lexForwardSearch; + case '?': + return lexReverseSearch + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return lexLineRef; + case '+': + tokens.push(emitToken(TokenType.Plus, state)); + continue; + case '-': + tokens.push(emitToken(TokenType.Minus, state)); + continue; + default: + state.backup(); + return lexCommand; + } } + return null; } - return null; -} - -function scanCommand(state : State, tokens : token.Token[]): ScanFunction { - state.skipWhiteSpace(); - while (true) { - if (state.isAtEof) { - var emitted = state.emit(); - if (emitted) tokens.push(new token.TokenCommandName(emitted)); - break; - } - var c = state.next(); - var lc = c.toLowerCase(); - if (lc >= 'a' && lc <= 'z') { - continue; + + function lexLineRef(state : Scanner, tokens : Token[]): LexFunction { + while (true) { + if (state.isAtEof) { + var emitted = emitToken(TokenType.LineNumber, state); + if (emitted) tokens.push(emitted); + return null; + } + var c = state.next(); + switch (c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + continue; + default: + state.backup(); + // we're guaranteed to have a valid token here; don't check for null. + tokens.push(emitToken(TokenType.LineNumber, state)); + return lexRange; + } } - else { - state.backup(); - tokens.push(new token.TokenCommandName(state.emit())); - state.skipWhiteSpace(); - while (!state.isAtEof) state.next(); - var args = state.emit(); - if (args) tokens.push(new token.TokenCommandArgs(args)); - break; - } + return null; } - return null; -} - -function scanForwardSearch(state : State, tokens : token.Token[]): ScanFunction { - state.skip('/'); - var escaping : boolean; - var searchTerm = ''; - while(!state.isAtEof) { - var c = state.next(); - if (c == '/' && !escaping) break; - if (c == '\\') { - escaping = true; - continue; - } - else { - escaping = false; + + function lexCommand(state : Scanner, tokens : Token[]): LexFunction { + state.skipWhiteSpace(); + while (true) { + if (state.isAtEof) { + var emitted = emitToken(TokenType.CommandName, state); + if (emitted) tokens.push(emitted); + break; + } + var c = state.next(); + var lc = c.toLowerCase(); + if (lc >= 'a' && lc <= 'z') { + continue; + } + else { + state.backup(); + tokens.push(emitToken(TokenType.CommandName, state)); + state.skipWhiteSpace(); + while (!state.isAtEof) state.next(); + var args = emitToken(TokenType.CommandArgs, state); + if (args) tokens.push(args); + break; + } } - searchTerm += c != '\\' ? c : '\\\\'; + return null; } - tokens.push(new token.TokenSlashSearch(searchTerm)); - state.ignore(); - if (!state.isAtEof) state.skip('/'); - return scanRange; -} - -function scanReverseSearch(state : State, tokens : token.Token[]): ScanFunction { - state.skip('?'); - var escaping : boolean; - var searchTerm = ''; - while(!state.isAtEof) { - var c = state.next(); - if (c == '?' && !escaping) break; - if (c == '\\') { - escaping = true; - continue; + + function lexForwardSearch(state : Scanner, tokens : Token[]): LexFunction { + state.skip('/'); + var escaping : boolean; + var searchTerm = ''; + while(!state.isAtEof) { + var c = state.next(); + if (c == '/' && !escaping) break; + if (c == '\\') { + escaping = true; + continue; + } + else { + escaping = false; + } + searchTerm += c != '\\' ? c : '\\\\'; } - else { - escaping = false; + tokens.push(new Token(TokenType.ForwardSearch, searchTerm)); + state.ignore(); + if (!state.isAtEof) state.skip('/'); + return lexRange; + } + + function lexReverseSearch(state : Scanner, tokens : Token[]): LexFunction { + state.skip('?'); + var escaping : boolean; + var searchTerm = ''; + while(!state.isAtEof) { + var c = state.next(); + if (c == '?' && !escaping) break; + if (c == '\\') { + escaping = true; + continue; + } + else { + escaping = false; + } + searchTerm += c != '\\' ? c : '\\\\'; } - searchTerm += c != '\\' ? c : '\\\\'; + tokens.push(new Token(TokenType.ReverseSearch, searchTerm)); + state.ignore(); + if (!state.isAtEof) state.skip('?'); + return lexRange; } - tokens.push(new token.TokenQuestionMarkSearch(searchTerm)); - state.ignore(); - if (!state.isAtEof) state.skip('?'); - return scanRange; } \ No newline at end of file diff --git a/src/cmd_line/main.ts b/src/cmd_line/main.ts index ce4e6da5aab..9667fcf4472 100644 --- a/src/cmd_line/main.ts +++ b/src/cmd_line/main.ts @@ -20,6 +20,7 @@ function runCmdLine(s : string) : void { } catch (e) { util.showInfo(e); + return; } if (cmd.isEmpty) { @@ -27,6 +28,5 @@ function runCmdLine(s : string) : void { } else { cmd.runOn(vscode.window.activeTextEditor); - // vscode.window.showInformationMessage(s); } } \ No newline at end of file diff --git a/src/cmd_line/parser.ts b/src/cmd_line/parser.ts index 1483517b6f4..abca6314bc1 100644 --- a/src/cmd_line/parser.ts +++ b/src/cmd_line/parser.ts @@ -84,13 +84,13 @@ class ParserState { } lex(input : string) { - this.tokens = lexer.scan(input); + this.tokens = lexer.lex(input); } next() : token.Token { if (this.pos >= this.tokens.length) { this.pos = this.tokens.length; - return new token.TokenEof(); + return new token.Token(token.TokenType.Eof, '__EOF__'); } let tok = this.tokens[this.pos]; this.pos++; diff --git a/src/cmd_line/lexer_state.ts b/src/cmd_line/scanner.ts similarity index 81% rename from src/cmd_line/lexer_state.ts rename to src/cmd_line/scanner.ts index 49bcd3d7fb6..de4fe9dd1c9 100644 --- a/src/cmd_line/lexer_state.ts +++ b/src/cmd_line/scanner.ts @@ -1,73 +1,76 @@ - -// Lexer state. -export class State { - static EOF : string = '__EOF__'; - start : number = 0; - pos : number = 0; - input : string; - - constructor(input : string) { - this.input = input; - } - - // Returns the next character in the input, or EOF. - next() : string { - if (this.isAtEof) { - this.pos = this.input.length; - return State.EOF; - } - let c = this.input[this.pos]; - this.pos++; - return c; - } - - // Returns whether we've reached EOF. - get isAtEof() : boolean { - return this.pos >= this.input.length; - } - - // Ignores the span of text between the current start and the current position. - ignore() : void { - this.start = this.pos; - } - - // Returns the span of text between the current start and the current position. - emit() : string { - let s = this.input.substring(this.start, this.pos); - this.ignore(); - return s; - } - - backup(): void { - this.pos--; - } - - skip(c : string) : void { - var s = this.next(); - while (!this.isAtEof) { - if (s !== c) break; - s = this.next(); - } - this.backup(); - this.ignore(); - } - - skipRun(...chars : string[]) : void { - while(!this.isAtEof) { - var c = this.next(); - if (chars.indexOf(c) == -1) break; - } - this.backup(); - this.ignore(); - } - - skipWhiteSpace(): void { - while (true) { - var c = this.next(); - if (c == ' ' || c == '\t') continue; - break; - } - this.backup(); - this.ignore(); - } + +// Provides state and behavior to scan an input string character by character. +export class Scanner { + static EOF : string = '__EOF__'; + start : number = 0; + pos : number = 0; + input : string; + + constructor(input : string) { + this.input = input; + } + + // Returns the next character in the input, or EOF. + next() : string { + if (this.isAtEof) { + this.pos = this.input.length; + return Scanner.EOF; + } + let c = this.input[this.pos]; + this.pos++; + return c; + } + + // Returns whether we've reached EOF. + get isAtEof() : boolean { + return this.pos >= this.input.length; + } + + // Ignores the span of text between the current start and the current position. + ignore() : void { + this.start = this.pos; + } + + // Returns the span of text between the current start and the current position. + emit() : string { + let s = this.input.substring(this.start, this.pos); + this.ignore(); + return s; + } + + backup(): void { + this.pos--; + } + + // skips over c and ignores the text span + skip(c : string) : void { + var s = this.next(); + while (!this.isAtEof) { + if (s !== c) break; + s = this.next(); + } + this.backup(); + this.ignore(); + } + + // skips text while any of chars matches and ignores the text span + skipRun(...chars : string[]) : void { + while(!this.isAtEof) { + var c = this.next(); + if (chars.indexOf(c) == -1) break; + } + this.backup(); + this.ignore(); + } + + // skips over whitespace (tab, space) and ignores the text span + skipWhiteSpace(): void { + while (true) { + var c = this.next(); + if (c == ' ' || c == '\t') continue; + break; + } + this.backup(); + this.ignore(); + } } \ No newline at end of file diff --git a/src/cmd_line/token.ts b/src/cmd_line/token.ts index 1dfbb44c9a3..3375ed4bae7 100644 --- a/src/cmd_line/token.ts +++ b/src/cmd_line/token.ts @@ -1,111 +1,28 @@ // Tokens for the Vim command line. export enum TokenType { - Eof = 0, - LineNumber = 1, - Dot = 2, - Dollar = 3, - Percent = 4, - Comma = 5, - Plus = 6, - Minus = 7, - CommandName = 8, - CommandArgs = 9, - SlashSearch = 10, - QuestionMarkSearch = 11, - Offset = 12 -} - -export interface Token { - content : string; + Unknown, + Eof, + LineNumber, + Dot, + Dollar, + Percent, + Comma, + Plus, + Minus, + CommandName, + CommandArgs, + ForwardSearch, + ReverseSearch, + Offset +} + +export class Token { type : TokenType; -} - -// TODO: test and implement tokenization for this. -export class TokenOffset implements Token { - content : string; - type : TokenType = TokenType.Offset; - tokens : Token[]; - constructor() { - this.tokens = []; - this.content = this.tokens.join(' '); - } -} - -export class TokenEof implements Token { - type : TokenType = TokenType.Eof; - get content() : string { return '__EOF__' }; -} - -// Line referece. -export class TokenLineNumber implements Token { - type : TokenType = TokenType.LineNumber; content : string; - constructor(content : string) { + + constructor(type : TokenType, content : string) { + this.type = type; this.content = content; - } -} - -// Line referece. -export class TokenDot implements Token { - type : TokenType = TokenType.Dot; - get content() : string { return '.' }; -} - -// Line referece. -export class TokenDollar implements Token { - type = TokenType.Dollar; - get content() : string { return '$' }; -} - -export class TokenPercent implements Token { - type = TokenType.Percent; - get content() : string { return '%' }; -} - -export class TokenComma implements Token { - type = TokenType.Comma; - get content() : string { return ',' }; -} - -export class TokenPlus implements Token { - type = TokenType.Plus; - get content() : string { return '+' }; -} - -export class TokenMinus implements Token { - type = TokenType.Minus; - get content() : string { return '-' }; -} - -export class TokenCommandName implements Token { - type = TokenType.CommandName; - content : string; - constructor(content : string) { - this.content = content; - } -} - -export class TokenCommandArgs implements Token { - type = TokenType.CommandArgs; - content : string; - constructor(content : string) { - this.content = content; - } -} - -export class TokenSlashSearch implements Token { - type = TokenType.SlashSearch; - content : string; - constructor(content : string) { - this.content = content; - } + } } - -export class TokenQuestionMarkSearch implements Token { - type = TokenType.QuestionMarkSearch; - content : string; - constructor(content : string) { - this.content = content; - } -} \ No newline at end of file diff --git a/test/lexer.test.ts b/test/lexer.test.ts new file mode 100644 index 00000000000..8e9ed0f55fc --- /dev/null +++ b/test/lexer.test.ts @@ -0,0 +1,105 @@ +// +// Note: This example test is leveraging the Mocha test framework. +// Please refer to their documentation on https://mochajs.org/ for help. +// + +// The module 'assert' provides assertion methods from node +import * as assert from 'assert'; + +// You can import and use all API from the 'vscode' module +// as well as import your extension to test it +import * as vscode from 'vscode'; +import * as lexer from '../src/cmd_line/lexer' +import {Token, TokenType} from '../src/cmd_line/token' + +suite("Cmd line tests - lexing", () => { + + test("can lex empty string", () => { + var tokens = lexer.lex(""); + assert.equal(tokens.length, 0); + }); + + test("can lex comma", () => { + var tokens = lexer.lex(","); + assert.equal(tokens[0].content, new Token(TokenType.Comma, ',').content); + }); + + test("can lex percent", () => { + var tokens = lexer.lex("%"); + assert.equal(tokens[0].content, new Token(TokenType.Percent, '%').content); + }); + + test("can lex dollar", () => { + var tokens = lexer.lex("$"); + assert.equal(tokens[0].content, new Token(TokenType.Dollar, '$').content); + }); + + test("can lex dot", () => { + var tokens = lexer.lex("."); + assert.equal(tokens[0].content, new Token(TokenType.Dot, '.').content); + }); + + test("can lex one number", () => { + var tokens = lexer.lex("1"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "1").content); + }); + + test("can lex longer number", () => { + var tokens = lexer.lex("100"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "100").content); + }); + + test("can lex plus", () => { + var tokens = lexer.lex("+"); + assert.equal(tokens[0].content, new Token(TokenType.Plus, '+').content); + }); + + test("can lex minus", () => { + var tokens = lexer.lex("-"); + assert.equal(tokens[0].content, new Token(TokenType.Minus, '-').content); + }); + + test("can lex forward search", () => { + var tokens = lexer.lex("/horses/"); + assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "horses").content); + }); + + test("can lex forward search escaping", () => { + var tokens = lexer.lex("/hor\\/ses/"); + assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "hor/ses").content); + }); + + test("can lex reverse search", () => { + var tokens = lexer.lex("?worms?"); + assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "worms").content); + }); + + test("can lex reverse search escaping", () => { + var tokens = lexer.lex("?wor\\?ms?"); + assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "wor?ms").content); + }); + + test("can lex command name", () => { + var tokens = lexer.lex("w"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); + }); + + test("can lex command args", () => { + var tokens = lexer.lex("w something"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); + assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "something").content); + }); + + test("can lex long command name and args", () => { + var tokens = lexer.lex("write12 something here"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "write").content); + assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "12 something here").content); + }); + + test("can lex left and right line refs", () => { + var tokens = lexer.lex("20,30"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "20").content); + assert.equal(tokens[1].content, new Token(TokenType.LineNumber, ",").content); + assert.equal(tokens[2].content, new Token(TokenType.LineNumber, "30").content); + }); +}); \ No newline at end of file diff --git a/test/lexer_state.test.ts b/test/lexer_state.test.ts deleted file mode 100644 index dfafda00255..00000000000 --- a/test/lexer_state.test.ts +++ /dev/null @@ -1,67 +0,0 @@ -// -// Note: This example test is leveraging the Mocha test framework. -// Please refer to their documentation on https://mochajs.org/ for help. -// - -// The module 'assert' provides assertion methods from node -import * as assert from 'assert'; - -// You can import and use all API from the 'vscode' module -// as well as import your extension to test it -import * as vscode from 'vscode'; -import * as myExtension from '../extension'; -import * as lexerState from '../src/cmd_line/lexer_state' - -suite("Cmd line tests - lexer state", () => { - - test("can init lexer state", () => { - var state = new lexerState.State("dog"); - assert.equal(state.input, "dog"); - }); - - test("can detect EOF with empty input", () => { - var state = new lexerState.State(""); - assert.ok(state.isAtEof); - }); - - test("next() returns EOF at EOF", () => { - var state = new lexerState.State(""); - assert.equal(state.next(), lexerState.State.EOF); - assert.equal(state.next(), lexerState.State.EOF); - assert.equal(state.next(), lexerState.State.EOF); - }); - - test("next() can scan", () => { - var state = new lexerState.State("dog"); - assert.equal(state.next(), "d"); - assert.equal(state.next(), "o"); - assert.equal(state.next(), "g") - assert.equal(state.next(), lexerState.State.EOF); - }); - - test("can emit", () => { - var state = new lexerState.State("dog cat"); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), "dog"); - state.next(); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), " cat"); - }); - - test("can ignore", () => { - var state = new lexerState.State("dog cat"); - state.next(); - state.next(); - state.next(); - state.next(); - state.ignore(); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), "cat"); - }); -}); \ No newline at end of file diff --git a/test/scanner.test.ts b/test/scanner.test.ts index 40842822a4c..4123ed7e2e6 100644 --- a/test/scanner.test.ts +++ b/test/scanner.test.ts @@ -1,105 +1,67 @@ -// -// Note: This example test is leveraging the Mocha test framework. -// Please refer to their documentation on https://mochajs.org/ for help. -// - -// The module 'assert' provides assertion methods from node -import * as assert from 'assert'; - -// You can import and use all API from the 'vscode' module -// as well as import your extension to test it -import * as vscode from 'vscode'; -import * as lexer from '../src/cmd_line/lexer' -import * as token from '../src/cmd_line/token' - -suite("Cmd line tests - lexing", () => { - - test("can lex empty string", () => { - var tokens = lexer.scan(""); - assert.equal(tokens.length, 0); - }); - - test("can lex comma", () => { - var tokens = lexer.scan(","); - assert.equal(tokens[0].content, new token.TokenComma().content); - }); - - test("can lex percent", () => { - var tokens = lexer.scan("%"); - assert.equal(tokens[0].content, new token.TokenPercent().content); - }); - - test("can lex dollar", () => { - var tokens = lexer.scan("$"); - assert.equal(tokens[0].content, new token.TokenDollar().content); - }); - - test("can lex dot", () => { - var tokens = lexer.scan("."); - assert.equal(tokens[0].content, new token.TokenDot().content); - }); - - test("can lex one number", () => { - var tokens = lexer.scan("1"); - assert.equal(tokens[0].content, new token.TokenLineNumber("1").content); - }); - - test("can lex longer number", () => { - var tokens = lexer.scan("100"); - assert.equal(tokens[0].content, new token.TokenLineNumber("100").content); - }); - - test("can lex plus", () => { - var tokens = lexer.scan("+"); - assert.equal(tokens[0].content, new token.TokenPlus().content); - }); - - test("can lex minus", () => { - var tokens = lexer.scan("-"); - assert.equal(tokens[0].content, new token.TokenMinus().content); - }); - - test("can lex forward search", () => { - var tokens = lexer.scan("/horses/"); - assert.equal(tokens[0].content, new token.TokenSlashSearch("horses").content); - }); - - test("can lex forward search escaping", () => { - var tokens = lexer.scan("/hor\\/ses/"); - assert.equal(tokens[0].content, new token.TokenSlashSearch("hor/ses").content); - }); - - test("can lex reverse search", () => { - var tokens = lexer.scan("?worms?"); - assert.equal(tokens[0].content, new token.TokenQuestionMarkSearch("worms").content); - }); - - test("can lex reverse search escaping", () => { - var tokens = lexer.scan("?wor\\?ms?"); - assert.equal(tokens[0].content, new token.TokenQuestionMarkSearch("wor?ms").content); - }); - - test("can lex command name", () => { - var tokens = lexer.scan("w"); - assert.equal(tokens[0].content, new token.TokenCommandName("w").content); - }); - - test("can lex command args", () => { - var tokens = lexer.scan("w something"); - assert.equal(tokens[0].content, new token.TokenCommandName("w").content); - assert.equal(tokens[1].content, new token.TokenCommandArgs("something").content); - }); - - test("can lex long command name and args", () => { - var tokens = lexer.scan("write12 something here"); - assert.equal(tokens[0].content, new token.TokenCommandName("write").content); - assert.equal(tokens[1].content, new token.TokenCommandArgs("12 something here").content); - }); - - test("can lex left and right line refs", () => { - var tokens = lexer.scan("20,30"); - assert.equal(tokens[0].content, new token.TokenLineNumber("20").content); - assert.equal(tokens[1].content, new token.TokenLineNumber(",").content); - assert.equal(tokens[2].content, new token.TokenLineNumber("30").content); - }); +// +// Note: This example test is leveraging the Mocha test framework. +// Please refer to their documentation on https://mochajs.org/ for help. +// + +// The module 'assert' provides assertion methods from node +import * as assert from 'assert'; + +// You can import and use all API from the 'vscode' module +// as well as import your extension to test it +import * as vscode from 'vscode'; +import * as myExtension from '../extension'; +import * as lexerState from '../src/cmd_line/scanner' + +suite("Cmd line tests - lexer state", () => { + + test("can init lexer state", () => { + var state = new lexerState.Scanner("dog"); + assert.equal(state.input, "dog"); + }); + + test("can detect EOF with empty input", () => { + var state = new lexerState.Scanner(""); + assert.ok(state.isAtEof); + }); + + test("next() returns EOF at EOF", () => { + var state = new lexerState.Scanner(""); + assert.equal(state.next(), lexerState.Scanner.EOF); + assert.equal(state.next(), lexerState.Scanner.EOF); + assert.equal(state.next(), lexerState.Scanner.EOF); + }); + + test("next() can scan", () => { + var state = new lexerState.Scanner("dog"); + assert.equal(state.next(), "d"); + assert.equal(state.next(), "o"); + assert.equal(state.next(), "g") + assert.equal(state.next(), lexerState.Scanner.EOF); + }); + + test("can emit", () => { + var state = new lexerState.Scanner("dog cat"); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), "dog"); + state.next(); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), " cat"); + }); + + test("can ignore", () => { + var state = new lexerState.Scanner("dog cat"); + state.next(); + state.next(); + state.next(); + state.next(); + state.ignore(); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), "cat"); + }); }); \ No newline at end of file From b7d8c3efe96221f3ec9850dae2de7b03e68c8596 Mon Sep 17 00:00:00 2001 From: guillermooo Date: Mon, 16 Nov 2015 23:32:15 +0100 Subject: [PATCH 2/3] tslint + whitespace fixes --- src/cmd_line/command_node.ts | 48 ++--- src/cmd_line/lexer.ts | 366 ++++++++++++++++++----------------- src/cmd_line/main.ts | 64 +++--- src/cmd_line/node.ts | 172 ++++++++-------- src/cmd_line/parser.ts | 182 +++++++++-------- src/cmd_line/scanner.ts | 152 ++++++++------- src/cmd_line/subparsers.ts | 14 +- src/cmd_line/token.ts | 56 +++--- test/extension.test.ts | 13 +- test/index.ts | 6 +- test/lexer.test.ts | 185 +++++++++--------- test/parser.test.ts | 71 ++++--- test/scanner.test.ts | 96 ++++----- 13 files changed, 716 insertions(+), 709 deletions(-) diff --git a/src/cmd_line/command_node.ts b/src/cmd_line/command_node.ts index 53b5fd54438..734d8fc0b5e 100644 --- a/src/cmd_line/command_node.ts +++ b/src/cmd_line/command_node.ts @@ -1,26 +1,26 @@ -import * as vscode from 'vscode'; -import * as token from './token'; -import * as node from './node'; -import * as lexer from './lexer'; -import * as util from '../util'; +import * as vscode from "vscode"; +import * as token from "./token"; +import * as node from "./node"; +import * as lexer from "./lexer"; +import * as util from "../util"; export class WriteCommand implements node.CommandBase { - name : string; - shortName : string; - args : Object; - - constructor(args : Object = null) { - // TODO: implement other arguments. - this.name = 'write'; - this.shortName = 'w'; - this.args = args; - } - - runOn(textEditor : vscode.TextEditor) : void { - if (this.args || !textEditor.document.fileName) { - util.showInfo("Not implemented."); - return; - } - textEditor.document.save(); - } -} \ No newline at end of file + name : string; + shortName : string; + args : Object; + + constructor(args : Object = null) { + // TODO: implement other arguments. + this.name = "write"; + this.shortName = "w"; + this.args = args; + } + + runOn(textEditor : vscode.TextEditor) : void { + if (this.args || !textEditor.document.fileName) { + util.showInfo("Not implemented."); + return; + } + textEditor.document.save(); + } +} diff --git a/src/cmd_line/lexer.ts b/src/cmd_line/lexer.ts index b15491fc8ef..5dcccd8190b 100644 --- a/src/cmd_line/lexer.ts +++ b/src/cmd_line/lexer.ts @@ -1,175 +1,191 @@ -import {Scanner} from './scanner'; -import {Token, TokenType} from './token'; - -// Describes a function that can lex part of a Vim command line. -interface LexFunction { - (state: Scanner, tokens: Token[]) : LexFunction; -} - -export function lex(input : string) : Token[] { - // we use a character scanner as state for the lexer - var state = new Scanner(input); - var tokens : Token[] = []; - var f : LexFunction = LexFunctions.lexRange; // first lexing function - while (f) { - // Each lexing function returns the next lexing function or null. - f = f(state, tokens); - } - return tokens; -} - -function emitToken(type : TokenType, state : Scanner) : Token { - var content = state.emit(); - return (content.length > 0) ? new Token(type, content) : null; -} - -module LexFunctions { - // starts lexing a Vim command line and forwards later parts to other scanning functions. - export function lexRange(state : Scanner, tokens : Token[]): LexFunction { - while (true) { - if (state.isAtEof) { - break; - } - var c = state.next(); - switch (c) { - case ',': - tokens.push(emitToken(TokenType.Comma, state)); - continue; - case '%': - tokens.push(emitToken(TokenType.Percent, state)); - continue; - case '$': - tokens.push(emitToken(TokenType.Dollar, state)); - continue; - case '.': - tokens.push(emitToken(TokenType.Dot, state)); - continue; - case '/': - return lexForwardSearch; - case '?': - return lexReverseSearch - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return lexLineRef; - case '+': - tokens.push(emitToken(TokenType.Plus, state)); - continue; - case '-': - tokens.push(emitToken(TokenType.Minus, state)); - continue; - default: - state.backup(); - return lexCommand; - } - } - return null; - } - - function lexLineRef(state : Scanner, tokens : Token[]): LexFunction { - while (true) { - if (state.isAtEof) { - var emitted = emitToken(TokenType.LineNumber, state); - if (emitted) tokens.push(emitted); - return null; - } - var c = state.next(); - switch (c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - continue; - default: - state.backup(); - // we're guaranteed to have a valid token here; don't check for null. - tokens.push(emitToken(TokenType.LineNumber, state)); - return lexRange; - } - } - return null; - } - - function lexCommand(state : Scanner, tokens : Token[]): LexFunction { - state.skipWhiteSpace(); - while (true) { - if (state.isAtEof) { - var emitted = emitToken(TokenType.CommandName, state); - if (emitted) tokens.push(emitted); - break; - } - var c = state.next(); - var lc = c.toLowerCase(); - if (lc >= 'a' && lc <= 'z') { - continue; - } - else { - state.backup(); - tokens.push(emitToken(TokenType.CommandName, state)); - state.skipWhiteSpace(); - while (!state.isAtEof) state.next(); - var args = emitToken(TokenType.CommandArgs, state); - if (args) tokens.push(args); - break; - } - } - return null; - } - - function lexForwardSearch(state : Scanner, tokens : Token[]): LexFunction { - state.skip('/'); - var escaping : boolean; - var searchTerm = ''; - while(!state.isAtEof) { - var c = state.next(); - if (c == '/' && !escaping) break; - if (c == '\\') { - escaping = true; - continue; - } - else { - escaping = false; - } - searchTerm += c != '\\' ? c : '\\\\'; - } - tokens.push(new Token(TokenType.ForwardSearch, searchTerm)); - state.ignore(); - if (!state.isAtEof) state.skip('/'); - return lexRange; - } - - function lexReverseSearch(state : Scanner, tokens : Token[]): LexFunction { - state.skip('?'); - var escaping : boolean; - var searchTerm = ''; - while(!state.isAtEof) { - var c = state.next(); - if (c == '?' && !escaping) break; - if (c == '\\') { - escaping = true; - continue; - } - else { - escaping = false; - } - searchTerm += c != '\\' ? c : '\\\\'; - } - tokens.push(new Token(TokenType.ReverseSearch, searchTerm)); - state.ignore(); - if (!state.isAtEof) state.skip('?'); - return lexRange; - } -} \ No newline at end of file +import {Scanner} from "./scanner"; +import {Token, TokenType} from "./token"; + +// Describes a function that can lex part of a Vim command line. +interface LexFunction { + (state: Scanner, tokens: Token[]) : LexFunction; +} + +export function lex(input : string) : Token[] { + // We use a character scanner as state for the lexer. + var state = new Scanner(input); + var tokens : Token[] = []; + var f : LexFunction = LexerFunctions.lexRange; + while (f) { + // Each lexing function returns the next lexing function or null. + f = f(state, tokens); + } + return tokens; +} + +function emitToken(type : TokenType, state : Scanner) : Token { + var content = state.emit(); + return (content.length > 0) ? new Token(type, content) : null; +} + +module LexerFunctions { + // Starts lexing a Vim command line and delegates on other lexer functions as needed. + export function lexRange(state : Scanner, tokens : Token[]): LexFunction { + while (true) { + if (state.isAtEof) { + break; + } + var c = state.next(); + switch (c) { + case ",": + tokens.push(emitToken(TokenType.Comma, state)); + continue; + case "%": + tokens.push(emitToken(TokenType.Percent, state)); + continue; + case "$": + tokens.push(emitToken(TokenType.Dollar, state)); + continue; + case ".": + tokens.push(emitToken(TokenType.Dot, state)); + continue; + case "/": + return lexForwardSearch; + case "?": + return lexReverseSearch; + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + return lexLineRef; + case "+": + tokens.push(emitToken(TokenType.Plus, state)); + continue; + case "-": + tokens.push(emitToken(TokenType.Minus, state)); + continue; + default: + return lexCommand; + } + } + return null; + } + + function lexLineRef(state : Scanner, tokens : Token[]): LexFunction { + // The first digit has already been lexed. + while (true) { + if (state.isAtEof) { + var emitted = emitToken(TokenType.LineNumber, state); + if (emitted) { + tokens.push(emitted); + }; + return null; + } + var c = state.next(); + switch (c) { + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + continue; + default: + state.backup(); + tokens.push(emitToken(TokenType.LineNumber, state)); + return lexRange; + } + } + return null; + } + + function lexCommand(state : Scanner, tokens : Token[]): LexFunction { + // The first character of command"s name has already been lexed. + state.skipWhiteSpace(); + while (true) { + if (state.isAtEof) { + tokens.push(emitToken(TokenType.CommandName, state)); + break; + } + var c = state.next(); + var lc = c.toLowerCase(); + if (lc >= "a" && lc <= "z") { + continue; + } + else { + state.backup(); + tokens.push(emitToken(TokenType.CommandName, state)); + state.skipWhiteSpace(); + while (!state.isAtEof) { + state.next(); + } + // TODO(guillermooo): We need to parse multiple commands. + var args = emitToken(TokenType.CommandArgs, state); + if (args) { + tokens.push(args); + }; + break; + } + } + return null; + } + + function lexForwardSearch(state : Scanner, tokens : Token[]): LexFunction { + // The first slash has already been lexed. + state.skip("/"); // XXX: really? + var escaping : boolean; + var searchTerm = ""; + while(!state.isAtEof) { + var c = state.next(); + if (c == "/" && !escaping) { + break; + } + if (c == "\\") { + escaping = true; + continue; + } + else { + escaping = false; + } + searchTerm += c != "\\" ? c : "\\\\"; + } + tokens.push(new Token(TokenType.ForwardSearch, searchTerm)); + state.ignore(); + if (!state.isAtEof) { + state.skip("/"); + }; + return lexRange; + } + + function lexReverseSearch(state : Scanner, tokens : Token[]): LexFunction { + // The first question mark has already been lexed. + state.skip("?"); // XXX: really? + var escaping : boolean; + var searchTerm = ""; + while(!state.isAtEof) { + var c = state.next(); + if (c == "?" && !escaping) { + break; + } + if (c == "\\") { + escaping = true; + continue; + } + else { + escaping = false; + } + searchTerm += c != "\\" ? c : "\\\\"; + } + tokens.push(new Token(TokenType.ReverseSearch, searchTerm)); + state.ignore(); + if (!state.isAtEof) { + state.skip("?"); + } + return lexRange; + } +} diff --git a/src/cmd_line/main.ts b/src/cmd_line/main.ts index 9667fcf4472..21bb909e6dd 100644 --- a/src/cmd_line/main.ts +++ b/src/cmd_line/main.ts @@ -1,32 +1,32 @@ -import * as vscode from 'vscode'; -import * as parser from './parser'; -import * as util from '../util'; - -// Shows the vim command line. -export function showCmdLine(initialText = "") { - const options : vscode.InputBoxOptions = { - prompt: "Vim command line", - value: initialText - }; - vscode.window.showInputBox(options).then( - runCmdLine, - vscode.window.showErrorMessage - ); -} - -function runCmdLine(s : string) : void { - try { - var cmd = parser.parse(s); - } - catch (e) { - util.showInfo(e); - return; - } - - if (cmd.isEmpty) { - vscode.window.showInformationMessage("empty cmdline"); - } - else { - cmd.runOn(vscode.window.activeTextEditor); - } -} \ No newline at end of file +import * as vscode from "vscode"; +import * as parser from "./parser"; +import * as util from "../util"; + +// Shows the vim command line. +export function showCmdLine(initialText = "") { + const options : vscode.InputBoxOptions = { + prompt: "Vim command line", + value: initialText + }; + vscode.window.showInputBox(options).then( + runCmdLine, + vscode.window.showErrorMessage + ); +} + +function runCmdLine(s : string) : void { + try { + var cmd = parser.parse(s); + } + catch (e) { + util.showInfo(e); + return; + } + + if (cmd.isEmpty) { + vscode.window.showInformationMessage("empty cmdline"); + } + else { + cmd.runOn(vscode.window.activeTextEditor); + } +} diff --git a/src/cmd_line/node.ts b/src/cmd_line/node.ts index 75d245421c4..d893af7398f 100644 --- a/src/cmd_line/node.ts +++ b/src/cmd_line/node.ts @@ -1,106 +1,106 @@ -import * as vscode from 'vscode'; -import * as token from './token'; -import * as node from './node'; -import * as lexer from './lexer'; -export * from './command_node'; +import * as vscode from "vscode"; +import * as token from "./token"; +import * as node from "./node"; +import * as lexer from "./lexer"; +export * from "./command_node"; export class LineRange { - left : token.Token[]; - separator : token.Token; - right : token.Token[]; + left : token.Token[]; + separator : token.Token; + right : token.Token[]; - constructor() { - this.left = []; - this.right = []; - } + constructor() { + this.left = []; + this.right = []; + } - addToken(tok : token.Token) : void { - if (tok.type === token.TokenType.Comma) { - this.separator = tok; - return; - } + addToken(tok : token.Token) : void { + if (tok.type === token.TokenType.Comma) { + this.separator = tok; + return; + } - if (!this.separator) { - if (this.left.length > 0 && tok.type != token.TokenType.Offset) { - // XXX: is this always this error? - throw Error("not a Vim command"); - } - this.left.push(tok); - } - else { - if (this.right.length > 0 && tok.type != token.TokenType.Offset) { - // XXX: is this always this error? - throw Error("not a Vim command"); - } - this.right.push(tok); - } - } + if (!this.separator) { + if (this.left.length > 0 && tok.type != token.TokenType.Offset) { + // XXX: is this always this error? + throw Error("not a Vim command"); + } + this.left.push(tok); + } + else { + if (this.right.length > 0 && tok.type != token.TokenType.Offset) { + // XXX: is this always this error? + throw Error("not a Vim command"); + } + this.right.push(tok); + } + } - get isEmpty() : boolean { - return this.left.length === 0 && this.right.length === 0 && !this.separator; - } + get isEmpty() : boolean { + return this.left.length === 0 && this.right.length === 0 && !this.separator; + } - toString() : string { - return this.left.toString() + this.separator.content + this.right.toString(); - } + toString() : string { + return this.left.toString() + this.separator.content + this.right.toString(); + } - runOn(document : vscode.TextEditor) : void { - if (this.isEmpty) { - return; - } - var lineRef = this.right.length === 0 ? this.left : this.right; - var pos = this.lineRefToPosition(document, lineRef); - document.selection = new vscode.Selection(pos, pos); - } + runOn(document : vscode.TextEditor) : void { + if (this.isEmpty) { + return; + } + var lineRef = this.right.length === 0 ? this.left : this.right; + var pos = this.lineRefToPosition(document, lineRef); + document.selection = new vscode.Selection(pos, pos); + } - lineRefToPosition(doc : vscode.TextEditor, toks : token.Token[]) : vscode.Position { - var first = toks[0]; - switch (first.type) { - case token.TokenType.Dollar: - case token.TokenType.Percent: - return new vscode.Position(doc.document.lineCount, 0); - case token.TokenType.Dot: - return new vscode.Position(doc.selection.active.line, 0); - case token.TokenType.LineNumber: - var line = Number.parseInt(first.content); - line = Math.max(0, line - 1); - line = Math.min(doc.document.lineCount, line); - return new vscode.Position(line, 0); - default: - throw new Error("not implemented"); - } - } + lineRefToPosition(doc : vscode.TextEditor, toks : token.Token[]) : vscode.Position { + var first = toks[0]; + switch (first.type) { + case token.TokenType.Dollar: + case token.TokenType.Percent: + return new vscode.Position(doc.document.lineCount, 0); + case token.TokenType.Dot: + return new vscode.Position(doc.selection.active.line, 0); + case token.TokenType.LineNumber: + var line = Number.parseInt(first.content); + line = Math.max(0, line - 1); + line = Math.min(doc.document.lineCount, line); + return new vscode.Position(line, 0); + default: + throw new Error("not implemented"); + } + } } export class CommandLine { - range : LineRange; - command : CommandBase; + range : LineRange; + command : CommandBase; - constructor() { - this.range = new LineRange(); - } + constructor() { + this.range = new LineRange(); + } - get isEmpty() : boolean { - return this.range.isEmpty && !this.command; - } + get isEmpty() : boolean { + return this.range.isEmpty && !this.command; + } - toString() : string { - return ":" + this.range.toString() + " " + this.command.toString(); - } + toString() : string { + return ":" + this.range.toString() + " " + this.command.toString(); + } - runOn(document : vscode.TextEditor) : void { - if (!this.command) { - this.range.runOn(document); - return; - } + runOn(document : vscode.TextEditor) : void { + if (!this.command) { + this.range.runOn(document); + return; + } - // TODO: calc range - this.command.runOn(document); - } + // TODO: calc range + this.command.runOn(document); + } } export interface CommandBase { - name : string; - shortName : string; - runOn(textEditor : vscode.TextEditor) : void -} \ No newline at end of file + name : string; + shortName : string; + runOn(textEditor : vscode.TextEditor) : void; +} diff --git a/src/cmd_line/parser.ts b/src/cmd_line/parser.ts index abca6314bc1..b84738f651a 100644 --- a/src/cmd_line/parser.ts +++ b/src/cmd_line/parser.ts @@ -1,107 +1,105 @@ -// parse range -// parse command name -// command parses its own arguments - -import * as vscode from 'vscode'; -import * as token from './token'; -import * as node from './node'; -import * as lexer from './lexer'; -import {parseWriteCommandArgs, commandParsers} from './subparsers'; -import * as util from '../util'; +import * as vscode from "vscode"; +import * as token from "./token"; +import * as node from "./node"; +import * as lexer from "./lexer"; +import {parseWriteCommandArgs, commandParsers} from "./subparsers"; +import * as util from "../util"; interface ParseFunction { - (state : ParserState, command : node.CommandLine) : ParseFunction; + (state : ParserState, command : node.CommandLine) : ParseFunction; } export function parse(input : string) : node.CommandLine { - var cmd = new node.CommandLine(); - var f : ParseFunction = parseLineRange; - let state : ParserState = new ParserState(input); - while (f) f = f(state, cmd); - return cmd; + var cmd = new node.CommandLine(); + var f : ParseFunction = parseLineRange; + let state : ParserState = new ParserState(input); + while (f) { + f = f(state, cmd); + } + return cmd; } -function parseLineRange(state : ParserState, commandLine : node.CommandLine) : ParseFunction { - while (true) { - let tok = state.next(); - switch (tok.type) { - case token.TokenType.Eof: - return null; - case token.TokenType.Dot: - case token.TokenType.Dollar: - case token.TokenType.Percent: - case token.TokenType.Comma: - case token.TokenType.LineNumber: - commandLine.range.addToken(tok); - continue; - case token.TokenType.CommandName: - state.backup(); - return parseCommand; - // commandLine.command = new node.CommandLineCommand(tok.content, null); - // continue; - default: - console.warn("skipping token " + "Token(" + tok.type + ",{" + tok.content + "})"); - return null; - } - } +function parseLineRange(state : ParserState, commandLine : node.CommandLine) : ParseFunction { + while (true) { + let tok = state.next(); + switch (tok.type) { + case token.TokenType.Eof: + return null; + case token.TokenType.Dot: + case token.TokenType.Dollar: + case token.TokenType.Percent: + case token.TokenType.Comma: + case token.TokenType.LineNumber: + commandLine.range.addToken(tok); + continue; + case token.TokenType.CommandName: + state.backup(); + return parseCommand; + // commandLine.command = new node.CommandLineCommand(tok.content, null); + // continue; + default: + console.warn("skipping token " + "Token(" + tok.type + ",{" + tok.content + "})"); + return null; + } + } } function parseCommand(state : ParserState, commandLine : node.CommandLine) : ParseFunction { - while (!state.isAtEof) { - var tok = state.next(); - switch (tok.type) { - case token.TokenType.CommandName: - var commandParser = commandParsers[tok.content]; - if (!commandParser) { - throw new Error("not implemented or not a valid command"); - } - // TODO: Pass the args, but keep in mind there could be multiple - // commands, not just one. - var argsTok = state.next(); - var args = argsTok.type === token.TokenType.CommandArgs ? argsTok.content : null; - commandLine.command = commandParser(args); - return null; - default: - throw new Error("not implemented"); - } - } - if (!state.isAtEof) { - state.backup(); - return parseCommand; - } - else { - return null; - } + while (!state.isAtEof) { + var tok = state.next(); + switch (tok.type) { + case token.TokenType.CommandName: + var commandParser = commandParsers[tok.content]; + if (!commandParser) { + throw new Error("not implemented or not a valid command"); + } + // TODO: Pass the args, but keep in mind there could be multiple + // commands, not just one. + var argsTok = state.next(); + var args = argsTok.type === token.TokenType.CommandArgs ? argsTok.content : null; + commandLine.command = commandParser(args); + return null; + default: + throw new Error("not implemented"); + } + } + if (!state.isAtEof) { + state.backup(); + return parseCommand; + } + else { + return null; + } } // Keeps track of parsing state. class ParserState { - tokens : token.Token[] = []; - pos : number = 0; - - constructor(input : string) { - this.lex(input); - } - - lex(input : string) { - this.tokens = lexer.lex(input); - } - - next() : token.Token { - if (this.pos >= this.tokens.length) { - this.pos = this.tokens.length; - return new token.Token(token.TokenType.Eof, '__EOF__'); - } - let tok = this.tokens[this.pos]; - this.pos++; - return tok; - } - - backup() : void { - this.pos--; - } - - get isAtEof() { - return this.pos >= this.tokens.length; // XXX the last token is TokenEof; is this correct? - } + tokens : token.Token[] = []; + pos : number = 0; + + constructor(input : string) { + this.lex(input); + } + + lex(input : string) { + this.tokens = lexer.lex(input); + } + + next() : token.Token { + if (this.pos >= this.tokens.length) { + this.pos = this.tokens.length; + return new token.Token(token.TokenType.Eof, "__EOF__"); + } + let tok = this.tokens[this.pos]; + this.pos++; + return tok; + } + + backup() : void { + this.pos--; + } + + get isAtEof() { + return this.pos >= this.tokens.length; // XXX the last token is TokenEof; is this correct? + } } diff --git a/src/cmd_line/scanner.ts b/src/cmd_line/scanner.ts index de4fe9dd1c9..3862cdb65f2 100644 --- a/src/cmd_line/scanner.ts +++ b/src/cmd_line/scanner.ts @@ -1,76 +1,82 @@ // Provides state and behavior to scan an input string character by character. export class Scanner { - static EOF : string = '__EOF__'; - start : number = 0; - pos : number = 0; - input : string; - - constructor(input : string) { - this.input = input; - } - - // Returns the next character in the input, or EOF. - next() : string { - if (this.isAtEof) { - this.pos = this.input.length; - return Scanner.EOF; - } - let c = this.input[this.pos]; - this.pos++; - return c; - } - - // Returns whether we've reached EOF. - get isAtEof() : boolean { - return this.pos >= this.input.length; - } - - // Ignores the span of text between the current start and the current position. - ignore() : void { - this.start = this.pos; - } - - // Returns the span of text between the current start and the current position. - emit() : string { - let s = this.input.substring(this.start, this.pos); - this.ignore(); - return s; - } - - backup(): void { - this.pos--; - } - - // skips over c and ignores the text span - skip(c : string) : void { - var s = this.next(); - while (!this.isAtEof) { - if (s !== c) break; - s = this.next(); - } - this.backup(); - this.ignore(); - } - - // skips text while any of chars matches and ignores the text span - skipRun(...chars : string[]) : void { - while(!this.isAtEof) { - var c = this.next(); - if (chars.indexOf(c) == -1) break; - } - this.backup(); - this.ignore(); - } - - // skips over whitespace (tab, space) and ignores the text span - skipWhiteSpace(): void { - while (true) { - var c = this.next(); - if (c == ' ' || c == '\t') continue; - break; - } - this.backup(); - this.ignore(); - } -} \ No newline at end of file + static EOF : string = "__EOF__"; + start : number = 0; + pos : number = 0; + input : string; + + constructor(input : string) { + this.input = input; + } + + // Returns the next character in the input, or EOF. + next() : string { + if (this.isAtEof) { + this.pos = this.input.length; + return Scanner.EOF; + } + let c = this.input[this.pos]; + this.pos++; + return c; + } + + // Returns whether we've reached EOF. + get isAtEof() : boolean { + return this.pos >= this.input.length; + } + + // Ignores the span of text between the current start and the current position. + ignore() : void { + this.start = this.pos; + } + + // Returns the span of text between the current start and the current position. + emit() : string { + let s = this.input.substring(this.start, this.pos); + this.ignore(); + return s; + } + + backup(): void { + this.pos--; + } + + // skips over c and ignores the text span + skip(c : string) : void { + var s = this.next(); + while (!this.isAtEof) { + if (s !== c) { + break; + } + s = this.next(); + } + this.backup(); + this.ignore(); + } + + // skips text while any of chars matches and ignores the text span + skipRun(...chars : string[]) : void { + while(!this.isAtEof) { + var c = this.next(); + if (chars.indexOf(c) == -1) { + break; + } + } + this.backup(); + this.ignore(); + } + + // skips over whitespace (tab, space) and ignores the text span + skipWhiteSpace(): void { + while (true) { + var c = this.next(); + if (c == " " || c == "\t") { + continue; + } + break; + } + this.backup(); + this.ignore(); + } +} diff --git a/src/cmd_line/subparsers.ts b/src/cmd_line/subparsers.ts index ddc56e86ce8..08e4f63cb11 100644 --- a/src/cmd_line/subparsers.ts +++ b/src/cmd_line/subparsers.ts @@ -1,12 +1,12 @@ -import * as node from './node'; +import * as node from "./node"; // maps command names to parsers for said commands. export const commandParsers = { - w: parseWriteCommandArgs, - write: parseWriteCommandArgs -} + w: parseWriteCommandArgs, + write: parseWriteCommandArgs +}; export function parseWriteCommandArgs(args : string = null) { - // TODO: actually parse arguments. - return new node.WriteCommand(args ? args : null); -} \ No newline at end of file + // TODO: actually parse arguments. + return new node.WriteCommand(args ? args : null); +} diff --git a/src/cmd_line/token.ts b/src/cmd_line/token.ts index 3375ed4bae7..6df351fe9d9 100644 --- a/src/cmd_line/token.ts +++ b/src/cmd_line/token.ts @@ -1,28 +1,28 @@ -// Tokens for the Vim command line. - -export enum TokenType { - Unknown, - Eof, - LineNumber, - Dot, - Dollar, - Percent, - Comma, - Plus, - Minus, - CommandName, - CommandArgs, - ForwardSearch, - ReverseSearch, - Offset -} - -export class Token { - type : TokenType; - content : string; - - constructor(type : TokenType, content : string) { - this.type = type; - this.content = content; - } -} +// Tokens for the Vim command line. + +export enum TokenType { + Unknown, + Eof, + LineNumber, + Dot, + Dollar, + Percent, + Comma, + Plus, + Minus, + CommandName, + CommandArgs, + ForwardSearch, + ReverseSearch, + Offset +} + +export class Token { + type : TokenType; + content : string; + + constructor(type : TokenType, content : string) { + this.type = type; + this.content = content; + } +} diff --git a/test/extension.test.ts b/test/extension.test.ts index 94bcf0ef2f4..a8cee0dca15 100644 --- a/test/extension.test.ts +++ b/test/extension.test.ts @@ -1,7 +1,4 @@ -// -// Note: This example test is leveraging the Mocha test framework. -// Please refer to their documentation on https://mochajs.org/ for help. -// +// For documentation on the test framework see https://mochajs.org/. // The module 'assert' provides assertion methods from node import * as assert from 'assert'; @@ -14,7 +11,7 @@ import * as myExtension from '../extension'; suite("Extension tests", () => { - test("dummy", () => { - assert.equal(0, 0); - }); -}); \ No newline at end of file + test("dummy", () => { + assert.equal(0, 0); + }); +}); diff --git a/test/index.ts b/test/index.ts index e3cebd0d168..b6c9dadec2a 100644 --- a/test/index.ts +++ b/test/index.ts @@ -15,8 +15,8 @@ var testRunner = require('vscode/lib/testrunner'); // You can directly control Mocha options by uncommenting the following lines // See https://github.com/mochajs/mocha/wiki/Using-mocha-programmatically#set-options for more info testRunner.configure({ - ui: 'tdd', // the TDD UI is being used in extension.test.ts (suite, test, etc.) - useColors: true // colored output from test results + ui: 'tdd', // the TDD UI is being used in extension.test.ts (suite, test, etc.) + useColors: true // colored output from test results }); -module.exports = testRunner; \ No newline at end of file +module.exports = testRunner; diff --git a/test/lexer.test.ts b/test/lexer.test.ts index 8e9ed0f55fc..edb0b13048c 100644 --- a/test/lexer.test.ts +++ b/test/lexer.test.ts @@ -1,105 +1,100 @@ -// -// Note: This example test is leveraging the Mocha test framework. -// Please refer to their documentation on https://mochajs.org/ for help. -// +// For documentation on the test framework, see https://mochajs.org/. // The module 'assert' provides assertion methods from node import * as assert from 'assert'; -// You can import and use all API from the 'vscode' module -// as well as import your extension to test it import * as vscode from 'vscode'; import * as lexer from '../src/cmd_line/lexer' import {Token, TokenType} from '../src/cmd_line/token' suite("Cmd line tests - lexing", () => { - test("can lex empty string", () => { - var tokens = lexer.lex(""); - assert.equal(tokens.length, 0); - }); - - test("can lex comma", () => { - var tokens = lexer.lex(","); - assert.equal(tokens[0].content, new Token(TokenType.Comma, ',').content); - }); - - test("can lex percent", () => { - var tokens = lexer.lex("%"); - assert.equal(tokens[0].content, new Token(TokenType.Percent, '%').content); - }); - - test("can lex dollar", () => { - var tokens = lexer.lex("$"); - assert.equal(tokens[0].content, new Token(TokenType.Dollar, '$').content); - }); - - test("can lex dot", () => { - var tokens = lexer.lex("."); - assert.equal(tokens[0].content, new Token(TokenType.Dot, '.').content); - }); - - test("can lex one number", () => { - var tokens = lexer.lex("1"); - assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "1").content); - }); - - test("can lex longer number", () => { - var tokens = lexer.lex("100"); - assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "100").content); - }); - - test("can lex plus", () => { - var tokens = lexer.lex("+"); - assert.equal(tokens[0].content, new Token(TokenType.Plus, '+').content); - }); - - test("can lex minus", () => { - var tokens = lexer.lex("-"); - assert.equal(tokens[0].content, new Token(TokenType.Minus, '-').content); - }); - - test("can lex forward search", () => { - var tokens = lexer.lex("/horses/"); - assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "horses").content); - }); - - test("can lex forward search escaping", () => { - var tokens = lexer.lex("/hor\\/ses/"); - assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "hor/ses").content); - }); - - test("can lex reverse search", () => { - var tokens = lexer.lex("?worms?"); - assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "worms").content); - }); - - test("can lex reverse search escaping", () => { - var tokens = lexer.lex("?wor\\?ms?"); - assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "wor?ms").content); - }); - - test("can lex command name", () => { - var tokens = lexer.lex("w"); - assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); - }); - - test("can lex command args", () => { - var tokens = lexer.lex("w something"); - assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); - assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "something").content); - }); - - test("can lex long command name and args", () => { - var tokens = lexer.lex("write12 something here"); - assert.equal(tokens[0].content, new Token(TokenType.CommandName, "write").content); - assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "12 something here").content); - }); - - test("can lex left and right line refs", () => { - var tokens = lexer.lex("20,30"); - assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "20").content); - assert.equal(tokens[1].content, new Token(TokenType.LineNumber, ",").content); - assert.equal(tokens[2].content, new Token(TokenType.LineNumber, "30").content); - }); -}); \ No newline at end of file + test("can lex empty string", () => { + var tokens = lexer.lex(""); + assert.equal(tokens.length, 0); + }); + + test("can lex comma", () => { + var tokens = lexer.lex(","); + assert.equal(tokens[0].content, new Token(TokenType.Comma, ',').content); + }); + + test("can lex percent", () => { + var tokens = lexer.lex("%"); + assert.equal(tokens[0].content, new Token(TokenType.Percent, '%').content); + }); + + test("can lex dollar", () => { + var tokens = lexer.lex("$"); + assert.equal(tokens[0].content, new Token(TokenType.Dollar, '$').content); + }); + + test("can lex dot", () => { + var tokens = lexer.lex("."); + assert.equal(tokens[0].content, new Token(TokenType.Dot, '.').content); + }); + + test("can lex one number", () => { + var tokens = lexer.lex("1"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "1").content); + }); + + test("can lex longer number", () => { + var tokens = lexer.lex("100"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "100").content); + }); + + test("can lex plus", () => { + var tokens = lexer.lex("+"); + assert.equal(tokens[0].content, new Token(TokenType.Plus, '+').content); + }); + + test("can lex minus", () => { + var tokens = lexer.lex("-"); + assert.equal(tokens[0].content, new Token(TokenType.Minus, '-').content); + }); + + test("can lex forward search", () => { + var tokens = lexer.lex("/horses/"); + assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "horses").content); + }); + + test("can lex forward search escaping", () => { + var tokens = lexer.lex("/hor\\/ses/"); + assert.equal(tokens[0].content, new Token(TokenType.ForwardSearch, "hor/ses").content); + }); + + test("can lex reverse search", () => { + var tokens = lexer.lex("?worms?"); + assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "worms").content); + }); + + test("can lex reverse search escaping", () => { + var tokens = lexer.lex("?wor\\?ms?"); + assert.equal(tokens[0].content, new Token(TokenType.ReverseSearch, "wor?ms").content); + }); + + test("can lex command name", () => { + var tokens = lexer.lex("w"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); + }); + + test("can lex command args", () => { + var tokens = lexer.lex("w something"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "w").content); + assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "something").content); + }); + + test("can lex long command name and args", () => { + var tokens = lexer.lex("write12 something here"); + assert.equal(tokens[0].content, new Token(TokenType.CommandName, "write").content); + assert.equal(tokens[1].content, new Token(TokenType.CommandArgs, "12 something here").content); + }); + + test("can lex left and right line refs", () => { + var tokens = lexer.lex("20,30"); + assert.equal(tokens[0].content, new Token(TokenType.LineNumber, "20").content); + assert.equal(tokens[1].content, new Token(TokenType.LineNumber, ",").content); + assert.equal(tokens[2].content, new Token(TokenType.LineNumber, "30").content); + }); +}); diff --git a/test/parser.test.ts b/test/parser.test.ts index 89259e2053b..293db3a6f0a 100644 --- a/test/parser.test.ts +++ b/test/parser.test.ts @@ -1,13 +1,8 @@ -// -// Note: This example test is leveraging the Mocha test framework. -// Please refer to their documentation on https://mochajs.org/ for help. -// +// For documentation on the test framework, see https://mochajs.org/. // The module 'assert' provides assertion methods from node import * as assert from 'assert'; -// You can import and use all API from the 'vscode' module -// as well as import your extension to test it import * as vscode from 'vscode'; import * as myExtension from '../extension'; import * as parser from '../src/cmd_line/parser'; @@ -16,36 +11,36 @@ import * as token from '../src/cmd_line/token'; suite("Cmd line tests - parser", () => { - test("can parse empty string", () => { - var cmd = parser.parse(""); - assert.ok(cmd.isEmpty); - }); - - // TODO: Range tests follow -- should prolly create a suite for this - test("can parse left - dot", () => { - var cmd : node.CommandLine = parser.parse("."); - assert.equal(cmd.range.left[0].type, token.TokenType.Dot); - }); - - test("can parse left - dollar", () => { - var cmd : node.CommandLine = parser.parse("$"); - assert.equal(cmd.range.left[0].type, token.TokenType.Dollar); - }); - - test("can parse left - percent", () => { - var cmd : node.CommandLine = parser.parse("%"); - assert.equal(cmd.range.left[0].type, token.TokenType.Percent); - }); - - test("can parse separator - comma", () => { - var cmd : node.CommandLine = parser.parse(","); - assert.equal(cmd.range.separator.type, token.TokenType.Comma); - }); - - test("can parse right - dollar", () => { - var cmd : node.CommandLine = parser.parse(",$"); - assert.equal(cmd.range.left.length, 0); - assert.equal(cmd.range.right.length, 1); - assert.equal(cmd.range.right[0].type, token.TokenType.Dollar, "unexpected token"); - }); + test("can parse empty string", () => { + var cmd = parser.parse(""); + assert.ok(cmd.isEmpty); + }); + + // TODO: Range tests follow -- should prolly create a suite for this + test("can parse left - dot", () => { + var cmd : node.CommandLine = parser.parse("."); + assert.equal(cmd.range.left[0].type, token.TokenType.Dot); + }); + + test("can parse left - dollar", () => { + var cmd : node.CommandLine = parser.parse("$"); + assert.equal(cmd.range.left[0].type, token.TokenType.Dollar); + }); + + test("can parse left - percent", () => { + var cmd : node.CommandLine = parser.parse("%"); + assert.equal(cmd.range.left[0].type, token.TokenType.Percent); + }); + + test("can parse separator - comma", () => { + var cmd : node.CommandLine = parser.parse(","); + assert.equal(cmd.range.separator.type, token.TokenType.Comma); + }); + + test("can parse right - dollar", () => { + var cmd : node.CommandLine = parser.parse(",$"); + assert.equal(cmd.range.left.length, 0); + assert.equal(cmd.range.right.length, 1); + assert.equal(cmd.range.right[0].type, token.TokenType.Dollar, "unexpected token"); + }); }); diff --git a/test/scanner.test.ts b/test/scanner.test.ts index 4123ed7e2e6..cd9d04e8c53 100644 --- a/test/scanner.test.ts +++ b/test/scanner.test.ts @@ -14,54 +14,54 @@ import * as lexerState from '../src/cmd_line/scanner' suite("Cmd line tests - lexer state", () => { - test("can init lexer state", () => { - var state = new lexerState.Scanner("dog"); - assert.equal(state.input, "dog"); - }); - - test("can detect EOF with empty input", () => { - var state = new lexerState.Scanner(""); - assert.ok(state.isAtEof); - }); + test("can init lexer state", () => { + var state = new lexerState.Scanner("dog"); + assert.equal(state.input, "dog"); + }); + + test("can detect EOF with empty input", () => { + var state = new lexerState.Scanner(""); + assert.ok(state.isAtEof); + }); - test("next() returns EOF at EOF", () => { - var state = new lexerState.Scanner(""); - assert.equal(state.next(), lexerState.Scanner.EOF); - assert.equal(state.next(), lexerState.Scanner.EOF); - assert.equal(state.next(), lexerState.Scanner.EOF); - }); + test("next() returns EOF at EOF", () => { + var state = new lexerState.Scanner(""); + assert.equal(state.next(), lexerState.Scanner.EOF); + assert.equal(state.next(), lexerState.Scanner.EOF); + assert.equal(state.next(), lexerState.Scanner.EOF); + }); - test("next() can scan", () => { - var state = new lexerState.Scanner("dog"); - assert.equal(state.next(), "d"); - assert.equal(state.next(), "o"); - assert.equal(state.next(), "g") - assert.equal(state.next(), lexerState.Scanner.EOF); - }); - - test("can emit", () => { - var state = new lexerState.Scanner("dog cat"); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), "dog"); - state.next(); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), " cat"); - }); + test("next() can scan", () => { + var state = new lexerState.Scanner("dog"); + assert.equal(state.next(), "d"); + assert.equal(state.next(), "o"); + assert.equal(state.next(), "g") + assert.equal(state.next(), lexerState.Scanner.EOF); + }); + + test("can emit", () => { + var state = new lexerState.Scanner("dog cat"); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), "dog"); + state.next(); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), " cat"); + }); - test("can ignore", () => { - var state = new lexerState.Scanner("dog cat"); - state.next(); - state.next(); - state.next(); - state.next(); - state.ignore(); - state.next(); - state.next(); - state.next(); - assert.equal(state.emit(), "cat"); - }); -}); \ No newline at end of file + test("can ignore", () => { + var state = new lexerState.Scanner("dog cat"); + state.next(); + state.next(); + state.next(); + state.next(); + state.ignore(); + state.next(); + state.next(); + state.next(); + assert.equal(state.emit(), "cat"); + }); +}); From ef66cab2234f88dca6bc26caeb013b554b69a1d0 Mon Sep 17 00:00:00 2001 From: guillermooo Date: Tue, 17 Nov 2015 00:00:38 +0100 Subject: [PATCH 3/3] fixes --- src/cmd_line/lexer.ts | 3 +-- src/cmd_line/scanner.ts | 12 +++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/cmd_line/lexer.ts b/src/cmd_line/lexer.ts index 5dcccd8190b..a750237b9dc 100644 --- a/src/cmd_line/lexer.ts +++ b/src/cmd_line/lexer.ts @@ -105,8 +105,7 @@ module LexerFunctions { } function lexCommand(state : Scanner, tokens : Token[]): LexFunction { - // The first character of command"s name has already been lexed. - state.skipWhiteSpace(); + // The first character of the command's name has already been lexed. while (true) { if (state.isAtEof) { tokens.push(emitToken(TokenType.CommandName, state)); diff --git a/src/cmd_line/scanner.ts b/src/cmd_line/scanner.ts index 3862cdb65f2..bcb24d2fbe1 100644 --- a/src/cmd_line/scanner.ts +++ b/src/cmd_line/scanner.ts @@ -51,7 +51,9 @@ export class Scanner { } s = this.next(); } - this.backup(); + if (!this.isAtEof) { + this.backup(); + } this.ignore(); } @@ -63,7 +65,9 @@ export class Scanner { break; } } - this.backup(); + if (!this.isAtEof) { + this.backup(); + } this.ignore(); } @@ -76,7 +80,9 @@ export class Scanner { } break; } - this.backup(); + if (!this.isAtEof) { + this.backup(); + } this.ignore(); } }