Skip to content

Commit

Permalink
Merge pull request #44 from forkeith/syntest
Browse files Browse the repository at this point in the history
added syntest example to run ST syntax tests
  • Loading branch information
trishume authored Mar 17, 2017
2 parents b8c860b + 045dd0c commit f5e01f1
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 8 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ bincode = "0.6"
flate2 = "^0.2"
fnv = "^1.0"

[dev-dependencies]
regex = "0.2.1"

[features]
static-onig = ["onig/static-libonig"]
assets = []
Expand Down
Binary file modified assets/default_newlines.packdump
Binary file not shown.
Binary file modified assets/default_nonewlines.packdump
Binary file not shown.
291 changes: 291 additions & 0 deletions examples/syntest.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
//! An example of using syntect for testing syntax definitions.
//! Basically exactly the same as what Sublime Text can do,
//! but without needing ST installed
extern crate syntect;
extern crate walkdir;
#[macro_use]
extern crate lazy_static;
extern crate regex;
//extern crate onig;
use syntect::parsing::{SyntaxSet, ParseState, ScopeStack, Scope};
use syntect::highlighting::ScopeSelectors;
use syntect::easy::{ScopeRegionIterator};

use std::path::Path;
use std::io::{BufRead, BufReader};
use std::fs::File;
use std::cmp::{min, max};
use walkdir::{DirEntry, WalkDir, WalkDirIterator};
use std::str::FromStr;
use regex::Regex;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestHeaderError {
MalformedHeader,
SyntaxDefinitionNotFound,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SyntaxTestFileResult {
FailedAssertions(usize, usize),
Success(usize),
}

lazy_static! {
pub static ref SYNTAX_TEST_HEADER_PATTERN: Regex = Regex::new(r#"(?xm)
^(?P<testtoken_start>\s*\S+)
\s+SYNTAX\sTEST\s+
"(?P<syntax_file>[^"]+)"
\s*(?P<testtoken_end>\S+)?$
"#).unwrap();
pub static ref SYNTAX_TEST_ASSERTION_PATTERN: Regex = Regex::new(r#"(?xm)
\s*(?:
(?P<begin_of_token><-)|(?P<range>\^+)
)(.+)$"#).unwrap();
}

#[derive(Debug)]
struct AssertionRange<'a> {
begin_char: usize,
end_char: usize,
scope_selector_text: &'a str,
is_pure_assertion_line: bool,
}

#[derive(Debug)]
struct ScopedText {
scope: Vec<Scope>,
char_start: usize,
text_len: usize,
}

#[derive(Debug)]
struct RangeTestResult {
column_begin: usize,
column_end: usize,
success: bool,
}

fn get_line_assertion_details<'a>(testtoken_start: &str, testtoken_end: Option<&str>, line: &'a str) -> Option<AssertionRange<'a>> {
// if the test start token specified in the test file's header is on the line
if let Some(index) = line.find(testtoken_start) {
let (before_token_start, token_and_rest_of_line) = line.split_at(index);

if let Some(captures) = SYNTAX_TEST_ASSERTION_PATTERN.captures(&token_and_rest_of_line[testtoken_start.len()..]) {
let mut sst = captures.get(3).unwrap().as_str(); // get the scope selector text
let mut only_whitespace_after_token_end = true;

if let Some(token) = testtoken_end { // if there is an end token defined in the test file header
if let Some(end_token_pos) = sst.find(token) { // and there is an end token in the line
let (ss, after_token_end) = sst.split_at(end_token_pos); // the scope selector text ends at the end token
sst = &ss;
only_whitespace_after_token_end = after_token_end.trim_right().is_empty();
}
}
return Some(AssertionRange {
begin_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().start() } else { 0 },
end_char: index + if captures.get(2).is_some() { testtoken_start.len() + captures.get(2).unwrap().end() } else { 1 },
scope_selector_text: sst,
is_pure_assertion_line: before_token_start.trim_left().is_empty() && only_whitespace_after_token_end, // if only whitespace surrounds the test tokens on the line, then it is a pure assertion line
});
}
}
None
}

fn process_assertions(assertion: &AssertionRange, test_against_line_scopes: &Vec<ScopedText>) -> Vec<RangeTestResult> {
// format the scope selector to include a space at the beginning, because, currently, ScopeSelector expects excludes to begin with " -"
// and they are sometimes in the syntax test as ^^^-comment, for example
let selector = ScopeSelectors::from_str(&format!(" {}", &assertion.scope_selector_text)).unwrap();
// find the scope at the specified start column, and start matching the selector through the rest of the tokens on the line from there until the end column is reached
let mut results = Vec::new();
for scoped_text in test_against_line_scopes.iter().skip_while(|s|s.char_start + s.text_len <= assertion.begin_char).take_while(|s|s.char_start < assertion.end_char) {
let match_value = selector.does_match(scoped_text.scope.as_slice());
let result = RangeTestResult {
column_begin: max(scoped_text.char_start, assertion.begin_char),
column_end: min(scoped_text.char_start + scoped_text.text_len, assertion.end_char),
success: match_value.is_some()
};
results.push(result);
}
// don't ignore assertions after the newline, they should be treated as though they are asserting against the newline
let last = test_against_line_scopes.last().unwrap();
if last.char_start + last.text_len < assertion.end_char {
let match_value = selector.does_match(last.scope.as_slice());
let result = RangeTestResult {
column_begin: max(last.char_start + last.text_len, assertion.begin_char),
column_end: assertion.end_char,
success: match_value.is_some()
};
results.push(result);
}
results
}

/// If `parse_test_lines` is `false` then lines that only contain assertions are not parsed
fn test_file(ss: &SyntaxSet, path: &Path, parse_test_lines: bool) -> Result<SyntaxTestFileResult, SyntaxTestHeaderError> {
let f = File::open(path).unwrap();
let mut reader = BufReader::new(f);
let mut line = String::new();

// read the first line from the file - if we have reached EOF already, it's an invalid file
if reader.read_line(&mut line).unwrap() == 0 {
return Err(SyntaxTestHeaderError::MalformedHeader);
}

line = line.replace("\r", &"");

// parse the syntax test header in the first line of the file
let header_line = line.clone();
let search_result = SYNTAX_TEST_HEADER_PATTERN.captures(&header_line);
let captures = try!(search_result.ok_or(SyntaxTestHeaderError::MalformedHeader));

let testtoken_start = captures.name("testtoken_start").unwrap().as_str();
let testtoken_end = captures.name("testtoken_end").map_or(None, |c|Some(c.as_str()));
let syntax_file = captures.name("syntax_file").unwrap().as_str();

// find the relevant syntax definition to parse the file with - case is important!
println!("The test file references syntax definition file: {}", syntax_file);
let syntax = try!(ss.find_syntax_by_path(syntax_file).ok_or(SyntaxTestHeaderError::SyntaxDefinitionNotFound));

// iterate over the lines of the file, testing them
let mut state = ParseState::new(syntax);
let mut stack = ScopeStack::new();

let mut current_line_number = 1;
let mut test_against_line_number = 1;
let mut scopes_on_line_being_tested = Vec::new();
let mut previous_non_assertion_line = line.to_string();

let mut assertion_failures: usize = 0;
let mut total_assertions: usize = 0;

loop { // over lines of file, starting with the header line
let mut line_only_has_assertion = false;
let mut line_has_assertion = false;
if let Some(assertion) = get_line_assertion_details(testtoken_start, testtoken_end, &line) {
let result = process_assertions(&assertion, &scopes_on_line_being_tested);
total_assertions += &assertion.end_char - &assertion.begin_char;
for failure in result.iter().filter(|r|!r.success) {
let chars = &previous_non_assertion_line[failure.column_begin..failure.column_end];
println!(" Assertion selector {:?} \
from line {:?} failed against line {:?}, column range {:?}-{:?} \
(with text {:?}) \
has scope {:?}",
assertion.scope_selector_text.trim(),
current_line_number, test_against_line_number, failure.column_begin, failure.column_end,
chars,
scopes_on_line_being_tested.iter().skip_while(|s|s.char_start + s.text_len <= failure.column_begin).next().unwrap_or(scopes_on_line_being_tested.last().unwrap()).scope
);
assertion_failures += failure.column_end - failure.column_begin;
}
line_only_has_assertion = assertion.is_pure_assertion_line;
line_has_assertion = true;
}
if !line_only_has_assertion || parse_test_lines {
if !line_has_assertion { // ST seems to ignore lines that have assertions when calculating which line the assertion tests against
scopes_on_line_being_tested.clear();
test_against_line_number = current_line_number;
previous_non_assertion_line = line.to_string();
}
let ops = state.parse_line(&line);
let mut col: usize = 0;
for (s, op) in ScopeRegionIterator::new(&ops, &line) {
stack.apply(op);
if s.is_empty() { // in this case we don't care about blank tokens
continue;
}
if !line_has_assertion {
// if the line has no assertions on it, remember the scopes on the line so we can test against them later
let len = s.chars().count();
scopes_on_line_being_tested.push(
ScopedText {
char_start: col,
text_len: len,
scope: stack.as_slice().to_vec()
}
);
// TODO: warn when there are duplicate adjacent (non-meta?) scopes, as it is almost always undesired
col += len;
}
}
}

line.clear();
current_line_number += 1;
if reader.read_line(&mut line).unwrap() == 0 {
break;
}
line = line.replace("\r", &"");
}
if assertion_failures > 0 {
Ok(SyntaxTestFileResult::FailedAssertions(assertion_failures, total_assertions))
} else {
Ok(SyntaxTestFileResult::Success(total_assertions))
}
}

fn main() {
let args: Vec<String> = std::env::args().collect();
let tests_path = if args.len() < 2 {
"."
} else {
&args[1]
};
let syntaxes_path = if args.len() == 3 {
&args[2]
} else {
""
};

// load the syntaxes from disk if told to
// (as opposed to from the binary dumps)
// this helps to ensure that a recompile isn't needed
// when using this for syntax development
let mut ss = if syntaxes_path.is_empty() {
SyntaxSet::load_defaults_newlines() // note we load the version with newlines
} else {
SyntaxSet::new()
};
if !syntaxes_path.is_empty() {
println!("loading syntax definitions from {}", syntaxes_path);
ss.load_syntaxes(&syntaxes_path, true).unwrap(); // note that we load the version with newlines
ss.link_syntaxes();
}

let exit_code = recursive_walk(&ss, &tests_path);
println!("exiting with code {}", exit_code);
std::process::exit(exit_code);

}


fn recursive_walk(ss: &SyntaxSet, path: &str) -> i32 {
let mut exit_code: i32 = 0; // exit with code 0 by default, if all tests pass
let walker = WalkDir::new(path).into_iter();
for entry in walker.filter_entry(|e|e.file_type().is_dir() || is_a_syntax_test_file(e)) {
let entry = entry.unwrap();
if entry.file_type().is_file() {
println!("Testing file {}", entry.path().display());
let result = test_file(&ss, entry.path(), true);
println!("{:?}", result);
if exit_code != 2 { // leave exit code 2 if there was an error
if let Err(_) = result { // set exit code 2 if there was an error
exit_code = 2;
} else if let Ok(ok) = result {
if let SyntaxTestFileResult::FailedAssertions(_, _) = ok {
exit_code = 1; // otherwise, if there were failures, exit with code 1
}
}
}
}
}
exit_code
}

fn is_a_syntax_test_file(entry: &DirEntry) -> bool {
entry.file_name()
.to_str()
.map(|s| s.starts_with("syntax_test_"))
.unwrap_or(false)
}
39 changes: 34 additions & 5 deletions src/easy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,21 +148,27 @@ static NOOP_OP: ScopeStackOp = ScopeStackOp::Noop;
impl<'a> Iterator for ScopeRegionIterator<'a> {
type Item = (&'a str, &'a ScopeStackOp);
fn next(&mut self) -> Option<Self::Item> {
let next_str_i = if self.index >= self.ops.len() {
if self.last_str_index >= self.line.len() {
return None;
}
if self.index > self.ops.len() {
return None;
}

// region extends up to next operation (ops[index]) or string end if there is none
// note the next operation may be at, last_str_index, in which case the region is empty
let next_str_i = if self.index == self.ops.len() {
self.line.len()
} else {
self.ops[self.index].0
};
let substr = &self.line[self.last_str_index..next_str_i];
self.last_str_index = next_str_i;

// the first region covers everything before the first op, which may be empty
let op = if self.index == 0 {
&NOOP_OP
} else {
&self.ops[self.index-1].1
};

self.index += 1;
Some((substr, op))
}
Expand All @@ -175,7 +181,7 @@ mod tests {
use parsing::{SyntaxSet, ParseState, ScopeStack};
use highlighting::ThemeSet;
use std::str::FromStr;

#[test]
fn can_highlight_lines() {
let ps = SyntaxSet::load_defaults_nonewlines();
Expand Down Expand Up @@ -219,4 +225,27 @@ mod tests {
}
assert_eq!(token_count, 5);
}

#[test]
fn can_find_regions_with_trailing_newline() {
let ss = SyntaxSet::load_defaults_newlines();
let mut state = ParseState::new(ss.find_syntax_by_extension("rb").unwrap());
let lines = ["# hello world\n", "lol=5+2\n"];
let mut stack = ScopeStack::new();

for line in lines.iter() {
let ops = state.parse_line(&line);
println!("{:?}", ops);

let mut iterated_ops: Vec<&ScopeStackOp> = Vec::new();
for (_, op) in ScopeRegionIterator::new(&ops, &line) {
stack.apply(op);
iterated_ops.push(&op);
println!("{:?}", op);
}

let all_ops: Vec<&ScopeStackOp> = ops.iter().map(|t|&t.1).collect();
assert_eq!(all_ops.len(), iterated_ops.len() - 1); // -1 because we want to ignore the NOOP
}
}
}
Loading

0 comments on commit f5e01f1

Please sign in to comment.